subpage.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/slab.h>
  3. #include "messages.h"
  4. #include "subpage.h"
  5. #include "btrfs_inode.h"
  6. /*
  7. * Subpage (block size < folio size) support overview:
  8. *
  9. * Limitations:
  10. *
  11. * - Only support 64K page size for now
  12. * This is to make metadata handling easier, as 64K page would ensure
  13. * all nodesize would fit inside one page, thus we don't need to handle
  14. * cases where a tree block crosses several pages.
  15. *
  16. * - Only metadata read-write for now
  17. * The data read-write part is in development.
  18. *
  19. * - Metadata can't cross 64K page boundary
  20. * btrfs-progs and kernel have done that for a while, thus only ancient
  21. * filesystems could have such problem. For such case, do a graceful
  22. * rejection.
  23. *
  24. * Special behavior:
  25. *
  26. * - Metadata
  27. * Metadata read is fully supported.
  28. * Meaning when reading one tree block will only trigger the read for the
  29. * needed range, other unrelated range in the same page will not be touched.
  30. *
  31. * Metadata write support is partial.
  32. * The writeback is still for the full page, but we will only submit
  33. * the dirty extent buffers in the page.
  34. *
  35. * This means, if we have a metadata page like this:
  36. *
  37. * Page offset
  38. * 0 16K 32K 48K 64K
  39. * |/////////| |///////////|
  40. * \- Tree block A \- Tree block B
  41. *
  42. * Even if we just want to writeback tree block A, we will also writeback
  43. * tree block B if it's also dirty.
  44. *
  45. * This may cause extra metadata writeback which results more COW.
  46. *
  47. * Implementation:
  48. *
  49. * - Common
  50. * Both metadata and data will use a new structure, btrfs_folio_state, to
  51. * record the status of each sector inside a page. This provides the extra
  52. * granularity needed.
  53. *
  54. * - Metadata
  55. * Since we have multiple tree blocks inside one page, we can't rely on page
  56. * locking anymore, or we will have greatly reduced concurrency or even
  57. * deadlocks (hold one tree lock while trying to lock another tree lock in
  58. * the same page).
  59. *
  60. * Thus for metadata locking, subpage support relies on io_tree locking only.
  61. * This means a slightly higher tree locking latency.
  62. */
  63. int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info,
  64. struct folio *folio, enum btrfs_folio_type type)
  65. {
  66. struct btrfs_folio_state *bfs;
  67. /* For metadata we don't support large folio yet. */
  68. if (type == BTRFS_SUBPAGE_METADATA)
  69. ASSERT(!folio_test_large(folio));
  70. /*
  71. * We have cases like a dummy extent buffer page, which is not mapped
  72. * and doesn't need to be locked.
  73. */
  74. if (folio->mapping)
  75. ASSERT(folio_test_locked(folio));
  76. /* Either not subpage, or the folio already has private attached. */
  77. if (folio_test_private(folio))
  78. return 0;
  79. if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info))
  80. return 0;
  81. if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio))
  82. return 0;
  83. bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type);
  84. if (IS_ERR(bfs))
  85. return PTR_ERR(bfs);
  86. folio_attach_private(folio, bfs);
  87. return 0;
  88. }
  89. void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio,
  90. enum btrfs_folio_type type)
  91. {
  92. struct btrfs_folio_state *bfs;
  93. /* Either not subpage, or the folio already has private attached. */
  94. if (!folio_test_private(folio))
  95. return;
  96. if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info))
  97. return;
  98. if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio))
  99. return;
  100. bfs = folio_detach_private(folio);
  101. ASSERT(bfs);
  102. btrfs_free_folio_state(bfs);
  103. }
  104. struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info,
  105. size_t fsize, enum btrfs_folio_type type)
  106. {
  107. struct btrfs_folio_state *ret;
  108. unsigned int real_size;
  109. ASSERT(fs_info->sectorsize < fsize);
  110. real_size = struct_size(ret, bitmaps,
  111. BITS_TO_LONGS(btrfs_bitmap_nr_max *
  112. (fsize >> fs_info->sectorsize_bits)));
  113. ret = kzalloc(real_size, GFP_NOFS);
  114. if (!ret)
  115. return ERR_PTR(-ENOMEM);
  116. spin_lock_init(&ret->lock);
  117. if (type == BTRFS_SUBPAGE_METADATA)
  118. atomic_set(&ret->eb_refs, 0);
  119. else
  120. atomic_set(&ret->nr_locked, 0);
  121. return ret;
  122. }
  123. /*
  124. * Increase the eb_refs of current subpage.
  125. *
  126. * This is important for eb allocation, to prevent race with last eb freeing
  127. * of the same page.
  128. * With the eb_refs increased before the eb inserted into radix tree,
  129. * detach_extent_buffer_page() won't detach the folio private while we're still
  130. * allocating the extent buffer.
  131. */
  132. void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
  133. {
  134. struct btrfs_folio_state *bfs;
  135. if (!btrfs_meta_is_subpage(fs_info))
  136. return;
  137. ASSERT(folio_test_private(folio) && folio->mapping);
  138. lockdep_assert_held(&folio->mapping->i_private_lock);
  139. bfs = folio_get_private(folio);
  140. atomic_inc(&bfs->eb_refs);
  141. }
  142. void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
  143. {
  144. struct btrfs_folio_state *bfs;
  145. if (!btrfs_meta_is_subpage(fs_info))
  146. return;
  147. ASSERT(folio_test_private(folio) && folio->mapping);
  148. lockdep_assert_held(&folio->mapping->i_private_lock);
  149. bfs = folio_get_private(folio);
  150. ASSERT(atomic_read(&bfs->eb_refs));
  151. atomic_dec(&bfs->eb_refs);
  152. }
  153. static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
  154. struct folio *folio, u64 start, u32 len)
  155. {
  156. /* Basic checks */
  157. ASSERT(folio_test_private(folio) && folio_get_private(folio));
  158. ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
  159. IS_ALIGNED(len, fs_info->sectorsize), "start=%llu len=%u", start, len);
  160. /*
  161. * The range check only works for mapped page, we can still have
  162. * unmapped page like dummy extent buffer pages.
  163. */
  164. if (folio->mapping)
  165. ASSERT(folio_pos(folio) <= start &&
  166. start + len <= folio_next_pos(folio),
  167. "start=%llu len=%u folio_pos=%llu folio_size=%zu",
  168. start, len, folio_pos(folio), folio_size(folio));
  169. }
  170. #define subpage_calc_start_bit(fs_info, folio, name, start, len) \
  171. ({ \
  172. unsigned int __start_bit; \
  173. const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
  174. \
  175. btrfs_subpage_assert(fs_info, folio, start, len); \
  176. __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \
  177. __start_bit += __bpf * btrfs_bitmap_nr_##name; \
  178. __start_bit; \
  179. })
  180. static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
  181. {
  182. u64 orig_start = *start;
  183. u32 orig_len = *len;
  184. *start = max_t(u64, folio_pos(folio), orig_start);
  185. /*
  186. * For certain call sites like btrfs_drop_pages(), we may have pages
  187. * beyond the target range. In that case, just set @len to 0, subpage
  188. * helpers can handle @len == 0 without any problem.
  189. */
  190. if (folio_pos(folio) >= orig_start + orig_len)
  191. *len = 0;
  192. else
  193. *len = min_t(u64, folio_next_pos(folio), orig_start + orig_len) - *start;
  194. }
  195. static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
  196. struct folio *folio, u64 start, u32 len)
  197. {
  198. struct btrfs_folio_state *bfs = folio_get_private(folio);
  199. const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
  200. const int nbits = (len >> fs_info->sectorsize_bits);
  201. unsigned long flags;
  202. unsigned int cleared = 0;
  203. int bit = start_bit;
  204. bool last;
  205. btrfs_subpage_assert(fs_info, folio, start, len);
  206. spin_lock_irqsave(&bfs->lock, flags);
  207. /*
  208. * We have call sites passing @lock_page into
  209. * extent_clear_unlock_delalloc() for compression path.
  210. *
  211. * This @locked_page is locked by plain lock_page(), thus its
  212. * subpage::locked is 0. Handle them in a special way.
  213. */
  214. if (atomic_read(&bfs->nr_locked) == 0) {
  215. spin_unlock_irqrestore(&bfs->lock, flags);
  216. return true;
  217. }
  218. for_each_set_bit_from(bit, bfs->bitmaps, start_bit + nbits) {
  219. clear_bit(bit, bfs->bitmaps);
  220. cleared++;
  221. }
  222. ASSERT(atomic_read(&bfs->nr_locked) >= cleared,
  223. "atomic_read(&bfs->nr_locked)=%d cleared=%d",
  224. atomic_read(&bfs->nr_locked), cleared);
  225. last = atomic_sub_and_test(cleared, &bfs->nr_locked);
  226. spin_unlock_irqrestore(&bfs->lock, flags);
  227. return last;
  228. }
  229. /*
  230. * Handle different locked folios:
  231. *
  232. * - Non-subpage folio
  233. * Just unlock it.
  234. *
  235. * - folio locked but without any subpage locked
  236. * This happens either before writepage_delalloc() or the delalloc range is
  237. * already handled by previous folio.
  238. * We can simple unlock it.
  239. *
  240. * - folio locked with subpage range locked.
  241. * We go through the locked sectors inside the range and clear their locked
  242. * bitmap, reduce the writer lock number, and unlock the page if that's
  243. * the last locked range.
  244. */
  245. void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
  246. struct folio *folio, u64 start, u32 len)
  247. {
  248. struct btrfs_folio_state *bfs = folio_get_private(folio);
  249. ASSERT(folio_test_locked(folio));
  250. if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) {
  251. folio_unlock(folio);
  252. return;
  253. }
  254. /*
  255. * For subpage case, there are two types of locked page. With or
  256. * without locked number.
  257. *
  258. * Since we own the page lock, no one else could touch subpage::locked
  259. * and we are safe to do several atomic operations without spinlock.
  260. */
  261. if (atomic_read(&bfs->nr_locked) == 0) {
  262. /* No subpage lock, locked by plain lock_page(). */
  263. folio_unlock(folio);
  264. return;
  265. }
  266. btrfs_subpage_clamp_range(folio, &start, &len);
  267. if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len))
  268. folio_unlock(folio);
  269. }
  270. void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
  271. struct folio *folio, unsigned long bitmap)
  272. {
  273. struct btrfs_folio_state *bfs = folio_get_private(folio);
  274. const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
  275. const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked;
  276. unsigned long flags;
  277. bool last = false;
  278. int cleared = 0;
  279. int bit;
  280. if (!btrfs_is_subpage(fs_info, folio)) {
  281. folio_unlock(folio);
  282. return;
  283. }
  284. if (atomic_read(&bfs->nr_locked) == 0) {
  285. /* No subpage lock, locked by plain lock_page(). */
  286. folio_unlock(folio);
  287. return;
  288. }
  289. spin_lock_irqsave(&bfs->lock, flags);
  290. for_each_set_bit(bit, &bitmap, blocks_per_folio) {
  291. if (test_and_clear_bit(bit + start_bit, bfs->bitmaps))
  292. cleared++;
  293. }
  294. ASSERT(atomic_read(&bfs->nr_locked) >= cleared,
  295. "atomic_read(&bfs->nr_locked)=%d cleared=%d",
  296. atomic_read(&bfs->nr_locked), cleared);
  297. last = atomic_sub_and_test(cleared, &bfs->nr_locked);
  298. spin_unlock_irqrestore(&bfs->lock, flags);
  299. if (last)
  300. folio_unlock(folio);
  301. }
  302. #define subpage_test_bitmap_all_set(fs_info, folio, name) \
  303. ({ \
  304. struct btrfs_folio_state *__bfs = folio_get_private(folio); \
  305. const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
  306. \
  307. bitmap_test_range_all_set(__bfs->bitmaps, \
  308. __bpf * btrfs_bitmap_nr_##name, __bpf); \
  309. })
  310. #define subpage_test_bitmap_all_zero(fs_info, folio, name) \
  311. ({ \
  312. struct btrfs_folio_state *__bfs = folio_get_private(folio); \
  313. const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
  314. \
  315. bitmap_test_range_all_zero(__bfs->bitmaps, \
  316. __bpf * btrfs_bitmap_nr_##name, __bpf); \
  317. })
  318. void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
  319. struct folio *folio, u64 start, u32 len)
  320. {
  321. struct btrfs_folio_state *bfs = folio_get_private(folio);
  322. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  323. uptodate, start, len);
  324. unsigned long flags;
  325. spin_lock_irqsave(&bfs->lock, flags);
  326. bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  327. if (subpage_test_bitmap_all_set(fs_info, folio, uptodate))
  328. folio_mark_uptodate(folio);
  329. spin_unlock_irqrestore(&bfs->lock, flags);
  330. }
  331. void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
  332. struct folio *folio, u64 start, u32 len)
  333. {
  334. struct btrfs_folio_state *bfs = folio_get_private(folio);
  335. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  336. uptodate, start, len);
  337. unsigned long flags;
  338. spin_lock_irqsave(&bfs->lock, flags);
  339. bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  340. folio_clear_uptodate(folio);
  341. spin_unlock_irqrestore(&bfs->lock, flags);
  342. }
  343. void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
  344. struct folio *folio, u64 start, u32 len)
  345. {
  346. struct btrfs_folio_state *bfs = folio_get_private(folio);
  347. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  348. dirty, start, len);
  349. unsigned long flags;
  350. spin_lock_irqsave(&bfs->lock, flags);
  351. bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  352. spin_unlock_irqrestore(&bfs->lock, flags);
  353. folio_mark_dirty(folio);
  354. }
  355. /*
  356. * Extra clear_and_test function for subpage dirty bitmap.
  357. *
  358. * Return true if we're the last bits in the dirty_bitmap and clear the
  359. * dirty_bitmap.
  360. * Return false otherwise.
  361. *
  362. * NOTE: Callers should manually clear page dirty for true case, as we have
  363. * extra handling for tree blocks.
  364. */
  365. bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
  366. struct folio *folio, u64 start, u32 len)
  367. {
  368. struct btrfs_folio_state *bfs = folio_get_private(folio);
  369. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  370. dirty, start, len);
  371. unsigned long flags;
  372. bool last = false;
  373. spin_lock_irqsave(&bfs->lock, flags);
  374. bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  375. if (subpage_test_bitmap_all_zero(fs_info, folio, dirty))
  376. last = true;
  377. spin_unlock_irqrestore(&bfs->lock, flags);
  378. return last;
  379. }
  380. void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info,
  381. struct folio *folio, u64 start, u32 len)
  382. {
  383. bool last;
  384. last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len);
  385. if (last)
  386. folio_clear_dirty_for_io(folio);
  387. }
  388. void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
  389. struct folio *folio, u64 start, u32 len)
  390. {
  391. struct btrfs_folio_state *bfs = folio_get_private(folio);
  392. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  393. writeback, start, len);
  394. unsigned long flags;
  395. bool keep_write;
  396. spin_lock_irqsave(&bfs->lock, flags);
  397. bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  398. /*
  399. * Don't clear the TOWRITE tag when starting writeback on a still-dirty
  400. * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it,
  401. * assume writeback is complete, and exit too early — violating sync
  402. * ordering guarantees.
  403. */
  404. keep_write = folio_test_dirty(folio);
  405. if (!folio_test_writeback(folio))
  406. __folio_start_writeback(folio, keep_write);
  407. spin_unlock_irqrestore(&bfs->lock, flags);
  408. }
  409. void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
  410. struct folio *folio, u64 start, u32 len)
  411. {
  412. struct btrfs_folio_state *bfs = folio_get_private(folio);
  413. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  414. writeback, start, len);
  415. unsigned long flags;
  416. spin_lock_irqsave(&bfs->lock, flags);
  417. bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  418. if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) {
  419. ASSERT(folio_test_writeback(folio));
  420. folio_end_writeback(folio);
  421. }
  422. spin_unlock_irqrestore(&bfs->lock, flags);
  423. }
  424. void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
  425. struct folio *folio, u64 start, u32 len)
  426. {
  427. struct btrfs_folio_state *bfs = folio_get_private(folio);
  428. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  429. ordered, start, len);
  430. unsigned long flags;
  431. spin_lock_irqsave(&bfs->lock, flags);
  432. bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  433. folio_set_ordered(folio);
  434. spin_unlock_irqrestore(&bfs->lock, flags);
  435. }
  436. void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
  437. struct folio *folio, u64 start, u32 len)
  438. {
  439. struct btrfs_folio_state *bfs = folio_get_private(folio);
  440. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  441. ordered, start, len);
  442. unsigned long flags;
  443. spin_lock_irqsave(&bfs->lock, flags);
  444. bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  445. if (subpage_test_bitmap_all_zero(fs_info, folio, ordered))
  446. folio_clear_ordered(folio);
  447. spin_unlock_irqrestore(&bfs->lock, flags);
  448. }
  449. void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info,
  450. struct folio *folio, u64 start, u32 len)
  451. {
  452. struct btrfs_folio_state *bfs = folio_get_private(folio);
  453. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  454. checked, start, len);
  455. unsigned long flags;
  456. spin_lock_irqsave(&bfs->lock, flags);
  457. bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  458. if (subpage_test_bitmap_all_set(fs_info, folio, checked))
  459. folio_set_checked(folio);
  460. spin_unlock_irqrestore(&bfs->lock, flags);
  461. }
  462. void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
  463. struct folio *folio, u64 start, u32 len)
  464. {
  465. struct btrfs_folio_state *bfs = folio_get_private(folio);
  466. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
  467. checked, start, len);
  468. unsigned long flags;
  469. spin_lock_irqsave(&bfs->lock, flags);
  470. bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
  471. folio_clear_checked(folio);
  472. spin_unlock_irqrestore(&bfs->lock, flags);
  473. }
  474. /*
  475. * Unlike set/clear which is dependent on each page status, for test all bits
  476. * are tested in the same way.
  477. */
  478. #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \
  479. bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
  480. struct folio *folio, u64 start, u32 len) \
  481. { \
  482. struct btrfs_folio_state *bfs = folio_get_private(folio); \
  483. unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \
  484. name, start, len); \
  485. unsigned long flags; \
  486. bool ret; \
  487. \
  488. spin_lock_irqsave(&bfs->lock, flags); \
  489. ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit, \
  490. len >> fs_info->sectorsize_bits); \
  491. spin_unlock_irqrestore(&bfs->lock, flags); \
  492. return ret; \
  493. }
  494. IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate);
  495. IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
  496. IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
  497. IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
  498. IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked);
  499. /*
  500. * Note that, in selftests (extent-io-tests), we can have empty fs_info passed
  501. * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall
  502. * back to regular sectorsize branch.
  503. */
  504. #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \
  505. folio_clear_func, folio_test_func) \
  506. void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \
  507. struct folio *folio, u64 start, u32 len) \
  508. { \
  509. if (unlikely(!fs_info) || \
  510. !btrfs_is_subpage(fs_info, folio)) { \
  511. folio_set_func(folio); \
  512. return; \
  513. } \
  514. btrfs_subpage_set_##name(fs_info, folio, start, len); \
  515. } \
  516. void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \
  517. struct folio *folio, u64 start, u32 len) \
  518. { \
  519. if (unlikely(!fs_info) || \
  520. !btrfs_is_subpage(fs_info, folio)) { \
  521. folio_clear_func(folio); \
  522. return; \
  523. } \
  524. btrfs_subpage_clear_##name(fs_info, folio, start, len); \
  525. } \
  526. bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \
  527. struct folio *folio, u64 start, u32 len) \
  528. { \
  529. if (unlikely(!fs_info) || \
  530. !btrfs_is_subpage(fs_info, folio)) \
  531. return folio_test_func(folio); \
  532. return btrfs_subpage_test_##name(fs_info, folio, start, len); \
  533. } \
  534. void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \
  535. struct folio *folio, u64 start, u32 len) \
  536. { \
  537. if (unlikely(!fs_info) || \
  538. !btrfs_is_subpage(fs_info, folio)) { \
  539. folio_set_func(folio); \
  540. return; \
  541. } \
  542. btrfs_subpage_clamp_range(folio, &start, &len); \
  543. btrfs_subpage_set_##name(fs_info, folio, start, len); \
  544. } \
  545. void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
  546. struct folio *folio, u64 start, u32 len) \
  547. { \
  548. if (unlikely(!fs_info) || \
  549. !btrfs_is_subpage(fs_info, folio)) { \
  550. folio_clear_func(folio); \
  551. return; \
  552. } \
  553. btrfs_subpage_clamp_range(folio, &start, &len); \
  554. btrfs_subpage_clear_##name(fs_info, folio, start, len); \
  555. } \
  556. bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
  557. struct folio *folio, u64 start, u32 len) \
  558. { \
  559. if (unlikely(!fs_info) || \
  560. !btrfs_is_subpage(fs_info, folio)) \
  561. return folio_test_func(folio); \
  562. btrfs_subpage_clamp_range(folio, &start, &len); \
  563. return btrfs_subpage_test_##name(fs_info, folio, start, len); \
  564. } \
  565. void btrfs_meta_folio_set_##name(struct folio *folio, const struct extent_buffer *eb) \
  566. { \
  567. if (!btrfs_meta_is_subpage(eb->fs_info)) { \
  568. folio_set_func(folio); \
  569. return; \
  570. } \
  571. btrfs_subpage_set_##name(eb->fs_info, folio, eb->start, eb->len); \
  572. } \
  573. void btrfs_meta_folio_clear_##name(struct folio *folio, const struct extent_buffer *eb) \
  574. { \
  575. if (!btrfs_meta_is_subpage(eb->fs_info)) { \
  576. folio_clear_func(folio); \
  577. return; \
  578. } \
  579. btrfs_subpage_clear_##name(eb->fs_info, folio, eb->start, eb->len); \
  580. } \
  581. bool btrfs_meta_folio_test_##name(struct folio *folio, const struct extent_buffer *eb) \
  582. { \
  583. if (!btrfs_meta_is_subpage(eb->fs_info)) \
  584. return folio_test_func(folio); \
  585. return btrfs_subpage_test_##name(eb->fs_info, folio, eb->start, eb->len); \
  586. }
  587. IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate,
  588. folio_test_uptodate);
  589. IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io,
  590. folio_test_dirty);
  591. IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback,
  592. folio_test_writeback);
  593. IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered,
  594. folio_test_ordered);
  595. IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
  596. folio_test_checked);
  597. #define GET_SUBPAGE_BITMAP(fs_info, folio, name, dst) \
  598. { \
  599. const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
  600. const struct btrfs_folio_state *__bfs = folio_get_private(folio); \
  601. \
  602. ASSERT(__bpf <= BITS_PER_LONG); \
  603. *dst = bitmap_read(__bfs->bitmaps, \
  604. __bpf * btrfs_bitmap_nr_##name, __bpf); \
  605. }
  606. #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \
  607. { \
  608. unsigned long bitmap; \
  609. const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
  610. \
  611. GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \
  612. btrfs_warn(fs_info, \
  613. "dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
  614. start, len, folio_pos(folio), __bpf, &bitmap); \
  615. }
  616. /*
  617. * Make sure not only the page dirty bit is cleared, but also subpage dirty bit
  618. * is cleared.
  619. */
  620. void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
  621. struct folio *folio, u64 start, u32 len)
  622. {
  623. struct btrfs_folio_state *bfs;
  624. unsigned int start_bit;
  625. unsigned int nbits;
  626. unsigned long flags;
  627. if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
  628. return;
  629. if (!btrfs_is_subpage(fs_info, folio)) {
  630. ASSERT(!folio_test_dirty(folio));
  631. return;
  632. }
  633. start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len);
  634. nbits = len >> fs_info->sectorsize_bits;
  635. bfs = folio_get_private(folio);
  636. ASSERT(bfs);
  637. spin_lock_irqsave(&bfs->lock, flags);
  638. if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) {
  639. SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len);
  640. ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
  641. }
  642. ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
  643. spin_unlock_irqrestore(&bfs->lock, flags);
  644. }
  645. /*
  646. * This is for folio already locked by plain lock_page()/folio_lock(), which
  647. * doesn't have any subpage awareness.
  648. *
  649. * This populates the involved subpage ranges so that subpage helpers can
  650. * properly unlock them.
  651. */
  652. void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
  653. struct folio *folio, u64 start, u32 len)
  654. {
  655. struct btrfs_folio_state *bfs;
  656. unsigned long flags;
  657. unsigned int start_bit;
  658. unsigned int nbits;
  659. int ret;
  660. ASSERT(folio_test_locked(folio));
  661. if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio))
  662. return;
  663. bfs = folio_get_private(folio);
  664. start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
  665. nbits = len >> fs_info->sectorsize_bits;
  666. spin_lock_irqsave(&bfs->lock, flags);
  667. /* Target range should not yet be locked. */
  668. if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) {
  669. SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len);
  670. ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
  671. }
  672. bitmap_set(bfs->bitmaps, start_bit, nbits);
  673. ret = atomic_add_return(nbits, &bfs->nr_locked);
  674. ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio));
  675. spin_unlock_irqrestore(&bfs->lock, flags);
  676. }
  677. /*
  678. * Clear the dirty flag for the folio.
  679. *
  680. * If the affected folio is no longer dirty, return true. Otherwise return false.
  681. */
  682. bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb)
  683. {
  684. bool last;
  685. if (!btrfs_meta_is_subpage(eb->fs_info)) {
  686. folio_clear_dirty_for_io(folio);
  687. return true;
  688. }
  689. last = btrfs_subpage_clear_and_test_dirty(eb->fs_info, folio, eb->start, eb->len);
  690. if (last) {
  691. folio_clear_dirty_for_io(folio);
  692. return true;
  693. }
  694. return false;
  695. }
  696. void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
  697. struct folio *folio, u64 start, u32 len)
  698. {
  699. struct btrfs_folio_state *bfs;
  700. const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
  701. unsigned long uptodate_bitmap;
  702. unsigned long dirty_bitmap;
  703. unsigned long writeback_bitmap;
  704. unsigned long ordered_bitmap;
  705. unsigned long checked_bitmap;
  706. unsigned long locked_bitmap;
  707. unsigned long flags;
  708. ASSERT(folio_test_private(folio) && folio_get_private(folio));
  709. ASSERT(blocks_per_folio > 1);
  710. bfs = folio_get_private(folio);
  711. spin_lock_irqsave(&bfs->lock, flags);
  712. GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap);
  713. GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap);
  714. GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap);
  715. GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap);
  716. GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap);
  717. GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap);
  718. spin_unlock_irqrestore(&bfs->lock, flags);
  719. dump_page(folio_page(folio, 0), "btrfs folio state dump");
  720. btrfs_warn(fs_info,
  721. "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
  722. start, len, folio_pos(folio),
  723. blocks_per_folio, &uptodate_bitmap,
  724. blocks_per_folio, &dirty_bitmap,
  725. blocks_per_folio, &locked_bitmap,
  726. blocks_per_folio, &writeback_bitmap,
  727. blocks_per_folio, &ordered_bitmap,
  728. blocks_per_folio, &checked_bitmap);
  729. }
  730. void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
  731. struct folio *folio,
  732. unsigned long *ret_bitmap)
  733. {
  734. struct btrfs_folio_state *bfs;
  735. unsigned long flags;
  736. ASSERT(folio_test_private(folio) && folio_get_private(folio));
  737. ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1);
  738. bfs = folio_get_private(folio);
  739. spin_lock_irqsave(&bfs->lock, flags);
  740. GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap);
  741. spin_unlock_irqrestore(&bfs->lock, flags);
  742. }