extent_io.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef BTRFS_EXTENT_IO_H
  3. #define BTRFS_EXTENT_IO_H
  4. #include <linux/rbtree.h>
  5. #include <linux/refcount.h>
  6. #include <linux/fiemap.h>
  7. #include <linux/btrfs_tree.h>
  8. #include <linux/spinlock.h>
  9. #include <linux/atomic.h>
  10. #include <linux/rwsem.h>
  11. #include <linux/list.h>
  12. #include <linux/slab.h>
  13. #include "messages.h"
  14. #include "ulist.h"
  15. #include "misc.h"
  16. struct page;
  17. struct file;
  18. struct folio;
  19. struct inode;
  20. struct fiemap_extent_info;
  21. struct readahead_control;
  22. struct address_space;
  23. struct writeback_control;
  24. struct extent_io_tree;
  25. struct extent_map_tree;
  26. struct extent_state;
  27. struct btrfs_block_group;
  28. struct btrfs_fs_info;
  29. struct btrfs_inode;
  30. struct btrfs_root;
  31. struct btrfs_trans_handle;
  32. struct btrfs_tree_parent_check;
  33. enum {
  34. EXTENT_BUFFER_UPTODATE,
  35. EXTENT_BUFFER_DIRTY,
  36. EXTENT_BUFFER_TREE_REF,
  37. EXTENT_BUFFER_STALE,
  38. EXTENT_BUFFER_WRITEBACK,
  39. EXTENT_BUFFER_UNMAPPED,
  40. /* write IO error */
  41. EXTENT_BUFFER_WRITE_ERR,
  42. /* Indicate the extent buffer is written zeroed out (for zoned) */
  43. EXTENT_BUFFER_ZONED_ZEROOUT,
  44. /* Indicate that extent buffer pages a being read */
  45. EXTENT_BUFFER_READING,
  46. };
  47. /* these are flags for __process_pages_contig */
  48. enum {
  49. ENUM_BIT(PAGE_UNLOCK),
  50. /* Page starts writeback, clear dirty bit and set writeback bit */
  51. ENUM_BIT(PAGE_START_WRITEBACK),
  52. ENUM_BIT(PAGE_END_WRITEBACK),
  53. ENUM_BIT(PAGE_SET_ORDERED),
  54. };
  55. /*
  56. * Folio private values. Every page that is controlled by the extent map has
  57. * folio private set to this value.
  58. */
  59. #define EXTENT_FOLIO_PRIVATE 1
  60. /*
  61. * The extent buffer bitmap operations are done with byte granularity instead of
  62. * word granularity for two reasons:
  63. * 1. The bitmaps must be little-endian on disk.
  64. * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
  65. * single word in a bitmap may straddle two pages in the extent buffer.
  66. */
  67. #define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
  68. #define BYTE_MASK ((1U << BITS_PER_BYTE) - 1)
  69. #define BITMAP_FIRST_BYTE_MASK(start) \
  70. ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
  71. #define BITMAP_LAST_BYTE_MASK(nbits) \
  72. (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
  73. int __init extent_buffer_init_cachep(void);
  74. void __cold extent_buffer_free_cachep(void);
  75. #define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE)
  76. struct extent_buffer {
  77. u64 start;
  78. u32 len;
  79. u32 folio_size;
  80. unsigned long bflags;
  81. struct btrfs_fs_info *fs_info;
  82. /*
  83. * The address where the eb can be accessed without any cross-page handling.
  84. * This can be NULL if not possible.
  85. */
  86. void *addr;
  87. spinlock_t refs_lock;
  88. refcount_t refs;
  89. int read_mirror;
  90. /* >= 0 if eb belongs to a log tree, -1 otherwise */
  91. s8 log_index;
  92. u8 folio_shift;
  93. struct rcu_head rcu_head;
  94. struct rw_semaphore lock;
  95. /*
  96. * Pointers to all the folios of the extent buffer.
  97. *
  98. * For now the folio is always order 0 (aka, a single page).
  99. */
  100. struct folio *folios[INLINE_EXTENT_BUFFER_PAGES];
  101. #ifdef CONFIG_BTRFS_DEBUG
  102. struct list_head leak_list;
  103. pid_t lock_owner;
  104. #endif
  105. };
  106. struct btrfs_eb_write_context {
  107. struct writeback_control *wbc;
  108. struct extent_buffer *eb;
  109. /* Block group @eb resides in. Only used for zoned mode. */
  110. struct btrfs_block_group *zoned_bg;
  111. };
  112. static inline unsigned long offset_in_eb_folio(const struct extent_buffer *eb,
  113. u64 start)
  114. {
  115. ASSERT(eb->folio_size);
  116. return start & (eb->folio_size - 1);
  117. }
  118. /*
  119. * Get the correct offset inside the page of extent buffer.
  120. *
  121. * @eb: target extent buffer
  122. * @start: offset inside the extent buffer
  123. *
  124. * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases.
  125. */
  126. static inline size_t get_eb_offset_in_folio(const struct extent_buffer *eb,
  127. unsigned long offset)
  128. {
  129. /*
  130. * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case
  131. * 1.1) One large folio covering the whole eb
  132. * The eb->start is aligned to folio size, thus adding it
  133. * won't cause any difference.
  134. * 1.2) Several page sized folios
  135. * The eb->start is aligned to folio (page) size, thus
  136. * adding it won't cause any difference.
  137. *
  138. * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case
  139. * In this case there would only be one page sized folio, and there
  140. * may be several different extent buffers in the page/folio.
  141. * We need to add eb->start to properly access the offset inside
  142. * that eb.
  143. */
  144. return offset_in_folio(eb->folios[0], offset + eb->start);
  145. }
  146. static inline unsigned long get_eb_folio_index(const struct extent_buffer *eb,
  147. unsigned long offset)
  148. {
  149. /*
  150. * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case
  151. * 1.1) One large folio covering the whole eb.
  152. * the folio_shift would be large enough to always make us
  153. * return 0 as index.
  154. * 1.2) Several page sized folios
  155. * The folio_shift would be PAGE_SHIFT, giving us the correct
  156. * index.
  157. *
  158. * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case
  159. * The folio would only be page sized, and always give us 0 as index.
  160. */
  161. return offset >> eb->folio_shift;
  162. }
  163. /*
  164. * Structure to record how many bytes and which ranges are set/cleared
  165. */
  166. struct extent_changeset {
  167. /* How many bytes are set/cleared in this operation */
  168. u64 bytes_changed;
  169. /* Changed ranges */
  170. struct ulist range_changed;
  171. };
  172. static inline void extent_changeset_init(struct extent_changeset *changeset)
  173. {
  174. changeset->bytes_changed = 0;
  175. ulist_init(&changeset->range_changed);
  176. }
  177. static inline struct extent_changeset *extent_changeset_alloc(void)
  178. {
  179. struct extent_changeset *ret;
  180. ret = kmalloc_obj(*ret);
  181. if (!ret)
  182. return NULL;
  183. extent_changeset_init(ret);
  184. return ret;
  185. }
  186. static inline void extent_changeset_prealloc(struct extent_changeset *changeset, gfp_t gfp_mask)
  187. {
  188. ulist_prealloc(&changeset->range_changed, gfp_mask);
  189. }
  190. static inline void extent_changeset_release(struct extent_changeset *changeset)
  191. {
  192. if (!changeset)
  193. return;
  194. changeset->bytes_changed = 0;
  195. ulist_release(&changeset->range_changed);
  196. }
  197. static inline void extent_changeset_free(struct extent_changeset *changeset)
  198. {
  199. if (!changeset)
  200. return;
  201. extent_changeset_release(changeset);
  202. kfree(changeset);
  203. }
  204. bool try_release_extent_mapping(struct folio *folio, gfp_t mask);
  205. int try_release_extent_buffer(struct folio *folio);
  206. int btrfs_read_folio(struct file *file, struct folio *folio);
  207. void extent_write_locked_range(struct inode *inode, const struct folio *locked_folio,
  208. u64 start, u64 end, struct writeback_control *wbc,
  209. bool pages_dirty);
  210. int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc);
  211. int btree_writepages(struct address_space *mapping, struct writeback_control *wbc);
  212. void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start, u64 end);
  213. void btrfs_readahead(struct readahead_control *rac);
  214. int set_folio_extent_mapped(struct folio *folio);
  215. void clear_folio_extent_mapped(struct folio *folio);
  216. struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
  217. u64 start, u64 owner_root, int level);
  218. struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
  219. u64 start);
  220. struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src);
  221. struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
  222. u64 start);
  223. void free_extent_buffer(struct extent_buffer *eb);
  224. void free_extent_buffer_stale(struct extent_buffer *eb);
  225. int read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
  226. const struct btrfs_tree_parent_check *parent_check);
  227. int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
  228. const struct btrfs_tree_parent_check *parent_check);
  229. static inline void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
  230. {
  231. wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
  232. TASK_UNINTERRUPTIBLE);
  233. }
  234. void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
  235. u64 bytenr, u64 owner_root, u64 gen, int level);
  236. void btrfs_readahead_node_child(struct extent_buffer *node, int slot);
  237. /* Note: this can be used in for loops without caching the value in a variable. */
  238. static inline int __pure num_extent_pages(const struct extent_buffer *eb)
  239. {
  240. /*
  241. * For sectorsize == PAGE_SIZE case, since nodesize is always aligned to
  242. * sectorsize, it's just eb->len >> PAGE_SHIFT.
  243. *
  244. * For sectorsize < PAGE_SIZE case, we could have nodesize < PAGE_SIZE,
  245. * thus have to ensure we get at least one page.
  246. */
  247. return (eb->len >> PAGE_SHIFT) ?: 1;
  248. }
  249. /*
  250. * This can only be determined at runtime by checking eb::folios[0].
  251. *
  252. * As we can have either one large folio covering the whole eb
  253. * (either nodesize <= PAGE_SIZE, or high order folio), or multiple
  254. * single-paged folios.
  255. *
  256. * Note: this can be used in for loops without caching the value in a variable.
  257. */
  258. static inline int __pure num_extent_folios(const struct extent_buffer *eb)
  259. {
  260. if (!eb->folios[0])
  261. return 0;
  262. if (folio_order(eb->folios[0]))
  263. return 1;
  264. return num_extent_pages(eb);
  265. }
  266. static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
  267. {
  268. return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
  269. }
  270. int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
  271. unsigned long start, unsigned long len);
  272. void read_extent_buffer(const struct extent_buffer *eb, void *dst,
  273. unsigned long start,
  274. unsigned long len);
  275. int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
  276. void __user *dst, unsigned long start,
  277. unsigned long len);
  278. void write_extent_buffer(const struct extent_buffer *eb, const void *src,
  279. unsigned long start, unsigned long len);
  280. static inline void write_extent_buffer_chunk_tree_uuid(
  281. const struct extent_buffer *eb, const void *chunk_tree_uuid)
  282. {
  283. write_extent_buffer(eb, chunk_tree_uuid,
  284. offsetof(struct btrfs_header, chunk_tree_uuid),
  285. BTRFS_FSID_SIZE);
  286. }
  287. static inline void write_extent_buffer_fsid(const struct extent_buffer *eb,
  288. const void *fsid)
  289. {
  290. write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
  291. BTRFS_FSID_SIZE);
  292. }
  293. void copy_extent_buffer_full(const struct extent_buffer *dst,
  294. const struct extent_buffer *src);
  295. void copy_extent_buffer(const struct extent_buffer *dst,
  296. const struct extent_buffer *src,
  297. unsigned long dst_offset, unsigned long src_offset,
  298. unsigned long len);
  299. void memcpy_extent_buffer(const struct extent_buffer *dst,
  300. unsigned long dst_offset, unsigned long src_offset,
  301. unsigned long len);
  302. void memmove_extent_buffer(const struct extent_buffer *dst,
  303. unsigned long dst_offset, unsigned long src_offset,
  304. unsigned long len);
  305. void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
  306. unsigned long len);
  307. bool extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
  308. unsigned long pos);
  309. void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
  310. unsigned long pos, unsigned long len);
  311. void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
  312. unsigned long start, unsigned long pos,
  313. unsigned long len);
  314. void set_extent_buffer_dirty(struct extent_buffer *eb);
  315. void set_extent_buffer_uptodate(struct extent_buffer *eb);
  316. void clear_extent_buffer_uptodate(struct extent_buffer *eb);
  317. void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
  318. const struct folio *locked_folio,
  319. struct extent_state **cached,
  320. u32 bits_to_clear, unsigned long page_ops);
  321. int extent_invalidate_folio(struct extent_io_tree *tree,
  322. struct folio *folio, size_t offset);
  323. void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
  324. struct extent_buffer *buf);
  325. int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
  326. bool nofail);
  327. int btrfs_alloc_folio_array(unsigned int nr_folios, unsigned int order,
  328. struct folio **folio_array);
  329. #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  330. bool find_lock_delalloc_range(struct inode *inode,
  331. struct folio *locked_folio, u64 *start,
  332. u64 *end);
  333. #endif
  334. struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
  335. u64 start);
  336. #ifdef CONFIG_BTRFS_DEBUG
  337. void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info);
  338. #else
  339. #define btrfs_extent_buffer_leak_debug_check(fs_info) do {} while (0)
  340. #endif
  341. #endif