backref.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 2011 STRATO. All rights reserved.
  4. */
  5. #ifndef BTRFS_BACKREF_H
  6. #define BTRFS_BACKREF_H
  7. #include <linux/types.h>
  8. #include <linux/rbtree.h>
  9. #include <linux/list.h>
  10. #include <linux/slab.h>
  11. #include <uapi/linux/btrfs.h>
  12. #include <uapi/linux/btrfs_tree.h>
  13. #include "messages.h"
  14. #include "locking.h"
  15. #include "disk-io.h"
  16. #include "extent_io.h"
  17. #include "ctree.h"
  18. struct extent_inode_elem;
  19. struct ulist;
  20. struct btrfs_extent_item;
  21. struct btrfs_trans_handle;
  22. struct btrfs_fs_info;
  23. /*
  24. * Used by implementations of iterate_extent_inodes_t (see definition below) to
  25. * signal that backref iteration can stop immediately and no error happened.
  26. * The value must be non-negative and must not be 0, 1 (which is a common return
  27. * value from things like btrfs_search_slot() and used internally in the backref
  28. * walking code) and different from BACKREF_FOUND_SHARED and
  29. * BACKREF_FOUND_NOT_SHARED
  30. */
  31. #define BTRFS_ITERATE_EXTENT_INODES_STOP 5
  32. /*
  33. * Should return 0 if no errors happened and iteration of backrefs should
  34. * continue. Can return BTRFS_ITERATE_EXTENT_INODES_STOP or any other non-zero
  35. * value to immediately stop iteration and possibly signal an error back to
  36. * the caller.
  37. */
  38. typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 num_bytes,
  39. u64 root, void *ctx);
  40. /*
  41. * Context and arguments for backref walking functions. Some of the fields are
  42. * to be filled by the caller of such functions while other are filled by the
  43. * functions themselves, as described below.
  44. */
  45. struct btrfs_backref_walk_ctx {
  46. /*
  47. * The address of the extent for which we are doing backref walking.
  48. * Can be either a data extent or a metadata extent.
  49. *
  50. * Must always be set by the top level caller.
  51. */
  52. u64 bytenr;
  53. /*
  54. * Offset relative to the target extent. This is only used for data
  55. * extents, and it's meaningful because we can have file extent items
  56. * that point only to a section of a data extent ("bookend" extents),
  57. * and we want to filter out any that don't point to a section of the
  58. * data extent containing the given offset.
  59. *
  60. * Must always be set by the top level caller.
  61. */
  62. u64 extent_item_pos;
  63. /*
  64. * If true and bytenr corresponds to a data extent, then references from
  65. * all file extent items that point to the data extent are considered,
  66. * @extent_item_pos is ignored.
  67. */
  68. bool ignore_extent_item_pos;
  69. /*
  70. * If true and bytenr corresponds to a data extent, then the inode list
  71. * (each member describing inode number, file offset and root) is not
  72. * added to each reference added to the @refs ulist.
  73. */
  74. bool skip_inode_ref_list;
  75. /* A valid transaction handle or NULL. */
  76. struct btrfs_trans_handle *trans;
  77. /*
  78. * The file system's info object, can not be NULL.
  79. *
  80. * Must always be set by the top level caller.
  81. */
  82. struct btrfs_fs_info *fs_info;
  83. /*
  84. * Time sequence acquired from btrfs_get_tree_mod_seq(), in case the
  85. * caller joined the tree mod log to get a consistent view of b+trees
  86. * while we do backref walking, or BTRFS_SEQ_LAST.
  87. * When using BTRFS_SEQ_LAST, delayed refs are not checked and it uses
  88. * commit roots when searching b+trees - this is a special case for
  89. * qgroups used during a transaction commit.
  90. */
  91. u64 time_seq;
  92. /*
  93. * Used to collect the bytenr of metadata extents that point to the
  94. * target extent.
  95. */
  96. struct ulist *refs;
  97. /*
  98. * List used to collect the IDs of the roots from which the target
  99. * extent is accessible. Can be NULL in case the caller does not care
  100. * about collecting root IDs.
  101. */
  102. struct ulist *roots;
  103. /*
  104. * Used by iterate_extent_inodes() and the main backref walk code
  105. * (find_parent_nodes()). Lookup and store functions for an optional
  106. * cache which maps the logical address (bytenr) of leaves to an array
  107. * of root IDs.
  108. */
  109. bool (*cache_lookup)(u64 leaf_bytenr, void *user_ctx,
  110. const u64 **root_ids_ret, int *root_count_ret);
  111. void (*cache_store)(u64 leaf_bytenr, const struct ulist *root_ids,
  112. void *user_ctx);
  113. /*
  114. * If this is not NULL, then the backref walking code will call this
  115. * for each indirect data extent reference as soon as it finds one,
  116. * before collecting all the remaining backrefs and before resolving
  117. * indirect backrefs. This allows for the caller to terminate backref
  118. * walking as soon as it finds one backref that matches some specific
  119. * criteria. The @cache_lookup and @cache_store callbacks should not
  120. * be NULL in order to use this callback.
  121. */
  122. iterate_extent_inodes_t *indirect_ref_iterator;
  123. /*
  124. * If this is not NULL, then the backref walking code will call this for
  125. * each extent item it's meant to process before it actually starts
  126. * processing it. If this returns anything other than 0, then it stops
  127. * the backref walking code immediately.
  128. */
  129. int (*check_extent_item)(u64 bytenr, const struct btrfs_extent_item *ei,
  130. const struct extent_buffer *leaf, void *user_ctx);
  131. /*
  132. * If this is not NULL, then the backref walking code will call this for
  133. * each extent data ref it finds (BTRFS_EXTENT_DATA_REF_KEY keys) before
  134. * processing that data ref. If this callback return false, then it will
  135. * ignore this data ref and it will never resolve the indirect data ref,
  136. * saving time searching for leaves in a fs tree with file extent items
  137. * matching the data ref.
  138. */
  139. bool (*skip_data_ref)(u64 root, u64 ino, u64 offset, void *user_ctx);
  140. /* Context object to pass to the callbacks defined above. */
  141. void *user_ctx;
  142. };
  143. struct inode_fs_paths {
  144. struct btrfs_path *btrfs_path;
  145. struct btrfs_root *fs_root;
  146. struct btrfs_data_container *fspath;
  147. };
  148. struct btrfs_backref_shared_cache_entry {
  149. u64 bytenr;
  150. u64 gen;
  151. bool is_shared;
  152. };
  153. #define BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE 8
  154. struct btrfs_backref_share_check_ctx {
  155. /* Ulists used during backref walking. */
  156. struct ulist refs;
  157. /*
  158. * The current leaf the caller of btrfs_is_data_extent_shared() is at.
  159. * Typically the caller (at the moment only fiemap) tries to determine
  160. * the sharedness of data extents point by file extent items from entire
  161. * leaves.
  162. */
  163. u64 curr_leaf_bytenr;
  164. /*
  165. * The previous leaf the caller was at in the previous call to
  166. * btrfs_is_data_extent_shared(). This may be the same as the current
  167. * leaf. On the first call it must be 0.
  168. */
  169. u64 prev_leaf_bytenr;
  170. /*
  171. * A path from a root to a leaf that has a file extent item pointing to
  172. * a given data extent should never exceed the maximum b+tree height.
  173. */
  174. struct btrfs_backref_shared_cache_entry path_cache_entries[BTRFS_MAX_LEVEL];
  175. bool use_path_cache;
  176. /*
  177. * Cache the sharedness result for the last few extents we have found,
  178. * but only for extents for which we have multiple file extent items
  179. * that point to them.
  180. * It's very common to have several file extent items that point to the
  181. * same extent (bytenr) but with different offsets and lengths. This
  182. * typically happens for COW writes, partial writes into prealloc
  183. * extents, NOCOW writes after snapshotting a root, hole punching or
  184. * reflinking within the same file (less common perhaps).
  185. * So keep a small cache with the lookup results for the extent pointed
  186. * by the last few file extent items. This cache is checked, with a
  187. * linear scan, whenever btrfs_is_data_extent_shared() is called, so
  188. * it must be small so that it does not negatively affect performance in
  189. * case we don't have multiple file extent items that point to the same
  190. * data extent.
  191. */
  192. struct {
  193. u64 bytenr;
  194. bool is_shared;
  195. } prev_extents_cache[BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE];
  196. /*
  197. * The slot in the prev_extents_cache array that will be used for
  198. * storing the sharedness result of a new data extent.
  199. */
  200. int prev_extents_cache_slot;
  201. };
  202. struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void);
  203. void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx);
  204. int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
  205. struct btrfs_path *path, struct btrfs_key *found_key,
  206. u64 *flags);
  207. int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
  208. struct btrfs_key *key, struct btrfs_extent_item *ei,
  209. u32 item_size, u64 *out_root, u8 *out_level);
  210. int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx,
  211. bool search_commit_root,
  212. iterate_extent_inodes_t *iterate, void *user_ctx);
  213. int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
  214. void *ctx, bool ignore_offset);
  215. int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
  216. int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx);
  217. int btrfs_find_all_roots(struct btrfs_backref_walk_ctx *ctx,
  218. bool skip_commit_root_sem);
  219. char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
  220. u32 name_len, unsigned long name_off,
  221. struct extent_buffer *eb_in, u64 parent,
  222. char *dest, u32 size);
  223. struct btrfs_data_container *init_data_container(u32 total_bytes);
  224. struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
  225. struct btrfs_path *path);
  226. DEFINE_FREE(inode_fs_paths, struct inode_fs_paths *,
  227. if (_T) {
  228. kvfree(_T->fspath);
  229. kfree(_T);
  230. })
  231. int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
  232. u64 start_off, struct btrfs_path *path,
  233. struct btrfs_inode_extref **ret_extref,
  234. u64 *found_off);
  235. int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
  236. u64 extent_gen,
  237. struct btrfs_backref_share_check_ctx *ctx);
  238. int __init btrfs_prelim_ref_init(void);
  239. void __cold btrfs_prelim_ref_exit(void);
  240. struct prelim_ref {
  241. struct rb_node rbnode;
  242. u64 root_id;
  243. struct btrfs_key key_for_search;
  244. u8 level;
  245. int count;
  246. struct extent_inode_elem *inode_list;
  247. u64 parent;
  248. u64 wanted_disk_byte;
  249. };
  250. /*
  251. * Iterate backrefs of one extent.
  252. *
  253. * Now it only supports iteration of tree block in commit root.
  254. */
  255. struct btrfs_backref_iter {
  256. u64 bytenr;
  257. struct btrfs_path *path;
  258. struct btrfs_fs_info *fs_info;
  259. struct btrfs_key cur_key;
  260. u32 item_ptr;
  261. u32 cur_ptr;
  262. u32 end_ptr;
  263. };
  264. struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info);
  265. /*
  266. * For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data
  267. * is btrfs_tree_block_info, without a btrfs_extent_inline_ref header.
  268. *
  269. * This helper determines if that's the case.
  270. */
  271. static inline bool btrfs_backref_has_tree_block_info(
  272. struct btrfs_backref_iter *iter)
  273. {
  274. if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY &&
  275. iter->cur_ptr - iter->item_ptr == sizeof(struct btrfs_extent_item))
  276. return true;
  277. return false;
  278. }
  279. int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
  280. int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
  281. /*
  282. * Backref cache related structures
  283. *
  284. * The whole objective of backref_cache is to build a bi-directional map
  285. * of tree blocks (represented by backref_node) and all their parents.
  286. */
  287. /*
  288. * Represent a tree block in the backref cache
  289. */
  290. struct btrfs_backref_node {
  291. union{
  292. /* Use rb_simple_node for search/insert */
  293. struct {
  294. struct rb_node rb_node;
  295. u64 bytenr;
  296. };
  297. struct rb_simple_node simple_node;
  298. };
  299. /*
  300. * This is a sanity check, whenever we COW a block we will update
  301. * new_bytenr with it's current location, and we will check this in
  302. * various places to validate that the cache makes sense, it shouldn't
  303. * be used for anything else.
  304. */
  305. u64 new_bytenr;
  306. /* Objectid of tree block owner, can be not uptodate */
  307. u64 owner;
  308. /* Link to pending, changed or detached list */
  309. struct list_head list;
  310. /* List of upper level edges, which link this node to its parents */
  311. struct list_head upper;
  312. /* List of lower level edges, which link this node to its children */
  313. struct list_head lower;
  314. /* NULL if this node is not tree root */
  315. struct btrfs_root *root;
  316. /* Extent buffer got by COWing the block */
  317. struct extent_buffer *eb;
  318. /* Level of the tree block */
  319. unsigned int level:8;
  320. /* Is the extent buffer locked */
  321. unsigned int locked:1;
  322. /* Has the block been processed */
  323. unsigned int processed:1;
  324. /* Have backrefs of this block been checked */
  325. unsigned int checked:1;
  326. /*
  327. * 1 if corresponding block has been COWed but some upper level block
  328. * pointers may not point to the new location
  329. */
  330. unsigned int pending:1;
  331. /* 1 if the backref node isn't connected to any other backref node */
  332. unsigned int detached:1;
  333. /*
  334. * For generic purpose backref cache, where we only care if it's a reloc
  335. * root, doesn't care the source subvolid.
  336. */
  337. unsigned int is_reloc_root:1;
  338. };
  339. #define LOWER 0
  340. #define UPPER 1
  341. /*
  342. * Represent an edge connecting upper and lower backref nodes.
  343. */
  344. struct btrfs_backref_edge {
  345. /*
  346. * list[LOWER] is linked to btrfs_backref_node::upper of lower level
  347. * node, and list[UPPER] is linked to btrfs_backref_node::lower of
  348. * upper level node.
  349. *
  350. * Also, build_backref_tree() uses list[UPPER] for pending edges, before
  351. * linking list[UPPER] to its upper level nodes.
  352. */
  353. struct list_head list[2];
  354. /* Two related nodes */
  355. struct btrfs_backref_node *node[2];
  356. };
  357. struct btrfs_backref_cache {
  358. /* Red black tree of all backref nodes in the cache */
  359. struct rb_root rb_root;
  360. /* For passing backref nodes to btrfs_reloc_cow_block */
  361. struct btrfs_backref_node *path[BTRFS_MAX_LEVEL];
  362. /*
  363. * List of blocks that have been COWed but some block pointers in upper
  364. * level blocks may not reflect the new location
  365. */
  366. struct list_head pending[BTRFS_MAX_LEVEL];
  367. u64 last_trans;
  368. int nr_nodes;
  369. int nr_edges;
  370. /* List of unchecked backref edges during backref cache build */
  371. struct list_head pending_edge;
  372. /* List of useless backref nodes during backref cache build */
  373. struct list_head useless_node;
  374. struct btrfs_fs_info *fs_info;
  375. /*
  376. * Whether this cache is for relocation
  377. *
  378. * Relocation backref cache require more info for reloc root compared
  379. * to generic backref cache.
  380. */
  381. bool is_reloc;
  382. };
  383. void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
  384. struct btrfs_backref_cache *cache, bool is_reloc);
  385. struct btrfs_backref_node *btrfs_backref_alloc_node(
  386. struct btrfs_backref_cache *cache, u64 bytenr, int level);
  387. struct btrfs_backref_edge *btrfs_backref_alloc_edge(
  388. struct btrfs_backref_cache *cache);
  389. void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
  390. struct btrfs_backref_node *node);
  391. void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
  392. struct btrfs_backref_edge *edge);
  393. void btrfs_backref_unlock_node_buffer(struct btrfs_backref_node *node);
  394. void btrfs_backref_drop_node_buffer(struct btrfs_backref_node *node);
  395. void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
  396. struct btrfs_backref_node *node);
  397. void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
  398. struct btrfs_backref_node *node);
  399. void btrfs_backref_release_cache(struct btrfs_backref_cache *cache);
  400. static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info,
  401. u64 bytenr, int error)
  402. {
  403. btrfs_panic(fs_info, error,
  404. "Inconsistency in backref cache found at offset %llu",
  405. bytenr);
  406. }
  407. int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
  408. struct btrfs_backref_cache *cache,
  409. struct btrfs_path *path,
  410. struct btrfs_backref_iter *iter,
  411. struct btrfs_key *node_key,
  412. struct btrfs_backref_node *cur);
  413. int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
  414. struct btrfs_backref_node *start);
  415. void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
  416. struct btrfs_backref_node *node);
  417. #endif