delayed-ref.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 2008 Oracle. All rights reserved.
  4. */
  5. #ifndef BTRFS_DELAYED_REF_H
  6. #define BTRFS_DELAYED_REF_H
  7. #include <linux/types.h>
  8. #include <linux/refcount.h>
  9. #include <linux/list.h>
  10. #include <linux/rbtree.h>
  11. #include <linux/mutex.h>
  12. #include <linux/spinlock.h>
  13. #include <linux/slab.h>
  14. #include <uapi/linux/btrfs_tree.h>
  15. #include "fs.h"
  16. #include "messages.h"
  17. struct btrfs_trans_handle;
  18. struct btrfs_fs_info;
  19. /* these are the possible values of struct btrfs_delayed_ref_node->action */
  20. enum btrfs_delayed_ref_action {
  21. /* Add one backref to the tree */
  22. BTRFS_ADD_DELAYED_REF = 1,
  23. /* Delete one backref from the tree */
  24. BTRFS_DROP_DELAYED_REF,
  25. /* Record a full extent allocation */
  26. BTRFS_ADD_DELAYED_EXTENT,
  27. /* Not changing ref count on head ref */
  28. BTRFS_UPDATE_DELAYED_HEAD,
  29. } __packed;
  30. struct btrfs_data_ref {
  31. /* For EXTENT_DATA_REF */
  32. /* Inode which refers to this data extent */
  33. u64 objectid;
  34. /*
  35. * file_offset - extent_offset
  36. *
  37. * file_offset is the key.offset of the EXTENT_DATA key.
  38. * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
  39. */
  40. u64 offset;
  41. };
  42. struct btrfs_tree_ref {
  43. /*
  44. * Level of this tree block.
  45. *
  46. * Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
  47. */
  48. int level;
  49. /* For non-skinny metadata, no special member needed */
  50. };
  51. struct btrfs_delayed_ref_node {
  52. struct rb_node ref_node;
  53. /*
  54. * If action is BTRFS_ADD_DELAYED_REF, also link this node to
  55. * ref_head->ref_add_list, then we do not need to iterate the
  56. * refs rbtree in the corresponding delayed ref head
  57. * (struct btrfs_delayed_ref_head::ref_tree).
  58. */
  59. struct list_head add_list;
  60. /* the starting bytenr of the extent */
  61. u64 bytenr;
  62. /* the size of the extent */
  63. u64 num_bytes;
  64. /* seq number to keep track of insertion order */
  65. u64 seq;
  66. /* The ref_root for this ref */
  67. u64 ref_root;
  68. /*
  69. * The parent for this ref, if this isn't set the ref_root is the
  70. * reference owner.
  71. */
  72. u64 parent;
  73. /* ref count on this data structure */
  74. refcount_t refs;
  75. /*
  76. * how many refs is this entry adding or deleting. For
  77. * head refs, this may be a negative number because it is keeping
  78. * track of the total mods done to the reference count.
  79. * For individual refs, this will always be a positive number
  80. *
  81. * It may be more than one, since it is possible for a single
  82. * parent to have more than one ref on an extent
  83. */
  84. int ref_mod;
  85. unsigned int action:8;
  86. unsigned int type:8;
  87. union {
  88. struct btrfs_tree_ref tree_ref;
  89. struct btrfs_data_ref data_ref;
  90. };
  91. };
  92. struct btrfs_delayed_extent_op {
  93. struct btrfs_disk_key key;
  94. bool update_key;
  95. bool update_flags;
  96. u64 flags_to_set;
  97. };
  98. /*
  99. * the head refs are used to hold a lock on a given extent, which allows us
  100. * to make sure that only one process is running the delayed refs
  101. * at a time for a single extent. They also store the sum of all the
  102. * reference count modifications we've queued up.
  103. */
  104. struct btrfs_delayed_ref_head {
  105. u64 bytenr;
  106. u64 num_bytes;
  107. /*
  108. * the mutex is held while running the refs, and it is also
  109. * held when checking the sum of reference modifications.
  110. */
  111. struct mutex mutex;
  112. refcount_t refs;
  113. /* Protects 'ref_tree' and 'ref_add_list'. */
  114. spinlock_t lock;
  115. struct rb_root_cached ref_tree;
  116. /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
  117. struct list_head ref_add_list;
  118. struct btrfs_delayed_extent_op *extent_op;
  119. /*
  120. * This is used to track the final ref_mod from all the refs associated
  121. * with this head ref, this is not adjusted as delayed refs are run,
  122. * this is meant to track if we need to do the csum accounting or not.
  123. */
  124. int total_ref_mod;
  125. /*
  126. * This is the current outstanding mod references for this bytenr. This
  127. * is used with lookup_extent_info to get an accurate reference count
  128. * for a bytenr, so it is adjusted as delayed refs are run so that any
  129. * on disk reference count + ref_mod is accurate.
  130. */
  131. int ref_mod;
  132. /*
  133. * The root that triggered the allocation when must_insert_reserved is
  134. * set to true.
  135. */
  136. u64 owning_root;
  137. /*
  138. * Track reserved bytes when setting must_insert_reserved. On success
  139. * or cleanup, we will need to free the reservation.
  140. */
  141. u64 reserved_bytes;
  142. /* Tree block level, for metadata only. */
  143. u8 level;
  144. /*
  145. * when a new extent is allocated, it is just reserved in memory
  146. * The actual extent isn't inserted into the extent allocation tree
  147. * until the delayed ref is processed. must_insert_reserved is
  148. * used to flag a delayed ref so the accounting can be updated
  149. * when a full insert is done.
  150. *
  151. * It is possible the extent will be freed before it is ever
  152. * inserted into the extent allocation tree. In this case
  153. * we need to update the in ram accounting to properly reflect
  154. * the free has happened.
  155. */
  156. bool must_insert_reserved;
  157. bool is_data;
  158. bool is_system;
  159. bool processing;
  160. /*
  161. * Indicate if it's currently in the data structure that tracks head
  162. * refs (struct btrfs_delayed_ref_root::head_refs).
  163. */
  164. bool tracked;
  165. };
  166. enum btrfs_delayed_ref_flags {
  167. /* Indicate that we are flushing delayed refs for the commit */
  168. BTRFS_DELAYED_REFS_FLUSHING,
  169. };
  170. struct btrfs_delayed_ref_root {
  171. /*
  172. * Track head references.
  173. * The keys correspond to the logical address of the extent ("bytenr")
  174. * right shifted by fs_info->sectorsize_bits. This is both to get a more
  175. * dense index space (optimizes xarray structure) and because indexes in
  176. * xarrays are of "unsigned long" type, meaning they are 32 bits wide on
  177. * 32 bits platforms, limiting the extent range to 4G which is too low
  178. * and makes it unusable (truncated index values) on 32 bits platforms.
  179. * Protected by the spinlock 'lock' defined below.
  180. */
  181. struct xarray head_refs;
  182. /*
  183. * Track dirty extent records.
  184. * The keys correspond to the logical address of the extent ("bytenr")
  185. * right shifted by fs_info->sectorsize_bits, for same reasons as above.
  186. */
  187. struct xarray dirty_extents;
  188. /*
  189. * Protects the xarray head_refs, its entries and the following fields:
  190. * num_heads, num_heads_ready, pending_csums and run_delayed_start.
  191. */
  192. spinlock_t lock;
  193. /* Total number of head refs, protected by the spinlock 'lock'. */
  194. unsigned long num_heads;
  195. /*
  196. * Total number of head refs ready for processing, protected by the
  197. * spinlock 'lock'.
  198. */
  199. unsigned long num_heads_ready;
  200. /*
  201. * Track space reserved for deleting csums of data extents.
  202. * Protected by the spinlock 'lock'.
  203. */
  204. u64 pending_csums;
  205. unsigned long flags;
  206. /*
  207. * Track from which bytenr to start searching ref heads.
  208. * Protected by the spinlock 'lock'.
  209. */
  210. u64 run_delayed_start;
  211. /*
  212. * To make qgroup to skip given root.
  213. * This is for snapshot, as btrfs_qgroup_inherit() will manually
  214. * modify counters for snapshot and its source, so we should skip
  215. * the snapshot in new_root/old_roots or it will get calculated twice
  216. */
  217. u64 qgroup_to_skip;
  218. };
  219. enum btrfs_ref_type {
  220. BTRFS_REF_NOT_SET,
  221. BTRFS_REF_DATA,
  222. BTRFS_REF_METADATA,
  223. } __packed;
  224. struct btrfs_ref {
  225. enum btrfs_ref_type type;
  226. enum btrfs_delayed_ref_action action;
  227. /*
  228. * Whether this extent should go through qgroup record.
  229. *
  230. * Normally false, but for certain cases like delayed subtree scan,
  231. * setting this flag can hugely reduce qgroup overhead.
  232. */
  233. bool skip_qgroup;
  234. u64 bytenr;
  235. u64 num_bytes;
  236. u64 owning_root;
  237. /*
  238. * The root that owns the reference for this reference, this will be set
  239. * or ->parent will be set, depending on what type of reference this is.
  240. */
  241. u64 ref_root;
  242. /* Bytenr of the parent tree block */
  243. u64 parent;
  244. union {
  245. struct btrfs_data_ref data_ref;
  246. struct btrfs_tree_ref tree_ref;
  247. };
  248. #ifdef CONFIG_BTRFS_DEBUG
  249. /* Through which root is this modification. */
  250. u64 real_root;
  251. #endif
  252. };
  253. extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
  254. extern struct kmem_cache *btrfs_delayed_ref_node_cachep;
  255. extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
  256. int __init btrfs_delayed_ref_init(void);
  257. void __cold btrfs_delayed_ref_exit(void);
  258. static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_info,
  259. int num_delayed_refs)
  260. {
  261. u64 num_bytes;
  262. num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_delayed_refs);
  263. /*
  264. * We have to check the mount option here because we could be enabling
  265. * the free space tree for the first time and don't have the compat_ro
  266. * option set yet.
  267. *
  268. * We need extra reservations if we have the free space tree because
  269. * we'll have to modify that tree as well.
  270. */
  271. if (btrfs_test_opt(fs_info, FREE_SPACE_TREE))
  272. num_bytes *= 2;
  273. return num_bytes;
  274. }
  275. static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *fs_info,
  276. int num_csum_items)
  277. {
  278. /*
  279. * Deleting csum items does not result in new nodes/leaves and does not
  280. * require changing the free space tree, only the csum tree, so this is
  281. * all we need.
  282. */
  283. return btrfs_calc_metadata_size(fs_info, num_csum_items);
  284. }
  285. void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
  286. bool skip_qgroup);
  287. void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset,
  288. u64 mod_root, bool skip_qgroup);
  289. static inline struct btrfs_delayed_extent_op *
  290. btrfs_alloc_delayed_extent_op(void)
  291. {
  292. return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS);
  293. }
  294. static inline void
  295. btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
  296. {
  297. if (op)
  298. kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
  299. }
  300. void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref);
  301. static inline u64 btrfs_ref_head_to_space_flags(
  302. struct btrfs_delayed_ref_head *head_ref)
  303. {
  304. if (head_ref->is_data)
  305. return BTRFS_BLOCK_GROUP_DATA;
  306. else if (head_ref->is_system)
  307. return BTRFS_BLOCK_GROUP_SYSTEM;
  308. return BTRFS_BLOCK_GROUP_METADATA;
  309. }
  310. static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
  311. {
  312. if (refcount_dec_and_test(&head->refs))
  313. kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
  314. }
  315. int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
  316. struct btrfs_ref *generic_ref,
  317. struct btrfs_delayed_extent_op *extent_op);
  318. int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
  319. struct btrfs_ref *generic_ref,
  320. u64 reserved);
  321. int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
  322. u64 bytenr, u64 num_bytes, u8 level,
  323. struct btrfs_delayed_extent_op *extent_op);
  324. void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
  325. struct btrfs_delayed_ref_root *delayed_refs,
  326. struct btrfs_delayed_ref_head *head);
  327. struct btrfs_delayed_ref_head *
  328. btrfs_find_delayed_ref_head(const struct btrfs_fs_info *fs_info,
  329. struct btrfs_delayed_ref_root *delayed_refs,
  330. u64 bytenr);
  331. static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
  332. {
  333. mutex_unlock(&head->mutex);
  334. }
  335. void btrfs_delete_ref_head(const struct btrfs_fs_info *fs_info,
  336. struct btrfs_delayed_ref_root *delayed_refs,
  337. struct btrfs_delayed_ref_head *head);
  338. struct btrfs_delayed_ref_head *btrfs_select_ref_head(
  339. const struct btrfs_fs_info *fs_info,
  340. struct btrfs_delayed_ref_root *delayed_refs);
  341. void btrfs_unselect_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
  342. struct btrfs_delayed_ref_head *head);
  343. struct btrfs_delayed_ref_node *btrfs_select_delayed_ref(struct btrfs_delayed_ref_head *head);
  344. int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
  345. void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums);
  346. void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
  347. void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info);
  348. void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info);
  349. void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
  350. void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
  351. int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
  352. enum btrfs_reserve_flush_enum flush);
  353. bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
  354. bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
  355. u64 root, u64 parent);
  356. void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans);
  357. static inline u64 btrfs_delayed_ref_owner(const struct btrfs_delayed_ref_node *node)
  358. {
  359. if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
  360. node->type == BTRFS_SHARED_DATA_REF_KEY)
  361. return node->data_ref.objectid;
  362. return node->tree_ref.level;
  363. }
  364. static inline u64 btrfs_delayed_ref_offset(const struct btrfs_delayed_ref_node *node)
  365. {
  366. if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
  367. node->type == BTRFS_SHARED_DATA_REF_KEY)
  368. return node->data_ref.offset;
  369. return 0;
  370. }
  371. static inline u8 btrfs_ref_type(const struct btrfs_ref *ref)
  372. {
  373. ASSERT(ref->type == BTRFS_REF_DATA || ref->type == BTRFS_REF_METADATA);
  374. if (ref->type == BTRFS_REF_DATA) {
  375. if (ref->parent)
  376. return BTRFS_SHARED_DATA_REF_KEY;
  377. else
  378. return BTRFS_EXTENT_DATA_REF_KEY;
  379. } else {
  380. if (ref->parent)
  381. return BTRFS_SHARED_BLOCK_REF_KEY;
  382. else
  383. return BTRFS_TREE_BLOCK_REF_KEY;
  384. }
  385. return 0;
  386. }
  387. #endif