delayed-ref.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2009 Oracle. All rights reserved.
  4. */
  5. #include <linux/sched.h>
  6. #include <linux/slab.h>
  7. #include <linux/sort.h>
  8. #include "messages.h"
  9. #include "ctree.h"
  10. #include "delayed-ref.h"
  11. #include "extent-tree.h"
  12. #include "transaction.h"
  13. #include "qgroup.h"
  14. #include "space-info.h"
  15. #include "tree-mod-log.h"
  16. #include "fs.h"
  17. struct kmem_cache *btrfs_delayed_ref_head_cachep;
  18. struct kmem_cache *btrfs_delayed_ref_node_cachep;
  19. struct kmem_cache *btrfs_delayed_extent_op_cachep;
  20. /*
  21. * delayed back reference update tracking. For subvolume trees
  22. * we queue up extent allocations and backref maintenance for
  23. * delayed processing. This avoids deep call chains where we
  24. * add extents in the middle of btrfs_search_slot, and it allows
  25. * us to buffer up frequently modified backrefs in an rb tree instead
  26. * of hammering updates on the extent allocation tree.
  27. */
  28. bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
  29. {
  30. struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
  31. struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
  32. bool ret = false;
  33. u64 reserved;
  34. spin_lock(&global_rsv->lock);
  35. reserved = global_rsv->reserved;
  36. spin_unlock(&global_rsv->lock);
  37. /*
  38. * Since the global reserve is just kind of magic we don't really want
  39. * to rely on it to save our bacon, so if our size is more than the
  40. * delayed_refs_rsv and the global rsv then it's time to think about
  41. * bailing.
  42. */
  43. spin_lock(&delayed_refs_rsv->lock);
  44. reserved += delayed_refs_rsv->reserved;
  45. if (delayed_refs_rsv->size >= reserved)
  46. ret = true;
  47. spin_unlock(&delayed_refs_rsv->lock);
  48. return ret;
  49. }
  50. /*
  51. * Release a ref head's reservation.
  52. *
  53. * @fs_info: the filesystem
  54. * @nr_refs: number of delayed refs to drop
  55. * @nr_csums: number of csum items to drop
  56. *
  57. * Drops the delayed ref head's count from the delayed refs rsv and free any
  58. * excess reservation we had.
  59. */
  60. void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums)
  61. {
  62. struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
  63. u64 num_bytes;
  64. u64 released;
  65. num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr_refs);
  66. num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
  67. released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
  68. if (released)
  69. trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
  70. 0, released, 0);
  71. }
  72. /*
  73. * Adjust the size of the delayed refs rsv.
  74. *
  75. * This is to be called anytime we may have adjusted trans->delayed_ref_updates
  76. * or trans->delayed_ref_csum_deletions, it'll calculate the additional size and
  77. * add it to the delayed_refs_rsv.
  78. */
  79. void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
  80. {
  81. struct btrfs_fs_info *fs_info = trans->fs_info;
  82. struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
  83. struct btrfs_block_rsv *local_rsv = &trans->delayed_rsv;
  84. u64 num_bytes;
  85. u64 reserved_bytes;
  86. if (btrfs_is_testing(fs_info))
  87. return;
  88. num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates);
  89. num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info,
  90. trans->delayed_ref_csum_deletions);
  91. if (num_bytes == 0)
  92. return;
  93. /*
  94. * Try to take num_bytes from the transaction's local delayed reserve.
  95. * If not possible, try to take as much as it's available. If the local
  96. * reserve doesn't have enough reserved space, the delayed refs reserve
  97. * will be refilled next time btrfs_delayed_refs_rsv_refill() is called
  98. * by someone or if a transaction commit is triggered before that, the
  99. * global block reserve will be used. We want to minimize using the
  100. * global block reserve for cases we can account for in advance, to
  101. * avoid exhausting it and reach -ENOSPC during a transaction commit.
  102. */
  103. spin_lock(&local_rsv->lock);
  104. reserved_bytes = min(num_bytes, local_rsv->reserved);
  105. local_rsv->reserved -= reserved_bytes;
  106. local_rsv->full = (local_rsv->reserved >= local_rsv->size);
  107. spin_unlock(&local_rsv->lock);
  108. spin_lock(&delayed_rsv->lock);
  109. delayed_rsv->size += num_bytes;
  110. delayed_rsv->reserved += reserved_bytes;
  111. delayed_rsv->full = (delayed_rsv->reserved >= delayed_rsv->size);
  112. spin_unlock(&delayed_rsv->lock);
  113. trans->delayed_ref_updates = 0;
  114. trans->delayed_ref_csum_deletions = 0;
  115. }
  116. /*
  117. * Adjust the size of the delayed refs block reserve for 1 block group item
  118. * insertion, used after allocating a block group.
  119. */
  120. void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info)
  121. {
  122. struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
  123. spin_lock(&delayed_rsv->lock);
  124. /*
  125. * Inserting a block group item does not require changing the free space
  126. * tree, only the extent tree or the block group tree, so this is all we
  127. * need.
  128. */
  129. delayed_rsv->size += btrfs_calc_insert_metadata_size(fs_info, 1);
  130. delayed_rsv->full = false;
  131. spin_unlock(&delayed_rsv->lock);
  132. }
  133. /*
  134. * Adjust the size of the delayed refs block reserve to release space for 1
  135. * block group item insertion.
  136. */
  137. void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info)
  138. {
  139. struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
  140. const u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
  141. u64 released;
  142. released = btrfs_block_rsv_release(fs_info, delayed_rsv, num_bytes, NULL);
  143. if (released > 0)
  144. trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
  145. 0, released, 0);
  146. }
  147. /*
  148. * Adjust the size of the delayed refs block reserve for 1 block group item
  149. * update.
  150. */
  151. void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info)
  152. {
  153. struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
  154. spin_lock(&delayed_rsv->lock);
  155. /*
  156. * Updating a block group item does not result in new nodes/leaves and
  157. * does not require changing the free space tree, only the extent tree
  158. * or the block group tree, so this is all we need.
  159. */
  160. delayed_rsv->size += btrfs_calc_metadata_size(fs_info, 1);
  161. delayed_rsv->full = false;
  162. spin_unlock(&delayed_rsv->lock);
  163. }
  164. /*
  165. * Adjust the size of the delayed refs block reserve to release space for 1
  166. * block group item update.
  167. */
  168. void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info)
  169. {
  170. struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
  171. const u64 num_bytes = btrfs_calc_metadata_size(fs_info, 1);
  172. u64 released;
  173. released = btrfs_block_rsv_release(fs_info, delayed_rsv, num_bytes, NULL);
  174. if (released > 0)
  175. trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
  176. 0, released, 0);
  177. }
  178. /*
  179. * Refill based on our delayed refs usage.
  180. *
  181. * @fs_info: the filesystem
  182. * @flush: control how we can flush for this reservation.
  183. *
  184. * This will refill the delayed block_rsv up to 1 items size worth of space and
  185. * will return -ENOSPC if we can't make the reservation.
  186. */
  187. int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
  188. enum btrfs_reserve_flush_enum flush)
  189. {
  190. struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
  191. struct btrfs_space_info *space_info = block_rsv->space_info;
  192. u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1);
  193. u64 num_bytes = 0;
  194. u64 refilled_bytes;
  195. u64 to_free;
  196. int ret = -ENOSPC;
  197. spin_lock(&block_rsv->lock);
  198. if (block_rsv->reserved < block_rsv->size) {
  199. num_bytes = block_rsv->size - block_rsv->reserved;
  200. num_bytes = min(num_bytes, limit);
  201. }
  202. spin_unlock(&block_rsv->lock);
  203. if (!num_bytes)
  204. return 0;
  205. ret = btrfs_reserve_metadata_bytes(space_info, num_bytes, flush);
  206. if (ret)
  207. return ret;
  208. /*
  209. * We may have raced with someone else, so check again if we the block
  210. * reserve is still not full and release any excess space.
  211. */
  212. spin_lock(&block_rsv->lock);
  213. if (block_rsv->reserved < block_rsv->size) {
  214. u64 needed = block_rsv->size - block_rsv->reserved;
  215. if (num_bytes >= needed) {
  216. block_rsv->reserved += needed;
  217. block_rsv->full = true;
  218. to_free = num_bytes - needed;
  219. refilled_bytes = needed;
  220. } else {
  221. block_rsv->reserved += num_bytes;
  222. to_free = 0;
  223. refilled_bytes = num_bytes;
  224. }
  225. } else {
  226. to_free = num_bytes;
  227. refilled_bytes = 0;
  228. }
  229. spin_unlock(&block_rsv->lock);
  230. if (to_free > 0)
  231. btrfs_space_info_free_bytes_may_use(space_info, to_free);
  232. if (refilled_bytes > 0)
  233. trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0,
  234. refilled_bytes, 1);
  235. return 0;
  236. }
  237. /*
  238. * compare two delayed data backrefs with same bytenr and type
  239. */
  240. static int comp_data_refs(const struct btrfs_delayed_ref_node *ref1,
  241. const struct btrfs_delayed_ref_node *ref2)
  242. {
  243. if (ref1->data_ref.objectid < ref2->data_ref.objectid)
  244. return -1;
  245. if (ref1->data_ref.objectid > ref2->data_ref.objectid)
  246. return 1;
  247. if (ref1->data_ref.offset < ref2->data_ref.offset)
  248. return -1;
  249. if (ref1->data_ref.offset > ref2->data_ref.offset)
  250. return 1;
  251. return 0;
  252. }
  253. static int comp_refs(const struct btrfs_delayed_ref_node *ref1,
  254. const struct btrfs_delayed_ref_node *ref2,
  255. bool check_seq)
  256. {
  257. int ret = 0;
  258. if (ref1->type < ref2->type)
  259. return -1;
  260. if (ref1->type > ref2->type)
  261. return 1;
  262. if (ref1->type == BTRFS_SHARED_BLOCK_REF_KEY ||
  263. ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
  264. if (ref1->parent < ref2->parent)
  265. return -1;
  266. if (ref1->parent > ref2->parent)
  267. return 1;
  268. } else {
  269. if (ref1->ref_root < ref2->ref_root)
  270. return -1;
  271. if (ref1->ref_root > ref2->ref_root)
  272. return 1;
  273. if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY)
  274. ret = comp_data_refs(ref1, ref2);
  275. }
  276. if (ret)
  277. return ret;
  278. if (check_seq) {
  279. if (ref1->seq < ref2->seq)
  280. return -1;
  281. if (ref1->seq > ref2->seq)
  282. return 1;
  283. }
  284. return 0;
  285. }
  286. static int cmp_refs_node(const struct rb_node *new, const struct rb_node *exist)
  287. {
  288. const struct btrfs_delayed_ref_node *new_node =
  289. rb_entry(new, struct btrfs_delayed_ref_node, ref_node);
  290. const struct btrfs_delayed_ref_node *exist_node =
  291. rb_entry(exist, struct btrfs_delayed_ref_node, ref_node);
  292. return comp_refs(new_node, exist_node, true);
  293. }
  294. static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
  295. struct btrfs_delayed_ref_node *ins)
  296. {
  297. struct rb_node *node = &ins->ref_node;
  298. struct rb_node *exist = rb_find_add_cached(node, root, cmp_refs_node);
  299. return rb_entry_safe(exist, struct btrfs_delayed_ref_node, ref_node);
  300. }
  301. static struct btrfs_delayed_ref_head *find_first_ref_head(
  302. struct btrfs_delayed_ref_root *dr)
  303. {
  304. unsigned long from = 0;
  305. lockdep_assert_held(&dr->lock);
  306. return xa_find(&dr->head_refs, &from, ULONG_MAX, XA_PRESENT);
  307. }
  308. static bool btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
  309. struct btrfs_delayed_ref_head *head)
  310. {
  311. lockdep_assert_held(&delayed_refs->lock);
  312. if (mutex_trylock(&head->mutex))
  313. return true;
  314. refcount_inc(&head->refs);
  315. spin_unlock(&delayed_refs->lock);
  316. mutex_lock(&head->mutex);
  317. spin_lock(&delayed_refs->lock);
  318. if (!head->tracked) {
  319. mutex_unlock(&head->mutex);
  320. btrfs_put_delayed_ref_head(head);
  321. return false;
  322. }
  323. btrfs_put_delayed_ref_head(head);
  324. return true;
  325. }
  326. static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info,
  327. struct btrfs_delayed_ref_root *delayed_refs,
  328. struct btrfs_delayed_ref_head *head,
  329. struct btrfs_delayed_ref_node *ref)
  330. {
  331. lockdep_assert_held(&head->lock);
  332. rb_erase_cached(&ref->ref_node, &head->ref_tree);
  333. RB_CLEAR_NODE(&ref->ref_node);
  334. if (!list_empty(&ref->add_list))
  335. list_del(&ref->add_list);
  336. btrfs_put_delayed_ref(ref);
  337. btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
  338. }
  339. static bool merge_ref(struct btrfs_fs_info *fs_info,
  340. struct btrfs_delayed_ref_root *delayed_refs,
  341. struct btrfs_delayed_ref_head *head,
  342. struct btrfs_delayed_ref_node *ref,
  343. u64 seq)
  344. {
  345. struct btrfs_delayed_ref_node *next;
  346. struct rb_node *node = rb_next(&ref->ref_node);
  347. bool done = false;
  348. while (!done && node) {
  349. int mod;
  350. next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
  351. node = rb_next(node);
  352. if (seq && next->seq >= seq)
  353. break;
  354. if (comp_refs(ref, next, false))
  355. break;
  356. if (ref->action == next->action) {
  357. mod = next->ref_mod;
  358. } else {
  359. if (ref->ref_mod < next->ref_mod) {
  360. swap(ref, next);
  361. done = true;
  362. }
  363. mod = -next->ref_mod;
  364. }
  365. drop_delayed_ref(fs_info, delayed_refs, head, next);
  366. ref->ref_mod += mod;
  367. if (ref->ref_mod == 0) {
  368. drop_delayed_ref(fs_info, delayed_refs, head, ref);
  369. done = true;
  370. } else {
  371. /*
  372. * Can't have multiples of the same ref on a tree block.
  373. */
  374. WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
  375. ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
  376. }
  377. }
  378. return done;
  379. }
  380. void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
  381. struct btrfs_delayed_ref_root *delayed_refs,
  382. struct btrfs_delayed_ref_head *head)
  383. {
  384. struct btrfs_delayed_ref_node *ref;
  385. struct rb_node *node;
  386. u64 seq = 0;
  387. lockdep_assert_held(&head->lock);
  388. if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
  389. return;
  390. /* We don't have too many refs to merge for data. */
  391. if (head->is_data)
  392. return;
  393. seq = btrfs_tree_mod_log_lowest_seq(fs_info);
  394. again:
  395. for (node = rb_first_cached(&head->ref_tree); node;
  396. node = rb_next(node)) {
  397. ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
  398. if (seq && ref->seq >= seq)
  399. continue;
  400. if (merge_ref(fs_info, delayed_refs, head, ref, seq))
  401. goto again;
  402. }
  403. }
  404. int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
  405. {
  406. int ret = 0;
  407. u64 min_seq = btrfs_tree_mod_log_lowest_seq(fs_info);
  408. if (min_seq != 0 && seq >= min_seq) {
  409. btrfs_debug(fs_info,
  410. "holding back delayed_ref %llu, lowest is %llu",
  411. seq, min_seq);
  412. ret = 1;
  413. }
  414. return ret;
  415. }
  416. struct btrfs_delayed_ref_head *btrfs_select_ref_head(
  417. const struct btrfs_fs_info *fs_info,
  418. struct btrfs_delayed_ref_root *delayed_refs)
  419. {
  420. struct btrfs_delayed_ref_head *head;
  421. unsigned long start_index;
  422. unsigned long found_index;
  423. bool found_head = false;
  424. bool locked;
  425. spin_lock(&delayed_refs->lock);
  426. again:
  427. start_index = (delayed_refs->run_delayed_start >> fs_info->sectorsize_bits);
  428. xa_for_each_start(&delayed_refs->head_refs, found_index, head, start_index) {
  429. if (!head->processing) {
  430. found_head = true;
  431. break;
  432. }
  433. }
  434. if (!found_head) {
  435. if (delayed_refs->run_delayed_start == 0) {
  436. spin_unlock(&delayed_refs->lock);
  437. return NULL;
  438. }
  439. delayed_refs->run_delayed_start = 0;
  440. goto again;
  441. }
  442. head->processing = true;
  443. WARN_ON(delayed_refs->num_heads_ready == 0);
  444. delayed_refs->num_heads_ready--;
  445. delayed_refs->run_delayed_start = head->bytenr +
  446. head->num_bytes;
  447. locked = btrfs_delayed_ref_lock(delayed_refs, head);
  448. spin_unlock(&delayed_refs->lock);
  449. /*
  450. * We may have dropped the spin lock to get the head mutex lock, and
  451. * that might have given someone else time to free the head. If that's
  452. * true, it has been removed from our list and we can move on.
  453. */
  454. if (!locked)
  455. return ERR_PTR(-EAGAIN);
  456. return head;
  457. }
  458. void btrfs_unselect_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
  459. struct btrfs_delayed_ref_head *head)
  460. {
  461. spin_lock(&delayed_refs->lock);
  462. head->processing = false;
  463. delayed_refs->num_heads_ready++;
  464. spin_unlock(&delayed_refs->lock);
  465. btrfs_delayed_ref_unlock(head);
  466. }
  467. void btrfs_delete_ref_head(const struct btrfs_fs_info *fs_info,
  468. struct btrfs_delayed_ref_root *delayed_refs,
  469. struct btrfs_delayed_ref_head *head)
  470. {
  471. const unsigned long index = (head->bytenr >> fs_info->sectorsize_bits);
  472. lockdep_assert_held(&delayed_refs->lock);
  473. lockdep_assert_held(&head->lock);
  474. xa_erase(&delayed_refs->head_refs, index);
  475. head->tracked = false;
  476. delayed_refs->num_heads--;
  477. if (!head->processing)
  478. delayed_refs->num_heads_ready--;
  479. }
  480. struct btrfs_delayed_ref_node *btrfs_select_delayed_ref(struct btrfs_delayed_ref_head *head)
  481. {
  482. struct btrfs_delayed_ref_node *ref;
  483. lockdep_assert_held(&head->mutex);
  484. lockdep_assert_held(&head->lock);
  485. if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
  486. return NULL;
  487. /*
  488. * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
  489. * This is to prevent a ref count from going down to zero, which deletes
  490. * the extent item from the extent tree, when there still are references
  491. * to add, which would fail because they would not find the extent item.
  492. */
  493. if (!list_empty(&head->ref_add_list))
  494. return list_first_entry(&head->ref_add_list,
  495. struct btrfs_delayed_ref_node, add_list);
  496. ref = rb_entry(rb_first_cached(&head->ref_tree),
  497. struct btrfs_delayed_ref_node, ref_node);
  498. ASSERT(list_empty(&ref->add_list));
  499. return ref;
  500. }
  501. /*
  502. * Helper to insert the ref_node to the tail or merge with tail.
  503. *
  504. * Return false if the ref was inserted.
  505. * Return true if the ref was merged into an existing one (and therefore can be
  506. * freed by the caller).
  507. */
  508. static bool insert_delayed_ref(struct btrfs_trans_handle *trans,
  509. struct btrfs_delayed_ref_head *href,
  510. struct btrfs_delayed_ref_node *ref)
  511. {
  512. struct btrfs_delayed_ref_root *root = &trans->transaction->delayed_refs;
  513. struct btrfs_delayed_ref_node *exist;
  514. int mod;
  515. spin_lock(&href->lock);
  516. exist = tree_insert(&href->ref_tree, ref);
  517. if (!exist) {
  518. if (ref->action == BTRFS_ADD_DELAYED_REF)
  519. list_add_tail(&ref->add_list, &href->ref_add_list);
  520. spin_unlock(&href->lock);
  521. trans->delayed_ref_updates++;
  522. return false;
  523. }
  524. /* Now we are sure we can merge */
  525. if (exist->action == ref->action) {
  526. mod = ref->ref_mod;
  527. } else {
  528. /* Need to change action */
  529. if (exist->ref_mod < ref->ref_mod) {
  530. exist->action = ref->action;
  531. mod = -exist->ref_mod;
  532. exist->ref_mod = ref->ref_mod;
  533. if (ref->action == BTRFS_ADD_DELAYED_REF)
  534. list_add_tail(&exist->add_list,
  535. &href->ref_add_list);
  536. else if (ref->action == BTRFS_DROP_DELAYED_REF) {
  537. ASSERT(!list_empty(&exist->add_list));
  538. list_del_init(&exist->add_list);
  539. } else {
  540. ASSERT(0);
  541. }
  542. } else
  543. mod = -ref->ref_mod;
  544. }
  545. exist->ref_mod += mod;
  546. /* remove existing tail if its ref_mod is zero */
  547. if (exist->ref_mod == 0)
  548. drop_delayed_ref(trans->fs_info, root, href, exist);
  549. spin_unlock(&href->lock);
  550. return true;
  551. }
  552. /*
  553. * helper function to update the accounting in the head ref
  554. * existing and update must have the same bytenr
  555. */
  556. static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
  557. struct btrfs_delayed_ref_head *existing,
  558. struct btrfs_delayed_ref_head *update)
  559. {
  560. struct btrfs_delayed_ref_root *delayed_refs =
  561. &trans->transaction->delayed_refs;
  562. struct btrfs_fs_info *fs_info = trans->fs_info;
  563. int old_ref_mod;
  564. BUG_ON(existing->is_data != update->is_data);
  565. spin_lock(&existing->lock);
  566. /*
  567. * When freeing an extent, we may not know the owning root when we
  568. * first create the head_ref. However, some deref before the last deref
  569. * will know it, so we just need to update the head_ref accordingly.
  570. */
  571. if (!existing->owning_root)
  572. existing->owning_root = update->owning_root;
  573. if (update->must_insert_reserved) {
  574. /* if the extent was freed and then
  575. * reallocated before the delayed ref
  576. * entries were processed, we can end up
  577. * with an existing head ref without
  578. * the must_insert_reserved flag set.
  579. * Set it again here
  580. */
  581. existing->must_insert_reserved = update->must_insert_reserved;
  582. existing->owning_root = update->owning_root;
  583. /*
  584. * update the num_bytes so we make sure the accounting
  585. * is done correctly
  586. */
  587. existing->num_bytes = update->num_bytes;
  588. }
  589. if (update->extent_op) {
  590. if (!existing->extent_op) {
  591. existing->extent_op = update->extent_op;
  592. } else {
  593. if (update->extent_op->update_key) {
  594. memcpy(&existing->extent_op->key,
  595. &update->extent_op->key,
  596. sizeof(update->extent_op->key));
  597. existing->extent_op->update_key = true;
  598. }
  599. if (update->extent_op->update_flags) {
  600. existing->extent_op->flags_to_set |=
  601. update->extent_op->flags_to_set;
  602. existing->extent_op->update_flags = true;
  603. }
  604. btrfs_free_delayed_extent_op(update->extent_op);
  605. }
  606. }
  607. /*
  608. * update the reference mod on the head to reflect this new operation,
  609. * only need the lock for this case cause we could be processing it
  610. * currently, for refs we just added we know we're a-ok.
  611. */
  612. old_ref_mod = existing->total_ref_mod;
  613. existing->ref_mod += update->ref_mod;
  614. existing->total_ref_mod += update->ref_mod;
  615. /*
  616. * If we are going to from a positive ref mod to a negative or vice
  617. * versa we need to make sure to adjust pending_csums accordingly.
  618. * We reserve bytes for csum deletion when adding or updating a ref head
  619. * see add_delayed_ref_head() for more details.
  620. */
  621. if (existing->is_data) {
  622. u64 csum_leaves =
  623. btrfs_csum_bytes_to_leaves(fs_info,
  624. existing->num_bytes);
  625. if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
  626. delayed_refs->pending_csums -= existing->num_bytes;
  627. btrfs_delayed_refs_rsv_release(fs_info, 0, csum_leaves);
  628. }
  629. if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
  630. delayed_refs->pending_csums += existing->num_bytes;
  631. trans->delayed_ref_csum_deletions += csum_leaves;
  632. }
  633. }
  634. spin_unlock(&existing->lock);
  635. }
  636. static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
  637. struct btrfs_ref *generic_ref,
  638. struct btrfs_qgroup_extent_record *qrecord,
  639. u64 reserved)
  640. {
  641. int count_mod = 1;
  642. bool must_insert_reserved = false;
  643. /* If reserved is provided, it must be a data extent. */
  644. BUG_ON(generic_ref->type != BTRFS_REF_DATA && reserved);
  645. switch (generic_ref->action) {
  646. case BTRFS_ADD_DELAYED_REF:
  647. /* count_mod is already set to 1. */
  648. break;
  649. case BTRFS_UPDATE_DELAYED_HEAD:
  650. count_mod = 0;
  651. break;
  652. case BTRFS_DROP_DELAYED_REF:
  653. /*
  654. * The head node stores the sum of all the mods, so dropping a ref
  655. * should drop the sum in the head node by one.
  656. */
  657. count_mod = -1;
  658. break;
  659. case BTRFS_ADD_DELAYED_EXTENT:
  660. /*
  661. * BTRFS_ADD_DELAYED_EXTENT means that we need to update the
  662. * reserved accounting when the extent is finally added, or if a
  663. * later modification deletes the delayed ref without ever
  664. * inserting the extent into the extent allocation tree.
  665. * ref->must_insert_reserved is the flag used to record that
  666. * accounting mods are required.
  667. *
  668. * Once we record must_insert_reserved, switch the action to
  669. * BTRFS_ADD_DELAYED_REF because other special casing is not
  670. * required.
  671. */
  672. must_insert_reserved = true;
  673. break;
  674. }
  675. refcount_set(&head_ref->refs, 1);
  676. head_ref->bytenr = generic_ref->bytenr;
  677. head_ref->num_bytes = generic_ref->num_bytes;
  678. head_ref->ref_mod = count_mod;
  679. head_ref->reserved_bytes = reserved;
  680. head_ref->must_insert_reserved = must_insert_reserved;
  681. head_ref->owning_root = generic_ref->owning_root;
  682. head_ref->is_data = (generic_ref->type == BTRFS_REF_DATA);
  683. head_ref->is_system = (generic_ref->ref_root == BTRFS_CHUNK_TREE_OBJECTID);
  684. head_ref->ref_tree = RB_ROOT_CACHED;
  685. INIT_LIST_HEAD(&head_ref->ref_add_list);
  686. head_ref->tracked = false;
  687. head_ref->processing = false;
  688. head_ref->total_ref_mod = count_mod;
  689. spin_lock_init(&head_ref->lock);
  690. mutex_init(&head_ref->mutex);
  691. /* If not metadata set an impossible level to help debugging. */
  692. if (generic_ref->type == BTRFS_REF_METADATA)
  693. head_ref->level = generic_ref->tree_ref.level;
  694. else
  695. head_ref->level = U8_MAX;
  696. if (qrecord) {
  697. if (generic_ref->ref_root && reserved) {
  698. qrecord->data_rsv = reserved;
  699. qrecord->data_rsv_refroot = generic_ref->ref_root;
  700. }
  701. qrecord->num_bytes = generic_ref->num_bytes;
  702. qrecord->old_roots = NULL;
  703. }
  704. }
  705. /*
  706. * Helper function to actually insert a head node into the xarray. This does all
  707. * the dirty work in terms of maintaining the correct overall modification
  708. * count.
  709. *
  710. * The caller is responsible for calling kfree() on @qrecord. More specifically,
  711. * if this function reports that it did not insert it as noted in
  712. * @qrecord_inserted_ret, then it's safe to call kfree() on it.
  713. *
  714. * Returns an error pointer in case of an error.
  715. */
  716. static noinline struct btrfs_delayed_ref_head *
  717. add_delayed_ref_head(struct btrfs_trans_handle *trans,
  718. struct btrfs_delayed_ref_head *head_ref,
  719. struct btrfs_qgroup_extent_record *qrecord,
  720. int action, bool *qrecord_inserted_ret)
  721. {
  722. struct btrfs_fs_info *fs_info = trans->fs_info;
  723. struct btrfs_delayed_ref_head *existing;
  724. struct btrfs_delayed_ref_root *delayed_refs;
  725. const unsigned long index = (head_ref->bytenr >> fs_info->sectorsize_bits);
  726. /*
  727. * If 'qrecord_inserted_ret' is provided, then the first thing we need
  728. * to do is to initialize it to false just in case we have an exit
  729. * before trying to insert the record.
  730. */
  731. if (qrecord_inserted_ret)
  732. *qrecord_inserted_ret = false;
  733. delayed_refs = &trans->transaction->delayed_refs;
  734. lockdep_assert_held(&delayed_refs->lock);
  735. #if BITS_PER_LONG == 32
  736. if (head_ref->bytenr >= MAX_LFS_FILESIZE) {
  737. if (qrecord)
  738. xa_release(&delayed_refs->dirty_extents, index);
  739. btrfs_err_rl(fs_info,
  740. "delayed ref head %llu is beyond 32bit page cache and xarray index limit",
  741. head_ref->bytenr);
  742. btrfs_err_32bit_limit(fs_info);
  743. return ERR_PTR(-EOVERFLOW);
  744. }
  745. #endif
  746. /* Record qgroup extent info if provided */
  747. if (qrecord) {
  748. /*
  749. * Setting 'qrecord' but not 'qrecord_inserted_ret' will likely
  750. * result in a memory leakage.
  751. */
  752. ASSERT(qrecord_inserted_ret != NULL);
  753. int ret;
  754. ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord,
  755. head_ref->bytenr);
  756. if (ret) {
  757. /* Clean up if insertion fails or item exists. */
  758. xa_release(&delayed_refs->dirty_extents, index);
  759. if (ret < 0)
  760. return ERR_PTR(ret);
  761. } else if (qrecord_inserted_ret) {
  762. *qrecord_inserted_ret = true;
  763. }
  764. }
  765. trace_add_delayed_ref_head(fs_info, head_ref, action);
  766. existing = xa_load(&delayed_refs->head_refs, index);
  767. if (existing) {
  768. update_existing_head_ref(trans, existing, head_ref);
  769. /*
  770. * we've updated the existing ref, free the newly
  771. * allocated ref
  772. */
  773. kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
  774. head_ref = existing;
  775. } else {
  776. existing = xa_store(&delayed_refs->head_refs, index, head_ref, GFP_ATOMIC);
  777. if (xa_is_err(existing)) {
  778. /* Memory was preallocated by the caller. */
  779. ASSERT(xa_err(existing) != -ENOMEM);
  780. return ERR_PTR(xa_err(existing));
  781. } else if (WARN_ON(existing)) {
  782. /*
  783. * Shouldn't happen we just did a lookup before under
  784. * delayed_refs->lock.
  785. */
  786. return ERR_PTR(-EEXIST);
  787. }
  788. head_ref->tracked = true;
  789. /*
  790. * We reserve the amount of bytes needed to delete csums when
  791. * adding the ref head and not when adding individual drop refs
  792. * since the csum items are deleted only after running the last
  793. * delayed drop ref (the data extent's ref count drops to 0).
  794. */
  795. if (head_ref->is_data && head_ref->ref_mod < 0) {
  796. delayed_refs->pending_csums += head_ref->num_bytes;
  797. trans->delayed_ref_csum_deletions +=
  798. btrfs_csum_bytes_to_leaves(fs_info, head_ref->num_bytes);
  799. }
  800. delayed_refs->num_heads++;
  801. delayed_refs->num_heads_ready++;
  802. }
  803. return head_ref;
  804. }
  805. /*
  806. * Initialize the structure which represents a modification to an extent.
  807. *
  808. * @fs_info: Internal to the mounted filesystem mount structure.
  809. *
  810. * @ref: The structure which is going to be initialized.
  811. *
  812. * @bytenr: The logical address of the extent for which a modification is
  813. * going to be recorded.
  814. *
  815. * @num_bytes: Size of the extent whose modification is being recorded.
  816. *
  817. * @ref_root: The id of the root where this modification has originated, this
  818. * can be either one of the well-known metadata trees or the
  819. * subvolume id which references this extent.
  820. *
  821. * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
  822. * BTRFS_ADD_DELAYED_EXTENT
  823. *
  824. * @ref_type: Holds the type of the extent which is being recorded, can be
  825. * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
  826. * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
  827. * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
  828. */
  829. static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
  830. struct btrfs_delayed_ref_node *ref,
  831. struct btrfs_ref *generic_ref)
  832. {
  833. int action = generic_ref->action;
  834. u64 seq = 0;
  835. if (action == BTRFS_ADD_DELAYED_EXTENT)
  836. action = BTRFS_ADD_DELAYED_REF;
  837. if (btrfs_is_fstree(generic_ref->ref_root))
  838. seq = atomic64_read(&fs_info->tree_mod_seq);
  839. refcount_set(&ref->refs, 1);
  840. ref->bytenr = generic_ref->bytenr;
  841. ref->num_bytes = generic_ref->num_bytes;
  842. ref->ref_mod = 1;
  843. ref->action = action;
  844. ref->seq = seq;
  845. ref->type = btrfs_ref_type(generic_ref);
  846. ref->ref_root = generic_ref->ref_root;
  847. ref->parent = generic_ref->parent;
  848. RB_CLEAR_NODE(&ref->ref_node);
  849. INIT_LIST_HEAD(&ref->add_list);
  850. if (generic_ref->type == BTRFS_REF_DATA)
  851. ref->data_ref = generic_ref->data_ref;
  852. else
  853. ref->tree_ref = generic_ref->tree_ref;
  854. }
  855. void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
  856. bool skip_qgroup)
  857. {
  858. #ifdef CONFIG_BTRFS_DEBUG
  859. /* If @real_root not set, use @root as fallback */
  860. generic_ref->real_root = mod_root ?: generic_ref->ref_root;
  861. #endif
  862. generic_ref->tree_ref.level = level;
  863. generic_ref->type = BTRFS_REF_METADATA;
  864. if (skip_qgroup || !(btrfs_is_fstree(generic_ref->ref_root) &&
  865. (!mod_root || btrfs_is_fstree(mod_root))))
  866. generic_ref->skip_qgroup = true;
  867. else
  868. generic_ref->skip_qgroup = false;
  869. }
  870. void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset,
  871. u64 mod_root, bool skip_qgroup)
  872. {
  873. #ifdef CONFIG_BTRFS_DEBUG
  874. /* If @real_root not set, use @root as fallback */
  875. generic_ref->real_root = mod_root ?: generic_ref->ref_root;
  876. #endif
  877. generic_ref->data_ref.objectid = ino;
  878. generic_ref->data_ref.offset = offset;
  879. generic_ref->type = BTRFS_REF_DATA;
  880. if (skip_qgroup || !(btrfs_is_fstree(generic_ref->ref_root) &&
  881. (!mod_root || btrfs_is_fstree(mod_root))))
  882. generic_ref->skip_qgroup = true;
  883. else
  884. generic_ref->skip_qgroup = false;
  885. }
  886. static int add_delayed_ref(struct btrfs_trans_handle *trans,
  887. struct btrfs_ref *generic_ref,
  888. struct btrfs_delayed_extent_op *extent_op,
  889. u64 reserved)
  890. {
  891. struct btrfs_fs_info *fs_info = trans->fs_info;
  892. struct btrfs_delayed_ref_node *node;
  893. struct btrfs_delayed_ref_head *head_ref;
  894. struct btrfs_delayed_ref_head *new_head_ref;
  895. struct btrfs_delayed_ref_root *delayed_refs;
  896. struct btrfs_qgroup_extent_record *record = NULL;
  897. const unsigned long index = (generic_ref->bytenr >> fs_info->sectorsize_bits);
  898. bool qrecord_reserved = false;
  899. bool qrecord_inserted;
  900. int action = generic_ref->action;
  901. bool merged;
  902. int ret;
  903. node = kmem_cache_alloc(btrfs_delayed_ref_node_cachep, GFP_NOFS);
  904. if (!node)
  905. return -ENOMEM;
  906. head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
  907. if (!head_ref) {
  908. ret = -ENOMEM;
  909. goto free_node;
  910. }
  911. delayed_refs = &trans->transaction->delayed_refs;
  912. if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
  913. record = kzalloc_obj(*record, GFP_NOFS);
  914. if (!record) {
  915. ret = -ENOMEM;
  916. goto free_head_ref;
  917. }
  918. if (xa_reserve(&delayed_refs->dirty_extents, index, GFP_NOFS)) {
  919. ret = -ENOMEM;
  920. goto free_record;
  921. }
  922. qrecord_reserved = true;
  923. }
  924. ret = xa_reserve(&delayed_refs->head_refs, index, GFP_NOFS);
  925. if (ret) {
  926. if (qrecord_reserved)
  927. xa_release(&delayed_refs->dirty_extents, index);
  928. goto free_record;
  929. }
  930. init_delayed_ref_common(fs_info, node, generic_ref);
  931. init_delayed_ref_head(head_ref, generic_ref, record, reserved);
  932. head_ref->extent_op = extent_op;
  933. spin_lock(&delayed_refs->lock);
  934. /*
  935. * insert both the head node and the new ref without dropping
  936. * the spin lock
  937. */
  938. new_head_ref = add_delayed_ref_head(trans, head_ref, record,
  939. action, &qrecord_inserted);
  940. if (IS_ERR(new_head_ref)) {
  941. xa_release(&delayed_refs->head_refs, index);
  942. spin_unlock(&delayed_refs->lock);
  943. ret = PTR_ERR(new_head_ref);
  944. /*
  945. * It's only safe to call kfree() on 'qrecord' if
  946. * add_delayed_ref_head() has _not_ inserted it for
  947. * tracing. Otherwise we need to handle this here.
  948. */
  949. if (!qrecord_reserved || qrecord_inserted)
  950. goto free_head_ref;
  951. goto free_record;
  952. }
  953. head_ref = new_head_ref;
  954. merged = insert_delayed_ref(trans, head_ref, node);
  955. spin_unlock(&delayed_refs->lock);
  956. /*
  957. * Need to update the delayed_refs_rsv with any changes we may have
  958. * made.
  959. */
  960. btrfs_update_delayed_refs_rsv(trans);
  961. if (generic_ref->type == BTRFS_REF_DATA)
  962. trace_add_delayed_data_ref(trans->fs_info, node);
  963. else
  964. trace_add_delayed_tree_ref(trans->fs_info, node);
  965. if (merged)
  966. kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
  967. if (qrecord_inserted)
  968. return btrfs_qgroup_trace_extent_post(trans, record, generic_ref->bytenr);
  969. kfree(record);
  970. return 0;
  971. free_record:
  972. kfree(record);
  973. free_head_ref:
  974. kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
  975. free_node:
  976. kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
  977. return ret;
  978. }
  979. /*
  980. * Add a delayed tree ref. This does all of the accounting required to make sure
  981. * the delayed ref is eventually processed before this transaction commits.
  982. */
  983. int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
  984. struct btrfs_ref *generic_ref,
  985. struct btrfs_delayed_extent_op *extent_op)
  986. {
  987. ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
  988. return add_delayed_ref(trans, generic_ref, extent_op, 0);
  989. }
  990. /*
  991. * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
  992. */
  993. int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
  994. struct btrfs_ref *generic_ref,
  995. u64 reserved)
  996. {
  997. ASSERT(generic_ref->type == BTRFS_REF_DATA && generic_ref->action);
  998. return add_delayed_ref(trans, generic_ref, NULL, reserved);
  999. }
  1000. int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
  1001. u64 bytenr, u64 num_bytes, u8 level,
  1002. struct btrfs_delayed_extent_op *extent_op)
  1003. {
  1004. const unsigned long index = (bytenr >> trans->fs_info->sectorsize_bits);
  1005. struct btrfs_delayed_ref_head *head_ref;
  1006. struct btrfs_delayed_ref_head *head_ref_ret;
  1007. struct btrfs_delayed_ref_root *delayed_refs;
  1008. struct btrfs_ref generic_ref = {
  1009. .type = BTRFS_REF_METADATA,
  1010. .action = BTRFS_UPDATE_DELAYED_HEAD,
  1011. .bytenr = bytenr,
  1012. .num_bytes = num_bytes,
  1013. .tree_ref.level = level,
  1014. };
  1015. int ret;
  1016. head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
  1017. if (!head_ref)
  1018. return -ENOMEM;
  1019. init_delayed_ref_head(head_ref, &generic_ref, NULL, 0);
  1020. head_ref->extent_op = extent_op;
  1021. delayed_refs = &trans->transaction->delayed_refs;
  1022. ret = xa_reserve(&delayed_refs->head_refs, index, GFP_NOFS);
  1023. if (ret) {
  1024. kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
  1025. return ret;
  1026. }
  1027. spin_lock(&delayed_refs->lock);
  1028. head_ref_ret = add_delayed_ref_head(trans, head_ref, NULL,
  1029. BTRFS_UPDATE_DELAYED_HEAD, NULL);
  1030. if (IS_ERR(head_ref_ret)) {
  1031. xa_release(&delayed_refs->head_refs, index);
  1032. spin_unlock(&delayed_refs->lock);
  1033. kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
  1034. return PTR_ERR(head_ref_ret);
  1035. }
  1036. spin_unlock(&delayed_refs->lock);
  1037. /*
  1038. * Need to update the delayed_refs_rsv with any changes we may have
  1039. * made.
  1040. */
  1041. btrfs_update_delayed_refs_rsv(trans);
  1042. return 0;
  1043. }
  1044. void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
  1045. {
  1046. if (refcount_dec_and_test(&ref->refs)) {
  1047. WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
  1048. kmem_cache_free(btrfs_delayed_ref_node_cachep, ref);
  1049. }
  1050. }
  1051. /*
  1052. * This does a simple search for the head node for a given extent. Returns the
  1053. * head node if found, or NULL if not.
  1054. */
  1055. struct btrfs_delayed_ref_head *
  1056. btrfs_find_delayed_ref_head(const struct btrfs_fs_info *fs_info,
  1057. struct btrfs_delayed_ref_root *delayed_refs,
  1058. u64 bytenr)
  1059. {
  1060. const unsigned long index = (bytenr >> fs_info->sectorsize_bits);
  1061. lockdep_assert_held(&delayed_refs->lock);
  1062. return xa_load(&delayed_refs->head_refs, index);
  1063. }
  1064. static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent)
  1065. {
  1066. int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY;
  1067. if (type < entry->type)
  1068. return -1;
  1069. if (type > entry->type)
  1070. return 1;
  1071. if (type == BTRFS_TREE_BLOCK_REF_KEY) {
  1072. if (root < entry->ref_root)
  1073. return -1;
  1074. if (root > entry->ref_root)
  1075. return 1;
  1076. } else {
  1077. if (parent < entry->parent)
  1078. return -1;
  1079. if (parent > entry->parent)
  1080. return 1;
  1081. }
  1082. return 0;
  1083. }
  1084. /*
  1085. * Check to see if a given root/parent reference is attached to the head. This
  1086. * only checks for BTRFS_ADD_DELAYED_REF references that match, as that
  1087. * indicates the reference exists for the given root or parent. This is for
  1088. * tree blocks only.
  1089. *
  1090. * @head: the head of the bytenr we're searching.
  1091. * @root: the root objectid of the reference if it is a normal reference.
  1092. * @parent: the parent if this is a shared backref.
  1093. */
  1094. bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
  1095. u64 root, u64 parent)
  1096. {
  1097. struct rb_node *node;
  1098. bool found = false;
  1099. lockdep_assert_held(&head->mutex);
  1100. spin_lock(&head->lock);
  1101. node = head->ref_tree.rb_root.rb_node;
  1102. while (node) {
  1103. struct btrfs_delayed_ref_node *entry;
  1104. int ret;
  1105. entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
  1106. ret = find_comp(entry, root, parent);
  1107. if (ret < 0) {
  1108. node = node->rb_left;
  1109. } else if (ret > 0) {
  1110. node = node->rb_right;
  1111. } else {
  1112. /*
  1113. * We only want to count ADD actions, as drops mean the
  1114. * ref doesn't exist.
  1115. */
  1116. if (entry->action == BTRFS_ADD_DELAYED_REF)
  1117. found = true;
  1118. break;
  1119. }
  1120. }
  1121. spin_unlock(&head->lock);
  1122. return found;
  1123. }
  1124. void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
  1125. {
  1126. struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
  1127. struct btrfs_fs_info *fs_info = trans->fs_info;
  1128. spin_lock(&delayed_refs->lock);
  1129. while (true) {
  1130. struct btrfs_delayed_ref_head *head;
  1131. struct rb_node *n;
  1132. bool pin_bytes = false;
  1133. head = find_first_ref_head(delayed_refs);
  1134. if (!head)
  1135. break;
  1136. if (!btrfs_delayed_ref_lock(delayed_refs, head))
  1137. continue;
  1138. spin_lock(&head->lock);
  1139. while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
  1140. struct btrfs_delayed_ref_node *ref;
  1141. ref = rb_entry(n, struct btrfs_delayed_ref_node, ref_node);
  1142. drop_delayed_ref(fs_info, delayed_refs, head, ref);
  1143. }
  1144. if (head->must_insert_reserved)
  1145. pin_bytes = true;
  1146. btrfs_free_delayed_extent_op(head->extent_op);
  1147. btrfs_delete_ref_head(fs_info, delayed_refs, head);
  1148. spin_unlock(&head->lock);
  1149. spin_unlock(&delayed_refs->lock);
  1150. mutex_unlock(&head->mutex);
  1151. if (!btrfs_is_testing(fs_info) && pin_bytes) {
  1152. struct btrfs_block_group *bg;
  1153. bg = btrfs_lookup_block_group(fs_info, head->bytenr);
  1154. if (WARN_ON_ONCE(bg == NULL)) {
  1155. /*
  1156. * Unexpected and there's nothing we can do here
  1157. * because we are in a transaction abort path,
  1158. * so any errors can only be ignored or reported
  1159. * while attempting to cleanup all resources.
  1160. */
  1161. btrfs_err(fs_info,
  1162. "block group for delayed ref at %llu was not found while destroying ref head",
  1163. head->bytenr);
  1164. } else {
  1165. spin_lock(&bg->space_info->lock);
  1166. spin_lock(&bg->lock);
  1167. bg->pinned += head->num_bytes;
  1168. btrfs_space_info_update_bytes_pinned(bg->space_info,
  1169. head->num_bytes);
  1170. bg->reserved -= head->num_bytes;
  1171. bg->space_info->bytes_reserved -= head->num_bytes;
  1172. spin_unlock(&bg->lock);
  1173. spin_unlock(&bg->space_info->lock);
  1174. btrfs_put_block_group(bg);
  1175. }
  1176. btrfs_error_unpin_extent_range(fs_info, head->bytenr,
  1177. head->bytenr + head->num_bytes - 1);
  1178. }
  1179. if (!btrfs_is_testing(fs_info))
  1180. btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
  1181. btrfs_put_delayed_ref_head(head);
  1182. cond_resched();
  1183. spin_lock(&delayed_refs->lock);
  1184. }
  1185. if (!btrfs_is_testing(fs_info))
  1186. btrfs_qgroup_destroy_extent_records(trans);
  1187. spin_unlock(&delayed_refs->lock);
  1188. }
  1189. void __cold btrfs_delayed_ref_exit(void)
  1190. {
  1191. kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
  1192. kmem_cache_destroy(btrfs_delayed_ref_node_cachep);
  1193. kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
  1194. }
  1195. int __init btrfs_delayed_ref_init(void)
  1196. {
  1197. btrfs_delayed_ref_head_cachep = KMEM_CACHE(btrfs_delayed_ref_head, 0);
  1198. if (!btrfs_delayed_ref_head_cachep)
  1199. return -ENOMEM;
  1200. btrfs_delayed_ref_node_cachep = KMEM_CACHE(btrfs_delayed_ref_node, 0);
  1201. if (!btrfs_delayed_ref_node_cachep)
  1202. goto fail;
  1203. btrfs_delayed_extent_op_cachep = KMEM_CACHE(btrfs_delayed_extent_op, 0);
  1204. if (!btrfs_delayed_extent_op_cachep)
  1205. goto fail;
  1206. return 0;
  1207. fail:
  1208. btrfs_delayed_ref_exit();
  1209. return -ENOMEM;
  1210. }