ref-verify.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2014 Facebook. All rights reserved.
  4. */
  5. #include <linux/sched.h>
  6. #include <linux/stacktrace.h>
  7. #include "messages.h"
  8. #include "ctree.h"
  9. #include "disk-io.h"
  10. #include "locking.h"
  11. #include "delayed-ref.h"
  12. #include "ref-verify.h"
  13. #include "fs.h"
  14. #include "accessors.h"
  15. /*
  16. * Used to keep track the roots and number of refs each root has for a given
  17. * bytenr. This just tracks the number of direct references, no shared
  18. * references.
  19. */
  20. struct root_entry {
  21. u64 root_objectid;
  22. u64 num_refs;
  23. struct rb_node node;
  24. };
  25. /*
  26. * These are meant to represent what should exist in the extent tree, these can
  27. * be used to verify the extent tree is consistent as these should all match
  28. * what the extent tree says.
  29. */
  30. struct ref_entry {
  31. u64 root_objectid;
  32. u64 parent;
  33. u64 owner;
  34. u64 offset;
  35. u64 num_refs;
  36. struct rb_node node;
  37. };
  38. #define MAX_TRACE 16
  39. /*
  40. * Whenever we add/remove a reference we record the action. The action maps
  41. * back to the delayed ref action. We hold the ref we are changing in the
  42. * action so we can account for the history properly, and we record the root we
  43. * were called with since it could be different from ref_root. We also store
  44. * stack traces because that's how I roll.
  45. */
  46. struct ref_action {
  47. int action;
  48. u64 root;
  49. struct ref_entry ref;
  50. struct list_head list;
  51. unsigned long trace[MAX_TRACE];
  52. unsigned int trace_len;
  53. };
  54. /*
  55. * One of these for every block we reference, it holds the roots and references
  56. * to it as well as all of the ref actions that have occurred to it. We never
  57. * free it until we unmount the file system in order to make sure re-allocations
  58. * are happening properly.
  59. */
  60. struct block_entry {
  61. u64 bytenr;
  62. u64 len;
  63. u64 num_refs;
  64. int metadata;
  65. int from_disk;
  66. struct rb_root roots;
  67. struct rb_root refs;
  68. struct rb_node node;
  69. struct list_head actions;
  70. };
  71. static int block_entry_bytenr_key_cmp(const void *key, const struct rb_node *node)
  72. {
  73. const u64 *bytenr = key;
  74. const struct block_entry *entry = rb_entry(node, struct block_entry, node);
  75. if (entry->bytenr < *bytenr)
  76. return 1;
  77. else if (entry->bytenr > *bytenr)
  78. return -1;
  79. return 0;
  80. }
  81. static int block_entry_bytenr_cmp(struct rb_node *new, const struct rb_node *existing)
  82. {
  83. const struct block_entry *new_entry = rb_entry(new, struct block_entry, node);
  84. return block_entry_bytenr_key_cmp(&new_entry->bytenr, existing);
  85. }
  86. static struct block_entry *insert_block_entry(struct rb_root *root,
  87. struct block_entry *be)
  88. {
  89. struct rb_node *node;
  90. node = rb_find_add(&be->node, root, block_entry_bytenr_cmp);
  91. return rb_entry_safe(node, struct block_entry, node);
  92. }
  93. static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
  94. {
  95. struct rb_node *node;
  96. node = rb_find(&bytenr, root, block_entry_bytenr_key_cmp);
  97. return rb_entry_safe(node, struct block_entry, node);
  98. }
  99. static int root_entry_root_objectid_key_cmp(const void *key, const struct rb_node *node)
  100. {
  101. const u64 *objectid = key;
  102. const struct root_entry *entry = rb_entry(node, struct root_entry, node);
  103. if (entry->root_objectid < *objectid)
  104. return 1;
  105. else if (entry->root_objectid > *objectid)
  106. return -1;
  107. return 0;
  108. }
  109. static int root_entry_root_objectid_cmp(struct rb_node *new, const struct rb_node *existing)
  110. {
  111. const struct root_entry *new_entry = rb_entry(new, struct root_entry, node);
  112. return root_entry_root_objectid_key_cmp(&new_entry->root_objectid, existing);
  113. }
  114. static struct root_entry *insert_root_entry(struct rb_root *root,
  115. struct root_entry *re)
  116. {
  117. struct rb_node *node;
  118. node = rb_find_add(&re->node, root, root_entry_root_objectid_cmp);
  119. return rb_entry_safe(node, struct root_entry, node);
  120. }
  121. static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
  122. {
  123. if (ref1->root_objectid < ref2->root_objectid)
  124. return -1;
  125. if (ref1->root_objectid > ref2->root_objectid)
  126. return 1;
  127. if (ref1->parent < ref2->parent)
  128. return -1;
  129. if (ref1->parent > ref2->parent)
  130. return 1;
  131. if (ref1->owner < ref2->owner)
  132. return -1;
  133. if (ref1->owner > ref2->owner)
  134. return 1;
  135. if (ref1->offset < ref2->offset)
  136. return -1;
  137. if (ref1->offset > ref2->offset)
  138. return 1;
  139. return 0;
  140. }
  141. static int ref_entry_cmp(struct rb_node *new, const struct rb_node *existing)
  142. {
  143. struct ref_entry *new_entry = rb_entry(new, struct ref_entry, node);
  144. struct ref_entry *existing_entry = rb_entry(existing, struct ref_entry, node);
  145. return comp_refs(new_entry, existing_entry);
  146. }
  147. static struct ref_entry *insert_ref_entry(struct rb_root *root,
  148. struct ref_entry *ref)
  149. {
  150. struct rb_node *node;
  151. node = rb_find_add(&ref->node, root, ref_entry_cmp);
  152. return rb_entry_safe(node, struct ref_entry, node);
  153. }
  154. static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
  155. {
  156. struct rb_node *node;
  157. node = rb_find(&objectid, root, root_entry_root_objectid_key_cmp);
  158. return rb_entry_safe(node, struct root_entry, node);
  159. }
  160. #ifdef CONFIG_STACKTRACE
  161. static void __save_stack_trace(struct ref_action *ra)
  162. {
  163. ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2);
  164. }
  165. static void __print_stack_trace(struct btrfs_fs_info *fs_info,
  166. struct ref_action *ra)
  167. {
  168. if (ra->trace_len == 0) {
  169. btrfs_err(fs_info, " ref-verify: no stacktrace");
  170. return;
  171. }
  172. stack_trace_print(ra->trace, ra->trace_len, 2);
  173. }
  174. #else
  175. static inline void __save_stack_trace(struct ref_action *ra)
  176. {
  177. }
  178. static inline void __print_stack_trace(struct btrfs_fs_info *fs_info,
  179. struct ref_action *ra)
  180. {
  181. btrfs_err(fs_info, " ref-verify: no stacktrace support");
  182. }
  183. #endif
  184. static void free_block_entry(struct block_entry *be)
  185. {
  186. struct root_entry *re;
  187. struct ref_entry *ref;
  188. struct ref_action *ra;
  189. struct rb_node *n;
  190. while ((n = rb_first(&be->roots))) {
  191. re = rb_entry(n, struct root_entry, node);
  192. rb_erase(&re->node, &be->roots);
  193. kfree(re);
  194. }
  195. while((n = rb_first(&be->refs))) {
  196. ref = rb_entry(n, struct ref_entry, node);
  197. rb_erase(&ref->node, &be->refs);
  198. kfree(ref);
  199. }
  200. while (!list_empty(&be->actions)) {
  201. ra = list_first_entry(&be->actions, struct ref_action,
  202. list);
  203. list_del(&ra->list);
  204. kfree(ra);
  205. }
  206. kfree(be);
  207. }
  208. static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
  209. u64 bytenr, u64 len,
  210. u64 root_objectid)
  211. {
  212. struct block_entry *be = NULL, *exist;
  213. struct root_entry *re = NULL;
  214. re = kzalloc_obj(struct root_entry, GFP_NOFS);
  215. be = kzalloc_obj(struct block_entry, GFP_NOFS);
  216. if (!be || !re) {
  217. kfree(re);
  218. kfree(be);
  219. return ERR_PTR(-ENOMEM);
  220. }
  221. be->bytenr = bytenr;
  222. be->len = len;
  223. re->root_objectid = root_objectid;
  224. re->num_refs = 0;
  225. spin_lock(&fs_info->ref_verify_lock);
  226. exist = insert_block_entry(&fs_info->block_tree, be);
  227. if (exist) {
  228. if (root_objectid) {
  229. struct root_entry *exist_re;
  230. exist_re = insert_root_entry(&exist->roots, re);
  231. if (exist_re)
  232. kfree(re);
  233. } else {
  234. kfree(re);
  235. }
  236. kfree(be);
  237. return exist;
  238. }
  239. be->num_refs = 0;
  240. be->metadata = 0;
  241. be->from_disk = 0;
  242. be->roots = RB_ROOT;
  243. be->refs = RB_ROOT;
  244. INIT_LIST_HEAD(&be->actions);
  245. if (root_objectid)
  246. insert_root_entry(&be->roots, re);
  247. else
  248. kfree(re);
  249. return be;
  250. }
  251. static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
  252. u64 parent, u64 bytenr, int level)
  253. {
  254. struct block_entry *be;
  255. struct root_entry *re;
  256. struct ref_entry *ref = NULL, *exist;
  257. ref = kmalloc_obj(struct ref_entry, GFP_NOFS);
  258. if (!ref)
  259. return -ENOMEM;
  260. if (parent)
  261. ref->root_objectid = 0;
  262. else
  263. ref->root_objectid = ref_root;
  264. ref->parent = parent;
  265. ref->owner = level;
  266. ref->offset = 0;
  267. ref->num_refs = 1;
  268. be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
  269. if (IS_ERR(be)) {
  270. kfree(ref);
  271. return PTR_ERR(be);
  272. }
  273. be->num_refs++;
  274. be->from_disk = 1;
  275. be->metadata = 1;
  276. if (!parent) {
  277. ASSERT(ref_root);
  278. re = lookup_root_entry(&be->roots, ref_root);
  279. ASSERT(re);
  280. re->num_refs++;
  281. }
  282. exist = insert_ref_entry(&be->refs, ref);
  283. if (exist) {
  284. exist->num_refs++;
  285. kfree(ref);
  286. }
  287. spin_unlock(&fs_info->ref_verify_lock);
  288. return 0;
  289. }
  290. static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
  291. u64 parent, u32 num_refs, u64 bytenr,
  292. u64 num_bytes)
  293. {
  294. struct block_entry *be;
  295. struct ref_entry *ref;
  296. ref = kzalloc_obj(struct ref_entry, GFP_NOFS);
  297. if (!ref)
  298. return -ENOMEM;
  299. be = add_block_entry(fs_info, bytenr, num_bytes, 0);
  300. if (IS_ERR(be)) {
  301. kfree(ref);
  302. return PTR_ERR(be);
  303. }
  304. be->num_refs += num_refs;
  305. ref->parent = parent;
  306. ref->num_refs = num_refs;
  307. if (insert_ref_entry(&be->refs, ref)) {
  308. spin_unlock(&fs_info->ref_verify_lock);
  309. btrfs_err(fs_info, "existing shared ref when reading from disk?");
  310. kfree(ref);
  311. return -EINVAL;
  312. }
  313. spin_unlock(&fs_info->ref_verify_lock);
  314. return 0;
  315. }
  316. static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
  317. struct extent_buffer *leaf,
  318. struct btrfs_extent_data_ref *dref,
  319. u64 bytenr, u64 num_bytes)
  320. {
  321. struct block_entry *be;
  322. struct ref_entry *ref;
  323. struct root_entry *re;
  324. u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
  325. u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
  326. u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
  327. u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
  328. ref = kzalloc_obj(struct ref_entry, GFP_NOFS);
  329. if (!ref)
  330. return -ENOMEM;
  331. be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
  332. if (IS_ERR(be)) {
  333. kfree(ref);
  334. return PTR_ERR(be);
  335. }
  336. be->num_refs += num_refs;
  337. ref->parent = 0;
  338. ref->owner = owner;
  339. ref->root_objectid = ref_root;
  340. ref->offset = offset;
  341. ref->num_refs = num_refs;
  342. if (insert_ref_entry(&be->refs, ref)) {
  343. spin_unlock(&fs_info->ref_verify_lock);
  344. btrfs_err(fs_info, "existing ref when reading from disk?");
  345. kfree(ref);
  346. return -EINVAL;
  347. }
  348. re = lookup_root_entry(&be->roots, ref_root);
  349. if (!re) {
  350. spin_unlock(&fs_info->ref_verify_lock);
  351. btrfs_err(fs_info, "missing root in new block entry?");
  352. return -EINVAL;
  353. }
  354. re->num_refs += num_refs;
  355. spin_unlock(&fs_info->ref_verify_lock);
  356. return 0;
  357. }
  358. static int process_extent_item(struct btrfs_fs_info *fs_info,
  359. struct btrfs_path *path, struct btrfs_key *key,
  360. int slot, int *tree_block_level)
  361. {
  362. struct btrfs_extent_item *ei;
  363. struct btrfs_extent_inline_ref *iref;
  364. struct btrfs_extent_data_ref *dref;
  365. struct btrfs_shared_data_ref *sref;
  366. struct extent_buffer *leaf = path->nodes[0];
  367. u32 item_size = btrfs_item_size(leaf, slot);
  368. unsigned long end, ptr;
  369. u64 offset, flags, count;
  370. int type;
  371. int ret = 0;
  372. ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
  373. flags = btrfs_extent_flags(leaf, ei);
  374. if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
  375. flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
  376. struct btrfs_tree_block_info *info;
  377. info = (struct btrfs_tree_block_info *)(ei + 1);
  378. *tree_block_level = btrfs_tree_block_level(leaf, info);
  379. iref = (struct btrfs_extent_inline_ref *)(info + 1);
  380. } else {
  381. if (key->type == BTRFS_METADATA_ITEM_KEY)
  382. *tree_block_level = key->offset;
  383. iref = (struct btrfs_extent_inline_ref *)(ei + 1);
  384. }
  385. ptr = (unsigned long)iref;
  386. end = (unsigned long)ei + item_size;
  387. while (ptr < end) {
  388. iref = (struct btrfs_extent_inline_ref *)ptr;
  389. type = btrfs_extent_inline_ref_type(leaf, iref);
  390. offset = btrfs_extent_inline_ref_offset(leaf, iref);
  391. switch (type) {
  392. case BTRFS_TREE_BLOCK_REF_KEY:
  393. ret = add_tree_block(fs_info, offset, 0, key->objectid,
  394. *tree_block_level);
  395. break;
  396. case BTRFS_SHARED_BLOCK_REF_KEY:
  397. ret = add_tree_block(fs_info, 0, offset, key->objectid,
  398. *tree_block_level);
  399. break;
  400. case BTRFS_EXTENT_DATA_REF_KEY:
  401. dref = (struct btrfs_extent_data_ref *)(&iref->offset);
  402. ret = add_extent_data_ref(fs_info, leaf, dref,
  403. key->objectid, key->offset);
  404. break;
  405. case BTRFS_SHARED_DATA_REF_KEY:
  406. sref = (struct btrfs_shared_data_ref *)(iref + 1);
  407. count = btrfs_shared_data_ref_count(leaf, sref);
  408. ret = add_shared_data_ref(fs_info, offset, count,
  409. key->objectid, key->offset);
  410. break;
  411. case BTRFS_EXTENT_OWNER_REF_KEY:
  412. if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) {
  413. btrfs_err(fs_info,
  414. "found extent owner ref without simple quotas enabled");
  415. ret = -EINVAL;
  416. }
  417. break;
  418. default:
  419. btrfs_err(fs_info, "invalid key type in iref");
  420. ret = -EINVAL;
  421. break;
  422. }
  423. if (ret)
  424. break;
  425. ptr += btrfs_extent_inline_ref_size(type);
  426. }
  427. return ret;
  428. }
  429. static int process_leaf(struct btrfs_root *root,
  430. struct btrfs_path *path, u64 *bytenr, u64 *num_bytes,
  431. int *tree_block_level)
  432. {
  433. struct btrfs_fs_info *fs_info = root->fs_info;
  434. struct extent_buffer *leaf = path->nodes[0];
  435. struct btrfs_extent_data_ref *dref;
  436. struct btrfs_shared_data_ref *sref;
  437. u32 count;
  438. int i = 0, ret = 0;
  439. struct btrfs_key key;
  440. int nritems = btrfs_header_nritems(leaf);
  441. for (i = 0; i < nritems; i++) {
  442. btrfs_item_key_to_cpu(leaf, &key, i);
  443. switch (key.type) {
  444. case BTRFS_EXTENT_ITEM_KEY:
  445. *num_bytes = key.offset;
  446. fallthrough;
  447. case BTRFS_METADATA_ITEM_KEY:
  448. *bytenr = key.objectid;
  449. ret = process_extent_item(fs_info, path, &key, i,
  450. tree_block_level);
  451. break;
  452. case BTRFS_TREE_BLOCK_REF_KEY:
  453. ret = add_tree_block(fs_info, key.offset, 0,
  454. key.objectid, *tree_block_level);
  455. break;
  456. case BTRFS_SHARED_BLOCK_REF_KEY:
  457. ret = add_tree_block(fs_info, 0, key.offset,
  458. key.objectid, *tree_block_level);
  459. break;
  460. case BTRFS_EXTENT_DATA_REF_KEY:
  461. dref = btrfs_item_ptr(leaf, i,
  462. struct btrfs_extent_data_ref);
  463. ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
  464. *num_bytes);
  465. break;
  466. case BTRFS_SHARED_DATA_REF_KEY:
  467. sref = btrfs_item_ptr(leaf, i,
  468. struct btrfs_shared_data_ref);
  469. count = btrfs_shared_data_ref_count(leaf, sref);
  470. ret = add_shared_data_ref(fs_info, key.offset, count,
  471. *bytenr, *num_bytes);
  472. break;
  473. default:
  474. break;
  475. }
  476. if (ret)
  477. break;
  478. }
  479. return ret;
  480. }
  481. /* Walk down to the leaf from the given level */
  482. static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
  483. int level, u64 *bytenr, u64 *num_bytes,
  484. int *tree_block_level)
  485. {
  486. struct extent_buffer *eb;
  487. int ret = 0;
  488. while (level >= 0) {
  489. if (level) {
  490. eb = btrfs_read_node_slot(path->nodes[level],
  491. path->slots[level]);
  492. if (IS_ERR(eb))
  493. return PTR_ERR(eb);
  494. btrfs_tree_read_lock(eb);
  495. path->nodes[level-1] = eb;
  496. path->slots[level-1] = 0;
  497. path->locks[level-1] = BTRFS_READ_LOCK;
  498. } else {
  499. ret = process_leaf(root, path, bytenr, num_bytes,
  500. tree_block_level);
  501. if (ret)
  502. break;
  503. }
  504. level--;
  505. }
  506. return ret;
  507. }
  508. /* Walk up to the next node that needs to be processed */
  509. static int walk_up_tree(struct btrfs_path *path, int *level)
  510. {
  511. int l;
  512. for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
  513. if (!path->nodes[l])
  514. continue;
  515. if (l) {
  516. path->slots[l]++;
  517. if (path->slots[l] <
  518. btrfs_header_nritems(path->nodes[l])) {
  519. *level = l;
  520. return 0;
  521. }
  522. }
  523. btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
  524. free_extent_buffer(path->nodes[l]);
  525. path->nodes[l] = NULL;
  526. path->slots[l] = 0;
  527. path->locks[l] = 0;
  528. }
  529. return 1;
  530. }
  531. static void dump_ref_action(struct btrfs_fs_info *fs_info,
  532. struct ref_action *ra)
  533. {
  534. btrfs_err(fs_info,
  535. " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
  536. ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
  537. ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
  538. __print_stack_trace(fs_info, ra);
  539. }
  540. /*
  541. * Dumps all the information from the block entry to printk, it's going to be
  542. * awesome.
  543. */
  544. static void dump_block_entry(struct btrfs_fs_info *fs_info,
  545. struct block_entry *be)
  546. {
  547. struct ref_entry *ref;
  548. struct root_entry *re;
  549. struct ref_action *ra;
  550. struct rb_node *n;
  551. btrfs_err(fs_info,
  552. "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
  553. be->bytenr, be->len, be->num_refs, be->metadata,
  554. be->from_disk);
  555. for (n = rb_first(&be->refs); n; n = rb_next(n)) {
  556. ref = rb_entry(n, struct ref_entry, node);
  557. btrfs_err(fs_info,
  558. " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
  559. ref->root_objectid, ref->parent, ref->owner,
  560. ref->offset, ref->num_refs);
  561. }
  562. for (n = rb_first(&be->roots); n; n = rb_next(n)) {
  563. re = rb_entry(n, struct root_entry, node);
  564. btrfs_err(fs_info, " root entry %llu, num_refs %llu",
  565. re->root_objectid, re->num_refs);
  566. }
  567. list_for_each_entry(ra, &be->actions, list)
  568. dump_ref_action(fs_info, ra);
  569. }
  570. /*
  571. * Called when we modify a ref for a bytenr.
  572. *
  573. * This will add an action item to the given bytenr and do sanity checks to make
  574. * sure we haven't messed something up. If we are making a new allocation and
  575. * this block entry has history we will delete all previous actions as long as
  576. * our sanity checks pass as they are no longer needed.
  577. */
  578. int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
  579. const struct btrfs_ref *generic_ref)
  580. {
  581. struct ref_entry *ref = NULL, *exist;
  582. struct ref_action *ra = NULL;
  583. struct block_entry *be = NULL;
  584. struct root_entry *re = NULL;
  585. int action = generic_ref->action;
  586. int ret = 0;
  587. bool metadata;
  588. u64 bytenr = generic_ref->bytenr;
  589. u64 num_bytes = generic_ref->num_bytes;
  590. u64 parent = generic_ref->parent;
  591. u64 ref_root = 0;
  592. u64 owner = 0;
  593. u64 offset = 0;
  594. if (!btrfs_test_opt(fs_info, REF_VERIFY))
  595. return 0;
  596. if (generic_ref->type == BTRFS_REF_METADATA) {
  597. if (!parent)
  598. ref_root = generic_ref->ref_root;
  599. owner = generic_ref->tree_ref.level;
  600. } else if (!parent) {
  601. ref_root = generic_ref->ref_root;
  602. owner = generic_ref->data_ref.objectid;
  603. offset = generic_ref->data_ref.offset;
  604. }
  605. metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
  606. ref = kzalloc_obj(struct ref_entry, GFP_NOFS);
  607. ra = kmalloc_obj(struct ref_action, GFP_NOFS);
  608. if (!ra || !ref) {
  609. kfree(ref);
  610. kfree(ra);
  611. ret = -ENOMEM;
  612. goto out;
  613. }
  614. ref->parent = parent;
  615. ref->owner = owner;
  616. ref->root_objectid = ref_root;
  617. ref->offset = offset;
  618. ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
  619. memcpy(&ra->ref, ref, sizeof(struct ref_entry));
  620. /*
  621. * Save the extra info from the delayed ref in the ref action to make it
  622. * easier to figure out what is happening. The real ref's we add to the
  623. * ref tree need to reflect what we save on disk so it matches any
  624. * on-disk refs we pre-loaded.
  625. */
  626. ra->ref.owner = owner;
  627. ra->ref.offset = offset;
  628. ra->ref.root_objectid = ref_root;
  629. __save_stack_trace(ra);
  630. INIT_LIST_HEAD(&ra->list);
  631. ra->action = action;
  632. ra->root = generic_ref->real_root;
  633. /*
  634. * This is an allocation, preallocate the block_entry in case we haven't
  635. * used it before.
  636. */
  637. ret = -EINVAL;
  638. if (action == BTRFS_ADD_DELAYED_EXTENT) {
  639. /*
  640. * For subvol_create we'll just pass in whatever the parent root
  641. * is and the new root objectid, so let's not treat the passed
  642. * in root as if it really has a ref for this bytenr.
  643. */
  644. be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
  645. if (IS_ERR(be)) {
  646. kfree(ref);
  647. kfree(ra);
  648. ret = PTR_ERR(be);
  649. goto out;
  650. }
  651. be->num_refs++;
  652. if (metadata)
  653. be->metadata = 1;
  654. if (be->num_refs != 1) {
  655. btrfs_err(fs_info,
  656. "re-allocated a block that still has references to it!");
  657. dump_block_entry(fs_info, be);
  658. dump_ref_action(fs_info, ra);
  659. kfree(ref);
  660. kfree(ra);
  661. goto out_unlock;
  662. }
  663. while (!list_empty(&be->actions)) {
  664. struct ref_action *tmp;
  665. tmp = list_first_entry(&be->actions, struct ref_action,
  666. list);
  667. list_del(&tmp->list);
  668. kfree(tmp);
  669. }
  670. } else {
  671. struct root_entry *tmp;
  672. if (!parent) {
  673. re = kmalloc_obj(struct root_entry, GFP_NOFS);
  674. if (!re) {
  675. kfree(ref);
  676. kfree(ra);
  677. ret = -ENOMEM;
  678. goto out;
  679. }
  680. /*
  681. * This is the root that is modifying us, so it's the
  682. * one we want to lookup below when we modify the
  683. * re->num_refs.
  684. */
  685. ref_root = generic_ref->real_root;
  686. re->root_objectid = generic_ref->real_root;
  687. re->num_refs = 0;
  688. }
  689. spin_lock(&fs_info->ref_verify_lock);
  690. be = lookup_block_entry(&fs_info->block_tree, bytenr);
  691. if (!be) {
  692. btrfs_err(fs_info,
  693. "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
  694. action, bytenr, num_bytes);
  695. dump_ref_action(fs_info, ra);
  696. kfree(ref);
  697. kfree(ra);
  698. kfree(re);
  699. goto out_unlock;
  700. } else if (be->num_refs == 0) {
  701. btrfs_err(fs_info,
  702. "trying to do action %d for a bytenr that has 0 total references",
  703. action);
  704. dump_block_entry(fs_info, be);
  705. dump_ref_action(fs_info, ra);
  706. kfree(ref);
  707. kfree(ra);
  708. kfree(re);
  709. goto out_unlock;
  710. }
  711. if (!parent) {
  712. tmp = insert_root_entry(&be->roots, re);
  713. if (tmp) {
  714. kfree(re);
  715. re = tmp;
  716. }
  717. }
  718. }
  719. exist = insert_ref_entry(&be->refs, ref);
  720. if (exist) {
  721. if (action == BTRFS_DROP_DELAYED_REF) {
  722. if (exist->num_refs == 0) {
  723. btrfs_err(fs_info,
  724. "dropping a ref for a existing root that doesn't have a ref on the block");
  725. dump_block_entry(fs_info, be);
  726. dump_ref_action(fs_info, ra);
  727. kfree(ref);
  728. kfree(ra);
  729. goto out_unlock;
  730. }
  731. exist->num_refs--;
  732. if (exist->num_refs == 0) {
  733. rb_erase(&exist->node, &be->refs);
  734. kfree(exist);
  735. }
  736. } else if (!be->metadata) {
  737. exist->num_refs++;
  738. } else {
  739. btrfs_err(fs_info,
  740. "attempting to add another ref for an existing ref on a tree block");
  741. dump_block_entry(fs_info, be);
  742. dump_ref_action(fs_info, ra);
  743. kfree(ref);
  744. kfree(ra);
  745. goto out_unlock;
  746. }
  747. kfree(ref);
  748. } else {
  749. if (action == BTRFS_DROP_DELAYED_REF) {
  750. btrfs_err(fs_info,
  751. "dropping a ref for a root that doesn't have a ref on the block");
  752. dump_block_entry(fs_info, be);
  753. dump_ref_action(fs_info, ra);
  754. rb_erase(&ref->node, &be->refs);
  755. kfree(ref);
  756. kfree(ra);
  757. goto out_unlock;
  758. }
  759. }
  760. if (!parent && !re) {
  761. re = lookup_root_entry(&be->roots, ref_root);
  762. if (!re) {
  763. /*
  764. * This shouldn't happen because we will add our re
  765. * above when we lookup the be with !parent, but just in
  766. * case catch this case so we don't panic because I
  767. * didn't think of some other corner case.
  768. */
  769. btrfs_err(fs_info, "failed to find root %llu for %llu",
  770. generic_ref->real_root, be->bytenr);
  771. dump_block_entry(fs_info, be);
  772. dump_ref_action(fs_info, ra);
  773. kfree(ra);
  774. goto out_unlock;
  775. }
  776. }
  777. if (action == BTRFS_DROP_DELAYED_REF) {
  778. if (re)
  779. re->num_refs--;
  780. be->num_refs--;
  781. } else if (action == BTRFS_ADD_DELAYED_REF) {
  782. be->num_refs++;
  783. if (re)
  784. re->num_refs++;
  785. }
  786. list_add_tail(&ra->list, &be->actions);
  787. ret = 0;
  788. out_unlock:
  789. spin_unlock(&fs_info->ref_verify_lock);
  790. out:
  791. if (ret) {
  792. btrfs_free_ref_cache(fs_info);
  793. btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
  794. }
  795. return ret;
  796. }
  797. /* Free up the ref cache */
  798. void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
  799. {
  800. struct block_entry *be;
  801. struct rb_node *n;
  802. if (!btrfs_test_opt(fs_info, REF_VERIFY))
  803. return;
  804. spin_lock(&fs_info->ref_verify_lock);
  805. while ((n = rb_first(&fs_info->block_tree))) {
  806. be = rb_entry(n, struct block_entry, node);
  807. rb_erase(&be->node, &fs_info->block_tree);
  808. free_block_entry(be);
  809. cond_resched_lock(&fs_info->ref_verify_lock);
  810. }
  811. spin_unlock(&fs_info->ref_verify_lock);
  812. }
  813. void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
  814. u64 len)
  815. {
  816. struct block_entry *be = NULL, *entry;
  817. struct rb_node *n;
  818. if (!btrfs_test_opt(fs_info, REF_VERIFY))
  819. return;
  820. spin_lock(&fs_info->ref_verify_lock);
  821. n = fs_info->block_tree.rb_node;
  822. while (n) {
  823. entry = rb_entry(n, struct block_entry, node);
  824. if (entry->bytenr < start) {
  825. n = n->rb_right;
  826. } else if (entry->bytenr > start) {
  827. n = n->rb_left;
  828. } else {
  829. be = entry;
  830. break;
  831. }
  832. /* We want to get as close to start as possible */
  833. if (be == NULL ||
  834. (entry->bytenr < start && be->bytenr > start) ||
  835. (entry->bytenr < start && entry->bytenr > be->bytenr))
  836. be = entry;
  837. }
  838. /*
  839. * Could have an empty block group, maybe have something to check for
  840. * this case to verify we were actually empty?
  841. */
  842. if (!be) {
  843. spin_unlock(&fs_info->ref_verify_lock);
  844. return;
  845. }
  846. n = &be->node;
  847. while (n) {
  848. be = rb_entry(n, struct block_entry, node);
  849. n = rb_next(n);
  850. if (be->bytenr < start && be->bytenr + be->len > start) {
  851. btrfs_err(fs_info,
  852. "block entry overlaps a block group [%llu,%llu]!",
  853. start, len);
  854. dump_block_entry(fs_info, be);
  855. continue;
  856. }
  857. if (be->bytenr < start)
  858. continue;
  859. if (be->bytenr >= start + len)
  860. break;
  861. if (be->bytenr + be->len > start + len) {
  862. btrfs_err(fs_info,
  863. "block entry overlaps a block group [%llu,%llu]!",
  864. start, len);
  865. dump_block_entry(fs_info, be);
  866. }
  867. rb_erase(&be->node, &fs_info->block_tree);
  868. free_block_entry(be);
  869. }
  870. spin_unlock(&fs_info->ref_verify_lock);
  871. }
  872. /* Walk down all roots and build the ref tree, meant to be called at mount */
  873. int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
  874. {
  875. struct btrfs_root *extent_root;
  876. BTRFS_PATH_AUTO_FREE(path);
  877. struct extent_buffer *eb;
  878. int tree_block_level = 0;
  879. u64 bytenr = 0, num_bytes = 0;
  880. int ret, level;
  881. if (!btrfs_test_opt(fs_info, REF_VERIFY))
  882. return 0;
  883. extent_root = btrfs_extent_root(fs_info, 0);
  884. /* If the extent tree is damaged we cannot ignore it (IGNOREBADROOTS). */
  885. if (!extent_root) {
  886. btrfs_warn(fs_info, "ref-verify: extent tree not available, disabling");
  887. btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
  888. return 0;
  889. }
  890. path = btrfs_alloc_path();
  891. if (!path)
  892. return -ENOMEM;
  893. eb = btrfs_read_lock_root_node(extent_root);
  894. level = btrfs_header_level(eb);
  895. path->nodes[level] = eb;
  896. path->slots[level] = 0;
  897. path->locks[level] = BTRFS_READ_LOCK;
  898. while (1) {
  899. /*
  900. * We have to keep track of the bytenr/num_bytes we last hit
  901. * because we could have run out of space for an inline ref, and
  902. * would have had to added a ref key item which may appear on a
  903. * different leaf from the original extent item.
  904. */
  905. ret = walk_down_tree(extent_root, path, level,
  906. &bytenr, &num_bytes, &tree_block_level);
  907. if (ret)
  908. break;
  909. ret = walk_up_tree(path, &level);
  910. if (ret < 0)
  911. break;
  912. if (ret > 0) {
  913. ret = 0;
  914. break;
  915. }
  916. }
  917. if (ret) {
  918. btrfs_free_ref_cache(fs_info);
  919. btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
  920. }
  921. return ret;
  922. }