free-space-tree.c 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2015 Facebook. All rights reserved.
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/sched/mm.h>
  7. #include "messages.h"
  8. #include "ctree.h"
  9. #include "disk-io.h"
  10. #include "locking.h"
  11. #include "free-space-tree.h"
  12. #include "transaction.h"
  13. #include "block-group.h"
  14. #include "fs.h"
  15. #include "accessors.h"
  16. #include "extent-tree.h"
  17. #include "root-tree.h"
  18. static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
  19. struct btrfs_block_group *block_group,
  20. struct btrfs_path *path);
  21. struct btrfs_root *btrfs_free_space_root(struct btrfs_block_group *block_group)
  22. {
  23. struct btrfs_key key = {
  24. .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
  25. .type = BTRFS_ROOT_ITEM_KEY,
  26. .offset = 0,
  27. };
  28. if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
  29. key.offset = block_group->global_root_id;
  30. return btrfs_global_root(block_group->fs_info, &key);
  31. }
  32. void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
  33. {
  34. u32 bitmap_range;
  35. size_t bitmap_size;
  36. u64 num_bitmaps, total_bitmap_size;
  37. if (WARN_ON(cache->length == 0))
  38. btrfs_warn(cache->fs_info, "block group %llu length is zero",
  39. cache->start);
  40. /*
  41. * We convert to bitmaps when the disk space required for using extents
  42. * exceeds that required for using bitmaps.
  43. */
  44. bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
  45. num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
  46. bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
  47. total_bitmap_size = num_bitmaps * bitmap_size;
  48. cache->bitmap_high_thresh = div_u64(total_bitmap_size,
  49. sizeof(struct btrfs_item));
  50. /*
  51. * We allow for a small buffer between the high threshold and low
  52. * threshold to avoid thrashing back and forth between the two formats.
  53. */
  54. if (cache->bitmap_high_thresh > 100)
  55. cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
  56. else
  57. cache->bitmap_low_thresh = 0;
  58. }
  59. static int add_new_free_space_info(struct btrfs_trans_handle *trans,
  60. struct btrfs_block_group *block_group,
  61. struct btrfs_path *path)
  62. {
  63. struct btrfs_root *root = btrfs_free_space_root(block_group);
  64. struct btrfs_free_space_info *info;
  65. struct btrfs_key key;
  66. struct extent_buffer *leaf;
  67. int ret;
  68. key.objectid = block_group->start;
  69. key.type = BTRFS_FREE_SPACE_INFO_KEY;
  70. key.offset = block_group->length;
  71. ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
  72. if (ret)
  73. return ret;
  74. leaf = path->nodes[0];
  75. info = btrfs_item_ptr(leaf, path->slots[0],
  76. struct btrfs_free_space_info);
  77. btrfs_set_free_space_extent_count(leaf, info, 0);
  78. btrfs_set_free_space_flags(leaf, info, 0);
  79. btrfs_release_path(path);
  80. return 0;
  81. }
  82. struct btrfs_free_space_info *btrfs_search_free_space_info(
  83. struct btrfs_trans_handle *trans,
  84. struct btrfs_block_group *block_group,
  85. struct btrfs_path *path, int cow)
  86. {
  87. struct btrfs_fs_info *fs_info = block_group->fs_info;
  88. struct btrfs_root *root = btrfs_free_space_root(block_group);
  89. struct btrfs_key key;
  90. int ret;
  91. key.objectid = block_group->start;
  92. key.type = BTRFS_FREE_SPACE_INFO_KEY;
  93. key.offset = block_group->length;
  94. ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
  95. if (ret < 0)
  96. return ERR_PTR(ret);
  97. if (ret != 0) {
  98. btrfs_warn(fs_info, "missing free space info for %llu",
  99. block_group->start);
  100. DEBUG_WARN();
  101. return ERR_PTR(-ENOENT);
  102. }
  103. return btrfs_item_ptr(path->nodes[0], path->slots[0],
  104. struct btrfs_free_space_info);
  105. }
  106. /*
  107. * btrfs_search_slot() but we're looking for the greatest key less than the
  108. * passed key.
  109. */
  110. static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
  111. struct btrfs_root *root,
  112. struct btrfs_key *key, struct btrfs_path *p,
  113. int ins_len, int cow)
  114. {
  115. int ret;
  116. ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
  117. if (ret < 0)
  118. return ret;
  119. if (unlikely(ret == 0)) {
  120. DEBUG_WARN();
  121. return -EIO;
  122. }
  123. if (unlikely(p->slots[0] == 0)) {
  124. DEBUG_WARN("no previous slot found");
  125. return -EIO;
  126. }
  127. p->slots[0]--;
  128. return 0;
  129. }
  130. static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
  131. u64 size)
  132. {
  133. return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
  134. }
  135. static unsigned long *alloc_bitmap(u32 bitmap_size)
  136. {
  137. unsigned long *ret;
  138. unsigned int nofs_flag;
  139. u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
  140. /*
  141. * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
  142. * into the filesystem here. All callers hold a transaction handle
  143. * open, so if a GFP_KERNEL allocation recurses into the filesystem
  144. * and triggers a transaction commit, we would deadlock.
  145. */
  146. nofs_flag = memalloc_nofs_save();
  147. ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
  148. memalloc_nofs_restore(nofs_flag);
  149. return ret;
  150. }
  151. static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
  152. {
  153. u8 *p = ((u8 *)map) + BIT_BYTE(start);
  154. const unsigned int size = start + len;
  155. int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
  156. u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
  157. while (len - bits_to_set >= 0) {
  158. *p |= mask_to_set;
  159. len -= bits_to_set;
  160. bits_to_set = BITS_PER_BYTE;
  161. mask_to_set = ~0;
  162. p++;
  163. }
  164. if (len) {
  165. mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
  166. *p |= mask_to_set;
  167. }
  168. }
  169. EXPORT_FOR_TESTS
  170. int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
  171. struct btrfs_block_group *block_group,
  172. struct btrfs_path *path)
  173. {
  174. struct btrfs_fs_info *fs_info = trans->fs_info;
  175. struct btrfs_root *root = btrfs_free_space_root(block_group);
  176. struct btrfs_free_space_info *info;
  177. struct btrfs_key key, found_key;
  178. struct extent_buffer *leaf;
  179. unsigned long *bitmap;
  180. char *bitmap_cursor;
  181. u64 start, end;
  182. u64 bitmap_range, i;
  183. u32 bitmap_size, flags, expected_extent_count;
  184. u32 extent_count = 0;
  185. int done = 0, nr;
  186. int ret;
  187. bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
  188. bitmap = alloc_bitmap(bitmap_size);
  189. if (unlikely(!bitmap))
  190. return 0;
  191. start = block_group->start;
  192. end = btrfs_block_group_end(block_group);
  193. key.objectid = end - 1;
  194. key.type = (u8)-1;
  195. key.offset = (u64)-1;
  196. while (!done) {
  197. ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
  198. if (unlikely(ret)) {
  199. btrfs_abort_transaction(trans, ret);
  200. goto out;
  201. }
  202. leaf = path->nodes[0];
  203. nr = 0;
  204. path->slots[0]++;
  205. while (path->slots[0] > 0) {
  206. btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
  207. if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
  208. ASSERT(found_key.objectid == block_group->start);
  209. ASSERT(found_key.offset == block_group->length);
  210. done = 1;
  211. break;
  212. } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
  213. u64 first, last;
  214. ASSERT(found_key.objectid >= start);
  215. ASSERT(found_key.objectid < end);
  216. ASSERT(found_key.objectid + found_key.offset <= end);
  217. first = div_u64(found_key.objectid - start,
  218. fs_info->sectorsize);
  219. last = div_u64(found_key.objectid + found_key.offset - start,
  220. fs_info->sectorsize);
  221. le_bitmap_set(bitmap, first, last - first);
  222. extent_count++;
  223. nr++;
  224. path->slots[0]--;
  225. } else {
  226. ASSERT(0);
  227. }
  228. }
  229. ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
  230. if (unlikely(ret)) {
  231. btrfs_abort_transaction(trans, ret);
  232. goto out;
  233. }
  234. btrfs_release_path(path);
  235. }
  236. info = btrfs_search_free_space_info(trans, block_group, path, 1);
  237. if (IS_ERR(info)) {
  238. ret = PTR_ERR(info);
  239. btrfs_abort_transaction(trans, ret);
  240. goto out;
  241. }
  242. leaf = path->nodes[0];
  243. flags = btrfs_free_space_flags(leaf, info);
  244. flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
  245. block_group->using_free_space_bitmaps = true;
  246. block_group->using_free_space_bitmaps_cached = true;
  247. btrfs_set_free_space_flags(leaf, info, flags);
  248. expected_extent_count = btrfs_free_space_extent_count(leaf, info);
  249. btrfs_release_path(path);
  250. if (unlikely(extent_count != expected_extent_count)) {
  251. btrfs_err(fs_info,
  252. "incorrect extent count for %llu; counted %u, expected %u",
  253. block_group->start, extent_count,
  254. expected_extent_count);
  255. ret = -EIO;
  256. btrfs_abort_transaction(trans, ret);
  257. goto out;
  258. }
  259. bitmap_cursor = (char *)bitmap;
  260. bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
  261. i = start;
  262. while (i < end) {
  263. unsigned long ptr;
  264. u64 extent_size;
  265. u32 data_size;
  266. extent_size = min(end - i, bitmap_range);
  267. data_size = free_space_bitmap_size(fs_info, extent_size);
  268. key.objectid = i;
  269. key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
  270. key.offset = extent_size;
  271. ret = btrfs_insert_empty_item(trans, root, path, &key,
  272. data_size);
  273. if (unlikely(ret)) {
  274. btrfs_abort_transaction(trans, ret);
  275. goto out;
  276. }
  277. leaf = path->nodes[0];
  278. ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
  279. write_extent_buffer(leaf, bitmap_cursor, ptr,
  280. data_size);
  281. btrfs_release_path(path);
  282. i += extent_size;
  283. bitmap_cursor += data_size;
  284. }
  285. ret = 0;
  286. out:
  287. kvfree(bitmap);
  288. return ret;
  289. }
  290. EXPORT_FOR_TESTS
  291. int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
  292. struct btrfs_block_group *block_group,
  293. struct btrfs_path *path)
  294. {
  295. struct btrfs_fs_info *fs_info = trans->fs_info;
  296. struct btrfs_root *root = btrfs_free_space_root(block_group);
  297. struct btrfs_free_space_info *info;
  298. struct btrfs_key key, found_key;
  299. struct extent_buffer *leaf;
  300. unsigned long *bitmap;
  301. u64 start, end;
  302. u32 bitmap_size, flags, expected_extent_count;
  303. unsigned long nrbits, start_bit, end_bit;
  304. u32 extent_count = 0;
  305. int done = 0, nr;
  306. int ret;
  307. bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
  308. bitmap = alloc_bitmap(bitmap_size);
  309. if (unlikely(!bitmap))
  310. return 0;
  311. start = block_group->start;
  312. end = btrfs_block_group_end(block_group);
  313. key.objectid = end - 1;
  314. key.type = (u8)-1;
  315. key.offset = (u64)-1;
  316. while (!done) {
  317. ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
  318. if (unlikely(ret)) {
  319. btrfs_abort_transaction(trans, ret);
  320. goto out;
  321. }
  322. leaf = path->nodes[0];
  323. nr = 0;
  324. path->slots[0]++;
  325. while (path->slots[0] > 0) {
  326. btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
  327. if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
  328. ASSERT(found_key.objectid == block_group->start);
  329. ASSERT(found_key.offset == block_group->length);
  330. done = 1;
  331. break;
  332. } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
  333. unsigned long ptr;
  334. char *bitmap_cursor;
  335. u32 bitmap_pos, data_size;
  336. ASSERT(found_key.objectid >= start);
  337. ASSERT(found_key.objectid < end);
  338. ASSERT(found_key.objectid + found_key.offset <= end);
  339. bitmap_pos = div_u64(found_key.objectid - start,
  340. fs_info->sectorsize *
  341. BITS_PER_BYTE);
  342. bitmap_cursor = ((char *)bitmap) + bitmap_pos;
  343. data_size = free_space_bitmap_size(fs_info,
  344. found_key.offset);
  345. path->slots[0]--;
  346. ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
  347. read_extent_buffer(leaf, bitmap_cursor, ptr,
  348. data_size);
  349. nr++;
  350. } else {
  351. ASSERT(0);
  352. }
  353. }
  354. ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
  355. if (unlikely(ret)) {
  356. btrfs_abort_transaction(trans, ret);
  357. goto out;
  358. }
  359. btrfs_release_path(path);
  360. }
  361. info = btrfs_search_free_space_info(trans, block_group, path, 1);
  362. if (IS_ERR(info)) {
  363. ret = PTR_ERR(info);
  364. btrfs_abort_transaction(trans, ret);
  365. goto out;
  366. }
  367. leaf = path->nodes[0];
  368. flags = btrfs_free_space_flags(leaf, info);
  369. flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
  370. block_group->using_free_space_bitmaps = false;
  371. block_group->using_free_space_bitmaps_cached = true;
  372. btrfs_set_free_space_flags(leaf, info, flags);
  373. expected_extent_count = btrfs_free_space_extent_count(leaf, info);
  374. btrfs_release_path(path);
  375. nrbits = block_group->length >> fs_info->sectorsize_bits;
  376. start_bit = find_next_bit_le(bitmap, nrbits, 0);
  377. while (start_bit < nrbits) {
  378. end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
  379. ASSERT(start_bit < end_bit);
  380. key.objectid = start + start_bit * fs_info->sectorsize;
  381. key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
  382. key.offset = (end_bit - start_bit) * fs_info->sectorsize;
  383. ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
  384. if (unlikely(ret)) {
  385. btrfs_abort_transaction(trans, ret);
  386. goto out;
  387. }
  388. btrfs_release_path(path);
  389. extent_count++;
  390. start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
  391. }
  392. if (unlikely(extent_count != expected_extent_count)) {
  393. btrfs_err(fs_info,
  394. "incorrect extent count for %llu; counted %u, expected %u",
  395. block_group->start, extent_count,
  396. expected_extent_count);
  397. ret = -EIO;
  398. btrfs_abort_transaction(trans, ret);
  399. goto out;
  400. }
  401. ret = 0;
  402. out:
  403. kvfree(bitmap);
  404. return ret;
  405. }
  406. static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
  407. struct btrfs_block_group *block_group,
  408. struct btrfs_path *path,
  409. int new_extents)
  410. {
  411. struct btrfs_free_space_info *info;
  412. u32 flags;
  413. u32 extent_count;
  414. int ret = 0;
  415. if (new_extents == 0)
  416. return 0;
  417. info = btrfs_search_free_space_info(trans, block_group, path, 1);
  418. if (IS_ERR(info))
  419. return PTR_ERR(info);
  420. flags = btrfs_free_space_flags(path->nodes[0], info);
  421. extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
  422. extent_count += new_extents;
  423. btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
  424. btrfs_release_path(path);
  425. if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
  426. extent_count > block_group->bitmap_high_thresh) {
  427. ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
  428. } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
  429. extent_count < block_group->bitmap_low_thresh) {
  430. ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
  431. }
  432. return ret;
  433. }
  434. EXPORT_FOR_TESTS
  435. bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
  436. struct btrfs_path *path, u64 offset)
  437. {
  438. struct extent_buffer *leaf;
  439. struct btrfs_key key;
  440. u64 found_start, found_end;
  441. unsigned long ptr, i;
  442. leaf = path->nodes[0];
  443. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  444. ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
  445. found_start = key.objectid;
  446. found_end = key.objectid + key.offset;
  447. ASSERT(offset >= found_start && offset < found_end);
  448. ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
  449. i = div_u64(offset - found_start,
  450. block_group->fs_info->sectorsize);
  451. return extent_buffer_test_bit(leaf, ptr, i);
  452. }
  453. static void free_space_modify_bits(struct btrfs_trans_handle *trans,
  454. struct btrfs_block_group *block_group,
  455. struct btrfs_path *path, u64 *start, u64 *size,
  456. bool set_bits)
  457. {
  458. struct btrfs_fs_info *fs_info = block_group->fs_info;
  459. struct extent_buffer *leaf;
  460. struct btrfs_key key;
  461. u64 end = *start + *size;
  462. u64 found_start, found_end;
  463. unsigned long ptr, first, last;
  464. leaf = path->nodes[0];
  465. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  466. ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
  467. found_start = key.objectid;
  468. found_end = key.objectid + key.offset;
  469. ASSERT(*start >= found_start && *start < found_end);
  470. ASSERT(end > found_start);
  471. if (end > found_end)
  472. end = found_end;
  473. ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
  474. first = (*start - found_start) >> fs_info->sectorsize_bits;
  475. last = (end - found_start) >> fs_info->sectorsize_bits;
  476. if (set_bits)
  477. extent_buffer_bitmap_set(leaf, ptr, first, last - first);
  478. else
  479. extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
  480. btrfs_mark_buffer_dirty(trans, leaf);
  481. *size -= end - *start;
  482. *start = end;
  483. }
  484. /*
  485. * We can't use btrfs_next_item() in modify_free_space_bitmap() because
  486. * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
  487. * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
  488. * looking for.
  489. */
  490. static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
  491. struct btrfs_root *root, struct btrfs_path *p)
  492. {
  493. struct btrfs_key key;
  494. if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
  495. p->slots[0]++;
  496. return 0;
  497. }
  498. btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
  499. btrfs_release_path(p);
  500. key.objectid += key.offset;
  501. key.type = (u8)-1;
  502. key.offset = (u64)-1;
  503. return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
  504. }
  505. /*
  506. * If remove is 1, then we are removing free space, thus clearing bits in the
  507. * bitmap. If remove is 0, then we are adding free space, thus setting bits in
  508. * the bitmap.
  509. */
  510. static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
  511. struct btrfs_block_group *block_group,
  512. struct btrfs_path *path,
  513. u64 start, u64 size, bool remove)
  514. {
  515. struct btrfs_root *root = btrfs_free_space_root(block_group);
  516. struct btrfs_key key;
  517. u64 end = start + size;
  518. u64 cur_start, cur_size;
  519. bool prev_bit_set = false;
  520. bool next_bit_set = false;
  521. int new_extents;
  522. int ret;
  523. /*
  524. * Read the bit for the block immediately before the extent of space if
  525. * that block is within the block group.
  526. */
  527. if (start > block_group->start) {
  528. u64 prev_block = start - block_group->fs_info->sectorsize;
  529. key.objectid = prev_block;
  530. key.type = (u8)-1;
  531. key.offset = (u64)-1;
  532. ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
  533. if (ret)
  534. return ret;
  535. prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
  536. /* The previous block may have been in the previous bitmap. */
  537. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  538. if (start >= key.objectid + key.offset) {
  539. ret = free_space_next_bitmap(trans, root, path);
  540. if (ret)
  541. return ret;
  542. }
  543. } else {
  544. key.objectid = start;
  545. key.type = (u8)-1;
  546. key.offset = (u64)-1;
  547. ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
  548. if (ret)
  549. return ret;
  550. }
  551. /*
  552. * Iterate over all of the bitmaps overlapped by the extent of space,
  553. * clearing/setting bits as required.
  554. */
  555. cur_start = start;
  556. cur_size = size;
  557. while (1) {
  558. free_space_modify_bits(trans, block_group, path, &cur_start,
  559. &cur_size, !remove);
  560. if (cur_size == 0)
  561. break;
  562. ret = free_space_next_bitmap(trans, root, path);
  563. if (ret)
  564. return ret;
  565. }
  566. /*
  567. * Read the bit for the block immediately after the extent of space if
  568. * that block is within the block group.
  569. */
  570. if (end < btrfs_block_group_end(block_group)) {
  571. /* The next block may be in the next bitmap. */
  572. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  573. if (end >= key.objectid + key.offset) {
  574. ret = free_space_next_bitmap(trans, root, path);
  575. if (ret)
  576. return ret;
  577. }
  578. next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
  579. }
  580. if (remove) {
  581. new_extents = -1;
  582. if (prev_bit_set) {
  583. /* Leftover on the left. */
  584. new_extents++;
  585. }
  586. if (next_bit_set) {
  587. /* Leftover on the right. */
  588. new_extents++;
  589. }
  590. } else {
  591. new_extents = 1;
  592. if (prev_bit_set) {
  593. /* Merging with neighbor on the left. */
  594. new_extents--;
  595. }
  596. if (next_bit_set) {
  597. /* Merging with neighbor on the right. */
  598. new_extents--;
  599. }
  600. }
  601. btrfs_release_path(path);
  602. return update_free_space_extent_count(trans, block_group, path, new_extents);
  603. }
  604. static int remove_free_space_extent(struct btrfs_trans_handle *trans,
  605. struct btrfs_block_group *block_group,
  606. struct btrfs_path *path,
  607. u64 start, u64 size)
  608. {
  609. struct btrfs_root *root = btrfs_free_space_root(block_group);
  610. struct btrfs_key key;
  611. u64 found_start, found_end;
  612. u64 end = start + size;
  613. int new_extents = -1;
  614. int ret;
  615. key.objectid = start;
  616. key.type = (u8)-1;
  617. key.offset = (u64)-1;
  618. ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
  619. if (ret)
  620. return ret;
  621. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  622. ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
  623. found_start = key.objectid;
  624. found_end = key.objectid + key.offset;
  625. ASSERT(start >= found_start && end <= found_end);
  626. /*
  627. * Okay, now that we've found the free space extent which contains the
  628. * free space that we are removing, there are four cases:
  629. *
  630. * 1. We're using the whole extent: delete the key we found and
  631. * decrement the free space extent count.
  632. * 2. We are using part of the extent starting at the beginning: delete
  633. * the key we found and insert a new key representing the leftover at
  634. * the end. There is no net change in the number of extents.
  635. * 3. We are using part of the extent ending at the end: delete the key
  636. * we found and insert a new key representing the leftover at the
  637. * beginning. There is no net change in the number of extents.
  638. * 4. We are using part of the extent in the middle: delete the key we
  639. * found and insert two new keys representing the leftovers on each
  640. * side. Where we used to have one extent, we now have two, so increment
  641. * the extent count. We may need to convert the block group to bitmaps
  642. * as a result.
  643. */
  644. /* Delete the existing key (cases 1-4). */
  645. ret = btrfs_del_item(trans, root, path);
  646. if (ret)
  647. return ret;
  648. /* Add a key for leftovers at the beginning (cases 3 and 4). */
  649. if (start > found_start) {
  650. key.objectid = found_start;
  651. key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
  652. key.offset = start - found_start;
  653. btrfs_release_path(path);
  654. ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
  655. if (ret)
  656. return ret;
  657. new_extents++;
  658. }
  659. /* Add a key for leftovers at the end (cases 2 and 4). */
  660. if (end < found_end) {
  661. key.objectid = end;
  662. key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
  663. key.offset = found_end - end;
  664. btrfs_release_path(path);
  665. ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
  666. if (ret)
  667. return ret;
  668. new_extents++;
  669. }
  670. btrfs_release_path(path);
  671. return update_free_space_extent_count(trans, block_group, path, new_extents);
  672. }
  673. static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
  674. {
  675. struct btrfs_free_space_info *info;
  676. u32 flags;
  677. if (bg->using_free_space_bitmaps_cached)
  678. return bg->using_free_space_bitmaps;
  679. info = btrfs_search_free_space_info(NULL, bg, path, 0);
  680. if (IS_ERR(info))
  681. return PTR_ERR(info);
  682. flags = btrfs_free_space_flags(path->nodes[0], info);
  683. btrfs_release_path(path);
  684. bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
  685. bg->using_free_space_bitmaps_cached = true;
  686. return bg->using_free_space_bitmaps;
  687. }
  688. EXPORT_FOR_TESTS
  689. int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
  690. struct btrfs_block_group *block_group,
  691. struct btrfs_path *path, u64 start, u64 size)
  692. {
  693. int ret;
  694. ret = __add_block_group_free_space(trans, block_group, path);
  695. if (ret)
  696. return ret;
  697. ret = using_bitmaps(block_group, path);
  698. if (ret < 0)
  699. return ret;
  700. if (ret)
  701. return modify_free_space_bitmap(trans, block_group, path,
  702. start, size, true);
  703. return remove_free_space_extent(trans, block_group, path, start, size);
  704. }
  705. int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
  706. u64 start, u64 size)
  707. {
  708. struct btrfs_block_group *block_group;
  709. BTRFS_PATH_AUTO_FREE(path);
  710. int ret;
  711. if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
  712. return 0;
  713. path = btrfs_alloc_path();
  714. if (unlikely(!path)) {
  715. ret = -ENOMEM;
  716. btrfs_abort_transaction(trans, ret);
  717. return ret;
  718. }
  719. block_group = btrfs_lookup_block_group(trans->fs_info, start);
  720. if (unlikely(!block_group)) {
  721. DEBUG_WARN("no block group found for start=%llu", start);
  722. ret = -ENOENT;
  723. btrfs_abort_transaction(trans, ret);
  724. return ret;
  725. }
  726. mutex_lock(&block_group->free_space_lock);
  727. ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
  728. mutex_unlock(&block_group->free_space_lock);
  729. if (ret)
  730. btrfs_abort_transaction(trans, ret);
  731. btrfs_put_block_group(block_group);
  732. return ret;
  733. }
  734. static int add_free_space_extent(struct btrfs_trans_handle *trans,
  735. struct btrfs_block_group *block_group,
  736. struct btrfs_path *path,
  737. u64 start, u64 size)
  738. {
  739. struct btrfs_root *root = btrfs_free_space_root(block_group);
  740. struct btrfs_key key, new_key;
  741. u64 found_start, found_end;
  742. u64 end = start + size;
  743. int new_extents = 1;
  744. int ret;
  745. /*
  746. * We are adding a new extent of free space, but we need to merge
  747. * extents. There are four cases here:
  748. *
  749. * 1. The new extent does not have any immediate neighbors to merge
  750. * with: add the new key and increment the free space extent count. We
  751. * may need to convert the block group to bitmaps as a result.
  752. * 2. The new extent has an immediate neighbor before it: remove the
  753. * previous key and insert a new key combining both of them. There is no
  754. * net change in the number of extents.
  755. * 3. The new extent has an immediate neighbor after it: remove the next
  756. * key and insert a new key combining both of them. There is no net
  757. * change in the number of extents.
  758. * 4. The new extent has immediate neighbors on both sides: remove both
  759. * of the keys and insert a new key combining all of them. Where we used
  760. * to have two extents, we now have one, so decrement the extent count.
  761. */
  762. new_key.objectid = start;
  763. new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
  764. new_key.offset = size;
  765. /* Search for a neighbor on the left. */
  766. if (start == block_group->start)
  767. goto right;
  768. key.objectid = start - 1;
  769. key.type = (u8)-1;
  770. key.offset = (u64)-1;
  771. ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
  772. if (ret)
  773. return ret;
  774. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  775. if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
  776. ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
  777. btrfs_release_path(path);
  778. goto right;
  779. }
  780. found_start = key.objectid;
  781. found_end = key.objectid + key.offset;
  782. ASSERT(found_start >= block_group->start &&
  783. found_end > block_group->start);
  784. ASSERT(found_start < start && found_end <= start);
  785. /*
  786. * Delete the neighbor on the left and absorb it into the new key (cases
  787. * 2 and 4).
  788. */
  789. if (found_end == start) {
  790. ret = btrfs_del_item(trans, root, path);
  791. if (ret)
  792. return ret;
  793. new_key.objectid = found_start;
  794. new_key.offset += key.offset;
  795. new_extents--;
  796. }
  797. btrfs_release_path(path);
  798. right:
  799. /* Search for a neighbor on the right. */
  800. if (end == btrfs_block_group_end(block_group))
  801. goto insert;
  802. key.objectid = end;
  803. key.type = (u8)-1;
  804. key.offset = (u64)-1;
  805. ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
  806. if (ret)
  807. return ret;
  808. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  809. if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
  810. ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
  811. btrfs_release_path(path);
  812. goto insert;
  813. }
  814. found_start = key.objectid;
  815. found_end = key.objectid + key.offset;
  816. ASSERT(found_start >= block_group->start &&
  817. found_end > block_group->start);
  818. ASSERT((found_start < start && found_end <= start) ||
  819. (found_start >= end && found_end > end));
  820. /*
  821. * Delete the neighbor on the right and absorb it into the new key
  822. * (cases 3 and 4).
  823. */
  824. if (found_start == end) {
  825. ret = btrfs_del_item(trans, root, path);
  826. if (ret)
  827. return ret;
  828. new_key.offset += key.offset;
  829. new_extents--;
  830. }
  831. btrfs_release_path(path);
  832. insert:
  833. /* Insert the new key (cases 1-4). */
  834. ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
  835. if (ret)
  836. return ret;
  837. btrfs_release_path(path);
  838. return update_free_space_extent_count(trans, block_group, path, new_extents);
  839. }
  840. EXPORT_FOR_TESTS
  841. int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
  842. struct btrfs_block_group *block_group,
  843. struct btrfs_path *path, u64 start, u64 size)
  844. {
  845. int ret;
  846. ret = __add_block_group_free_space(trans, block_group, path);
  847. if (ret)
  848. return ret;
  849. ret = using_bitmaps(block_group, path);
  850. if (ret < 0)
  851. return ret;
  852. if (ret)
  853. return modify_free_space_bitmap(trans, block_group, path,
  854. start, size, false);
  855. return add_free_space_extent(trans, block_group, path, start, size);
  856. }
  857. int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
  858. u64 start, u64 size)
  859. {
  860. struct btrfs_block_group *block_group;
  861. BTRFS_PATH_AUTO_FREE(path);
  862. int ret;
  863. if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
  864. return 0;
  865. path = btrfs_alloc_path();
  866. if (unlikely(!path)) {
  867. ret = -ENOMEM;
  868. btrfs_abort_transaction(trans, ret);
  869. return ret;
  870. }
  871. block_group = btrfs_lookup_block_group(trans->fs_info, start);
  872. if (unlikely(!block_group)) {
  873. DEBUG_WARN("no block group found for start=%llu", start);
  874. ret = -ENOENT;
  875. btrfs_abort_transaction(trans, ret);
  876. return ret;
  877. }
  878. mutex_lock(&block_group->free_space_lock);
  879. ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
  880. mutex_unlock(&block_group->free_space_lock);
  881. if (ret)
  882. btrfs_abort_transaction(trans, ret);
  883. btrfs_put_block_group(block_group);
  884. return ret;
  885. }
  886. /*
  887. * Populate the free space tree by walking the extent tree. Operations on the
  888. * extent tree that happen as a result of writes to the free space tree will go
  889. * through the normal add/remove hooks.
  890. */
  891. static int populate_free_space_tree(struct btrfs_trans_handle *trans,
  892. struct btrfs_block_group *block_group)
  893. {
  894. struct btrfs_root *extent_root;
  895. BTRFS_PATH_AUTO_FREE(path);
  896. BTRFS_PATH_AUTO_FREE(path2);
  897. struct btrfs_key key;
  898. u64 start, end;
  899. int ret;
  900. path = btrfs_alloc_path();
  901. if (!path)
  902. return -ENOMEM;
  903. path2 = btrfs_alloc_path();
  904. if (!path2)
  905. return -ENOMEM;
  906. path->reada = READA_FORWARD;
  907. ret = add_new_free_space_info(trans, block_group, path2);
  908. if (ret)
  909. return ret;
  910. extent_root = btrfs_extent_root(trans->fs_info, block_group->start);
  911. if (unlikely(!extent_root)) {
  912. btrfs_err(trans->fs_info,
  913. "missing extent root for block group at offset %llu",
  914. block_group->start);
  915. return -EUCLEAN;
  916. }
  917. mutex_lock(&block_group->free_space_lock);
  918. /*
  919. * Iterate through all of the extent and metadata items in this block
  920. * group, adding the free space between them and the free space at the
  921. * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
  922. * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
  923. * contained in.
  924. */
  925. key.objectid = block_group->start;
  926. key.type = BTRFS_EXTENT_ITEM_KEY;
  927. key.offset = 0;
  928. ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
  929. if (ret < 0)
  930. goto out_locked;
  931. /*
  932. * If ret is 1 (no key found), it means this is an empty block group,
  933. * without any extents allocated from it and there's no block group
  934. * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
  935. * because we are using the block group tree feature (so block group
  936. * items are stored in the block group tree) or this is a new block
  937. * group created in the current transaction and its block group item
  938. * was not yet inserted in the extent tree (that happens in
  939. * btrfs_create_pending_block_groups() -> insert_block_group_item()).
  940. * It also means there are no extents allocated for block groups with a
  941. * start offset beyond this block group's end offset (this is the last,
  942. * highest, block group).
  943. */
  944. start = block_group->start;
  945. end = btrfs_block_group_end(block_group);
  946. while (ret == 0) {
  947. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  948. if (key.type == BTRFS_EXTENT_ITEM_KEY ||
  949. key.type == BTRFS_METADATA_ITEM_KEY) {
  950. if (key.objectid >= end)
  951. break;
  952. if (start < key.objectid) {
  953. ret = __btrfs_add_to_free_space_tree(trans,
  954. block_group,
  955. path2, start,
  956. key.objectid -
  957. start);
  958. if (ret)
  959. goto out_locked;
  960. }
  961. start = key.objectid;
  962. if (key.type == BTRFS_METADATA_ITEM_KEY)
  963. start += trans->fs_info->nodesize;
  964. else
  965. start += key.offset;
  966. } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
  967. if (key.objectid != block_group->start)
  968. break;
  969. }
  970. ret = btrfs_next_item(extent_root, path);
  971. if (ret < 0)
  972. goto out_locked;
  973. }
  974. if (start < end) {
  975. ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
  976. start, end - start);
  977. if (ret)
  978. goto out_locked;
  979. }
  980. ret = 0;
  981. out_locked:
  982. mutex_unlock(&block_group->free_space_lock);
  983. return ret;
  984. }
  985. int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
  986. {
  987. struct btrfs_trans_handle *trans;
  988. struct btrfs_root *tree_root = fs_info->tree_root;
  989. struct btrfs_root *free_space_root;
  990. struct btrfs_block_group *block_group;
  991. struct rb_node *node;
  992. int ret;
  993. trans = btrfs_start_transaction(tree_root, 0);
  994. if (IS_ERR(trans))
  995. return PTR_ERR(trans);
  996. set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
  997. set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
  998. free_space_root = btrfs_create_tree(trans,
  999. BTRFS_FREE_SPACE_TREE_OBJECTID);
  1000. if (IS_ERR(free_space_root)) {
  1001. ret = PTR_ERR(free_space_root);
  1002. btrfs_abort_transaction(trans, ret);
  1003. btrfs_end_transaction(trans);
  1004. goto out_clear;
  1005. }
  1006. ret = btrfs_global_root_insert(free_space_root);
  1007. if (unlikely(ret)) {
  1008. btrfs_put_root(free_space_root);
  1009. btrfs_abort_transaction(trans, ret);
  1010. btrfs_end_transaction(trans);
  1011. goto out_clear;
  1012. }
  1013. node = rb_first_cached(&fs_info->block_group_cache_tree);
  1014. while (node) {
  1015. block_group = rb_entry(node, struct btrfs_block_group,
  1016. cache_node);
  1017. ret = populate_free_space_tree(trans, block_group);
  1018. if (unlikely(ret)) {
  1019. btrfs_abort_transaction(trans, ret);
  1020. btrfs_end_transaction(trans);
  1021. goto out_clear;
  1022. }
  1023. node = rb_next(node);
  1024. }
  1025. btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
  1026. btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
  1027. clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
  1028. ret = btrfs_commit_transaction(trans);
  1029. /*
  1030. * Now that we've committed the transaction any reading of our commit
  1031. * root will be safe, so we can cache from the free space tree now.
  1032. */
  1033. clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
  1034. return ret;
  1035. out_clear:
  1036. clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
  1037. clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
  1038. return ret;
  1039. }
  1040. static int clear_free_space_tree(struct btrfs_trans_handle *trans,
  1041. struct btrfs_root *root)
  1042. {
  1043. BTRFS_PATH_AUTO_FREE(path);
  1044. struct btrfs_key key;
  1045. struct rb_node *node;
  1046. int nr;
  1047. int ret;
  1048. path = btrfs_alloc_path();
  1049. if (!path)
  1050. return -ENOMEM;
  1051. key.objectid = 0;
  1052. key.type = 0;
  1053. key.offset = 0;
  1054. while (1) {
  1055. ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
  1056. if (ret < 0)
  1057. return ret;
  1058. nr = btrfs_header_nritems(path->nodes[0]);
  1059. if (!nr)
  1060. break;
  1061. path->slots[0] = 0;
  1062. ret = btrfs_del_items(trans, root, path, 0, nr);
  1063. if (ret)
  1064. return ret;
  1065. btrfs_release_path(path);
  1066. }
  1067. node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
  1068. while (node) {
  1069. struct btrfs_block_group *bg;
  1070. bg = rb_entry(node, struct btrfs_block_group, cache_node);
  1071. clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
  1072. node = rb_next(node);
  1073. cond_resched();
  1074. }
  1075. return 0;
  1076. }
  1077. int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
  1078. {
  1079. struct btrfs_trans_handle *trans;
  1080. struct btrfs_root *tree_root = fs_info->tree_root;
  1081. struct btrfs_key key = {
  1082. .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
  1083. .type = BTRFS_ROOT_ITEM_KEY,
  1084. .offset = 0,
  1085. };
  1086. struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
  1087. int ret;
  1088. trans = btrfs_start_transaction(tree_root, 0);
  1089. if (IS_ERR(trans))
  1090. return PTR_ERR(trans);
  1091. btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
  1092. btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
  1093. ret = clear_free_space_tree(trans, free_space_root);
  1094. if (unlikely(ret)) {
  1095. btrfs_abort_transaction(trans, ret);
  1096. btrfs_end_transaction(trans);
  1097. return ret;
  1098. }
  1099. ret = btrfs_del_root(trans, &free_space_root->root_key);
  1100. if (unlikely(ret)) {
  1101. btrfs_abort_transaction(trans, ret);
  1102. btrfs_end_transaction(trans);
  1103. return ret;
  1104. }
  1105. btrfs_global_root_delete(free_space_root);
  1106. spin_lock(&fs_info->trans_lock);
  1107. list_del(&free_space_root->dirty_list);
  1108. spin_unlock(&fs_info->trans_lock);
  1109. btrfs_tree_lock(free_space_root->node);
  1110. btrfs_clear_buffer_dirty(trans, free_space_root->node);
  1111. btrfs_tree_unlock(free_space_root->node);
  1112. ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
  1113. free_space_root->node, 0, 1);
  1114. btrfs_put_root(free_space_root);
  1115. if (unlikely(ret < 0)) {
  1116. btrfs_abort_transaction(trans, ret);
  1117. btrfs_end_transaction(trans);
  1118. return ret;
  1119. }
  1120. return btrfs_commit_transaction(trans);
  1121. }
  1122. int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
  1123. {
  1124. struct btrfs_trans_handle *trans;
  1125. struct btrfs_key key = {
  1126. .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
  1127. .type = BTRFS_ROOT_ITEM_KEY,
  1128. .offset = 0,
  1129. };
  1130. struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
  1131. struct rb_node *node;
  1132. int ret;
  1133. trans = btrfs_start_transaction(free_space_root, 1);
  1134. if (IS_ERR(trans))
  1135. return PTR_ERR(trans);
  1136. set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
  1137. set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
  1138. ret = clear_free_space_tree(trans, free_space_root);
  1139. if (unlikely(ret)) {
  1140. btrfs_abort_transaction(trans, ret);
  1141. btrfs_end_transaction(trans);
  1142. return ret;
  1143. }
  1144. node = rb_first_cached(&fs_info->block_group_cache_tree);
  1145. while (node) {
  1146. struct btrfs_block_group *block_group;
  1147. block_group = rb_entry(node, struct btrfs_block_group,
  1148. cache_node);
  1149. if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
  1150. &block_group->runtime_flags))
  1151. goto next;
  1152. ret = populate_free_space_tree(trans, block_group);
  1153. if (unlikely(ret)) {
  1154. btrfs_abort_transaction(trans, ret);
  1155. btrfs_end_transaction(trans);
  1156. return ret;
  1157. }
  1158. next:
  1159. if (btrfs_should_end_transaction(trans)) {
  1160. btrfs_end_transaction(trans);
  1161. trans = btrfs_start_transaction(free_space_root, 1);
  1162. if (IS_ERR(trans))
  1163. return PTR_ERR(trans);
  1164. }
  1165. node = rb_next(node);
  1166. }
  1167. btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
  1168. btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
  1169. clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
  1170. ret = btrfs_commit_transaction(trans);
  1171. clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
  1172. return ret;
  1173. }
  1174. static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
  1175. struct btrfs_block_group *block_group,
  1176. struct btrfs_path *path)
  1177. {
  1178. bool own_path = false;
  1179. int ret;
  1180. if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
  1181. &block_group->runtime_flags))
  1182. return 0;
  1183. /*
  1184. * While rebuilding the free space tree we may allocate new metadata
  1185. * block groups while modifying the free space tree.
  1186. *
  1187. * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
  1188. * can use multiple transactions, every time btrfs_end_transaction() is
  1189. * called at btrfs_rebuild_free_space_tree() we finish the creation of
  1190. * new block groups by calling btrfs_create_pending_block_groups(), and
  1191. * that in turn calls us, through btrfs_add_block_group_free_space(),
  1192. * to add a free space info item and a free space extent item for the
  1193. * block group.
  1194. *
  1195. * Then later btrfs_rebuild_free_space_tree() may find such new block
  1196. * groups and processes them with populate_free_space_tree(), which can
  1197. * fail with EEXIST since there are already items for the block group in
  1198. * the free space tree. Notice that we say "may find" because a new
  1199. * block group may be added to the block groups rbtree in a node before
  1200. * or after the block group currently being processed by the rebuild
  1201. * process. So signal the rebuild process to skip such new block groups
  1202. * if it finds them.
  1203. */
  1204. set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
  1205. if (!path) {
  1206. path = btrfs_alloc_path();
  1207. if (unlikely(!path)) {
  1208. btrfs_abort_transaction(trans, -ENOMEM);
  1209. return -ENOMEM;
  1210. }
  1211. own_path = true;
  1212. }
  1213. ret = add_new_free_space_info(trans, block_group, path);
  1214. if (unlikely(ret)) {
  1215. btrfs_abort_transaction(trans, ret);
  1216. goto out;
  1217. }
  1218. ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
  1219. block_group->start, block_group->length);
  1220. if (ret)
  1221. btrfs_abort_transaction(trans, ret);
  1222. out:
  1223. if (own_path)
  1224. btrfs_free_path(path);
  1225. return ret;
  1226. }
  1227. int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
  1228. struct btrfs_block_group *block_group)
  1229. {
  1230. int ret;
  1231. if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
  1232. return 0;
  1233. mutex_lock(&block_group->free_space_lock);
  1234. ret = __add_block_group_free_space(trans, block_group, NULL);
  1235. mutex_unlock(&block_group->free_space_lock);
  1236. return ret;
  1237. }
  1238. int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
  1239. struct btrfs_block_group *block_group)
  1240. {
  1241. struct btrfs_root *root = btrfs_free_space_root(block_group);
  1242. BTRFS_PATH_AUTO_FREE(path);
  1243. struct btrfs_key key, found_key;
  1244. struct extent_buffer *leaf;
  1245. u64 start, end;
  1246. int done = 0, nr;
  1247. int ret;
  1248. if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
  1249. return 0;
  1250. if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
  1251. /* We never added this block group to the free space tree. */
  1252. return 0;
  1253. }
  1254. path = btrfs_alloc_path();
  1255. if (unlikely(!path)) {
  1256. ret = -ENOMEM;
  1257. btrfs_abort_transaction(trans, ret);
  1258. return ret;
  1259. }
  1260. start = block_group->start;
  1261. end = btrfs_block_group_end(block_group);
  1262. key.objectid = end - 1;
  1263. key.type = (u8)-1;
  1264. key.offset = (u64)-1;
  1265. while (!done) {
  1266. ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
  1267. if (unlikely(ret)) {
  1268. btrfs_abort_transaction(trans, ret);
  1269. return ret;
  1270. }
  1271. leaf = path->nodes[0];
  1272. nr = 0;
  1273. path->slots[0]++;
  1274. while (path->slots[0] > 0) {
  1275. btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
  1276. if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
  1277. ASSERT(found_key.objectid == block_group->start);
  1278. ASSERT(found_key.offset == block_group->length);
  1279. done = 1;
  1280. nr++;
  1281. path->slots[0]--;
  1282. break;
  1283. } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
  1284. found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
  1285. ASSERT(found_key.objectid >= start);
  1286. ASSERT(found_key.objectid < end);
  1287. ASSERT(found_key.objectid + found_key.offset <= end);
  1288. nr++;
  1289. path->slots[0]--;
  1290. } else {
  1291. ASSERT(0);
  1292. }
  1293. }
  1294. ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
  1295. if (unlikely(ret)) {
  1296. btrfs_abort_transaction(trans, ret);
  1297. return ret;
  1298. }
  1299. btrfs_release_path(path);
  1300. }
  1301. return 0;
  1302. }
  1303. static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
  1304. struct btrfs_path *path,
  1305. u32 expected_extent_count)
  1306. {
  1307. struct btrfs_block_group *block_group = caching_ctl->block_group;
  1308. struct btrfs_fs_info *fs_info = block_group->fs_info;
  1309. struct btrfs_root *root;
  1310. struct btrfs_key key;
  1311. bool prev_bit_set = false;
  1312. /* Initialize to silence GCC. */
  1313. u64 extent_start = 0;
  1314. const u64 end = btrfs_block_group_end(block_group);
  1315. u64 offset;
  1316. u64 total_found = 0;
  1317. u32 extent_count = 0;
  1318. int ret;
  1319. root = btrfs_free_space_root(block_group);
  1320. while (1) {
  1321. ret = btrfs_next_item(root, path);
  1322. if (ret < 0)
  1323. return ret;
  1324. if (ret)
  1325. break;
  1326. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  1327. if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
  1328. break;
  1329. ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
  1330. ASSERT(key.objectid < end && key.objectid + key.offset <= end);
  1331. offset = key.objectid;
  1332. while (offset < key.objectid + key.offset) {
  1333. bool bit_set;
  1334. bit_set = btrfs_free_space_test_bit(block_group, path, offset);
  1335. if (!prev_bit_set && bit_set) {
  1336. extent_start = offset;
  1337. } else if (prev_bit_set && !bit_set) {
  1338. u64 space_added;
  1339. ret = btrfs_add_new_free_space(block_group,
  1340. extent_start,
  1341. offset,
  1342. &space_added);
  1343. if (ret)
  1344. return ret;
  1345. total_found += space_added;
  1346. if (total_found > CACHING_CTL_WAKE_UP) {
  1347. total_found = 0;
  1348. wake_up(&caching_ctl->wait);
  1349. }
  1350. extent_count++;
  1351. }
  1352. prev_bit_set = bit_set;
  1353. offset += fs_info->sectorsize;
  1354. }
  1355. }
  1356. if (prev_bit_set) {
  1357. ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
  1358. if (ret)
  1359. return ret;
  1360. extent_count++;
  1361. }
  1362. if (unlikely(extent_count != expected_extent_count)) {
  1363. btrfs_err(fs_info,
  1364. "incorrect extent count for %llu; counted %u, expected %u",
  1365. block_group->start, extent_count,
  1366. expected_extent_count);
  1367. DEBUG_WARN();
  1368. return -EIO;
  1369. }
  1370. return 0;
  1371. }
  1372. static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
  1373. struct btrfs_path *path,
  1374. u32 expected_extent_count)
  1375. {
  1376. struct btrfs_block_group *block_group = caching_ctl->block_group;
  1377. struct btrfs_fs_info *fs_info = block_group->fs_info;
  1378. struct btrfs_root *root;
  1379. struct btrfs_key key;
  1380. const u64 end = btrfs_block_group_end(block_group);
  1381. u64 total_found = 0;
  1382. u32 extent_count = 0;
  1383. int ret;
  1384. root = btrfs_free_space_root(block_group);
  1385. while (1) {
  1386. u64 space_added;
  1387. ret = btrfs_next_item(root, path);
  1388. if (ret < 0)
  1389. return ret;
  1390. if (ret)
  1391. break;
  1392. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  1393. if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
  1394. break;
  1395. ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
  1396. ASSERT(key.objectid < end && key.objectid + key.offset <= end);
  1397. ret = btrfs_add_new_free_space(block_group, key.objectid,
  1398. key.objectid + key.offset,
  1399. &space_added);
  1400. if (ret)
  1401. return ret;
  1402. total_found += space_added;
  1403. if (total_found > CACHING_CTL_WAKE_UP) {
  1404. total_found = 0;
  1405. wake_up(&caching_ctl->wait);
  1406. }
  1407. extent_count++;
  1408. }
  1409. if (unlikely(extent_count != expected_extent_count)) {
  1410. btrfs_err(fs_info,
  1411. "incorrect extent count for %llu; counted %u, expected %u",
  1412. block_group->start, extent_count,
  1413. expected_extent_count);
  1414. DEBUG_WARN();
  1415. return -EIO;
  1416. }
  1417. return 0;
  1418. }
  1419. int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
  1420. {
  1421. struct btrfs_block_group *block_group;
  1422. struct btrfs_free_space_info *info;
  1423. BTRFS_PATH_AUTO_FREE(path);
  1424. u32 extent_count, flags;
  1425. block_group = caching_ctl->block_group;
  1426. path = btrfs_alloc_path();
  1427. if (!path)
  1428. return -ENOMEM;
  1429. /*
  1430. * Just like caching_thread() doesn't want to deadlock on the extent
  1431. * tree, we don't want to deadlock on the free space tree.
  1432. */
  1433. path->skip_locking = true;
  1434. path->search_commit_root = true;
  1435. path->reada = READA_FORWARD;
  1436. info = btrfs_search_free_space_info(NULL, block_group, path, 0);
  1437. if (IS_ERR(info))
  1438. return PTR_ERR(info);
  1439. extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
  1440. flags = btrfs_free_space_flags(path->nodes[0], info);
  1441. /*
  1442. * We left path pointing to the free space info item, so now
  1443. * load_free_space_foo can just iterate through the free space tree from
  1444. * there.
  1445. */
  1446. if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
  1447. return load_free_space_bitmaps(caching_ctl, path, extent_count);
  1448. else
  1449. return load_free_space_extents(caching_ctl, path, extent_count);
  1450. }
  1451. static int delete_orphan_free_space_entries(struct btrfs_root *fst_root,
  1452. struct btrfs_path *path,
  1453. u64 first_bg_bytenr)
  1454. {
  1455. struct btrfs_trans_handle *trans;
  1456. int ret;
  1457. trans = btrfs_start_transaction(fst_root, 1);
  1458. if (IS_ERR(trans))
  1459. return PTR_ERR(trans);
  1460. while (true) {
  1461. struct btrfs_key key = { 0 };
  1462. int i;
  1463. ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1);
  1464. if (ret < 0)
  1465. break;
  1466. ASSERT(ret > 0);
  1467. ret = 0;
  1468. for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) {
  1469. btrfs_item_key_to_cpu(path->nodes[0], &key, i);
  1470. if (key.objectid >= first_bg_bytenr) {
  1471. /*
  1472. * Only break the for() loop and continue to
  1473. * delete items.
  1474. */
  1475. break;
  1476. }
  1477. }
  1478. /* No items to delete, finished. */
  1479. if (i == 0)
  1480. break;
  1481. ret = btrfs_del_items(trans, fst_root, path, 0, i);
  1482. if (ret < 0)
  1483. break;
  1484. btrfs_release_path(path);
  1485. }
  1486. btrfs_release_path(path);
  1487. btrfs_end_transaction(trans);
  1488. if (ret == 0)
  1489. btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries");
  1490. return ret;
  1491. }
  1492. /* Remove any free space entry before the first block group. */
  1493. int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info)
  1494. {
  1495. BTRFS_PATH_AUTO_RELEASE(path);
  1496. struct btrfs_key key = {
  1497. .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
  1498. .type = BTRFS_ROOT_ITEM_KEY,
  1499. .offset = 0,
  1500. };
  1501. struct btrfs_root *root;
  1502. struct btrfs_block_group *bg;
  1503. u64 first_bg_bytenr;
  1504. int ret;
  1505. /*
  1506. * Extent tree v2 has multiple global roots based on the block group.
  1507. * This means we cannot easily grab the global free space tree and locate
  1508. * orphan items. Furthermore this is still experimental, all users
  1509. * should use the latest btrfs-progs anyway.
  1510. */
  1511. if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
  1512. return 0;
  1513. if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
  1514. return 0;
  1515. root = btrfs_global_root(fs_info, &key);
  1516. if (!root)
  1517. return 0;
  1518. key.objectid = 0;
  1519. key.type = 0;
  1520. key.offset = 0;
  1521. bg = btrfs_lookup_first_block_group(fs_info, 0);
  1522. if (unlikely(!bg)) {
  1523. btrfs_err(fs_info, "no block group found");
  1524. return -EUCLEAN;
  1525. }
  1526. first_bg_bytenr = bg->start;
  1527. btrfs_put_block_group(bg);
  1528. ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
  1529. if (ret < 0)
  1530. return ret;
  1531. /* There should not be an all-zero key in fst. */
  1532. ASSERT(ret > 0);
  1533. /* Empty free space tree. */
  1534. if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
  1535. return 0;
  1536. btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
  1537. if (key.objectid >= first_bg_bytenr)
  1538. return 0;
  1539. btrfs_release_path(&path);
  1540. return delete_orphan_free_space_entries(root, &path, first_bg_bytenr);
  1541. }