raid-stripe-tree.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2023 Western Digital Corporation or its affiliates.
  4. */
  5. #include <linux/btrfs_tree.h>
  6. #include "ctree.h"
  7. #include "fs.h"
  8. #include "accessors.h"
  9. #include "transaction.h"
  10. #include "disk-io.h"
  11. #include "raid-stripe-tree.h"
  12. #include "volumes.h"
  13. #include "print-tree.h"
  14. static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
  15. struct btrfs_path *path,
  16. const struct btrfs_key *oldkey,
  17. u64 newlen, u64 frontpad)
  18. {
  19. struct btrfs_root *stripe_root = trans->fs_info->stripe_root;
  20. struct btrfs_stripe_extent *extent, AUTO_KFREE(newitem);
  21. struct extent_buffer *leaf;
  22. int slot;
  23. size_t item_size;
  24. struct btrfs_key newkey = {
  25. .objectid = oldkey->objectid + frontpad,
  26. .type = BTRFS_RAID_STRIPE_KEY,
  27. .offset = newlen,
  28. };
  29. int ret;
  30. ASSERT(newlen > 0);
  31. ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
  32. leaf = path->nodes[0];
  33. slot = path->slots[0];
  34. item_size = btrfs_item_size(leaf, slot);
  35. newitem = kzalloc(item_size, GFP_NOFS);
  36. if (!newitem)
  37. return -ENOMEM;
  38. extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
  39. for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
  40. struct btrfs_raid_stride *stride = &extent->strides[i];
  41. u64 phys;
  42. phys = btrfs_raid_stride_physical(leaf, stride) + frontpad;
  43. btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys);
  44. }
  45. ret = btrfs_del_item(trans, stripe_root, path);
  46. if (ret)
  47. return ret;
  48. btrfs_release_path(path);
  49. return btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
  50. }
  51. int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
  52. {
  53. struct btrfs_fs_info *fs_info = trans->fs_info;
  54. struct btrfs_root *stripe_root = fs_info->stripe_root;
  55. BTRFS_PATH_AUTO_FREE(path);
  56. struct btrfs_key key;
  57. struct extent_buffer *leaf;
  58. u64 found_start;
  59. u64 found_end;
  60. u64 end = start + length;
  61. int slot;
  62. int ret;
  63. if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root)
  64. return 0;
  65. if (!btrfs_is_testing(fs_info)) {
  66. struct btrfs_chunk_map *map;
  67. bool use_rst;
  68. map = btrfs_find_chunk_map(fs_info, start, length);
  69. if (!map)
  70. return -EINVAL;
  71. use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
  72. btrfs_free_chunk_map(map);
  73. if (!use_rst)
  74. return 0;
  75. }
  76. path = btrfs_alloc_path();
  77. if (!path)
  78. return -ENOMEM;
  79. while (1) {
  80. key.objectid = start;
  81. key.type = BTRFS_RAID_STRIPE_KEY;
  82. key.offset = 0;
  83. ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
  84. if (ret < 0)
  85. break;
  86. if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
  87. path->slots[0]--;
  88. leaf = path->nodes[0];
  89. slot = path->slots[0];
  90. btrfs_item_key_to_cpu(leaf, &key, slot);
  91. found_start = key.objectid;
  92. found_end = found_start + key.offset;
  93. ret = 0;
  94. /*
  95. * The stripe extent starts before the range we want to delete,
  96. * but the range spans more than one stripe extent:
  97. *
  98. * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---|
  99. * |--- keep ---|--- drop ---|
  100. *
  101. * This means we have to get the previous item, truncate its
  102. * length and then restart the search.
  103. */
  104. if (found_start > start) {
  105. if (slot == 0) {
  106. ret = btrfs_previous_item(stripe_root, path, start,
  107. BTRFS_RAID_STRIPE_KEY);
  108. if (ret) {
  109. if (ret > 0)
  110. ret = -ENOENT;
  111. break;
  112. }
  113. } else {
  114. path->slots[0]--;
  115. }
  116. leaf = path->nodes[0];
  117. slot = path->slots[0];
  118. btrfs_item_key_to_cpu(leaf, &key, slot);
  119. found_start = key.objectid;
  120. found_end = found_start + key.offset;
  121. ASSERT(found_start <= start);
  122. }
  123. if (key.type != BTRFS_RAID_STRIPE_KEY)
  124. break;
  125. /* That stripe ends before we start, we're done. */
  126. if (found_end <= start)
  127. break;
  128. trace_btrfs_raid_extent_delete(fs_info, start, end,
  129. found_start, found_end);
  130. /*
  131. * The stripe extent starts before the range we want to delete
  132. * and ends after the range we want to delete, i.e. we're
  133. * punching a hole in the stripe extent:
  134. *
  135. * |--- RAID Stripe Extent ---|
  136. * | keep |--- drop ---| keep |
  137. *
  138. * This means we need to a) truncate the existing item and b)
  139. * create a second item for the remaining range.
  140. */
  141. if (found_start < start && found_end > end) {
  142. size_t item_size;
  143. u64 diff_start = start - found_start;
  144. u64 diff_end = found_end - end;
  145. struct btrfs_stripe_extent *extent;
  146. struct btrfs_key newkey = {
  147. .objectid = end,
  148. .type = BTRFS_RAID_STRIPE_KEY,
  149. .offset = diff_end,
  150. };
  151. /* The "right" item. */
  152. ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey);
  153. if (ret)
  154. break;
  155. item_size = btrfs_item_size(leaf, path->slots[0]);
  156. extent = btrfs_item_ptr(leaf, path->slots[0],
  157. struct btrfs_stripe_extent);
  158. for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
  159. struct btrfs_raid_stride *stride = &extent->strides[i];
  160. u64 phys;
  161. phys = btrfs_raid_stride_physical(leaf, stride);
  162. phys += diff_start + length;
  163. btrfs_set_raid_stride_physical(leaf, stride, phys);
  164. }
  165. /* The "left" item. */
  166. path->slots[0]--;
  167. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  168. btrfs_partially_delete_raid_extent(trans, path, &key,
  169. diff_start, 0);
  170. break;
  171. }
  172. /*
  173. * The stripe extent starts before the range we want to delete:
  174. *
  175. * |--- RAID Stripe Extent ---|
  176. * |--- keep ---|--- drop ---|
  177. *
  178. * This means we have to duplicate the tree item, truncate the
  179. * length to the new size and then re-insert the item.
  180. */
  181. if (found_start < start) {
  182. u64 diff_start = start - found_start;
  183. btrfs_partially_delete_raid_extent(trans, path, &key,
  184. diff_start, 0);
  185. start += (key.offset - diff_start);
  186. length -= (key.offset - diff_start);
  187. if (length == 0)
  188. break;
  189. btrfs_release_path(path);
  190. continue;
  191. }
  192. /*
  193. * The stripe extent ends after the range we want to delete:
  194. *
  195. * |--- RAID Stripe Extent ---|
  196. * |--- drop ---|--- keep ---|
  197. *
  198. * This means we have to duplicate the tree item, truncate the
  199. * length to the new size and then re-insert the item.
  200. */
  201. if (found_end > end) {
  202. u64 diff_end = found_end - end;
  203. btrfs_partially_delete_raid_extent(trans, path, &key,
  204. key.offset - length,
  205. length);
  206. ASSERT(key.offset - diff_end == length);
  207. break;
  208. }
  209. /* Finally we can delete the whole item, no more special cases. */
  210. ret = btrfs_del_item(trans, stripe_root, path);
  211. if (ret)
  212. break;
  213. start += key.offset;
  214. length -= key.offset;
  215. if (length == 0)
  216. break;
  217. btrfs_release_path(path);
  218. }
  219. return ret;
  220. }
  221. static int update_raid_extent_item(struct btrfs_trans_handle *trans,
  222. struct btrfs_key *key,
  223. struct btrfs_stripe_extent *stripe_extent,
  224. const size_t item_size)
  225. {
  226. BTRFS_PATH_AUTO_FREE(path);
  227. struct extent_buffer *leaf;
  228. int ret;
  229. int slot;
  230. path = btrfs_alloc_path();
  231. if (!path)
  232. return -ENOMEM;
  233. ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path,
  234. 0, 1);
  235. if (ret)
  236. return (ret == 1 ? ret : -EINVAL);
  237. leaf = path->nodes[0];
  238. slot = path->slots[0];
  239. write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
  240. item_size);
  241. return ret;
  242. }
  243. EXPORT_FOR_TESTS
  244. int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
  245. struct btrfs_io_context *bioc)
  246. {
  247. struct btrfs_fs_info *fs_info = trans->fs_info;
  248. struct btrfs_key stripe_key;
  249. struct btrfs_root *stripe_root = fs_info->stripe_root;
  250. const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
  251. struct btrfs_stripe_extent AUTO_KFREE(stripe_extent);
  252. const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
  253. int ret;
  254. stripe_extent = kzalloc(item_size, GFP_NOFS);
  255. if (!unlikely(stripe_extent)) {
  256. btrfs_abort_transaction(trans, -ENOMEM);
  257. btrfs_end_transaction(trans);
  258. return -ENOMEM;
  259. }
  260. trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
  261. num_stripes);
  262. for (int i = 0; i < num_stripes; i++) {
  263. u64 devid = bioc->stripes[i].dev->devid;
  264. u64 physical = bioc->stripes[i].physical;
  265. struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
  266. btrfs_set_stack_raid_stride_devid(raid_stride, devid);
  267. btrfs_set_stack_raid_stride_physical(raid_stride, physical);
  268. }
  269. stripe_key.objectid = bioc->logical;
  270. stripe_key.type = BTRFS_RAID_STRIPE_KEY;
  271. stripe_key.offset = bioc->size;
  272. ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
  273. item_size);
  274. if (ret == -EEXIST) {
  275. ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
  276. item_size);
  277. if (ret)
  278. btrfs_abort_transaction(trans, ret);
  279. } else if (ret) {
  280. btrfs_abort_transaction(trans, ret);
  281. }
  282. return ret;
  283. }
  284. int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
  285. struct btrfs_ordered_extent *ordered_extent)
  286. {
  287. struct btrfs_io_context *bioc;
  288. int ret;
  289. if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE))
  290. return 0;
  291. list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) {
  292. ret = btrfs_insert_one_raid_extent(trans, bioc);
  293. if (ret)
  294. return ret;
  295. }
  296. while (!list_empty(&ordered_extent->bioc_list)) {
  297. bioc = list_first_entry(&ordered_extent->bioc_list,
  298. typeof(*bioc), rst_ordered_entry);
  299. list_del(&bioc->rst_ordered_entry);
  300. btrfs_put_bioc(bioc);
  301. }
  302. return 0;
  303. }
  304. int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
  305. u64 logical, u64 *length, u64 map_type,
  306. u32 stripe_index, struct btrfs_io_stripe *stripe)
  307. {
  308. struct btrfs_root *stripe_root = fs_info->stripe_root;
  309. struct btrfs_stripe_extent *stripe_extent;
  310. struct btrfs_key stripe_key;
  311. struct btrfs_key found_key;
  312. BTRFS_PATH_AUTO_FREE(path);
  313. struct extent_buffer *leaf;
  314. const u64 end = logical + *length;
  315. int num_stripes;
  316. u64 offset;
  317. u64 found_logical;
  318. u64 found_length;
  319. u64 found_end;
  320. int slot;
  321. int ret;
  322. stripe_key.objectid = logical;
  323. stripe_key.type = BTRFS_RAID_STRIPE_KEY;
  324. stripe_key.offset = 0;
  325. path = btrfs_alloc_path();
  326. if (!path)
  327. return -ENOMEM;
  328. if (stripe->rst_search_commit_root) {
  329. path->skip_locking = true;
  330. path->search_commit_root = true;
  331. }
  332. ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
  333. if (ret < 0)
  334. return ret;
  335. if (ret) {
  336. if (path->slots[0] != 0)
  337. path->slots[0]--;
  338. }
  339. while (1) {
  340. leaf = path->nodes[0];
  341. slot = path->slots[0];
  342. btrfs_item_key_to_cpu(leaf, &found_key, slot);
  343. found_logical = found_key.objectid;
  344. found_length = found_key.offset;
  345. found_end = found_logical + found_length;
  346. if (found_logical > end) {
  347. ret = -ENODATA;
  348. goto out;
  349. }
  350. if (in_range(logical, found_logical, found_length))
  351. break;
  352. ret = btrfs_next_item(stripe_root, path);
  353. if (ret)
  354. goto out;
  355. }
  356. offset = logical - found_logical;
  357. /*
  358. * If we have a logically contiguous, but physically non-continuous
  359. * range, we need to split the bio. Record the length after which we
  360. * must split the bio.
  361. */
  362. if (end > found_end)
  363. *length -= end - found_end;
  364. num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
  365. stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
  366. for (int i = 0; i < num_stripes; i++) {
  367. struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
  368. u64 devid = btrfs_raid_stride_devid(leaf, stride);
  369. u64 physical = btrfs_raid_stride_physical(leaf, stride);
  370. if (devid != stripe->dev->devid)
  371. continue;
  372. if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
  373. continue;
  374. stripe->physical = physical + offset;
  375. trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
  376. stripe->physical, devid);
  377. return 0;
  378. }
  379. /* If we're here, we haven't found the requested devid in the stripe. */
  380. ret = -ENODATA;
  381. out:
  382. if (ret > 0)
  383. ret = -ENODATA;
  384. if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
  385. btrfs_debug(fs_info,
  386. "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
  387. logical, logical + *length, stripe->dev->devid,
  388. btrfs_bg_type_to_raid_name(map_type));
  389. }
  390. return ret;
  391. }