file-item.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2007 Oracle. All rights reserved.
  4. */
  5. #include <linux/bio.h>
  6. #include <linux/slab.h>
  7. #include <linux/pagemap.h>
  8. #include <linux/highmem.h>
  9. #include <linux/sched/mm.h>
  10. #include "messages.h"
  11. #include "ctree.h"
  12. #include "disk-io.h"
  13. #include "transaction.h"
  14. #include "bio.h"
  15. #include "compression.h"
  16. #include "fs.h"
  17. #include "accessors.h"
  18. #include "file-item.h"
  19. #include "volumes.h"
  20. #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
  21. sizeof(struct btrfs_item) * 2) / \
  22. size) - 1))
  23. #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
  24. PAGE_SIZE))
  25. /*
  26. * Set inode's size according to filesystem options.
  27. *
  28. * @inode: inode we want to update the disk_i_size for
  29. * @new_i_size: i_size we want to set to, 0 if we use i_size
  30. *
  31. * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
  32. * returns as it is perfectly fine with a file that has holes without hole file
  33. * extent items.
  34. *
  35. * However without NO_HOLES we need to only return the area that is contiguous
  36. * from the 0 offset of the file. Otherwise we could end up adjust i_size up
  37. * to an extent that has a gap in between.
  38. *
  39. * Finally new_i_size should only be set in the case of truncate where we're not
  40. * ready to use i_size_read() as the limiter yet.
  41. */
  42. void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size)
  43. {
  44. u64 start, end, i_size;
  45. bool found;
  46. spin_lock(&inode->lock);
  47. i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
  48. if (!inode->file_extent_tree) {
  49. inode->disk_i_size = i_size;
  50. goto out_unlock;
  51. }
  52. found = btrfs_find_contiguous_extent_bit(inode->file_extent_tree, 0, &start,
  53. &end, EXTENT_DIRTY);
  54. if (found && start == 0)
  55. i_size = min(i_size, end + 1);
  56. else
  57. i_size = 0;
  58. inode->disk_i_size = i_size;
  59. out_unlock:
  60. spin_unlock(&inode->lock);
  61. }
  62. /*
  63. * Mark range within a file as having a new extent inserted.
  64. *
  65. * @inode: inode being modified
  66. * @start: start file offset of the file extent we've inserted
  67. * @len: logical length of the file extent item
  68. *
  69. * Call when we are inserting a new file extent where there was none before.
  70. * Does not need to call this in the case where we're replacing an existing file
  71. * extent, however if not sure it's fine to call this multiple times.
  72. *
  73. * The start and len must match the file extent item, so thus must be sectorsize
  74. * aligned.
  75. */
  76. int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
  77. u64 len)
  78. {
  79. if (!inode->file_extent_tree)
  80. return 0;
  81. if (len == 0)
  82. return 0;
  83. ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
  84. return btrfs_set_extent_bit(inode->file_extent_tree, start, start + len - 1,
  85. EXTENT_DIRTY, NULL);
  86. }
  87. /*
  88. * Mark an inode range as not having a backing extent.
  89. *
  90. * @inode: inode being modified
  91. * @start: start file offset of the file extent we've inserted
  92. * @len: logical length of the file extent item
  93. *
  94. * Called when we drop a file extent, for example when we truncate. Doesn't
  95. * need to be called for cases where we're replacing a file extent, like when
  96. * we've COWed a file extent.
  97. *
  98. * The start and len must match the file extent item, so thus must be sectorsize
  99. * aligned.
  100. */
  101. int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
  102. u64 len)
  103. {
  104. if (!inode->file_extent_tree)
  105. return 0;
  106. if (len == 0)
  107. return 0;
  108. ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
  109. len == (u64)-1);
  110. return btrfs_clear_extent_bit(inode->file_extent_tree, start,
  111. start + len - 1, EXTENT_DIRTY, NULL);
  112. }
  113. static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes)
  114. {
  115. ASSERT(IS_ALIGNED(bytes, fs_info->sectorsize));
  116. return (bytes >> fs_info->sectorsize_bits) * fs_info->csum_size;
  117. }
  118. static size_t csum_size_to_bytes(const struct btrfs_fs_info *fs_info, u32 csum_size)
  119. {
  120. ASSERT(IS_ALIGNED(csum_size, fs_info->csum_size));
  121. return (csum_size / fs_info->csum_size) << fs_info->sectorsize_bits;
  122. }
  123. static inline u32 max_ordered_sum_bytes(const struct btrfs_fs_info *fs_info)
  124. {
  125. u32 max_csum_size = round_down(PAGE_SIZE - sizeof(struct btrfs_ordered_sum),
  126. fs_info->csum_size);
  127. return csum_size_to_bytes(fs_info, max_csum_size);
  128. }
  129. /*
  130. * Calculate the total size needed to allocate for an ordered sum structure
  131. * spanning @bytes in the file.
  132. */
  133. static int btrfs_ordered_sum_size(const struct btrfs_fs_info *fs_info, unsigned long bytes)
  134. {
  135. return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes);
  136. }
  137. int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
  138. struct btrfs_root *root,
  139. u64 objectid, u64 pos, u64 num_bytes)
  140. {
  141. int ret = 0;
  142. struct btrfs_file_extent_item *item;
  143. struct btrfs_key file_key;
  144. BTRFS_PATH_AUTO_FREE(path);
  145. struct extent_buffer *leaf;
  146. path = btrfs_alloc_path();
  147. if (!path)
  148. return -ENOMEM;
  149. file_key.objectid = objectid;
  150. file_key.type = BTRFS_EXTENT_DATA_KEY;
  151. file_key.offset = pos;
  152. ret = btrfs_insert_empty_item(trans, root, path, &file_key,
  153. sizeof(*item));
  154. if (ret < 0)
  155. return ret;
  156. leaf = path->nodes[0];
  157. item = btrfs_item_ptr(leaf, path->slots[0],
  158. struct btrfs_file_extent_item);
  159. btrfs_set_file_extent_disk_bytenr(leaf, item, 0);
  160. btrfs_set_file_extent_disk_num_bytes(leaf, item, 0);
  161. btrfs_set_file_extent_offset(leaf, item, 0);
  162. btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
  163. btrfs_set_file_extent_ram_bytes(leaf, item, num_bytes);
  164. btrfs_set_file_extent_generation(leaf, item, trans->transid);
  165. btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
  166. btrfs_set_file_extent_compression(leaf, item, 0);
  167. btrfs_set_file_extent_encryption(leaf, item, 0);
  168. btrfs_set_file_extent_other_encoding(leaf, item, 0);
  169. return ret;
  170. }
  171. static struct btrfs_csum_item *
  172. btrfs_lookup_csum(struct btrfs_trans_handle *trans,
  173. struct btrfs_root *root,
  174. struct btrfs_path *path,
  175. u64 bytenr, int cow)
  176. {
  177. struct btrfs_fs_info *fs_info = root->fs_info;
  178. int ret;
  179. struct btrfs_key file_key;
  180. struct btrfs_key found_key;
  181. struct btrfs_csum_item *item;
  182. struct extent_buffer *leaf;
  183. u64 csum_offset = 0;
  184. const u32 csum_size = fs_info->csum_size;
  185. int csums_in_item;
  186. file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
  187. file_key.type = BTRFS_EXTENT_CSUM_KEY;
  188. file_key.offset = bytenr;
  189. ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
  190. if (ret < 0)
  191. goto fail;
  192. leaf = path->nodes[0];
  193. if (ret > 0) {
  194. ret = 1;
  195. if (path->slots[0] == 0)
  196. goto fail;
  197. path->slots[0]--;
  198. btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
  199. if (found_key.type != BTRFS_EXTENT_CSUM_KEY)
  200. goto fail;
  201. csum_offset = (bytenr - found_key.offset) >>
  202. fs_info->sectorsize_bits;
  203. csums_in_item = btrfs_item_size(leaf, path->slots[0]);
  204. csums_in_item /= csum_size;
  205. if (csum_offset == csums_in_item) {
  206. ret = -EFBIG;
  207. goto fail;
  208. } else if (csum_offset > csums_in_item) {
  209. goto fail;
  210. }
  211. }
  212. item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
  213. item = (struct btrfs_csum_item *)((unsigned char *)item +
  214. csum_offset * csum_size);
  215. return item;
  216. fail:
  217. if (ret > 0)
  218. ret = -ENOENT;
  219. return ERR_PTR(ret);
  220. }
  221. int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
  222. struct btrfs_root *root,
  223. struct btrfs_path *path, u64 objectid,
  224. u64 offset, int mod)
  225. {
  226. struct btrfs_key file_key;
  227. int ins_len = mod < 0 ? -1 : 0;
  228. int cow = mod != 0;
  229. file_key.objectid = objectid;
  230. file_key.type = BTRFS_EXTENT_DATA_KEY;
  231. file_key.offset = offset;
  232. return btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
  233. }
  234. /*
  235. * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and
  236. * store the result to @dst.
  237. *
  238. * Return >0 for the number of sectors we found.
  239. * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum
  240. * for it. Caller may want to try next sector until one range is hit.
  241. * Return <0 for fatal error.
  242. */
  243. static int search_csum_tree(struct btrfs_fs_info *fs_info,
  244. struct btrfs_path *path, u64 disk_bytenr,
  245. u64 len, u8 *dst)
  246. {
  247. struct btrfs_root *csum_root;
  248. struct btrfs_csum_item *item = NULL;
  249. struct btrfs_key key;
  250. const u32 sectorsize = fs_info->sectorsize;
  251. const u32 csum_size = fs_info->csum_size;
  252. u32 itemsize;
  253. int ret;
  254. u64 csum_start;
  255. u64 csum_len;
  256. ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) &&
  257. IS_ALIGNED(len, sectorsize));
  258. /* Check if the current csum item covers disk_bytenr */
  259. if (path->nodes[0]) {
  260. item = btrfs_item_ptr(path->nodes[0], path->slots[0],
  261. struct btrfs_csum_item);
  262. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  263. itemsize = btrfs_item_size(path->nodes[0], path->slots[0]);
  264. csum_start = key.offset;
  265. csum_len = (itemsize / csum_size) * sectorsize;
  266. if (in_range(disk_bytenr, csum_start, csum_len))
  267. goto found;
  268. }
  269. /* Current item doesn't contain the desired range, search again */
  270. btrfs_release_path(path);
  271. csum_root = btrfs_csum_root(fs_info, disk_bytenr);
  272. if (unlikely(!csum_root)) {
  273. btrfs_err(fs_info,
  274. "missing csum root for extent at bytenr %llu",
  275. disk_bytenr);
  276. return -EUCLEAN;
  277. }
  278. item = btrfs_lookup_csum(NULL, csum_root, path, disk_bytenr, 0);
  279. if (IS_ERR(item)) {
  280. ret = PTR_ERR(item);
  281. goto out;
  282. }
  283. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  284. itemsize = btrfs_item_size(path->nodes[0], path->slots[0]);
  285. csum_start = key.offset;
  286. csum_len = (itemsize / csum_size) * sectorsize;
  287. ASSERT(in_range(disk_bytenr, csum_start, csum_len));
  288. found:
  289. ret = (min(csum_start + csum_len, disk_bytenr + len) -
  290. disk_bytenr) >> fs_info->sectorsize_bits;
  291. read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
  292. ret * csum_size);
  293. out:
  294. if (ret == -ENOENT || ret == -EFBIG)
  295. ret = 0;
  296. return ret;
  297. }
  298. /*
  299. * Lookup the checksum for the read bio in csum tree.
  300. *
  301. * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
  302. */
  303. int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
  304. {
  305. struct btrfs_inode *inode = bbio->inode;
  306. struct btrfs_fs_info *fs_info = inode->root->fs_info;
  307. struct bio *bio = &bbio->bio;
  308. BTRFS_PATH_AUTO_FREE(path);
  309. const u32 sectorsize = fs_info->sectorsize;
  310. const u32 csum_size = fs_info->csum_size;
  311. u32 orig_len = bio->bi_iter.bi_size;
  312. u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
  313. const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
  314. int ret = 0;
  315. u32 bio_offset = 0;
  316. if ((inode->flags & BTRFS_INODE_NODATASUM) ||
  317. test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state))
  318. return 0;
  319. /*
  320. * This function is only called for read bio.
  321. *
  322. * This means two things:
  323. * - All our csums should only be in csum tree
  324. * No ordered extents csums, as ordered extents are only for write
  325. * path.
  326. * - No need to bother any other info from bvec
  327. * Since we're looking up csums, the only important info is the
  328. * disk_bytenr and the length, which can be extracted from bi_iter
  329. * directly.
  330. */
  331. ASSERT(bio_op(bio) == REQ_OP_READ);
  332. path = btrfs_alloc_path();
  333. if (!path)
  334. return -ENOMEM;
  335. if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
  336. bbio->csum = kvcalloc(nblocks, csum_size, GFP_NOFS);
  337. if (!bbio->csum)
  338. return -ENOMEM;
  339. } else {
  340. bbio->csum = bbio->csum_inline;
  341. }
  342. /*
  343. * If requested number of sectors is larger than one leaf can contain,
  344. * kick the readahead for csum tree.
  345. */
  346. if (nblocks > fs_info->csums_per_leaf)
  347. path->reada = READA_FORWARD;
  348. /*
  349. * the free space stuff is only read when it hasn't been
  350. * updated in the current transaction. So, we can safely
  351. * read from the commit root and sidestep a nasty deadlock
  352. * between reading the free space cache and updating the csum tree.
  353. */
  354. if (btrfs_is_free_space_inode(inode)) {
  355. path->search_commit_root = true;
  356. path->skip_locking = true;
  357. }
  358. /*
  359. * If we are searching for a csum of an extent from a past
  360. * transaction, we can search in the commit root and reduce
  361. * lock contention on the csum tree extent buffers.
  362. *
  363. * This is important because that lock is an rwsem which gets
  364. * pretty heavy write load under memory pressure and sustained
  365. * csum overwrites, unlike the commit_root_sem. (Memory pressure
  366. * makes us writeback the nodes multiple times per transaction,
  367. * which makes us cow them each time, taking the write lock.)
  368. *
  369. * Due to how rwsem is implemented, there is a possible
  370. * priority inversion where the readers holding the lock don't
  371. * get scheduled (say they're in a cgroup stuck in heavy reclaim)
  372. * which then blocks writers, including transaction commit. By
  373. * using a semaphore with fewer writers (only a commit switching
  374. * the roots), we make this issue less likely.
  375. *
  376. * Note that we don't rely on btrfs_search_slot to lock the
  377. * commit root csum. We call search_slot multiple times, which would
  378. * create a potential race where a commit comes in between searches
  379. * while we are not holding the commit_root_sem, and we get csums
  380. * from across transactions.
  381. */
  382. if (bbio->csum_search_commit_root) {
  383. path->search_commit_root = true;
  384. path->skip_locking = true;
  385. down_read(&fs_info->commit_root_sem);
  386. }
  387. while (bio_offset < orig_len) {
  388. int count;
  389. u64 cur_disk_bytenr = orig_disk_bytenr + bio_offset;
  390. u8 *csum_dst = bbio->csum +
  391. (bio_offset >> fs_info->sectorsize_bits) * csum_size;
  392. count = search_csum_tree(fs_info, path, cur_disk_bytenr,
  393. orig_len - bio_offset, csum_dst);
  394. if (count < 0) {
  395. ret = count;
  396. if (bbio->csum != bbio->csum_inline)
  397. kvfree(bbio->csum);
  398. bbio->csum = NULL;
  399. break;
  400. }
  401. /*
  402. * We didn't find a csum for this range. We need to make sure
  403. * we complain loudly about this, because we are not NODATASUM.
  404. *
  405. * However for the DATA_RELOC inode we could potentially be
  406. * relocating data extents for a NODATASUM inode, so the inode
  407. * itself won't be marked with NODATASUM, but the extent we're
  408. * copying is in fact NODATASUM. If we don't find a csum we
  409. * assume this is the case.
  410. */
  411. if (count == 0) {
  412. memset(csum_dst, 0, csum_size);
  413. count = 1;
  414. if (btrfs_is_data_reloc_root(inode->root)) {
  415. u64 file_offset = bbio->file_offset + bio_offset;
  416. btrfs_set_extent_bit(&inode->io_tree, file_offset,
  417. file_offset + sectorsize - 1,
  418. EXTENT_NODATASUM, NULL);
  419. } else {
  420. btrfs_warn_rl(fs_info,
  421. "csum hole found for disk bytenr range [%llu, %llu)",
  422. cur_disk_bytenr, cur_disk_bytenr + sectorsize);
  423. }
  424. }
  425. bio_offset += count * sectorsize;
  426. }
  427. if (bbio->csum_search_commit_root)
  428. up_read(&fs_info->commit_root_sem);
  429. return ret;
  430. }
  431. /*
  432. * Search for checksums for a given logical range.
  433. *
  434. * @root: The root where to look for checksums.
  435. * @start: Logical address of target checksum range.
  436. * @end: End offset (inclusive) of the target checksum range.
  437. * @list: List for adding each checksum that was found.
  438. * Can be NULL in case the caller only wants to check if
  439. * there any checksums for the range.
  440. * @nowait: Indicate if the search must be non-blocking or not.
  441. *
  442. * Return < 0 on error, 0 if no checksums were found, or 1 if checksums were
  443. * found.
  444. */
  445. int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
  446. struct list_head *list, bool nowait)
  447. {
  448. struct btrfs_fs_info *fs_info = root->fs_info;
  449. struct btrfs_key key;
  450. struct btrfs_path *path;
  451. struct extent_buffer *leaf;
  452. struct btrfs_ordered_sum *sums;
  453. struct btrfs_csum_item *item;
  454. int ret;
  455. bool found_csums = false;
  456. ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
  457. IS_ALIGNED(end + 1, fs_info->sectorsize));
  458. path = btrfs_alloc_path();
  459. if (!path)
  460. return -ENOMEM;
  461. path->nowait = nowait;
  462. key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
  463. key.type = BTRFS_EXTENT_CSUM_KEY;
  464. key.offset = start;
  465. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  466. if (ret < 0)
  467. goto out;
  468. if (ret > 0 && path->slots[0] > 0) {
  469. leaf = path->nodes[0];
  470. btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
  471. /*
  472. * There are two cases we can hit here for the previous csum
  473. * item:
  474. *
  475. * |<- search range ->|
  476. * |<- csum item ->|
  477. *
  478. * Or
  479. * |<- search range ->|
  480. * |<- csum item ->|
  481. *
  482. * Check if the previous csum item covers the leading part of
  483. * the search range. If so we have to start from previous csum
  484. * item.
  485. */
  486. if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
  487. key.type == BTRFS_EXTENT_CSUM_KEY) {
  488. if (bytes_to_csum_size(fs_info, start - key.offset) <
  489. btrfs_item_size(leaf, path->slots[0] - 1))
  490. path->slots[0]--;
  491. }
  492. }
  493. while (start <= end) {
  494. u64 csum_end;
  495. leaf = path->nodes[0];
  496. if (path->slots[0] >= btrfs_header_nritems(leaf)) {
  497. ret = btrfs_next_leaf(root, path);
  498. if (ret < 0)
  499. goto out;
  500. if (ret > 0)
  501. break;
  502. leaf = path->nodes[0];
  503. }
  504. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  505. if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
  506. key.type != BTRFS_EXTENT_CSUM_KEY ||
  507. key.offset > end)
  508. break;
  509. if (key.offset > start)
  510. start = key.offset;
  511. csum_end = key.offset + csum_size_to_bytes(fs_info,
  512. btrfs_item_size(leaf, path->slots[0]));
  513. if (csum_end <= start) {
  514. path->slots[0]++;
  515. continue;
  516. }
  517. found_csums = true;
  518. if (!list)
  519. goto out;
  520. csum_end = min(csum_end, end + 1);
  521. item = btrfs_item_ptr(path->nodes[0], path->slots[0],
  522. struct btrfs_csum_item);
  523. while (start < csum_end) {
  524. unsigned long offset;
  525. size_t size;
  526. size = min_t(size_t, csum_end - start,
  527. max_ordered_sum_bytes(fs_info));
  528. sums = kzalloc(btrfs_ordered_sum_size(fs_info, size),
  529. GFP_NOFS);
  530. if (!sums) {
  531. ret = -ENOMEM;
  532. goto out;
  533. }
  534. sums->logical = start;
  535. sums->len = size;
  536. offset = bytes_to_csum_size(fs_info, start - key.offset);
  537. read_extent_buffer(path->nodes[0],
  538. sums->sums,
  539. ((unsigned long)item) + offset,
  540. bytes_to_csum_size(fs_info, size));
  541. start += size;
  542. list_add_tail(&sums->list, list);
  543. }
  544. path->slots[0]++;
  545. }
  546. out:
  547. btrfs_free_path(path);
  548. if (ret < 0) {
  549. if (list) {
  550. struct btrfs_ordered_sum *tmp_sums;
  551. list_for_each_entry_safe(sums, tmp_sums, list, list)
  552. kfree(sums);
  553. }
  554. return ret;
  555. }
  556. return found_csums ? 1 : 0;
  557. }
  558. /*
  559. * Do the same work as btrfs_lookup_csums_list(), the difference is in how
  560. * we return the result.
  561. *
  562. * This version will set the corresponding bits in @csum_bitmap to represent
  563. * that there is a csum found.
  564. * Each bit represents a sector. Thus caller should ensure @csum_buf passed
  565. * in is large enough to contain all csums.
  566. */
  567. int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
  568. u64 start, u64 end, u8 *csum_buf,
  569. unsigned long *csum_bitmap)
  570. {
  571. struct btrfs_fs_info *fs_info = root->fs_info;
  572. struct btrfs_key key;
  573. struct extent_buffer *leaf;
  574. struct btrfs_csum_item *item;
  575. const u64 orig_start = start;
  576. bool free_path = false;
  577. int ret;
  578. ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
  579. IS_ALIGNED(end + 1, fs_info->sectorsize));
  580. if (!path) {
  581. path = btrfs_alloc_path();
  582. if (!path)
  583. return -ENOMEM;
  584. free_path = true;
  585. }
  586. /* Check if we can reuse the previous path. */
  587. if (path->nodes[0]) {
  588. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  589. if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
  590. key.type == BTRFS_EXTENT_CSUM_KEY &&
  591. key.offset <= start)
  592. goto search_forward;
  593. btrfs_release_path(path);
  594. }
  595. key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
  596. key.type = BTRFS_EXTENT_CSUM_KEY;
  597. key.offset = start;
  598. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  599. if (ret < 0)
  600. goto fail;
  601. if (ret > 0 && path->slots[0] > 0) {
  602. leaf = path->nodes[0];
  603. btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
  604. /*
  605. * There are two cases we can hit here for the previous csum
  606. * item:
  607. *
  608. * |<- search range ->|
  609. * |<- csum item ->|
  610. *
  611. * Or
  612. * |<- search range ->|
  613. * |<- csum item ->|
  614. *
  615. * Check if the previous csum item covers the leading part of
  616. * the search range. If so we have to start from previous csum
  617. * item.
  618. */
  619. if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
  620. key.type == BTRFS_EXTENT_CSUM_KEY) {
  621. if (bytes_to_csum_size(fs_info, start - key.offset) <
  622. btrfs_item_size(leaf, path->slots[0] - 1))
  623. path->slots[0]--;
  624. }
  625. }
  626. search_forward:
  627. while (start <= end) {
  628. u64 csum_end;
  629. leaf = path->nodes[0];
  630. if (path->slots[0] >= btrfs_header_nritems(leaf)) {
  631. ret = btrfs_next_leaf(root, path);
  632. if (ret < 0)
  633. goto fail;
  634. if (ret > 0)
  635. break;
  636. leaf = path->nodes[0];
  637. }
  638. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  639. if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
  640. key.type != BTRFS_EXTENT_CSUM_KEY ||
  641. key.offset > end)
  642. break;
  643. if (key.offset > start)
  644. start = key.offset;
  645. csum_end = key.offset + csum_size_to_bytes(fs_info,
  646. btrfs_item_size(leaf, path->slots[0]));
  647. if (csum_end <= start) {
  648. path->slots[0]++;
  649. continue;
  650. }
  651. csum_end = min(csum_end, end + 1);
  652. item = btrfs_item_ptr(path->nodes[0], path->slots[0],
  653. struct btrfs_csum_item);
  654. while (start < csum_end) {
  655. unsigned long offset;
  656. size_t size;
  657. u8 *csum_dest = csum_buf + bytes_to_csum_size(fs_info,
  658. start - orig_start);
  659. size = min_t(size_t, csum_end - start, end + 1 - start);
  660. offset = bytes_to_csum_size(fs_info, start - key.offset);
  661. read_extent_buffer(path->nodes[0], csum_dest,
  662. ((unsigned long)item) + offset,
  663. bytes_to_csum_size(fs_info, size));
  664. bitmap_set(csum_bitmap,
  665. (start - orig_start) >> fs_info->sectorsize_bits,
  666. size >> fs_info->sectorsize_bits);
  667. start += size;
  668. }
  669. path->slots[0]++;
  670. }
  671. ret = 0;
  672. fail:
  673. if (free_path)
  674. btrfs_free_path(path);
  675. return ret;
  676. }
  677. static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
  678. {
  679. struct btrfs_inode *inode = bbio->inode;
  680. struct btrfs_fs_info *fs_info = inode->root->fs_info;
  681. struct bio *bio = &bbio->bio;
  682. struct btrfs_ordered_sum *sums = bbio->sums;
  683. struct bvec_iter iter = *src;
  684. phys_addr_t paddr;
  685. const u32 blocksize = fs_info->sectorsize;
  686. const u32 step = min(blocksize, PAGE_SIZE);
  687. const u32 nr_steps = blocksize / step;
  688. phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
  689. u32 offset = 0;
  690. int index = 0;
  691. btrfs_bio_for_each_block(paddr, bio, &iter, step) {
  692. paddrs[(offset / step) % nr_steps] = paddr;
  693. offset += step;
  694. if (IS_ALIGNED(offset, blocksize)) {
  695. btrfs_calculate_block_csum_pages(fs_info, paddrs, sums->sums + index);
  696. index += fs_info->csum_size;
  697. }
  698. }
  699. }
  700. static void csum_one_bio_work(struct work_struct *work)
  701. {
  702. struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, csum_work);
  703. ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
  704. ASSERT(bbio->async_csum == true);
  705. csum_one_bio(bbio, &bbio->csum_saved_iter);
  706. complete(&bbio->csum_done);
  707. }
  708. /*
  709. * Calculate checksums of the data contained inside a bio.
  710. */
  711. int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async)
  712. {
  713. struct btrfs_ordered_extent *ordered = bbio->ordered;
  714. struct btrfs_inode *inode = bbio->inode;
  715. struct btrfs_fs_info *fs_info = inode->root->fs_info;
  716. struct bio *bio = &bbio->bio;
  717. struct btrfs_ordered_sum *sums;
  718. unsigned nofs_flag;
  719. nofs_flag = memalloc_nofs_save();
  720. sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
  721. GFP_KERNEL);
  722. memalloc_nofs_restore(nofs_flag);
  723. if (!sums)
  724. return -ENOMEM;
  725. sums->logical = bbio->orig_logical;
  726. sums->len = bio->bi_iter.bi_size;
  727. INIT_LIST_HEAD(&sums->list);
  728. bbio->sums = sums;
  729. btrfs_add_ordered_sum(ordered, sums);
  730. if (!async) {
  731. csum_one_bio(bbio, &bbio->bio.bi_iter);
  732. return 0;
  733. }
  734. init_completion(&bbio->csum_done);
  735. bbio->async_csum = true;
  736. bbio->csum_saved_iter = bbio->bio.bi_iter;
  737. INIT_WORK(&bbio->csum_work, csum_one_bio_work);
  738. schedule_work(&bbio->csum_work);
  739. return 0;
  740. }
  741. /*
  742. * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to
  743. * record the updated logical address on Zone Append completion.
  744. * Allocate just the structure with an empty sums array here for that case.
  745. */
  746. int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio)
  747. {
  748. bbio->sums = kmalloc_obj(*bbio->sums, GFP_NOFS);
  749. if (!bbio->sums)
  750. return -ENOMEM;
  751. bbio->sums->len = bbio->bio.bi_iter.bi_size;
  752. bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
  753. btrfs_add_ordered_sum(bbio->ordered, bbio->sums);
  754. return 0;
  755. }
  756. /*
  757. * Remove one checksum overlapping a range.
  758. *
  759. * This expects the key to describe the csum pointed to by the path, and it
  760. * expects the csum to overlap the range [bytenr, len]
  761. *
  762. * The csum should not be entirely contained in the range and the range should
  763. * not be entirely contained in the csum.
  764. *
  765. * This calls btrfs_truncate_item with the correct args based on the overlap,
  766. * and fixes up the key as required.
  767. */
  768. static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
  769. struct btrfs_path *path,
  770. struct btrfs_key *key,
  771. u64 bytenr, u64 len)
  772. {
  773. struct btrfs_fs_info *fs_info = trans->fs_info;
  774. struct extent_buffer *leaf;
  775. const u32 csum_size = fs_info->csum_size;
  776. u64 csum_end;
  777. u64 end_byte = bytenr + len;
  778. u32 blocksize_bits = fs_info->sectorsize_bits;
  779. leaf = path->nodes[0];
  780. csum_end = btrfs_item_size(leaf, path->slots[0]) / csum_size;
  781. csum_end <<= blocksize_bits;
  782. csum_end += key->offset;
  783. if (key->offset < bytenr && csum_end <= end_byte) {
  784. /*
  785. * [ bytenr - len ]
  786. * [ ]
  787. * [csum ]
  788. * A simple truncate off the end of the item
  789. */
  790. u32 new_size = (bytenr - key->offset) >> blocksize_bits;
  791. new_size *= csum_size;
  792. btrfs_truncate_item(trans, path, new_size, 1);
  793. } else if (key->offset >= bytenr && csum_end > end_byte &&
  794. end_byte > key->offset) {
  795. /*
  796. * [ bytenr - len ]
  797. * [ ]
  798. * [csum ]
  799. * we need to truncate from the beginning of the csum
  800. */
  801. u32 new_size = (csum_end - end_byte) >> blocksize_bits;
  802. new_size *= csum_size;
  803. btrfs_truncate_item(trans, path, new_size, 0);
  804. key->offset = end_byte;
  805. btrfs_set_item_key_safe(trans, path, key);
  806. } else {
  807. BUG();
  808. }
  809. }
  810. /*
  811. * Delete the csum items from the csum tree for a given range of bytes.
  812. */
  813. int btrfs_del_csums(struct btrfs_trans_handle *trans,
  814. struct btrfs_root *root, u64 bytenr, u64 len)
  815. {
  816. struct btrfs_fs_info *fs_info = trans->fs_info;
  817. BTRFS_PATH_AUTO_FREE(path);
  818. struct btrfs_key key;
  819. u64 end_byte = bytenr + len;
  820. u64 csum_end;
  821. struct extent_buffer *leaf;
  822. int ret = 0;
  823. const u32 csum_size = fs_info->csum_size;
  824. u32 blocksize_bits = fs_info->sectorsize_bits;
  825. ASSERT(btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID ||
  826. btrfs_root_id(root) == BTRFS_TREE_LOG_OBJECTID);
  827. path = btrfs_alloc_path();
  828. if (!path)
  829. return -ENOMEM;
  830. while (1) {
  831. key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
  832. key.type = BTRFS_EXTENT_CSUM_KEY;
  833. key.offset = end_byte - 1;
  834. ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
  835. if (ret > 0) {
  836. ret = 0;
  837. if (path->slots[0] == 0)
  838. break;
  839. path->slots[0]--;
  840. } else if (ret < 0) {
  841. break;
  842. }
  843. leaf = path->nodes[0];
  844. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  845. if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
  846. key.type != BTRFS_EXTENT_CSUM_KEY) {
  847. break;
  848. }
  849. if (key.offset >= end_byte)
  850. break;
  851. csum_end = btrfs_item_size(leaf, path->slots[0]) / csum_size;
  852. csum_end <<= blocksize_bits;
  853. csum_end += key.offset;
  854. /* this csum ends before we start, we're done */
  855. if (csum_end <= bytenr)
  856. break;
  857. /* delete the entire item, it is inside our range */
  858. if (key.offset >= bytenr && csum_end <= end_byte) {
  859. int del_nr = 1;
  860. /*
  861. * Check how many csum items preceding this one in this
  862. * leaf correspond to our range and then delete them all
  863. * at once.
  864. */
  865. if (key.offset > bytenr && path->slots[0] > 0) {
  866. int slot = path->slots[0] - 1;
  867. while (slot >= 0) {
  868. struct btrfs_key pk;
  869. btrfs_item_key_to_cpu(leaf, &pk, slot);
  870. if (pk.offset < bytenr ||
  871. pk.type != BTRFS_EXTENT_CSUM_KEY ||
  872. pk.objectid !=
  873. BTRFS_EXTENT_CSUM_OBJECTID)
  874. break;
  875. path->slots[0] = slot;
  876. del_nr++;
  877. key.offset = pk.offset;
  878. slot--;
  879. }
  880. }
  881. ret = btrfs_del_items(trans, root, path,
  882. path->slots[0], del_nr);
  883. if (ret)
  884. break;
  885. if (key.offset == bytenr)
  886. break;
  887. } else if (key.offset < bytenr && csum_end > end_byte) {
  888. unsigned long offset;
  889. unsigned long shift_len;
  890. unsigned long item_offset;
  891. /*
  892. * [ bytenr - len ]
  893. * [csum ]
  894. *
  895. * Our bytes are in the middle of the csum,
  896. * we need to split this item and insert a new one.
  897. *
  898. * But we can't drop the path because the
  899. * csum could change, get removed, extended etc.
  900. *
  901. * The trick here is the max size of a csum item leaves
  902. * enough room in the tree block for a single
  903. * item header. So, we split the item in place,
  904. * adding a new header pointing to the existing
  905. * bytes. Then we loop around again and we have
  906. * a nicely formed csum item that we can neatly
  907. * truncate.
  908. */
  909. offset = (bytenr - key.offset) >> blocksize_bits;
  910. offset *= csum_size;
  911. shift_len = (len >> blocksize_bits) * csum_size;
  912. item_offset = btrfs_item_ptr_offset(leaf,
  913. path->slots[0]);
  914. memzero_extent_buffer(leaf, item_offset + offset,
  915. shift_len);
  916. key.offset = bytenr;
  917. /*
  918. * btrfs_split_item returns -EAGAIN when the
  919. * item changed size or key
  920. */
  921. ret = btrfs_split_item(trans, root, path, &key, offset);
  922. if (unlikely(ret && ret != -EAGAIN)) {
  923. btrfs_abort_transaction(trans, ret);
  924. break;
  925. }
  926. ret = 0;
  927. key.offset = end_byte - 1;
  928. } else {
  929. truncate_one_csum(trans, path, &key, bytenr, len);
  930. if (key.offset < bytenr)
  931. break;
  932. }
  933. btrfs_release_path(path);
  934. }
  935. return ret;
  936. }
  937. static int find_next_csum_offset(struct btrfs_root *root,
  938. struct btrfs_path *path,
  939. u64 *next_offset)
  940. {
  941. const u32 nritems = btrfs_header_nritems(path->nodes[0]);
  942. struct btrfs_key found_key;
  943. int slot = path->slots[0] + 1;
  944. int ret;
  945. if (nritems == 0 || slot >= nritems) {
  946. ret = btrfs_next_leaf(root, path);
  947. if (ret < 0) {
  948. return ret;
  949. } else if (ret > 0) {
  950. *next_offset = (u64)-1;
  951. return 0;
  952. }
  953. slot = path->slots[0];
  954. }
  955. btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
  956. if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
  957. found_key.type != BTRFS_EXTENT_CSUM_KEY)
  958. *next_offset = (u64)-1;
  959. else
  960. *next_offset = found_key.offset;
  961. return 0;
  962. }
  963. int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
  964. struct btrfs_root *root,
  965. struct btrfs_ordered_sum *sums)
  966. {
  967. struct btrfs_fs_info *fs_info = root->fs_info;
  968. struct btrfs_key file_key;
  969. struct btrfs_key found_key;
  970. BTRFS_PATH_AUTO_FREE(path);
  971. struct btrfs_csum_item *item;
  972. struct btrfs_csum_item *item_end;
  973. struct extent_buffer *leaf = NULL;
  974. u64 next_offset;
  975. u64 total_bytes = 0;
  976. u64 csum_offset;
  977. u64 bytenr;
  978. u32 ins_size;
  979. int index = 0;
  980. int found_next;
  981. int ret;
  982. const u32 csum_size = fs_info->csum_size;
  983. path = btrfs_alloc_path();
  984. if (!path)
  985. return -ENOMEM;
  986. again:
  987. next_offset = (u64)-1;
  988. found_next = 0;
  989. bytenr = sums->logical + total_bytes;
  990. file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
  991. file_key.type = BTRFS_EXTENT_CSUM_KEY;
  992. file_key.offset = bytenr;
  993. item = btrfs_lookup_csum(trans, root, path, bytenr, 1);
  994. if (!IS_ERR(item)) {
  995. ret = 0;
  996. leaf = path->nodes[0];
  997. item_end = btrfs_item_ptr(leaf, path->slots[0],
  998. struct btrfs_csum_item);
  999. item_end = (struct btrfs_csum_item *)((char *)item_end +
  1000. btrfs_item_size(leaf, path->slots[0]));
  1001. goto found;
  1002. }
  1003. ret = PTR_ERR(item);
  1004. if (ret != -EFBIG && ret != -ENOENT)
  1005. return ret;
  1006. if (ret == -EFBIG) {
  1007. u32 item_size;
  1008. /* we found one, but it isn't big enough yet */
  1009. leaf = path->nodes[0];
  1010. item_size = btrfs_item_size(leaf, path->slots[0]);
  1011. if ((item_size / csum_size) >=
  1012. MAX_CSUM_ITEMS(fs_info, csum_size)) {
  1013. /* already at max size, make a new one */
  1014. goto insert;
  1015. }
  1016. } else {
  1017. /* We didn't find a csum item, insert one. */
  1018. ret = find_next_csum_offset(root, path, &next_offset);
  1019. if (ret < 0)
  1020. return ret;
  1021. found_next = 1;
  1022. goto insert;
  1023. }
  1024. /*
  1025. * At this point, we know the tree has a checksum item that ends at an
  1026. * offset matching the start of the checksum range we want to insert.
  1027. * We try to extend that item as much as possible and then add as many
  1028. * checksums to it as they fit.
  1029. *
  1030. * First check if the leaf has enough free space for at least one
  1031. * checksum. If it has go directly to the item extension code, otherwise
  1032. * release the path and do a search for insertion before the extension.
  1033. */
  1034. if (btrfs_leaf_free_space(leaf) >= csum_size) {
  1035. btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
  1036. csum_offset = (bytenr - found_key.offset) >>
  1037. fs_info->sectorsize_bits;
  1038. goto extend_csum;
  1039. }
  1040. btrfs_release_path(path);
  1041. path->search_for_extension = true;
  1042. ret = btrfs_search_slot(trans, root, &file_key, path,
  1043. csum_size, 1);
  1044. path->search_for_extension = false;
  1045. if (ret < 0)
  1046. return ret;
  1047. if (ret > 0) {
  1048. if (path->slots[0] == 0)
  1049. goto insert;
  1050. path->slots[0]--;
  1051. }
  1052. leaf = path->nodes[0];
  1053. btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
  1054. csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits;
  1055. if (found_key.type != BTRFS_EXTENT_CSUM_KEY ||
  1056. found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
  1057. csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) {
  1058. goto insert;
  1059. }
  1060. extend_csum:
  1061. if (csum_offset == btrfs_item_size(leaf, path->slots[0]) /
  1062. csum_size) {
  1063. int extend_nr;
  1064. u64 tmp;
  1065. u32 diff;
  1066. tmp = sums->len - total_bytes;
  1067. tmp >>= fs_info->sectorsize_bits;
  1068. WARN_ON(tmp < 1);
  1069. extend_nr = max_t(int, 1, tmp);
  1070. /*
  1071. * A log tree can already have checksum items with a subset of
  1072. * the checksums we are trying to log. This can happen after
  1073. * doing a sequence of partial writes into prealloc extents and
  1074. * fsyncs in between, with a full fsync logging a larger subrange
  1075. * of an extent for which a previous fast fsync logged a smaller
  1076. * subrange. And this happens in particular due to merging file
  1077. * extent items when we complete an ordered extent for a range
  1078. * covered by a prealloc extent - this is done at
  1079. * btrfs_mark_extent_written().
  1080. *
  1081. * So if we try to extend the previous checksum item, which has
  1082. * a range that ends at the start of the range we want to insert,
  1083. * make sure we don't extend beyond the start offset of the next
  1084. * checksum item. If we are at the last item in the leaf, then
  1085. * forget the optimization of extending and add a new checksum
  1086. * item - it is not worth the complexity of releasing the path,
  1087. * getting the first key for the next leaf, repeat the btree
  1088. * search, etc, because log trees are temporary anyway and it
  1089. * would only save a few bytes of leaf space.
  1090. */
  1091. if (btrfs_root_id(root) == BTRFS_TREE_LOG_OBJECTID) {
  1092. if (path->slots[0] + 1 >=
  1093. btrfs_header_nritems(path->nodes[0])) {
  1094. ret = find_next_csum_offset(root, path, &next_offset);
  1095. if (ret < 0)
  1096. return ret;
  1097. found_next = 1;
  1098. goto insert;
  1099. }
  1100. ret = find_next_csum_offset(root, path, &next_offset);
  1101. if (ret < 0)
  1102. return ret;
  1103. tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits;
  1104. if (tmp <= INT_MAX)
  1105. extend_nr = min_t(int, extend_nr, tmp);
  1106. }
  1107. diff = (csum_offset + extend_nr) * csum_size;
  1108. diff = min(diff,
  1109. MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
  1110. diff = diff - btrfs_item_size(leaf, path->slots[0]);
  1111. diff = min_t(u32, btrfs_leaf_free_space(leaf), diff);
  1112. diff /= csum_size;
  1113. diff *= csum_size;
  1114. btrfs_extend_item(trans, path, diff);
  1115. ret = 0;
  1116. goto csum;
  1117. }
  1118. insert:
  1119. btrfs_release_path(path);
  1120. csum_offset = 0;
  1121. if (found_next) {
  1122. u64 tmp;
  1123. tmp = sums->len - total_bytes;
  1124. tmp >>= fs_info->sectorsize_bits;
  1125. tmp = min(tmp, (next_offset - file_key.offset) >>
  1126. fs_info->sectorsize_bits);
  1127. tmp = max_t(u64, 1, tmp);
  1128. tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
  1129. ins_size = csum_size * tmp;
  1130. } else {
  1131. ins_size = csum_size;
  1132. }
  1133. ret = btrfs_insert_empty_item(trans, root, path, &file_key,
  1134. ins_size);
  1135. if (ret < 0)
  1136. return ret;
  1137. leaf = path->nodes[0];
  1138. csum:
  1139. item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
  1140. item_end = (struct btrfs_csum_item *)((unsigned char *)item +
  1141. btrfs_item_size(leaf, path->slots[0]));
  1142. item = (struct btrfs_csum_item *)((unsigned char *)item +
  1143. csum_offset * csum_size);
  1144. found:
  1145. ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits;
  1146. ins_size *= csum_size;
  1147. ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
  1148. ins_size);
  1149. write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
  1150. ins_size);
  1151. index += ins_size;
  1152. ins_size /= csum_size;
  1153. total_bytes += ins_size * fs_info->sectorsize;
  1154. if (total_bytes < sums->len) {
  1155. btrfs_release_path(path);
  1156. cond_resched();
  1157. goto again;
  1158. }
  1159. return 0;
  1160. }
  1161. void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
  1162. const struct btrfs_path *path,
  1163. const struct btrfs_file_extent_item *fi,
  1164. struct extent_map *em)
  1165. {
  1166. struct btrfs_fs_info *fs_info = inode->root->fs_info;
  1167. struct btrfs_root *root = inode->root;
  1168. struct extent_buffer *leaf = path->nodes[0];
  1169. const int slot = path->slots[0];
  1170. struct btrfs_key key;
  1171. u64 extent_start;
  1172. u8 type = btrfs_file_extent_type(leaf, fi);
  1173. int compress_type = btrfs_file_extent_compression(leaf, fi);
  1174. btrfs_item_key_to_cpu(leaf, &key, slot);
  1175. extent_start = key.offset;
  1176. em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
  1177. em->generation = btrfs_file_extent_generation(leaf, fi);
  1178. if (type == BTRFS_FILE_EXTENT_REG ||
  1179. type == BTRFS_FILE_EXTENT_PREALLOC) {
  1180. const u64 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
  1181. em->start = extent_start;
  1182. em->len = btrfs_file_extent_end(path) - extent_start;
  1183. if (disk_bytenr == 0) {
  1184. em->disk_bytenr = EXTENT_MAP_HOLE;
  1185. em->disk_num_bytes = 0;
  1186. em->offset = 0;
  1187. return;
  1188. }
  1189. em->disk_bytenr = disk_bytenr;
  1190. em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
  1191. em->offset = btrfs_file_extent_offset(leaf, fi);
  1192. if (compress_type != BTRFS_COMPRESS_NONE) {
  1193. btrfs_extent_map_set_compression(em, compress_type);
  1194. } else {
  1195. /*
  1196. * Older kernels can create regular non-hole data
  1197. * extents with ram_bytes smaller than disk_num_bytes.
  1198. * Not a big deal, just always use disk_num_bytes
  1199. * for ram_bytes.
  1200. */
  1201. em->ram_bytes = em->disk_num_bytes;
  1202. if (type == BTRFS_FILE_EXTENT_PREALLOC)
  1203. em->flags |= EXTENT_FLAG_PREALLOC;
  1204. }
  1205. } else if (type == BTRFS_FILE_EXTENT_INLINE) {
  1206. /* Tree-checker has ensured this. */
  1207. ASSERT(extent_start == 0);
  1208. em->disk_bytenr = EXTENT_MAP_INLINE;
  1209. em->start = 0;
  1210. em->len = fs_info->sectorsize;
  1211. em->offset = 0;
  1212. btrfs_extent_map_set_compression(em, compress_type);
  1213. } else {
  1214. btrfs_err(fs_info,
  1215. "unknown file extent item type %d, inode %llu, offset %llu, "
  1216. "root %llu", type, btrfs_ino(inode), extent_start,
  1217. btrfs_root_id(root));
  1218. }
  1219. }
  1220. /*
  1221. * Returns the end offset (non inclusive) of the file extent item the given path
  1222. * points to. If it points to an inline extent, the returned offset is rounded
  1223. * up to the sector size.
  1224. */
  1225. u64 btrfs_file_extent_end(const struct btrfs_path *path)
  1226. {
  1227. const struct extent_buffer *leaf = path->nodes[0];
  1228. const int slot = path->slots[0];
  1229. struct btrfs_file_extent_item *fi;
  1230. struct btrfs_key key;
  1231. u64 end;
  1232. btrfs_item_key_to_cpu(leaf, &key, slot);
  1233. ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
  1234. fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
  1235. if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE)
  1236. end = leaf->fs_info->sectorsize;
  1237. else
  1238. end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
  1239. return end;
  1240. }