lzo.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2008 Oracle. All rights reserved.
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/slab.h>
  7. #include <linux/mm.h>
  8. #include <linux/init.h>
  9. #include <linux/err.h>
  10. #include <linux/sched.h>
  11. #include <linux/pagemap.h>
  12. #include <linux/bio.h>
  13. #include <linux/lzo.h>
  14. #include <linux/refcount.h>
  15. #include "messages.h"
  16. #include "compression.h"
  17. #include "ctree.h"
  18. #include "super.h"
  19. #include "btrfs_inode.h"
  20. #define LZO_LEN 4
  21. /*
  22. * Btrfs LZO compression format
  23. *
  24. * Regular and inlined LZO compressed data extents consist of:
  25. *
  26. * 1. Header
  27. * Fixed size. LZO_LEN (4) bytes long, LE32.
  28. * Records the total size (including the header) of compressed data.
  29. *
  30. * 2. Segment(s)
  31. * Variable size. Each segment includes one segment header, followed by data
  32. * payload.
  33. * One regular LZO compressed extent can have one or more segments.
  34. * For inlined LZO compressed extent, only one segment is allowed.
  35. * One segment represents at most one sector of uncompressed data.
  36. *
  37. * 2.1 Segment header
  38. * Fixed size. LZO_LEN (4) bytes long, LE32.
  39. * Records the total size of the segment (not including the header).
  40. * Segment header never crosses sector boundary, thus it's possible to
  41. * have at most 3 padding zeros at the end of the sector.
  42. *
  43. * 2.2 Data Payload
  44. * Variable size. Size up limit should be lzo1x_worst_compress(sectorsize)
  45. * which is 4419 for a 4KiB sectorsize.
  46. *
  47. * Example with 4K sectorsize:
  48. * Page 1:
  49. * 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10
  50. * 0x0000 | Header | SegHdr 01 | Data payload 01 ... |
  51. * ...
  52. * 0x0ff0 | SegHdr N | Data payload N ... |00|
  53. * ^^ padding zeros
  54. * Page 2:
  55. * 0x1000 | SegHdr N+1| Data payload N+1 ... |
  56. */
  57. struct workspace {
  58. void *mem;
  59. void *buf; /* where decompressed data goes */
  60. void *cbuf; /* where compressed data goes */
  61. struct list_head list;
  62. };
  63. static u32 workspace_buf_length(const struct btrfs_fs_info *fs_info)
  64. {
  65. return lzo1x_worst_compress(fs_info->sectorsize);
  66. }
  67. static u32 workspace_cbuf_length(const struct btrfs_fs_info *fs_info)
  68. {
  69. return lzo1x_worst_compress(fs_info->sectorsize);
  70. }
  71. void lzo_free_workspace(struct list_head *ws)
  72. {
  73. struct workspace *workspace = list_entry(ws, struct workspace, list);
  74. kvfree(workspace->buf);
  75. kvfree(workspace->cbuf);
  76. kvfree(workspace->mem);
  77. kfree(workspace);
  78. }
  79. struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info)
  80. {
  81. struct workspace *workspace;
  82. workspace = kzalloc_obj(*workspace);
  83. if (!workspace)
  84. return ERR_PTR(-ENOMEM);
  85. workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
  86. workspace->buf = kvmalloc(workspace_buf_length(fs_info), GFP_KERNEL | __GFP_NOWARN);
  87. workspace->cbuf = kvmalloc(workspace_cbuf_length(fs_info), GFP_KERNEL | __GFP_NOWARN);
  88. if (!workspace->mem || !workspace->buf || !workspace->cbuf)
  89. goto fail;
  90. INIT_LIST_HEAD(&workspace->list);
  91. return &workspace->list;
  92. fail:
  93. lzo_free_workspace(&workspace->list);
  94. return ERR_PTR(-ENOMEM);
  95. }
  96. static inline void write_compress_length(char *buf, size_t len)
  97. {
  98. __le32 dlen;
  99. dlen = cpu_to_le32(len);
  100. memcpy(buf, &dlen, LZO_LEN);
  101. }
  102. static inline size_t read_compress_length(const char *buf)
  103. {
  104. __le32 dlen;
  105. memcpy(&dlen, buf, LZO_LEN);
  106. return le32_to_cpu(dlen);
  107. }
  108. /*
  109. * Write data into @out_folio and queue it into @out_bio.
  110. *
  111. * Return 0 if everything is fine and @total_out will be increased.
  112. * Return <0 for error.
  113. *
  114. * The @out_folio can be NULL after a full folio is queued.
  115. * Thus the caller should check and allocate a new folio when needed.
  116. */
  117. static int write_and_queue_folio(struct bio *out_bio, struct folio **out_folio,
  118. u32 *total_out, u32 write_len)
  119. {
  120. const u32 fsize = folio_size(*out_folio);
  121. const u32 foffset = offset_in_folio(*out_folio, *total_out);
  122. ASSERT(out_folio && *out_folio);
  123. /* Should not cross folio boundary. */
  124. ASSERT(foffset + write_len <= fsize);
  125. /* We can not use bio_add_folio_nofail() which doesn't do any merge. */
  126. if (!bio_add_folio(out_bio, *out_folio, write_len, foffset)) {
  127. /*
  128. * We have allocated a bio that havs BTRFS_MAX_COMPRESSED_PAGES
  129. * vecs, and all ranges inside the same folio should have been
  130. * merged. If bio_add_folio() still failed, that means we have
  131. * reached the bvec limits.
  132. *
  133. * This should only happen at the beginning of a folio, and
  134. * caller is responsible for releasing the folio, since it's
  135. * not yet queued into the bio.
  136. */
  137. ASSERT(IS_ALIGNED(*total_out, fsize));
  138. return -E2BIG;
  139. }
  140. *total_out += write_len;
  141. /*
  142. * The full folio has been filled and queued, reset @out_folio to NULL,
  143. * so that error handling is fully handled by the bio.
  144. */
  145. if (IS_ALIGNED(*total_out, fsize))
  146. *out_folio = NULL;
  147. return 0;
  148. }
  149. /*
  150. * Copy compressed data to bio.
  151. *
  152. * @out_bio: The bio that will contain all the compressed data.
  153. * @compressed_data: The compressed data of this segment.
  154. * @compressed_size: The size of the compressed data.
  155. * @out_folio: The current output folio, will be updated if a new
  156. * folio is allocated.
  157. * @total_out: The total bytes of current output.
  158. * @max_out: The maximum size of the compressed data.
  159. *
  160. * Will do:
  161. *
  162. * - Write a segment header into the destination
  163. * - Copy the compressed buffer into the destination
  164. * - Make sure we have enough space in the last sector to fit a segment header
  165. * If not, we will pad at most (LZO_LEN (4)) - 1 bytes of zeros.
  166. * - If a full folio is filled, it will be queued into @out_bio, and @out_folio
  167. * will be updated.
  168. *
  169. * Will allocate new pages when needed.
  170. */
  171. static int copy_compressed_data_to_bio(struct btrfs_fs_info *fs_info,
  172. struct bio *out_bio,
  173. const char *compressed_data,
  174. size_t compressed_size,
  175. struct folio **out_folio,
  176. u32 *total_out, u32 max_out)
  177. {
  178. const u32 sectorsize = fs_info->sectorsize;
  179. const u32 sectorsize_bits = fs_info->sectorsize_bits;
  180. const u32 fsize = btrfs_min_folio_size(fs_info);
  181. const u32 old_size = out_bio->bi_iter.bi_size;
  182. u32 copy_start;
  183. u32 sector_bytes_left;
  184. char *kaddr;
  185. int ret;
  186. ASSERT(out_folio);
  187. /* There should be at least a lzo header queued. */
  188. ASSERT(old_size);
  189. ASSERT(old_size == *total_out);
  190. /*
  191. * We never allow a segment header crossing sector boundary, previous
  192. * run should ensure we have enough space left inside the sector.
  193. */
  194. ASSERT((old_size >> sectorsize_bits) == (old_size + LZO_LEN - 1) >> sectorsize_bits);
  195. if (!*out_folio) {
  196. *out_folio = btrfs_alloc_compr_folio(fs_info);
  197. if (!*out_folio)
  198. return -ENOMEM;
  199. }
  200. /* Write the segment header first. */
  201. kaddr = kmap_local_folio(*out_folio, offset_in_folio(*out_folio, *total_out));
  202. write_compress_length(kaddr, compressed_size);
  203. kunmap_local(kaddr);
  204. ret = write_and_queue_folio(out_bio, out_folio, total_out, LZO_LEN);
  205. if (ret < 0)
  206. return ret;
  207. copy_start = *total_out;
  208. /* Copy compressed data. */
  209. while (*total_out - copy_start < compressed_size) {
  210. u32 copy_len = min_t(u32, sectorsize - *total_out % sectorsize,
  211. copy_start + compressed_size - *total_out);
  212. u32 foffset = *total_out & (fsize - 1);
  213. /* With the range copied, we're larger than the original range. */
  214. if (((*total_out + copy_len) >> sectorsize_bits) >=
  215. max_out >> sectorsize_bits)
  216. return -E2BIG;
  217. if (!*out_folio) {
  218. *out_folio = btrfs_alloc_compr_folio(fs_info);
  219. if (!*out_folio)
  220. return -ENOMEM;
  221. }
  222. kaddr = kmap_local_folio(*out_folio, foffset);
  223. memcpy(kaddr, compressed_data + *total_out - copy_start, copy_len);
  224. kunmap_local(kaddr);
  225. ret = write_and_queue_folio(out_bio, out_folio, total_out, copy_len);
  226. if (ret < 0)
  227. return ret;
  228. }
  229. /*
  230. * Check if we can fit the next segment header into the remaining space
  231. * of the sector.
  232. */
  233. sector_bytes_left = round_up(*total_out, sectorsize) - *total_out;
  234. if (sector_bytes_left >= LZO_LEN || sector_bytes_left == 0)
  235. return 0;
  236. ASSERT(*out_folio);
  237. /* The remaining size is not enough, pad it with zeros */
  238. folio_zero_range(*out_folio, offset_in_folio(*out_folio, *total_out), sector_bytes_left);
  239. return write_and_queue_folio(out_bio, out_folio, total_out, sector_bytes_left);
  240. }
  241. int lzo_compress_bio(struct list_head *ws, struct compressed_bio *cb)
  242. {
  243. struct btrfs_inode *inode = cb->bbio.inode;
  244. struct btrfs_fs_info *fs_info = inode->root->fs_info;
  245. struct workspace *workspace = list_entry(ws, struct workspace, list);
  246. struct bio *bio = &cb->bbio.bio;
  247. const u64 start = cb->start;
  248. const u32 len = cb->len;
  249. const u32 sectorsize = fs_info->sectorsize;
  250. const u32 min_folio_size = btrfs_min_folio_size(fs_info);
  251. struct address_space *mapping = inode->vfs_inode.i_mapping;
  252. struct folio *folio_in = NULL;
  253. struct folio *folio_out = NULL;
  254. char *sizes_ptr;
  255. int ret = 0;
  256. /* Points to the file offset of input data. */
  257. u64 cur_in = start;
  258. /* Points to the current output byte. */
  259. u32 total_out = 0;
  260. ASSERT(bio->bi_iter.bi_size == 0);
  261. ASSERT(len);
  262. folio_out = btrfs_alloc_compr_folio(fs_info);
  263. if (!folio_out)
  264. return -ENOMEM;
  265. /* Queue a segment header first. */
  266. ret = write_and_queue_folio(bio, &folio_out, &total_out, LZO_LEN);
  267. /* The first header should not fail. */
  268. ASSERT(ret == 0);
  269. while (cur_in < start + len) {
  270. char *data_in;
  271. const u32 sectorsize_mask = sectorsize - 1;
  272. u32 sector_off = (cur_in - start) & sectorsize_mask;
  273. u32 in_len;
  274. size_t out_len;
  275. /* Get the input page first. */
  276. if (!folio_in) {
  277. ret = btrfs_compress_filemap_get_folio(mapping, cur_in, &folio_in);
  278. if (ret < 0)
  279. goto out;
  280. }
  281. /* Compress at most one sector of data each time. */
  282. in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off);
  283. ASSERT(in_len);
  284. data_in = kmap_local_folio(folio_in, offset_in_folio(folio_in, cur_in));
  285. ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, &out_len,
  286. workspace->mem);
  287. kunmap_local(data_in);
  288. if (unlikely(ret < 0)) {
  289. /* lzo1x_1_compress never fails. */
  290. ret = -EIO;
  291. goto out;
  292. }
  293. ret = copy_compressed_data_to_bio(fs_info, bio, workspace->cbuf, out_len,
  294. &folio_out, &total_out, len);
  295. if (ret < 0)
  296. goto out;
  297. cur_in += in_len;
  298. /*
  299. * Check if we're making it bigger after two sectors. And if
  300. * it is so, give up.
  301. */
  302. if (cur_in - start > sectorsize * 2 && cur_in - start < total_out) {
  303. ret = -E2BIG;
  304. goto out;
  305. }
  306. /* Check if we have reached input folio boundary. */
  307. if (IS_ALIGNED(cur_in, min_folio_size)) {
  308. folio_put(folio_in);
  309. folio_in = NULL;
  310. }
  311. }
  312. /*
  313. * The last folio is already queued. Bio is responsible for freeing
  314. * those folios now.
  315. */
  316. folio_out = NULL;
  317. /* Store the size of all chunks of compressed data */
  318. sizes_ptr = kmap_local_folio(bio_first_folio_all(bio), 0);
  319. write_compress_length(sizes_ptr, total_out);
  320. kunmap_local(sizes_ptr);
  321. out:
  322. /*
  323. * We can only free the folio that has no part queued into the bio.
  324. *
  325. * As any folio that is already queued into bio will be released by
  326. * the endio function of bio.
  327. */
  328. if (folio_out && IS_ALIGNED(total_out, min_folio_size)) {
  329. btrfs_free_compr_folio(folio_out);
  330. folio_out = NULL;
  331. }
  332. if (folio_in)
  333. folio_put(folio_in);
  334. return ret;
  335. }
  336. static struct folio *get_current_folio(struct compressed_bio *cb, struct folio_iter *fi,
  337. u32 *cur_folio_index, u32 cur_in)
  338. {
  339. struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
  340. const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
  341. ASSERT(cur_folio_index);
  342. /* Need to switch to the next folio. */
  343. if (cur_in >> min_folio_shift != *cur_folio_index) {
  344. /* We can only do the switch one folio a time. */
  345. ASSERT(cur_in >> min_folio_shift == *cur_folio_index + 1);
  346. bio_next_folio(fi, &cb->bbio.bio);
  347. (*cur_folio_index)++;
  348. }
  349. return fi->folio;
  350. }
  351. /*
  352. * Copy the compressed segment payload into @dest.
  353. *
  354. * For the payload there will be no padding, just need to do page switching.
  355. */
  356. static void copy_compressed_segment(struct compressed_bio *cb,
  357. struct folio_iter *fi, u32 *cur_folio_index,
  358. char *dest, u32 len, u32 *cur_in)
  359. {
  360. u32 orig_in = *cur_in;
  361. while (*cur_in < orig_in + len) {
  362. struct folio *cur_folio = get_current_folio(cb, fi, cur_folio_index, *cur_in);
  363. u32 copy_len;
  364. ASSERT(cur_folio);
  365. copy_len = min_t(u32, orig_in + len - *cur_in,
  366. folio_size(cur_folio) - offset_in_folio(cur_folio, *cur_in));
  367. ASSERT(copy_len);
  368. memcpy_from_folio(dest + *cur_in - orig_in, cur_folio,
  369. offset_in_folio(cur_folio, *cur_in), copy_len);
  370. *cur_in += copy_len;
  371. }
  372. }
  373. int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
  374. {
  375. struct workspace *workspace = list_entry(ws, struct workspace, list);
  376. struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info;
  377. const u32 sectorsize = fs_info->sectorsize;
  378. struct folio_iter fi;
  379. char *kaddr;
  380. int ret;
  381. /* Compressed data length, can be unaligned */
  382. u32 len_in;
  383. /* Offset inside the compressed data */
  384. u32 cur_in = 0;
  385. /* Bytes decompressed so far */
  386. u32 cur_out = 0;
  387. /* The current folio index number inside the bio. */
  388. u32 cur_folio_index = 0;
  389. bio_first_folio(&fi, &cb->bbio.bio, 0);
  390. /* There must be a compressed folio and matches the sectorsize. */
  391. if (unlikely(!fi.folio))
  392. return -EINVAL;
  393. ASSERT(folio_size(fi.folio) == btrfs_min_folio_size(fs_info));
  394. kaddr = kmap_local_folio(fi.folio, 0);
  395. len_in = read_compress_length(kaddr);
  396. kunmap_local(kaddr);
  397. cur_in += LZO_LEN;
  398. /*
  399. * LZO header length check
  400. *
  401. * The total length should not exceed the maximum extent length,
  402. * and all sectors should be used.
  403. * If this happens, it means the compressed extent is corrupted.
  404. */
  405. if (unlikely(len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) ||
  406. round_up(len_in, sectorsize) < cb->compressed_len)) {
  407. struct btrfs_inode *inode = cb->bbio.inode;
  408. btrfs_err(fs_info,
  409. "lzo header invalid, root %llu inode %llu offset %llu lzo len %u compressed len %u",
  410. btrfs_root_id(inode->root), btrfs_ino(inode),
  411. cb->start, len_in, cb->compressed_len);
  412. return -EUCLEAN;
  413. }
  414. /* Go through each lzo segment */
  415. while (cur_in < len_in) {
  416. struct folio *cur_folio;
  417. /* Length of the compressed segment */
  418. u32 seg_len;
  419. u32 sector_bytes_left;
  420. size_t out_len = lzo1x_worst_compress(sectorsize);
  421. /*
  422. * We should always have enough space for one segment header
  423. * inside current sector.
  424. */
  425. ASSERT(cur_in / sectorsize ==
  426. (cur_in + LZO_LEN - 1) / sectorsize);
  427. cur_folio = get_current_folio(cb, &fi, &cur_folio_index, cur_in);
  428. ASSERT(cur_folio);
  429. kaddr = kmap_local_folio(cur_folio, 0);
  430. seg_len = read_compress_length(kaddr + offset_in_folio(cur_folio, cur_in));
  431. kunmap_local(kaddr);
  432. cur_in += LZO_LEN;
  433. if (unlikely(seg_len > workspace_cbuf_length(fs_info))) {
  434. struct btrfs_inode *inode = cb->bbio.inode;
  435. /*
  436. * seg_len shouldn't be larger than we have allocated
  437. * for workspace->cbuf
  438. */
  439. btrfs_err(fs_info,
  440. "lzo segment too big, root %llu inode %llu offset %llu len %u",
  441. btrfs_root_id(inode->root), btrfs_ino(inode),
  442. cb->start, seg_len);
  443. return -EIO;
  444. }
  445. /* Copy the compressed segment payload into workspace */
  446. copy_compressed_segment(cb, &fi, &cur_folio_index, workspace->cbuf,
  447. seg_len, &cur_in);
  448. /* Decompress the data */
  449. ret = lzo1x_decompress_safe(workspace->cbuf, seg_len,
  450. workspace->buf, &out_len);
  451. if (unlikely(ret != LZO_E_OK)) {
  452. struct btrfs_inode *inode = cb->bbio.inode;
  453. btrfs_err(fs_info,
  454. "lzo decompression failed, error %d root %llu inode %llu offset %llu",
  455. ret, btrfs_root_id(inode->root), btrfs_ino(inode),
  456. cb->start);
  457. return -EIO;
  458. }
  459. /* Copy the data into inode pages */
  460. ret = btrfs_decompress_buf2page(workspace->buf, out_len, cb, cur_out);
  461. cur_out += out_len;
  462. /* All data read, exit */
  463. if (ret == 0)
  464. return 0;
  465. ret = 0;
  466. /* Check if the sector has enough space for a segment header */
  467. sector_bytes_left = sectorsize - (cur_in % sectorsize);
  468. if (sector_bytes_left >= LZO_LEN)
  469. continue;
  470. /* Skip the padding zeros */
  471. cur_in += sector_bytes_left;
  472. }
  473. return 0;
  474. }
  475. int lzo_decompress(struct list_head *ws, const u8 *data_in,
  476. struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
  477. size_t destlen)
  478. {
  479. struct workspace *workspace = list_entry(ws, struct workspace, list);
  480. struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio);
  481. const u32 sectorsize = fs_info->sectorsize;
  482. size_t in_len;
  483. size_t out_len;
  484. size_t max_segment_len = workspace_buf_length(fs_info);
  485. int ret;
  486. if (unlikely(srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2))
  487. return -EUCLEAN;
  488. in_len = read_compress_length(data_in);
  489. if (unlikely(in_len != srclen))
  490. return -EUCLEAN;
  491. data_in += LZO_LEN;
  492. in_len = read_compress_length(data_in);
  493. if (unlikely(in_len != srclen - LZO_LEN * 2))
  494. return -EUCLEAN;
  495. data_in += LZO_LEN;
  496. out_len = sectorsize;
  497. ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
  498. if (unlikely(ret != LZO_E_OK)) {
  499. struct btrfs_inode *inode = folio_to_inode(dest_folio);
  500. btrfs_err(fs_info,
  501. "lzo decompression failed, error %d root %llu inode %llu offset %llu",
  502. ret, btrfs_root_id(inode->root), btrfs_ino(inode),
  503. folio_pos(dest_folio));
  504. return -EIO;
  505. }
  506. ASSERT(out_len <= sectorsize);
  507. memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, out_len);
  508. /* Early end, considered as an error. */
  509. if (unlikely(out_len < destlen)) {
  510. folio_zero_range(dest_folio, dest_pgoff + out_len, destlen - out_len);
  511. return -EIO;
  512. }
  513. return 0;
  514. }
  515. const struct btrfs_compress_levels btrfs_lzo_compress = {
  516. .max_level = 1,
  517. .default_level = 1,
  518. };