zlib.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2008 Oracle. All rights reserved.
  4. *
  5. * Based on jffs2 zlib code:
  6. * Copyright © 2001-2007 Red Hat, Inc.
  7. * Created by David Woodhouse <dwmw2@infradead.org>
  8. */
  9. #include <linux/kernel.h>
  10. #include <linux/slab.h>
  11. #include <linux/zlib.h>
  12. #include <linux/zutil.h>
  13. #include <linux/mm.h>
  14. #include <linux/init.h>
  15. #include <linux/err.h>
  16. #include <linux/sched.h>
  17. #include <linux/pagemap.h>
  18. #include <linux/bio.h>
  19. #include <linux/refcount.h>
  20. #include "btrfs_inode.h"
  21. #include "compression.h"
  22. #include "fs.h"
  23. #include "subpage.h"
  24. /* workspace buffer size for s390 zlib hardware support */
  25. #define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE)
  26. struct workspace {
  27. z_stream strm;
  28. char *buf;
  29. unsigned int buf_size;
  30. struct list_head list;
  31. int level;
  32. };
  33. struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
  34. {
  35. struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level);
  36. struct workspace *workspace = list_entry(ws, struct workspace, list);
  37. workspace->level = level;
  38. return ws;
  39. }
  40. void zlib_free_workspace(struct list_head *ws)
  41. {
  42. struct workspace *workspace = list_entry(ws, struct workspace, list);
  43. kvfree(workspace->strm.workspace);
  44. kfree(workspace->buf);
  45. kfree(workspace);
  46. }
  47. /*
  48. * For s390 hardware acceleration, the buffer size should be at least
  49. * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
  50. *
  51. * But if bs > ps we can have large enough folios that meet the s390 hardware
  52. * handling.
  53. */
  54. static bool need_special_buffer(struct btrfs_fs_info *fs_info)
  55. {
  56. if (!zlib_deflate_dfltcc_enabled())
  57. return false;
  58. if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE)
  59. return false;
  60. return true;
  61. }
  62. struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
  63. {
  64. const u32 blocksize = fs_info->sectorsize;
  65. struct workspace *workspace;
  66. int workspacesize;
  67. workspace = kzalloc_obj(*workspace);
  68. if (!workspace)
  69. return ERR_PTR(-ENOMEM);
  70. workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
  71. zlib_inflate_workspacesize());
  72. workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
  73. workspace->level = level;
  74. workspace->buf = NULL;
  75. if (need_special_buffer(fs_info)) {
  76. workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
  77. __GFP_NOMEMALLOC | __GFP_NORETRY |
  78. __GFP_NOWARN | GFP_NOIO);
  79. workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
  80. }
  81. if (!workspace->buf) {
  82. workspace->buf = kmalloc(blocksize, GFP_KERNEL);
  83. workspace->buf_size = blocksize;
  84. }
  85. if (!workspace->strm.workspace || !workspace->buf)
  86. goto fail;
  87. INIT_LIST_HEAD(&workspace->list);
  88. return &workspace->list;
  89. fail:
  90. zlib_free_workspace(&workspace->list);
  91. return ERR_PTR(-ENOMEM);
  92. }
  93. /*
  94. * Helper for S390x with hardware zlib compression support.
  95. *
  96. * That hardware acceleration requires a buffer size larger than a single page
  97. * to get ideal performance, thus we need to do the memory copy rather than
  98. * use the page cache directly as input buffer.
  99. */
  100. static int copy_data_into_buffer(struct address_space *mapping,
  101. struct workspace *workspace, u64 filepos,
  102. unsigned long length)
  103. {
  104. u64 cur = filepos;
  105. /* It's only for hardware accelerated zlib code. */
  106. ASSERT(zlib_deflate_dfltcc_enabled());
  107. while (cur < filepos + length) {
  108. struct folio *folio;
  109. void *data_in;
  110. unsigned int offset;
  111. unsigned long copy_length;
  112. int ret;
  113. ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
  114. if (ret < 0)
  115. return ret;
  116. offset = offset_in_folio(folio, cur);
  117. copy_length = min(folio_size(folio) - offset,
  118. filepos + length - cur);
  119. data_in = kmap_local_folio(folio, offset);
  120. memcpy(workspace->buf + cur - filepos, data_in, copy_length);
  121. kunmap_local(data_in);
  122. folio_put(folio);
  123. cur += copy_length;
  124. }
  125. return 0;
  126. }
  127. int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
  128. {
  129. struct btrfs_inode *inode = cb->bbio.inode;
  130. struct btrfs_fs_info *fs_info = inode->root->fs_info;
  131. struct workspace *workspace = list_entry(ws, struct workspace, list);
  132. struct address_space *mapping = inode->vfs_inode.i_mapping;
  133. struct bio *bio = &cb->bbio.bio;
  134. u64 start = cb->start;
  135. u32 len = cb->len;
  136. const u32 min_folio_size = btrfs_min_folio_size(fs_info);
  137. int ret;
  138. char *data_in = NULL;
  139. char *cfolio_out;
  140. struct folio *in_folio = NULL;
  141. struct folio *out_folio = NULL;
  142. const u32 blocksize = fs_info->sectorsize;
  143. const u64 orig_end = start + len;
  144. ret = zlib_deflateInit(&workspace->strm, workspace->level);
  145. if (unlikely(ret != Z_OK)) {
  146. btrfs_err(fs_info,
  147. "zlib compression init failed, error %d root %llu inode %llu offset %llu",
  148. ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
  149. ret = -EIO;
  150. goto out;
  151. }
  152. workspace->strm.total_in = 0;
  153. workspace->strm.total_out = 0;
  154. out_folio = btrfs_alloc_compr_folio(fs_info);
  155. if (out_folio == NULL) {
  156. ret = -ENOMEM;
  157. goto out;
  158. }
  159. cfolio_out = folio_address(out_folio);
  160. workspace->strm.next_in = workspace->buf;
  161. workspace->strm.avail_in = 0;
  162. workspace->strm.next_out = cfolio_out;
  163. workspace->strm.avail_out = min_folio_size;
  164. while (workspace->strm.total_in < len) {
  165. /*
  166. * Get next input pages and copy the contents to the workspace
  167. * buffer if required.
  168. */
  169. if (workspace->strm.avail_in == 0) {
  170. unsigned long bytes_left = len - workspace->strm.total_in;
  171. unsigned int copy_length = min(bytes_left, workspace->buf_size);
  172. /*
  173. * For s390 hardware accelerated zlib, and our folio is smaller
  174. * than the copy_length, we need to fill the buffer so that
  175. * we can take full advantage of hardware acceleration.
  176. */
  177. if (need_special_buffer(fs_info)) {
  178. ret = copy_data_into_buffer(mapping, workspace,
  179. start, copy_length);
  180. if (ret < 0)
  181. goto out;
  182. start += copy_length;
  183. workspace->strm.next_in = workspace->buf;
  184. workspace->strm.avail_in = copy_length;
  185. } else {
  186. unsigned int cur_len;
  187. if (data_in) {
  188. kunmap_local(data_in);
  189. folio_put(in_folio);
  190. data_in = NULL;
  191. }
  192. ret = btrfs_compress_filemap_get_folio(mapping,
  193. start, &in_folio);
  194. if (ret < 0)
  195. goto out;
  196. cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
  197. data_in = kmap_local_folio(in_folio,
  198. offset_in_folio(in_folio, start));
  199. start += cur_len;
  200. workspace->strm.next_in = data_in;
  201. workspace->strm.avail_in = cur_len;
  202. }
  203. }
  204. ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
  205. if (unlikely(ret != Z_OK)) {
  206. btrfs_warn(fs_info,
  207. "zlib compression failed, error %d root %llu inode %llu offset %llu",
  208. ret, btrfs_root_id(inode->root), btrfs_ino(inode),
  209. start);
  210. zlib_deflateEnd(&workspace->strm);
  211. ret = -EIO;
  212. goto out;
  213. }
  214. /* We're making it bigger, give up. */
  215. if (workspace->strm.total_in > blocksize * 2 &&
  216. workspace->strm.total_in < workspace->strm.total_out) {
  217. ret = -E2BIG;
  218. goto out;
  219. }
  220. if (workspace->strm.total_out >= len) {
  221. ret = -E2BIG;
  222. goto out;
  223. }
  224. /* Queue the full folio and allocate a new one. */
  225. if (workspace->strm.avail_out == 0) {
  226. if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
  227. ret = -E2BIG;
  228. goto out;
  229. }
  230. out_folio = btrfs_alloc_compr_folio(fs_info);
  231. if (out_folio == NULL) {
  232. ret = -ENOMEM;
  233. goto out;
  234. }
  235. cfolio_out = folio_address(out_folio);
  236. workspace->strm.avail_out = min_folio_size;
  237. workspace->strm.next_out = cfolio_out;
  238. }
  239. /* We're all done. */
  240. if (workspace->strm.total_in >= len)
  241. break;
  242. }
  243. workspace->strm.avail_in = 0;
  244. /*
  245. * Call deflate with Z_FINISH flush parameter providing more output
  246. * space but no more input data, until it returns with Z_STREAM_END.
  247. */
  248. while (ret != Z_STREAM_END) {
  249. ret = zlib_deflate(&workspace->strm, Z_FINISH);
  250. if (ret == Z_STREAM_END)
  251. break;
  252. if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) {
  253. zlib_deflateEnd(&workspace->strm);
  254. ret = -EIO;
  255. goto out;
  256. } else if (workspace->strm.avail_out == 0) {
  257. if (workspace->strm.total_out >= len) {
  258. ret = -E2BIG;
  259. goto out;
  260. }
  261. if (!bio_add_folio(bio, out_folio, folio_size(out_folio), 0)) {
  262. ret = -E2BIG;
  263. goto out;
  264. }
  265. /* Get another folio for the stream end. */
  266. out_folio = btrfs_alloc_compr_folio(fs_info);
  267. if (out_folio == NULL) {
  268. ret = -ENOMEM;
  269. goto out;
  270. }
  271. cfolio_out = folio_address(out_folio);
  272. workspace->strm.avail_out = min_folio_size;
  273. workspace->strm.next_out = cfolio_out;
  274. }
  275. }
  276. /* Queue the remaining part of the folio. */
  277. if (workspace->strm.total_out > bio->bi_iter.bi_size) {
  278. const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size;
  279. ASSERT(cur_len <= folio_size(out_folio));
  280. if (!bio_add_folio(bio, out_folio, cur_len, 0)) {
  281. ret = -E2BIG;
  282. goto out;
  283. }
  284. } else {
  285. /* The last folio hasn't' been utilized. */
  286. btrfs_free_compr_folio(out_folio);
  287. }
  288. out_folio = NULL;
  289. ASSERT(bio->bi_iter.bi_size == workspace->strm.total_out);
  290. zlib_deflateEnd(&workspace->strm);
  291. if (workspace->strm.total_out >= workspace->strm.total_in) {
  292. ret = -E2BIG;
  293. goto out;
  294. }
  295. ret = 0;
  296. out:
  297. if (out_folio)
  298. btrfs_free_compr_folio(out_folio);
  299. if (data_in) {
  300. kunmap_local(data_in);
  301. folio_put(in_folio);
  302. }
  303. return ret;
  304. }
  305. int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
  306. {
  307. struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
  308. struct workspace *workspace = list_entry(ws, struct workspace, list);
  309. struct folio_iter fi;
  310. const u32 min_folio_size = btrfs_min_folio_size(fs_info);
  311. int ret = 0, ret2;
  312. int wbits = MAX_WBITS;
  313. char *data_in;
  314. size_t total_out = 0;
  315. size_t srclen = cb->compressed_len;
  316. unsigned long buf_start;
  317. bio_first_folio(&fi, &cb->bbio.bio, 0);
  318. /* We must have at least one folio here, that has the correct size. */
  319. if (unlikely(!fi.folio))
  320. return -EINVAL;
  321. ASSERT(folio_size(fi.folio) == min_folio_size);
  322. data_in = kmap_local_folio(fi.folio, 0);
  323. workspace->strm.next_in = data_in;
  324. workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size);
  325. workspace->strm.total_in = 0;
  326. workspace->strm.total_out = 0;
  327. workspace->strm.next_out = workspace->buf;
  328. workspace->strm.avail_out = workspace->buf_size;
  329. /* If it's deflate, and it's got no preset dictionary, then
  330. we can tell zlib to skip the adler32 check. */
  331. if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
  332. ((data_in[0] & 0x0f) == Z_DEFLATED) &&
  333. !(((data_in[0]<<8) + data_in[1]) % 31)) {
  334. wbits = -((data_in[0] >> 4) + 8);
  335. workspace->strm.next_in += 2;
  336. workspace->strm.avail_in -= 2;
  337. }
  338. ret = zlib_inflateInit2(&workspace->strm, wbits);
  339. if (unlikely(ret != Z_OK)) {
  340. struct btrfs_inode *inode = cb->bbio.inode;
  341. kunmap_local(data_in);
  342. btrfs_err(inode->root->fs_info,
  343. "zlib decompression init failed, error %d root %llu inode %llu offset %llu",
  344. ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
  345. return -EIO;
  346. }
  347. while (workspace->strm.total_in < srclen) {
  348. ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
  349. if (ret != Z_OK && ret != Z_STREAM_END)
  350. break;
  351. buf_start = total_out;
  352. total_out = workspace->strm.total_out;
  353. /* we didn't make progress in this inflate call, we're done */
  354. if (buf_start == total_out)
  355. break;
  356. ret2 = btrfs_decompress_buf2page(workspace->buf,
  357. total_out - buf_start, cb, buf_start);
  358. if (ret2 == 0) {
  359. ret = 0;
  360. goto done;
  361. }
  362. workspace->strm.next_out = workspace->buf;
  363. workspace->strm.avail_out = workspace->buf_size;
  364. if (workspace->strm.avail_in == 0) {
  365. unsigned long tmp;
  366. kunmap_local(data_in);
  367. bio_next_folio(&fi, &cb->bbio.bio);
  368. if (!fi.folio) {
  369. data_in = NULL;
  370. break;
  371. }
  372. ASSERT(folio_size(fi.folio) == min_folio_size);
  373. data_in = kmap_local_folio(fi.folio, 0);
  374. workspace->strm.next_in = data_in;
  375. tmp = srclen - workspace->strm.total_in;
  376. workspace->strm.avail_in = min(tmp, min_folio_size);
  377. }
  378. }
  379. if (unlikely(ret != Z_STREAM_END)) {
  380. btrfs_err(cb->bbio.inode->root->fs_info,
  381. "zlib decompression failed, error %d root %llu inode %llu offset %llu",
  382. ret, btrfs_root_id(cb->bbio.inode->root),
  383. btrfs_ino(cb->bbio.inode), cb->start);
  384. ret = -EIO;
  385. } else {
  386. ret = 0;
  387. }
  388. done:
  389. zlib_inflateEnd(&workspace->strm);
  390. if (data_in)
  391. kunmap_local(data_in);
  392. return ret;
  393. }
  394. int zlib_decompress(struct list_head *ws, const u8 *data_in,
  395. struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
  396. size_t destlen)
  397. {
  398. struct workspace *workspace = list_entry(ws, struct workspace, list);
  399. int ret = 0;
  400. int wbits = MAX_WBITS;
  401. unsigned long to_copy;
  402. workspace->strm.next_in = data_in;
  403. workspace->strm.avail_in = srclen;
  404. workspace->strm.total_in = 0;
  405. workspace->strm.next_out = workspace->buf;
  406. workspace->strm.avail_out = workspace->buf_size;
  407. workspace->strm.total_out = 0;
  408. /* If it's deflate, and it's got no preset dictionary, then
  409. we can tell zlib to skip the adler32 check. */
  410. if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
  411. ((data_in[0] & 0x0f) == Z_DEFLATED) &&
  412. !(((data_in[0]<<8) + data_in[1]) % 31)) {
  413. wbits = -((data_in[0] >> 4) + 8);
  414. workspace->strm.next_in += 2;
  415. workspace->strm.avail_in -= 2;
  416. }
  417. ret = zlib_inflateInit2(&workspace->strm, wbits);
  418. if (unlikely(ret != Z_OK)) {
  419. struct btrfs_inode *inode = folio_to_inode(dest_folio);
  420. btrfs_err(inode->root->fs_info,
  421. "zlib decompression init failed, error %d root %llu inode %llu offset %llu",
  422. ret, btrfs_root_id(inode->root), btrfs_ino(inode),
  423. folio_pos(dest_folio));
  424. return -EIO;
  425. }
  426. /*
  427. * Everything (in/out buf) should be at most one sector, there should
  428. * be no need to switch any input/output buffer.
  429. */
  430. ret = zlib_inflate(&workspace->strm, Z_FINISH);
  431. to_copy = min(workspace->strm.total_out, destlen);
  432. if (ret != Z_STREAM_END)
  433. goto out;
  434. memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
  435. out:
  436. if (unlikely(to_copy != destlen)) {
  437. struct btrfs_inode *inode = folio_to_inode(dest_folio);
  438. btrfs_err(inode->root->fs_info,
  439. "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
  440. ret, btrfs_root_id(inode->root), btrfs_ino(inode),
  441. folio_pos(dest_folio), to_copy, destlen);
  442. ret = -EIO;
  443. } else {
  444. ret = 0;
  445. }
  446. zlib_inflateEnd(&workspace->strm);
  447. if (unlikely(to_copy < destlen))
  448. folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
  449. return ret;
  450. }
  451. const struct btrfs_compress_levels btrfs_zlib_compress = {
  452. .min_level = 1,
  453. .max_level = 9,
  454. .default_level = BTRFS_ZLIB_DEFAULT_LEVEL,
  455. };