zstd_compress_literals.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
  2. /*
  3. * Copyright (c) Meta Platforms, Inc. and affiliates.
  4. * All rights reserved.
  5. *
  6. * This source code is licensed under both the BSD-style license (found in the
  7. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  8. * in the COPYING file in the root directory of this source tree).
  9. * You may select, at your option, one of the above-listed licenses.
  10. */
  11. /*-*************************************
  12. * Dependencies
  13. ***************************************/
  14. #include "zstd_compress_literals.h"
  15. /* **************************************************************
  16. * Debug Traces
  17. ****************************************************************/
  18. #if DEBUGLEVEL >= 2
  19. static size_t showHexa(const void* src, size_t srcSize)
  20. {
  21. const BYTE* const ip = (const BYTE*)src;
  22. size_t u;
  23. for (u=0; u<srcSize; u++) {
  24. RAWLOG(5, " %02X", ip[u]); (void)ip;
  25. }
  26. RAWLOG(5, " \n");
  27. return srcSize;
  28. }
  29. #endif
  30. /* **************************************************************
  31. * Literals compression - special cases
  32. ****************************************************************/
  33. size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
  34. {
  35. BYTE* const ostart = (BYTE*)dst;
  36. U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
  37. DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
  38. RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
  39. switch(flSize)
  40. {
  41. case 1: /* 2 - 1 - 5 */
  42. ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
  43. break;
  44. case 2: /* 2 - 2 - 12 */
  45. MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
  46. break;
  47. case 3: /* 2 - 2 - 20 */
  48. MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
  49. break;
  50. default: /* not necessary : flSize is {1,2,3} */
  51. assert(0);
  52. }
  53. ZSTD_memcpy(ostart + flSize, src, srcSize);
  54. DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
  55. return srcSize + flSize;
  56. }
  57. static int allBytesIdentical(const void* src, size_t srcSize)
  58. {
  59. assert(srcSize >= 1);
  60. assert(src != NULL);
  61. { const BYTE b = ((const BYTE*)src)[0];
  62. size_t p;
  63. for (p=1; p<srcSize; p++) {
  64. if (((const BYTE*)src)[p] != b) return 0;
  65. }
  66. return 1;
  67. }
  68. }
  69. size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
  70. {
  71. BYTE* const ostart = (BYTE*)dst;
  72. U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
  73. assert(dstCapacity >= 4); (void)dstCapacity;
  74. assert(allBytesIdentical(src, srcSize));
  75. switch(flSize)
  76. {
  77. case 1: /* 2 - 1 - 5 */
  78. ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
  79. break;
  80. case 2: /* 2 - 2 - 12 */
  81. MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
  82. break;
  83. case 3: /* 2 - 2 - 20 */
  84. MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
  85. break;
  86. default: /* not necessary : flSize is {1,2,3} */
  87. assert(0);
  88. }
  89. ostart[flSize] = *(const BYTE*)src;
  90. DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
  91. return flSize+1;
  92. }
  93. /* ZSTD_minLiteralsToCompress() :
  94. * returns minimal amount of literals
  95. * for literal compression to even be attempted.
  96. * Minimum is made tighter as compression strategy increases.
  97. */
  98. static size_t
  99. ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
  100. {
  101. assert((int)strategy >= 0);
  102. assert((int)strategy <= 9);
  103. /* btultra2 : min 8 bytes;
  104. * then 2x larger for each successive compression strategy
  105. * max threshold 64 bytes */
  106. { int const shift = MIN(9-(int)strategy, 3);
  107. size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
  108. DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
  109. return mintc;
  110. }
  111. }
  112. size_t ZSTD_compressLiterals (
  113. void* dst, size_t dstCapacity,
  114. const void* src, size_t srcSize,
  115. void* entropyWorkspace, size_t entropyWorkspaceSize,
  116. const ZSTD_hufCTables_t* prevHuf,
  117. ZSTD_hufCTables_t* nextHuf,
  118. ZSTD_strategy strategy,
  119. int disableLiteralCompression,
  120. int suspectUncompressible,
  121. int bmi2)
  122. {
  123. size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
  124. BYTE* const ostart = (BYTE*)dst;
  125. U32 singleStream = srcSize < 256;
  126. SymbolEncodingType_e hType = set_compressed;
  127. size_t cLitSize;
  128. DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
  129. disableLiteralCompression, (U32)srcSize, dstCapacity);
  130. DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
  131. /* Prepare nextEntropy assuming reusing the existing table */
  132. ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
  133. if (disableLiteralCompression)
  134. return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
  135. /* if too small, don't even attempt compression (speed opt) */
  136. if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
  137. return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
  138. RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
  139. { HUF_repeat repeat = prevHuf->repeatMode;
  140. int const flags = 0
  141. | (bmi2 ? HUF_flags_bmi2 : 0)
  142. | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
  143. | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
  144. | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
  145. typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
  146. huf_compress_f huf_compress;
  147. if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
  148. huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
  149. cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
  150. src, srcSize,
  151. HUF_SYMBOLVALUE_MAX, LitHufLog,
  152. entropyWorkspace, entropyWorkspaceSize,
  153. (HUF_CElt*)nextHuf->CTable,
  154. &repeat, flags);
  155. DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
  156. if (repeat != HUF_repeat_none) {
  157. /* reused the existing table */
  158. DEBUGLOG(5, "reusing statistics from previous huffman block");
  159. hType = set_repeat;
  160. }
  161. }
  162. { size_t const minGain = ZSTD_minGain(srcSize, strategy);
  163. if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
  164. ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
  165. return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
  166. } }
  167. if (cLitSize==1) {
  168. /* A return value of 1 signals that the alphabet consists of a single symbol.
  169. * However, in some rare circumstances, it could be the compressed size (a single byte).
  170. * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
  171. * (it's also necessary to not generate statistics).
  172. * Therefore, in such a case, actively check that all bytes are identical. */
  173. if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
  174. ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
  175. return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
  176. } }
  177. if (hType == set_compressed) {
  178. /* using a newly constructed table */
  179. nextHuf->repeatMode = HUF_repeat_check;
  180. }
  181. /* Build header */
  182. switch(lhSize)
  183. {
  184. case 3: /* 2 - 2 - 10 - 10 */
  185. if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
  186. { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
  187. MEM_writeLE24(ostart, lhc);
  188. break;
  189. }
  190. case 4: /* 2 - 2 - 14 - 14 */
  191. assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
  192. { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
  193. MEM_writeLE32(ostart, lhc);
  194. break;
  195. }
  196. case 5: /* 2 - 2 - 18 - 18 */
  197. assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
  198. { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
  199. MEM_writeLE32(ostart, lhc);
  200. ostart[4] = (BYTE)(cLitSize >> 10);
  201. break;
  202. }
  203. default: /* not possible : lhSize is {3,4,5} */
  204. assert(0);
  205. }
  206. DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
  207. return lhSize+cLitSize;
  208. }