bitstream.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
  2. /* ******************************************************************
  3. * bitstream
  4. * Part of FSE library
  5. * Copyright (c) Meta Platforms, Inc. and affiliates.
  6. *
  7. * You can contact the author at :
  8. * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  9. *
  10. * This source code is licensed under both the BSD-style license (found in the
  11. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  12. * in the COPYING file in the root directory of this source tree).
  13. * You may select, at your option, one of the above-listed licenses.
  14. ****************************************************************** */
  15. #ifndef BITSTREAM_H_MODULE
  16. #define BITSTREAM_H_MODULE
  17. /*
  18. * This API consists of small unitary functions, which must be inlined for best performance.
  19. * Since link-time-optimization is not available for all compilers,
  20. * these functions are defined into a .h to be included.
  21. */
  22. /*-****************************************
  23. * Dependencies
  24. ******************************************/
  25. #include "mem.h" /* unaligned access routines */
  26. #include "compiler.h" /* UNLIKELY() */
  27. #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
  28. #include "error_private.h" /* error codes and messages */
  29. #include "bits.h" /* ZSTD_highbit32 */
  30. /*=========================================
  31. * Target specific
  32. =========================================*/
  33. #define STREAM_ACCUMULATOR_MIN_32 25
  34. #define STREAM_ACCUMULATOR_MIN_64 57
  35. #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
  36. /*-******************************************
  37. * bitStream encoding API (write forward)
  38. ********************************************/
  39. typedef size_t BitContainerType;
  40. /* bitStream can mix input from multiple sources.
  41. * A critical property of these streams is that they encode and decode in **reverse** direction.
  42. * So the first bit sequence you add will be the last to be read, like a LIFO stack.
  43. */
  44. typedef struct {
  45. BitContainerType bitContainer;
  46. unsigned bitPos;
  47. char* startPtr;
  48. char* ptr;
  49. char* endPtr;
  50. } BIT_CStream_t;
  51. MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
  52. MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
  53. MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
  54. MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
  55. /* Start with initCStream, providing the size of buffer to write into.
  56. * bitStream will never write outside of this buffer.
  57. * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
  58. *
  59. * bits are first added to a local register.
  60. * Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
  61. * Writing data into memory is an explicit operation, performed by the flushBits function.
  62. * Hence keep track how many bits are potentially stored into local register to avoid register overflow.
  63. * After a flushBits, a maximum of 7 bits might still be stored into local register.
  64. *
  65. * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
  66. *
  67. * Last operation is to close the bitStream.
  68. * The function returns the final size of CStream in bytes.
  69. * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
  70. */
  71. /*-********************************************
  72. * bitStream decoding API (read backward)
  73. **********************************************/
  74. typedef struct {
  75. BitContainerType bitContainer;
  76. unsigned bitsConsumed;
  77. const char* ptr;
  78. const char* start;
  79. const char* limitPtr;
  80. } BIT_DStream_t;
  81. typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
  82. BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
  83. BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
  84. BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
  85. } BIT_DStream_status; /* result of BIT_reloadDStream() */
  86. MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
  87. MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
  88. MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
  89. MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
  90. /* Start by invoking BIT_initDStream().
  91. * A chunk of the bitStream is then stored into a local register.
  92. * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
  93. * You can then retrieve bitFields stored into the local register, **in reverse order**.
  94. * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
  95. * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
  96. * Otherwise, it can be less than that, so proceed accordingly.
  97. * Checking if DStream has reached its end can be performed with BIT_endOfDStream().
  98. */
  99. /*-****************************************
  100. * unsafe API
  101. ******************************************/
  102. MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
  103. /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
  104. MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
  105. /* unsafe version; does not check buffer overflow */
  106. MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
  107. /* faster, but works only if nbBits >= 1 */
  108. /*===== Local Constants =====*/
  109. static const unsigned BIT_mask[] = {
  110. 0, 1, 3, 7, 0xF, 0x1F,
  111. 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
  112. 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
  113. 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
  114. 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
  115. 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
  116. #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
  117. /*-**************************************************************
  118. * bitStream encoding
  119. ****************************************************************/
  120. /*! BIT_initCStream() :
  121. * `dstCapacity` must be > sizeof(size_t)
  122. * @return : 0 if success,
  123. * otherwise an error code (can be tested using ERR_isError()) */
  124. MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
  125. void* startPtr, size_t dstCapacity)
  126. {
  127. bitC->bitContainer = 0;
  128. bitC->bitPos = 0;
  129. bitC->startPtr = (char*)startPtr;
  130. bitC->ptr = bitC->startPtr;
  131. bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
  132. if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
  133. return 0;
  134. }
  135. FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)
  136. {
  137. assert(nbBits < BIT_MASK_SIZE);
  138. return bitContainer & BIT_mask[nbBits];
  139. }
  140. /*! BIT_addBits() :
  141. * can add up to 31 bits into `bitC`.
  142. * Note : does not check for register overflow ! */
  143. MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
  144. BitContainerType value, unsigned nbBits)
  145. {
  146. DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
  147. assert(nbBits < BIT_MASK_SIZE);
  148. assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  149. bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
  150. bitC->bitPos += nbBits;
  151. }
  152. /*! BIT_addBitsFast() :
  153. * works only if `value` is _clean_,
  154. * meaning all high bits above nbBits are 0 */
  155. MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
  156. BitContainerType value, unsigned nbBits)
  157. {
  158. assert((value>>nbBits) == 0);
  159. assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  160. bitC->bitContainer |= value << bitC->bitPos;
  161. bitC->bitPos += nbBits;
  162. }
  163. /*! BIT_flushBitsFast() :
  164. * assumption : bitContainer has not overflowed
  165. * unsafe version; does not check buffer overflow */
  166. MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
  167. {
  168. size_t const nbBytes = bitC->bitPos >> 3;
  169. assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  170. assert(bitC->ptr <= bitC->endPtr);
  171. MEM_writeLEST(bitC->ptr, bitC->bitContainer);
  172. bitC->ptr += nbBytes;
  173. bitC->bitPos &= 7;
  174. bitC->bitContainer >>= nbBytes*8;
  175. }
  176. /*! BIT_flushBits() :
  177. * assumption : bitContainer has not overflowed
  178. * safe version; check for buffer overflow, and prevents it.
  179. * note : does not signal buffer overflow.
  180. * overflow will be revealed later on using BIT_closeCStream() */
  181. MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
  182. {
  183. size_t const nbBytes = bitC->bitPos >> 3;
  184. assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  185. assert(bitC->ptr <= bitC->endPtr);
  186. MEM_writeLEST(bitC->ptr, bitC->bitContainer);
  187. bitC->ptr += nbBytes;
  188. if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
  189. bitC->bitPos &= 7;
  190. bitC->bitContainer >>= nbBytes*8;
  191. }
  192. /*! BIT_closeCStream() :
  193. * @return : size of CStream, in bytes,
  194. * or 0 if it could not fit into dstBuffer */
  195. MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
  196. {
  197. BIT_addBitsFast(bitC, 1, 1); /* endMark */
  198. BIT_flushBits(bitC);
  199. if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
  200. return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
  201. }
  202. /*-********************************************************
  203. * bitStream decoding
  204. **********************************************************/
  205. /*! BIT_initDStream() :
  206. * Initialize a BIT_DStream_t.
  207. * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
  208. * `srcSize` must be the *exact* size of the bitStream, in bytes.
  209. * @return : size of stream (== srcSize), or an errorCode if a problem is detected
  210. */
  211. MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
  212. {
  213. if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
  214. bitD->start = (const char*)srcBuffer;
  215. bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
  216. if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
  217. bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
  218. bitD->bitContainer = MEM_readLEST(bitD->ptr);
  219. { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
  220. bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
  221. if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
  222. } else {
  223. bitD->ptr = bitD->start;
  224. bitD->bitContainer = *(const BYTE*)(bitD->start);
  225. switch(srcSize)
  226. {
  227. case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
  228. ZSTD_FALLTHROUGH;
  229. case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
  230. ZSTD_FALLTHROUGH;
  231. case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
  232. ZSTD_FALLTHROUGH;
  233. case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
  234. ZSTD_FALLTHROUGH;
  235. case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
  236. ZSTD_FALLTHROUGH;
  237. case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
  238. ZSTD_FALLTHROUGH;
  239. default: break;
  240. }
  241. { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
  242. bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
  243. if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
  244. }
  245. bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
  246. }
  247. return srcSize;
  248. }
  249. FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
  250. {
  251. return bitContainer >> start;
  252. }
  253. FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
  254. {
  255. U32 const regMask = sizeof(bitContainer)*8 - 1;
  256. /* if start > regMask, bitstream is corrupted, and result is undefined */
  257. assert(nbBits < BIT_MASK_SIZE);
  258. /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
  259. * than accessing memory. When bmi2 instruction is not present, we consider
  260. * such cpus old (pre-Haswell, 2013) and their performance is not of that
  261. * importance.
  262. */
  263. #if defined(__x86_64__) || defined(_M_X64)
  264. return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
  265. #else
  266. return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
  267. #endif
  268. }
  269. /*! BIT_lookBits() :
  270. * Provides next n bits from local register.
  271. * local register is not modified.
  272. * On 32-bits, maxNbBits==24.
  273. * On 64-bits, maxNbBits==56.
  274. * @return : value extracted */
  275. FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
  276. {
  277. /* arbitrate between double-shift and shift+mask */
  278. #if 1
  279. /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
  280. * bitstream is likely corrupted, and result is undefined */
  281. return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
  282. #else
  283. /* this code path is slower on my os-x laptop */
  284. U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
  285. return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
  286. #endif
  287. }
  288. /*! BIT_lookBitsFast() :
  289. * unsafe version; only works if nbBits >= 1 */
  290. MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
  291. {
  292. U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
  293. assert(nbBits >= 1);
  294. return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
  295. }
  296. FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
  297. {
  298. bitD->bitsConsumed += nbBits;
  299. }
  300. /*! BIT_readBits() :
  301. * Read (consume) next n bits from local register and update.
  302. * Pay attention to not read more than nbBits contained into local register.
  303. * @return : extracted value. */
  304. FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
  305. {
  306. BitContainerType const value = BIT_lookBits(bitD, nbBits);
  307. BIT_skipBits(bitD, nbBits);
  308. return value;
  309. }
  310. /*! BIT_readBitsFast() :
  311. * unsafe version; only works if nbBits >= 1 */
  312. MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
  313. {
  314. BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);
  315. assert(nbBits >= 1);
  316. BIT_skipBits(bitD, nbBits);
  317. return value;
  318. }
  319. /*! BIT_reloadDStream_internal() :
  320. * Simple variant of BIT_reloadDStream(), with two conditions:
  321. * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
  322. * 2. look window is valid after shifted down : bitD->ptr >= bitD->start
  323. */
  324. MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
  325. {
  326. assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
  327. bitD->ptr -= bitD->bitsConsumed >> 3;
  328. assert(bitD->ptr >= bitD->start);
  329. bitD->bitsConsumed &= 7;
  330. bitD->bitContainer = MEM_readLEST(bitD->ptr);
  331. return BIT_DStream_unfinished;
  332. }
  333. /*! BIT_reloadDStreamFast() :
  334. * Similar to BIT_reloadDStream(), but with two differences:
  335. * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
  336. * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
  337. * point you must use BIT_reloadDStream() to reload.
  338. */
  339. MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
  340. {
  341. if (UNLIKELY(bitD->ptr < bitD->limitPtr))
  342. return BIT_DStream_overflow;
  343. return BIT_reloadDStream_internal(bitD);
  344. }
  345. /*! BIT_reloadDStream() :
  346. * Refill `bitD` from buffer previously set in BIT_initDStream() .
  347. * This function is safe, it guarantees it will not never beyond src buffer.
  348. * @return : status of `BIT_DStream_t` internal register.
  349. * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
  350. FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
  351. {
  352. /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
  353. if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
  354. static const BitContainerType zeroFilled = 0;
  355. bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
  356. /* overflow detected, erroneous scenario or end of stream: no update */
  357. return BIT_DStream_overflow;
  358. }
  359. assert(bitD->ptr >= bitD->start);
  360. if (bitD->ptr >= bitD->limitPtr) {
  361. return BIT_reloadDStream_internal(bitD);
  362. }
  363. if (bitD->ptr == bitD->start) {
  364. /* reached end of bitStream => no update */
  365. if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
  366. return BIT_DStream_completed;
  367. }
  368. /* start < ptr < limitPtr => cautious update */
  369. { U32 nbBytes = bitD->bitsConsumed >> 3;
  370. BIT_DStream_status result = BIT_DStream_unfinished;
  371. if (bitD->ptr - nbBytes < bitD->start) {
  372. nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
  373. result = BIT_DStream_endOfBuffer;
  374. }
  375. bitD->ptr -= nbBytes;
  376. bitD->bitsConsumed -= nbBytes*8;
  377. bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
  378. return result;
  379. }
  380. }
  381. /*! BIT_endOfDStream() :
  382. * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
  383. */
  384. MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
  385. {
  386. return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
  387. }
  388. #endif /* BITSTREAM_H_MODULE */