hist.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
  2. /* ******************************************************************
  3. * hist : Histogram functions
  4. * part of Finite State Entropy project
  5. * Copyright (c) Meta Platforms, Inc. and affiliates.
  6. *
  7. * You can contact the author at :
  8. * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
  9. * - Public forum : https://groups.google.com/forum/#!forum/lz4c
  10. *
  11. * This source code is licensed under both the BSD-style license (found in the
  12. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  13. * in the COPYING file in the root directory of this source tree).
  14. * You may select, at your option, one of the above-listed licenses.
  15. ****************************************************************** */
  16. /* --- dependencies --- */
  17. #include "../common/mem.h" /* U32, BYTE, etc. */
  18. #include "../common/debug.h" /* assert, DEBUGLOG */
  19. #include "../common/error_private.h" /* ERROR */
  20. #include "hist.h"
  21. /* --- Error management --- */
  22. unsigned HIST_isError(size_t code) { return ERR_isError(code); }
  23. /*-**************************************************************
  24. * Histogram functions
  25. ****************************************************************/
  26. void HIST_add(unsigned* count, const void* src, size_t srcSize)
  27. {
  28. const BYTE* ip = (const BYTE*)src;
  29. const BYTE* const end = ip + srcSize;
  30. while (ip<end) {
  31. count[*ip++]++;
  32. }
  33. }
  34. unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
  35. const void* src, size_t srcSize)
  36. {
  37. const BYTE* ip = (const BYTE*)src;
  38. const BYTE* const end = ip + srcSize;
  39. unsigned maxSymbolValue = *maxSymbolValuePtr;
  40. unsigned largestCount=0;
  41. ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
  42. if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
  43. while (ip<end) {
  44. assert(*ip <= maxSymbolValue);
  45. count[*ip++]++;
  46. }
  47. while (!count[maxSymbolValue]) maxSymbolValue--;
  48. *maxSymbolValuePtr = maxSymbolValue;
  49. { U32 s;
  50. for (s=0; s<=maxSymbolValue; s++)
  51. if (count[s] > largestCount) largestCount = count[s];
  52. }
  53. return largestCount;
  54. }
  55. typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
  56. /* HIST_count_parallel_wksp() :
  57. * store histogram into 4 intermediate tables, recombined at the end.
  58. * this design makes better use of OoO cpus,
  59. * and is noticeably faster when some values are heavily repeated.
  60. * But it needs some additional workspace for intermediate tables.
  61. * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
  62. * @return : largest histogram frequency,
  63. * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
  64. static size_t HIST_count_parallel_wksp(
  65. unsigned* count, unsigned* maxSymbolValuePtr,
  66. const void* source, size_t sourceSize,
  67. HIST_checkInput_e check,
  68. U32* const workSpace)
  69. {
  70. const BYTE* ip = (const BYTE*)source;
  71. const BYTE* const iend = ip+sourceSize;
  72. size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
  73. unsigned max=0;
  74. U32* const Counting1 = workSpace;
  75. U32* const Counting2 = Counting1 + 256;
  76. U32* const Counting3 = Counting2 + 256;
  77. U32* const Counting4 = Counting3 + 256;
  78. /* safety checks */
  79. assert(*maxSymbolValuePtr <= 255);
  80. if (!sourceSize) {
  81. ZSTD_memset(count, 0, countSize);
  82. *maxSymbolValuePtr = 0;
  83. return 0;
  84. }
  85. ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
  86. /* by stripes of 16 bytes */
  87. { U32 cached = MEM_read32(ip); ip += 4;
  88. while (ip < iend-15) {
  89. U32 c = cached; cached = MEM_read32(ip); ip += 4;
  90. Counting1[(BYTE) c ]++;
  91. Counting2[(BYTE)(c>>8) ]++;
  92. Counting3[(BYTE)(c>>16)]++;
  93. Counting4[ c>>24 ]++;
  94. c = cached; cached = MEM_read32(ip); ip += 4;
  95. Counting1[(BYTE) c ]++;
  96. Counting2[(BYTE)(c>>8) ]++;
  97. Counting3[(BYTE)(c>>16)]++;
  98. Counting4[ c>>24 ]++;
  99. c = cached; cached = MEM_read32(ip); ip += 4;
  100. Counting1[(BYTE) c ]++;
  101. Counting2[(BYTE)(c>>8) ]++;
  102. Counting3[(BYTE)(c>>16)]++;
  103. Counting4[ c>>24 ]++;
  104. c = cached; cached = MEM_read32(ip); ip += 4;
  105. Counting1[(BYTE) c ]++;
  106. Counting2[(BYTE)(c>>8) ]++;
  107. Counting3[(BYTE)(c>>16)]++;
  108. Counting4[ c>>24 ]++;
  109. }
  110. ip-=4;
  111. }
  112. /* finish last symbols */
  113. while (ip<iend) Counting1[*ip++]++;
  114. { U32 s;
  115. for (s=0; s<256; s++) {
  116. Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
  117. if (Counting1[s] > max) max = Counting1[s];
  118. } }
  119. { unsigned maxSymbolValue = 255;
  120. while (!Counting1[maxSymbolValue]) maxSymbolValue--;
  121. if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
  122. *maxSymbolValuePtr = maxSymbolValue;
  123. ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
  124. }
  125. return (size_t)max;
  126. }
  127. /* HIST_countFast_wksp() :
  128. * Same as HIST_countFast(), but using an externally provided scratch buffer.
  129. * `workSpace` is a writable buffer which must be 4-bytes aligned,
  130. * `workSpaceSize` must be >= HIST_WKSP_SIZE
  131. */
  132. size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
  133. const void* source, size_t sourceSize,
  134. void* workSpace, size_t workSpaceSize)
  135. {
  136. if (sourceSize < 1500) /* heuristic threshold */
  137. return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
  138. if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
  139. if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
  140. return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
  141. }
  142. /* HIST_count_wksp() :
  143. * Same as HIST_count(), but using an externally provided scratch buffer.
  144. * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
  145. size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
  146. const void* source, size_t sourceSize,
  147. void* workSpace, size_t workSpaceSize)
  148. {
  149. if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
  150. if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
  151. if (*maxSymbolValuePtr < 255)
  152. return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
  153. *maxSymbolValuePtr = 255;
  154. return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
  155. }