gen-hash-testvecs.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. #!/usr/bin/env python3
  2. # SPDX-License-Identifier: GPL-2.0-or-later
  3. #
  4. # Script that generates test vectors for the given hash function.
  5. #
  6. # Copyright 2025 Google LLC
  7. import hashlib
  8. import hmac
  9. import sys
  10. DATA_LENS = [0, 1, 2, 3, 16, 32, 48, 49, 63, 64, 65, 127, 128, 129, 256, 511,
  11. 513, 1000, 3333, 4096, 4128, 4160, 4224, 16384]
  12. # Generate the given number of random bytes, using the length itself as the seed
  13. # for a simple linear congruential generator (LCG). The C test code uses the
  14. # same LCG with the same seeding strategy to reconstruct the data, ensuring
  15. # reproducibility without explicitly storing the data in the test vectors.
  16. def rand_bytes(length):
  17. seed = length
  18. out = []
  19. for _ in range(length):
  20. seed = (seed * 25214903917 + 11) % 2**48
  21. out.append((seed >> 16) % 256)
  22. return bytes(out)
  23. POLY1305_KEY_SIZE = 32
  24. # A straightforward, unoptimized implementation of Poly1305.
  25. # Reference: https://cr.yp.to/mac/poly1305-20050329.pdf
  26. class Poly1305:
  27. def __init__(self, key):
  28. assert len(key) == POLY1305_KEY_SIZE
  29. self.h = 0
  30. rclamp = 0x0ffffffc0ffffffc0ffffffc0fffffff
  31. self.r = int.from_bytes(key[:16], byteorder='little') & rclamp
  32. self.s = int.from_bytes(key[16:], byteorder='little')
  33. # Note: this supports partial blocks only at the end.
  34. def update(self, data):
  35. for i in range(0, len(data), 16):
  36. chunk = data[i:i+16]
  37. c = int.from_bytes(chunk, byteorder='little') + 2**(8 * len(chunk))
  38. self.h = ((self.h + c) * self.r) % (2**130 - 5)
  39. return self
  40. # Note: gen_additional_poly1305_testvecs() relies on this being
  41. # nondestructive, i.e. not changing any field of self.
  42. def digest(self):
  43. m = (self.h + self.s) % 2**128
  44. return m.to_bytes(16, byteorder='little')
  45. POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0])
  46. POLYVAL_BLOCK_SIZE = 16
  47. # A straightforward, unoptimized implementation of POLYVAL.
  48. # Reference: https://datatracker.ietf.org/doc/html/rfc8452
  49. class Polyval:
  50. def __init__(self, key):
  51. assert len(key) == 16
  52. self.h = int.from_bytes(key, byteorder='little')
  53. self.acc = 0
  54. # Note: this supports partial blocks only at the end.
  55. def update(self, data):
  56. for i in range(0, len(data), 16):
  57. # acc += block
  58. self.acc ^= int.from_bytes(data[i:i+16], byteorder='little')
  59. # acc = (acc * h * x^-128) mod POLYVAL_POLY
  60. product = 0
  61. for j in range(128):
  62. if (self.h & (1 << j)) != 0:
  63. product ^= self.acc << j
  64. if (product & (1 << j)) != 0:
  65. product ^= POLYVAL_POLY << j
  66. self.acc = product >> 128
  67. return self
  68. def digest(self):
  69. return self.acc.to_bytes(16, byteorder='little')
  70. def hash_init(alg):
  71. if alg == 'poly1305':
  72. # Use a fixed random key here, to present Poly1305 as an unkeyed hash.
  73. # This allows all the test cases for unkeyed hashes to work on Poly1305.
  74. return Poly1305(rand_bytes(POLY1305_KEY_SIZE))
  75. if alg == 'polyval':
  76. return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE))
  77. return hashlib.new(alg)
  78. def hash_update(ctx, data):
  79. ctx.update(data)
  80. def hash_final(ctx):
  81. return ctx.digest()
  82. def compute_hash(alg, data):
  83. ctx = hash_init(alg)
  84. hash_update(ctx, data)
  85. return hash_final(ctx)
  86. def print_bytes(prefix, value, bytes_per_line):
  87. for i in range(0, len(value), bytes_per_line):
  88. line = prefix + ''.join(f'0x{b:02x}, ' for b in value[i:i+bytes_per_line])
  89. print(f'{line.rstrip()}')
  90. def print_static_u8_array_definition(name, value):
  91. print('')
  92. print(f'static const u8 {name} = {{')
  93. print_bytes('\t', value, 8)
  94. print('};')
  95. def print_c_struct_u8_array_field(name, value):
  96. print(f'\t\t.{name} = {{')
  97. print_bytes('\t\t\t', value, 8)
  98. print('\t\t},')
  99. def alg_digest_size_const(alg):
  100. if alg.startswith('blake2'):
  101. return f'{alg.upper()}_HASH_SIZE'
  102. return f"{alg.upper().replace('-', '_')}_DIGEST_SIZE"
  103. def gen_unkeyed_testvecs(alg):
  104. print('')
  105. print('static const struct {')
  106. print('\tsize_t data_len;')
  107. print(f'\tu8 digest[{alg_digest_size_const(alg)}];')
  108. print('} hash_testvecs[] = {')
  109. for data_len in DATA_LENS:
  110. data = rand_bytes(data_len)
  111. print('\t{')
  112. print(f'\t\t.data_len = {data_len},')
  113. print_c_struct_u8_array_field('digest', compute_hash(alg, data))
  114. print('\t},')
  115. print('};')
  116. data = rand_bytes(4096)
  117. ctx = hash_init(alg)
  118. for data_len in range(len(data) + 1):
  119. hash_update(ctx, compute_hash(alg, data[:data_len]))
  120. print_static_u8_array_definition(
  121. f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]',
  122. hash_final(ctx))
  123. def gen_additional_sha3_testvecs():
  124. max_len = 4096
  125. in_data = rand_bytes(max_len)
  126. for alg in ['shake128', 'shake256']:
  127. ctx = hashlib.new('sha3-256')
  128. for in_len in range(max_len + 1):
  129. out_len = (in_len * 293) % (max_len + 1)
  130. out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len)
  131. ctx.update(out)
  132. print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]',
  133. ctx.digest())
  134. def gen_hmac_testvecs(alg):
  135. ctx = hmac.new(rand_bytes(32), digestmod=alg)
  136. data = rand_bytes(4096)
  137. for data_len in range(len(data) + 1):
  138. ctx.update(data[:data_len])
  139. key_len = data_len % 293
  140. key = rand_bytes(key_len)
  141. mac = hmac.digest(key, data[:data_len], alg)
  142. ctx.update(mac)
  143. print_static_u8_array_definition(
  144. f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]',
  145. ctx.digest())
  146. def gen_additional_blake2_testvecs(alg):
  147. if alg == 'blake2s':
  148. (max_key_size, max_hash_size) = (32, 32)
  149. elif alg == 'blake2b':
  150. (max_key_size, max_hash_size) = (64, 64)
  151. else:
  152. raise ValueError(f'Unsupported alg: {alg}')
  153. hashes = b''
  154. for key_len in range(max_key_size + 1):
  155. for out_len in range(1, max_hash_size + 1):
  156. h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len))
  157. h.update(rand_bytes(100))
  158. hashes += h.digest()
  159. print_static_u8_array_definition(
  160. f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]',
  161. compute_hash(alg, hashes))
  162. def nh_extract_int(bytestr, pos, length):
  163. assert pos % 8 == 0 and length % 8 == 0
  164. return int.from_bytes(bytestr[pos//8 : pos//8 + length//8], byteorder='little')
  165. # The NH "almost-universal hash function" used in Adiantum. This is a
  166. # straightforward translation of the pseudocode from Section 6.3 of the Adiantum
  167. # paper (https://eprint.iacr.org/2018/720.pdf), except the outer loop is omitted
  168. # because we assume len(msg) <= 1024. (The kernel's nh() function is only
  169. # expected to handle up to 1024 bytes; it's just called repeatedly as needed.)
  170. def nh(key, msg):
  171. (w, s, r, u) = (32, 2, 4, 8192)
  172. l = 8 * len(msg)
  173. assert l <= u
  174. assert l % (2*s*w) == 0
  175. h = bytes()
  176. for i in range(0, 2*s*w*r, 2*s*w):
  177. p = 0
  178. for j in range(0, l, 2*s*w):
  179. for k in range(0, w*s, w):
  180. a0 = nh_extract_int(key, i + j + k, w)
  181. a1 = nh_extract_int(key, i + j + k + s*w, w)
  182. b0 = nh_extract_int(msg, j + k, w)
  183. b1 = nh_extract_int(msg, j + k + s*w, w)
  184. p += ((a0 + b0) % 2**w) * ((a1 + b1) % 2**w)
  185. h += (p % 2**64).to_bytes(8, byteorder='little')
  186. return h
  187. def gen_nh_testvecs():
  188. NH_KEY_BYTES = 1072
  189. NH_MESSAGE_BYTES = 1024
  190. key = rand_bytes(NH_KEY_BYTES)
  191. msg = rand_bytes(NH_MESSAGE_BYTES)
  192. print_static_u8_array_definition('nh_test_key[NH_KEY_BYTES]', key)
  193. print_static_u8_array_definition('nh_test_msg[NH_MESSAGE_BYTES]', msg)
  194. for length in [16, 96, 256, 1024]:
  195. print_static_u8_array_definition(f'nh_test_val{length}[NH_HASH_BYTES]',
  196. nh(key, msg[:length]))
  197. def gen_additional_poly1305_testvecs():
  198. key = b'\xff' * POLY1305_KEY_SIZE
  199. data = b''
  200. ctx = Poly1305(key)
  201. for _ in range(32):
  202. for j in range(0, 4097, 16):
  203. ctx.update(b'\xff' * j)
  204. data += ctx.digest()
  205. print_static_u8_array_definition(
  206. 'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]',
  207. Poly1305(key).update(data).digest())
  208. def gen_additional_polyval_testvecs():
  209. key = b'\xff' * POLYVAL_BLOCK_SIZE
  210. hashes = b''
  211. for data_len in range(0, 4097, 16):
  212. hashes += Polyval(key).update(b'\xff' * data_len).digest()
  213. print_static_u8_array_definition(
  214. 'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]',
  215. Polyval(key).update(hashes).digest())
  216. if len(sys.argv) != 2:
  217. sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n')
  218. sys.stderr.write('ALGORITHM may be any supported by Python hashlib; or poly1305, polyval, or sha3.\n')
  219. sys.stderr.write('Example: gen-hash-testvecs.py sha512\n')
  220. sys.exit(1)
  221. alg = sys.argv[1]
  222. print('/* SPDX-License-Identifier: GPL-2.0-or-later */')
  223. print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */')
  224. if alg.startswith('blake2'):
  225. gen_unkeyed_testvecs(alg)
  226. gen_additional_blake2_testvecs(alg)
  227. elif alg == 'nh':
  228. gen_nh_testvecs()
  229. elif alg == 'poly1305':
  230. gen_unkeyed_testvecs(alg)
  231. gen_additional_poly1305_testvecs()
  232. elif alg == 'polyval':
  233. gen_unkeyed_testvecs(alg)
  234. gen_additional_polyval_testvecs()
  235. elif alg == 'sha3':
  236. print()
  237. print('/* SHA3-256 test vectors */')
  238. gen_unkeyed_testvecs('sha3-256')
  239. print()
  240. print('/* SHAKE test vectors */')
  241. gen_additional_sha3_testvecs()
  242. else:
  243. gen_unkeyed_testvecs(alg)
  244. gen_hmac_testvecs(alg)