crc32-core.S 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * Accelerated CRC32(C) using AArch64 CRC and PMULL instructions
  4. *
  5. * Copyright (C) 2016 - 2018 Linaro Ltd.
  6. * Copyright (C) 2024 Google LLC
  7. *
  8. * Author: Ard Biesheuvel <ardb@kernel.org>
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. .cpu generic+crc+crypto
  13. .macro bitle, reg
  14. .endm
  15. .macro bitbe, reg
  16. rbit \reg, \reg
  17. .endm
  18. .macro bytele, reg
  19. .endm
  20. .macro bytebe, reg
  21. rbit \reg, \reg
  22. lsr \reg, \reg, #24
  23. .endm
  24. .macro hwordle, reg
  25. CPU_BE( rev16 \reg, \reg )
  26. .endm
  27. .macro hwordbe, reg
  28. CPU_LE( rev \reg, \reg )
  29. rbit \reg, \reg
  30. CPU_BE( lsr \reg, \reg, #16 )
  31. .endm
  32. .macro le, regs:vararg
  33. .irp r, \regs
  34. CPU_BE( rev \r, \r )
  35. .endr
  36. .endm
  37. .macro be, regs:vararg
  38. .irp r, \regs
  39. CPU_LE( rev \r, \r )
  40. .endr
  41. .irp r, \regs
  42. rbit \r, \r
  43. .endr
  44. .endm
  45. .macro __crc32, c, order=le
  46. bit\order w0
  47. cmp x2, #16
  48. b.lt 8f // less than 16 bytes
  49. and x7, x2, #0x1f
  50. and x2, x2, #~0x1f
  51. cbz x7, 32f // multiple of 32 bytes
  52. and x8, x7, #0xf
  53. ldp x3, x4, [x1]
  54. add x8, x8, x1
  55. add x1, x1, x7
  56. ldp x5, x6, [x8]
  57. \order x3, x4, x5, x6
  58. tst x7, #8
  59. crc32\c\()x w8, w0, x3
  60. csel x3, x3, x4, eq
  61. csel w0, w0, w8, eq
  62. tst x7, #4
  63. lsr x4, x3, #32
  64. crc32\c\()w w8, w0, w3
  65. csel x3, x3, x4, eq
  66. csel w0, w0, w8, eq
  67. tst x7, #2
  68. lsr w4, w3, #16
  69. crc32\c\()h w8, w0, w3
  70. csel w3, w3, w4, eq
  71. csel w0, w0, w8, eq
  72. tst x7, #1
  73. crc32\c\()b w8, w0, w3
  74. csel w0, w0, w8, eq
  75. tst x7, #16
  76. crc32\c\()x w8, w0, x5
  77. crc32\c\()x w8, w8, x6
  78. csel w0, w0, w8, eq
  79. cbz x2, 0f
  80. 32: ldp x3, x4, [x1], #32
  81. sub x2, x2, #32
  82. ldp x5, x6, [x1, #-16]
  83. \order x3, x4, x5, x6
  84. crc32\c\()x w0, w0, x3
  85. crc32\c\()x w0, w0, x4
  86. crc32\c\()x w0, w0, x5
  87. crc32\c\()x w0, w0, x6
  88. cbnz x2, 32b
  89. 0: bit\order w0
  90. ret
  91. 8: tbz x2, #3, 4f
  92. ldr x3, [x1], #8
  93. \order x3
  94. crc32\c\()x w0, w0, x3
  95. 4: tbz x2, #2, 2f
  96. ldr w3, [x1], #4
  97. \order w3
  98. crc32\c\()w w0, w0, w3
  99. 2: tbz x2, #1, 1f
  100. ldrh w3, [x1], #2
  101. hword\order w3
  102. crc32\c\()h w0, w0, w3
  103. 1: tbz x2, #0, 0f
  104. ldrb w3, [x1]
  105. byte\order w3
  106. crc32\c\()b w0, w0, w3
  107. 0: bit\order w0
  108. ret
  109. .endm
  110. .align 5
  111. SYM_FUNC_START(crc32_le_arm64)
  112. __crc32
  113. SYM_FUNC_END(crc32_le_arm64)
  114. .align 5
  115. SYM_FUNC_START(crc32c_le_arm64)
  116. __crc32 c
  117. SYM_FUNC_END(crc32c_le_arm64)
  118. .align 5
  119. SYM_FUNC_START(crc32_be_arm64)
  120. __crc32 order=be
  121. SYM_FUNC_END(crc32_be_arm64)
  122. in .req x1
  123. len .req x2
  124. /*
  125. * w0: input CRC at entry, output CRC at exit
  126. * x1: pointer to input buffer
  127. * x2: length of input in bytes
  128. */
  129. .macro crc4way, insn, table, order=le
  130. bit\order w0
  131. lsr len, len, #6 // len := # of 64-byte blocks
  132. /* Process up to 64 blocks of 64 bytes at a time */
  133. .La\@: mov x3, #64
  134. cmp len, #64
  135. csel x3, x3, len, hi // x3 := min(len, 64)
  136. sub len, len, x3
  137. /* Divide the input into 4 contiguous blocks */
  138. add x4, x3, x3, lsl #1 // x4 := 3 * x3
  139. add x7, in, x3, lsl #4 // x7 := in + 16 * x3
  140. add x8, in, x3, lsl #5 // x8 := in + 32 * x3
  141. add x9, in, x4, lsl #4 // x9 := in + 16 * x4
  142. /* Load the folding coefficients from the lookup table */
  143. adr_l x5, \table - 12 // entry 0 omitted
  144. add x5, x5, x4, lsl #2 // x5 += 12 * x3
  145. ldp s0, s1, [x5]
  146. ldr s2, [x5, #8]
  147. /* Zero init partial CRCs for this iteration */
  148. mov w4, wzr
  149. mov w5, wzr
  150. mov w6, wzr
  151. mov x17, xzr
  152. .Lb\@: sub x3, x3, #1
  153. \insn w6, w6, x17
  154. ldp x10, x11, [in], #16
  155. ldp x12, x13, [x7], #16
  156. ldp x14, x15, [x8], #16
  157. ldp x16, x17, [x9], #16
  158. \order x10, x11, x12, x13, x14, x15, x16, x17
  159. /* Apply the CRC transform to 4 16-byte blocks in parallel */
  160. \insn w0, w0, x10
  161. \insn w4, w4, x12
  162. \insn w5, w5, x14
  163. \insn w6, w6, x16
  164. \insn w0, w0, x11
  165. \insn w4, w4, x13
  166. \insn w5, w5, x15
  167. cbnz x3, .Lb\@
  168. /* Combine the 4 partial results into w0 */
  169. mov v3.d[0], x0
  170. mov v4.d[0], x4
  171. mov v5.d[0], x5
  172. pmull v0.1q, v0.1d, v3.1d
  173. pmull v1.1q, v1.1d, v4.1d
  174. pmull v2.1q, v2.1d, v5.1d
  175. eor v0.8b, v0.8b, v1.8b
  176. eor v0.8b, v0.8b, v2.8b
  177. mov x5, v0.d[0]
  178. eor x5, x5, x17
  179. \insn w0, w6, x5
  180. mov in, x9
  181. cbnz len, .La\@
  182. bit\order w0
  183. ret
  184. .endm
  185. .align 5
  186. SYM_FUNC_START(crc32c_le_arm64_4way)
  187. crc4way crc32cx, .L0
  188. SYM_FUNC_END(crc32c_le_arm64_4way)
  189. .align 5
  190. SYM_FUNC_START(crc32_le_arm64_4way)
  191. crc4way crc32x, .L1
  192. SYM_FUNC_END(crc32_le_arm64_4way)
  193. .align 5
  194. SYM_FUNC_START(crc32_be_arm64_4way)
  195. crc4way crc32x, .L1, be
  196. SYM_FUNC_END(crc32_be_arm64_4way)
  197. .section .rodata, "a", %progbits
  198. .align 6
  199. .L0: .long 0xddc0152b, 0xba4fc28e, 0x493c7d27
  200. .long 0x0715ce53, 0x9e4addf8, 0xba4fc28e
  201. .long 0xc96cfdc0, 0x0715ce53, 0xddc0152b
  202. .long 0xab7aff2a, 0x0d3b6092, 0x9e4addf8
  203. .long 0x299847d5, 0x878a92a7, 0x39d3b296
  204. .long 0xb6dd949b, 0xab7aff2a, 0x0715ce53
  205. .long 0xa60ce07b, 0x83348832, 0x47db8317
  206. .long 0xd270f1a2, 0xb9e02b86, 0x0d3b6092
  207. .long 0x65863b64, 0xb6dd949b, 0xc96cfdc0
  208. .long 0xb3e32c28, 0xbac2fd7b, 0x878a92a7
  209. .long 0xf285651c, 0xce7f39f4, 0xdaece73e
  210. .long 0x271d9844, 0xd270f1a2, 0xab7aff2a
  211. .long 0x6cb08e5c, 0x2b3cac5d, 0x2162d385
  212. .long 0xcec3662e, 0x1b03397f, 0x83348832
  213. .long 0x8227bb8a, 0xb3e32c28, 0x299847d5
  214. .long 0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86
  215. .long 0xf6076544, 0x10746f3c, 0x18b33a4e
  216. .long 0x98d8d9cb, 0x271d9844, 0xb6dd949b
  217. .long 0x57a3d037, 0x93a5f730, 0x78d9ccb7
  218. .long 0x3771e98f, 0x6b749fb2, 0xbac2fd7b
  219. .long 0xe0ac139e, 0xcec3662e, 0xa60ce07b
  220. .long 0x6f345e45, 0xe6fc4e6a, 0xce7f39f4
  221. .long 0xa2b73df1, 0xb0cd4768, 0x61d82e56
  222. .long 0x86d8e4d2, 0xd7a4825c, 0xd270f1a2
  223. .long 0xa90fd27a, 0x0167d312, 0xc619809d
  224. .long 0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d
  225. .long 0x4597456a, 0x98d8d9cb, 0x65863b64
  226. .long 0xc9c8b782, 0x68bce87a, 0x1b03397f
  227. .long 0x62ec6c6d, 0x6956fc3b, 0xebb883bd
  228. .long 0x2342001e, 0x3771e98f, 0xb3e32c28
  229. .long 0xe8b6368b, 0x2178513a, 0x064f7f26
  230. .long 0x9ef68d35, 0x170076fa, 0xdd7e3b0c
  231. .long 0x0b0bf8ca, 0x6f345e45, 0xf285651c
  232. .long 0x02ee03b2, 0xff0dba97, 0x10746f3c
  233. .long 0x135c83fd, 0xf872e54c, 0xc7a68855
  234. .long 0x00bcf5f6, 0x86d8e4d2, 0x271d9844
  235. .long 0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c
  236. .long 0xded288f8, 0xb3af077a, 0x93a5f730
  237. .long 0x37170390, 0xca6ef3ac, 0x6cb08e5c
  238. .long 0xf48642e9, 0xdd66cbbb, 0x6b749fb2
  239. .long 0xb25b29f2, 0xe9e28eb4, 0x1393e203
  240. .long 0x45cddf4e, 0xc9c8b782, 0xcec3662e
  241. .long 0xdfd94fb2, 0x93e106a4, 0x96c515bb
  242. .long 0x021ac5ef, 0xd813b325, 0xe6fc4e6a
  243. .long 0x8e1450f7, 0x2342001e, 0x8227bb8a
  244. .long 0xe0cdcf86, 0x6d9a4957, 0xb0cd4768
  245. .long 0x613eee91, 0xd2c3ed1a, 0x39c7ff35
  246. .long 0xbedc6ba1, 0x9ef68d35, 0xd7a4825c
  247. .long 0x0cd1526a, 0xf2271e60, 0x0ab3844b
  248. .long 0xd6c3a807, 0x2664fd8b, 0x0167d312
  249. .long 0x1d31175f, 0x02ee03b2, 0xf6076544
  250. .long 0x4be7fd90, 0x363bd6b3, 0x26f6a60a
  251. .long 0x6eeed1c9, 0x5fabe670, 0xa741c1bf
  252. .long 0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb
  253. .long 0x2e7d11a7, 0x17f27698, 0x49c3cc9c
  254. .long 0x889774e1, 0xaa7c7ad5, 0x68bce87a
  255. .long 0x8a074012, 0xded288f8, 0x57a3d037
  256. .long 0xbd0bb25f, 0x6d390dec, 0x6956fc3b
  257. .long 0x3be3c09b, 0x6353c1cc, 0x42d98888
  258. .long 0x465a4eee, 0xf48642e9, 0x3771e98f
  259. .long 0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9
  260. .long 0xa52f58ec, 0x9a5ede41, 0x2178513a
  261. .long 0x47972100, 0x45cddf4e, 0xe0ac139e
  262. .long 0x359674f7, 0xa51b6135, 0x170076fa
  263. .L1: .long 0xaf449247, 0x81256527, 0xccaa009e
  264. .long 0x57c54819, 0x1d9513d7, 0x81256527
  265. .long 0x3f41287a, 0x57c54819, 0xaf449247
  266. .long 0xf5e48c85, 0x910eeec1, 0x1d9513d7
  267. .long 0x1f0c2cdd, 0x9026d5b1, 0xae0b5394
  268. .long 0x71d54a59, 0xf5e48c85, 0x57c54819
  269. .long 0x1c63267b, 0xfe807bbd, 0x0cbec0ed
  270. .long 0xd31343ea, 0xe95c1271, 0x910eeec1
  271. .long 0xf9d9c7ee, 0x71d54a59, 0x3f41287a
  272. .long 0x9ee62949, 0xcec97417, 0x9026d5b1
  273. .long 0xa55d1514, 0xf183c71b, 0xd1df2327
  274. .long 0x21aa2b26, 0xd31343ea, 0xf5e48c85
  275. .long 0x9d842b80, 0xeea395c4, 0x3c656ced
  276. .long 0xd8110ff1, 0xcd669a40, 0xfe807bbd
  277. .long 0x3f9e9356, 0x9ee62949, 0x1f0c2cdd
  278. .long 0x1d6708a0, 0x0c30f51d, 0xe95c1271
  279. .long 0xef82aa68, 0xdb3935ea, 0xb918a347
  280. .long 0xd14bcc9b, 0x21aa2b26, 0x71d54a59
  281. .long 0x99cce860, 0x356d209f, 0xff6f2fc2
  282. .long 0xd8af8e46, 0xc352f6de, 0xcec97417
  283. .long 0xf1996890, 0xd8110ff1, 0x1c63267b
  284. .long 0x631bc508, 0xe95c7216, 0xf183c71b
  285. .long 0x8511c306, 0x8e031a19, 0x9b9bdbd0
  286. .long 0xdb3839f3, 0x1d6708a0, 0xd31343ea
  287. .long 0x7a92fffb, 0xf7003835, 0x4470ac44
  288. .long 0x6ce68f2a, 0x00eba0c8, 0xeea395c4
  289. .long 0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee
  290. .long 0xb46f7cff, 0x9a1b53c8, 0xcd669a40
  291. .long 0x60290934, 0x81b6f443, 0x6d40f445
  292. .long 0x8e976a7d, 0xd8af8e46, 0x9ee62949
  293. .long 0xdcf5088a, 0x9dbdc100, 0x145575d5
  294. .long 0x1753ab84, 0xbbf2f6d6, 0x0c30f51d
  295. .long 0x255b139e, 0x631bc508, 0xa55d1514
  296. .long 0xd784eaa8, 0xce26786c, 0xdb3935ea
  297. .long 0x6d2c864a, 0x8068c345, 0x2586d334
  298. .long 0x02072e24, 0xdb3839f3, 0x21aa2b26
  299. .long 0x06689b0a, 0x5efd72f5, 0xe0575528
  300. .long 0x1e52f5ea, 0x4117915b, 0x356d209f
  301. .long 0x1d3d1db6, 0x6ce68f2a, 0x9d842b80
  302. .long 0x3796455c, 0xb8e0e4a8, 0xc352f6de
  303. .long 0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c
  304. .long 0x28ae0976, 0xb46f7cff, 0xd8110ff1
  305. .long 0x9764bc8d, 0xd7e7a22c, 0x712510f0
  306. .long 0x13a13e18, 0x3e9a43cd, 0xe95c7216
  307. .long 0xb8ee242e, 0x8e976a7d, 0x3f9e9356
  308. .long 0x0c540e7b, 0x753c81ff, 0x8e031a19
  309. .long 0x9924c781, 0xb9220208, 0x3edcde65
  310. .long 0x3954de39, 0x1753ab84, 0x1d6708a0
  311. .long 0xf32238b5, 0xbec81497, 0x9e70b943
  312. .long 0xbbd2cd2c, 0x0925d861, 0xf7003835
  313. .long 0xcc401304, 0xd784eaa8, 0xef82aa68
  314. .long 0x4987e684, 0x6044fbb0, 0x00eba0c8
  315. .long 0x3aa11427, 0x18fe3b4a, 0x87441142
  316. .long 0x297aad60, 0x02072e24, 0xd14bcc9b
  317. .long 0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a
  318. .long 0x632d78c5, 0x3fc33de4, 0x9a1b53c8
  319. .long 0x25b8822a, 0x1e52f5ea, 0x99cce860
  320. .long 0xd4fc84bc, 0x1af62fb8, 0x81b6f443
  321. .long 0x5690aa32, 0xa91fdefb, 0x688a110e
  322. .long 0x1357a093, 0x3796455c, 0xd8af8e46
  323. .long 0x798fdd33, 0xaaa18a37, 0x357b9517
  324. .long 0xc2815395, 0x54d42691, 0x9dbdc100
  325. .long 0x21cfc0f7, 0x28ae0976, 0xf1996890
  326. .long 0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6