aes-cipher-core.S 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * Scalar AES core transform
  4. *
  5. * Copyright (C) 2017 Linaro Ltd.
  6. * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/assembler.h>
  10. #include <asm/cache.h>
  11. .text
  12. .align 5
  13. rk .req r0
  14. rounds .req r1
  15. in .req r2
  16. out .req r3
  17. ttab .req ip
  18. t0 .req lr
  19. t1 .req r2
  20. t2 .req r3
  21. .macro __select, out, in, idx
  22. .if __LINUX_ARM_ARCH__ < 7
  23. and \out, \in, #0xff << (8 * \idx)
  24. .else
  25. ubfx \out, \in, #(8 * \idx), #8
  26. .endif
  27. .endm
  28. .macro __load, out, in, idx, sz, op
  29. .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
  30. ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
  31. .else
  32. ldr\op \out, [ttab, \in, lsl #\sz]
  33. .endif
  34. .endm
  35. .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
  36. __select \out0, \in0, 0
  37. __select t0, \in1, 1
  38. __load \out0, \out0, 0, \sz, \op
  39. __load t0, t0, 1, \sz, \op
  40. .if \enc
  41. __select \out1, \in1, 0
  42. __select t1, \in2, 1
  43. .else
  44. __select \out1, \in3, 0
  45. __select t1, \in0, 1
  46. .endif
  47. __load \out1, \out1, 0, \sz, \op
  48. __select t2, \in2, 2
  49. __load t1, t1, 1, \sz, \op
  50. __load t2, t2, 2, \sz, \op
  51. eor \out0, \out0, t0, ror #24
  52. __select t0, \in3, 3
  53. .if \enc
  54. __select \t3, \in3, 2
  55. __select \t4, \in0, 3
  56. .else
  57. __select \t3, \in1, 2
  58. __select \t4, \in2, 3
  59. .endif
  60. __load \t3, \t3, 2, \sz, \op
  61. __load t0, t0, 3, \sz, \op
  62. __load \t4, \t4, 3, \sz, \op
  63. .ifnb \oldcpsr
  64. /*
  65. * This is the final round and we're done with all data-dependent table
  66. * lookups, so we can safely re-enable interrupts.
  67. */
  68. restore_irqs \oldcpsr
  69. .endif
  70. eor \out1, \out1, t1, ror #24
  71. eor \out0, \out0, t2, ror #16
  72. ldm rk!, {t1, t2}
  73. eor \out1, \out1, \t3, ror #16
  74. eor \out0, \out0, t0, ror #8
  75. eor \out1, \out1, \t4, ror #8
  76. eor \out0, \out0, t1
  77. eor \out1, \out1, t2
  78. .endm
  79. .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
  80. __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
  81. __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
  82. .endm
  83. .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
  84. __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
  85. __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
  86. .endm
  87. .macro do_crypt, round, ttab, ltab, bsz
  88. push {r3-r11, lr}
  89. // Load keys first, to reduce latency in case they're not cached yet.
  90. ldm rk!, {r8-r11}
  91. ldr r4, [in]
  92. ldr r5, [in, #4]
  93. ldr r6, [in, #8]
  94. ldr r7, [in, #12]
  95. #ifdef CONFIG_CPU_BIG_ENDIAN
  96. rev_l r4, t0
  97. rev_l r5, t0
  98. rev_l r6, t0
  99. rev_l r7, t0
  100. #endif
  101. eor r4, r4, r8
  102. eor r5, r5, r9
  103. eor r6, r6, r10
  104. eor r7, r7, r11
  105. mov_l ttab, \ttab
  106. /*
  107. * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
  108. * L1 cache, assuming cacheline size >= 32. This is a hardening measure
  109. * intended to make cache-timing attacks more difficult. They may not
  110. * be fully prevented, however; see the paper
  111. * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
  112. * ("Cache-timing attacks on AES") for a discussion of the many
  113. * difficulties involved in writing truly constant-time AES software.
  114. */
  115. save_and_disable_irqs t0
  116. .set i, 0
  117. .rept 1024 / 128
  118. ldr r8, [ttab, #i + 0]
  119. ldr r9, [ttab, #i + 32]
  120. ldr r10, [ttab, #i + 64]
  121. ldr r11, [ttab, #i + 96]
  122. .set i, i + 128
  123. .endr
  124. push {t0} // oldcpsr
  125. tst rounds, #2
  126. bne 1f
  127. 0: \round r8, r9, r10, r11, r4, r5, r6, r7
  128. \round r4, r5, r6, r7, r8, r9, r10, r11
  129. 1: subs rounds, rounds, #4
  130. \round r8, r9, r10, r11, r4, r5, r6, r7
  131. bls 2f
  132. \round r4, r5, r6, r7, r8, r9, r10, r11
  133. b 0b
  134. 2: .ifb \ltab
  135. add ttab, ttab, #1
  136. .else
  137. mov_l ttab, \ltab
  138. // Prefetch inverse S-box for final round; see explanation above
  139. .set i, 0
  140. .rept 256 / 64
  141. ldr t0, [ttab, #i + 0]
  142. ldr t1, [ttab, #i + 32]
  143. .set i, i + 64
  144. .endr
  145. .endif
  146. pop {rounds} // oldcpsr
  147. \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
  148. #ifdef CONFIG_CPU_BIG_ENDIAN
  149. rev_l r4, t0
  150. rev_l r5, t0
  151. rev_l r6, t0
  152. rev_l r7, t0
  153. #endif
  154. ldr out, [sp]
  155. str r4, [out]
  156. str r5, [out, #4]
  157. str r6, [out, #8]
  158. str r7, [out, #12]
  159. pop {r3-r11, pc}
  160. .align 3
  161. .ltorg
  162. .endm
  163. ENTRY(__aes_arm_encrypt)
  164. do_crypt fround, aes_enc_tab,, 2
  165. ENDPROC(__aes_arm_encrypt)
  166. .align 5
  167. ENTRY(__aes_arm_decrypt)
  168. do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
  169. ENDPROC(__aes_arm_decrypt)