poly1305-mips.pl 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269
  1. #!/usr/bin/env perl
  2. # SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
  3. #
  4. # ====================================================================
  5. # Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
  6. # project.
  7. # ====================================================================
  8. # Poly1305 hash for MIPS.
  9. #
  10. # May 2016
  11. #
  12. # Numbers are cycles per processed byte with poly1305_blocks alone.
  13. #
  14. # IALU/gcc
  15. # R1x000 ~5.5/+130% (big-endian)
  16. # Octeon II 2.50/+70% (little-endian)
  17. #
  18. # March 2019
  19. #
  20. # Add 32-bit code path.
  21. #
  22. # October 2019
  23. #
  24. # Modulo-scheduling reduction allows to omit dependency chain at the
  25. # end of inner loop and improve performance. Also optimize MIPS32R2
  26. # code path for MIPS 1004K core. Per René von Dorst's suggestions.
  27. #
  28. # IALU/gcc
  29. # R1x000 ~9.8/? (big-endian)
  30. # Octeon II 3.65/+140% (little-endian)
  31. # MT7621/1004K 4.75/? (little-endian)
  32. #
  33. ######################################################################
  34. # There is a number of MIPS ABI in use, O32 and N32/64 are most
  35. # widely used. Then there is a new contender: NUBI. It appears that if
  36. # one picks the latter, it's possible to arrange code in ABI neutral
  37. # manner. Therefore let's stick to NUBI register layout:
  38. #
  39. ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
  40. ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  41. ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
  42. ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
  43. #
  44. # The return value is placed in $a0. Following coding rules facilitate
  45. # interoperability:
  46. #
  47. # - never ever touch $tp, "thread pointer", former $gp [o32 can be
  48. # excluded from the rule, because it's specified volatile];
  49. # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
  50. # old code];
  51. # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
  52. #
  53. # For reference here is register layout for N32/64 MIPS ABIs:
  54. #
  55. # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
  56. # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  57. # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
  58. # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
  59. # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
  60. #
  61. # <appro@openssl.org>
  62. #
  63. ######################################################################
  64. $flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
  65. $v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
  66. if ($flavour =~ /64|n32/i) {{{
  67. ######################################################################
  68. # 64-bit code path
  69. #
  70. my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
  71. my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
  72. $code.=<<___;
  73. #if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
  74. defined(_MIPS_ARCH_MIPS64R6)) \\
  75. && !defined(_MIPS_ARCH_MIPS64R2)
  76. # define _MIPS_ARCH_MIPS64R2
  77. #endif
  78. #if defined(_MIPS_ARCH_MIPS64R6)
  79. # define dmultu(rs,rt)
  80. # define mflo(rd,rs,rt) dmulu rd,rs,rt
  81. # define mfhi(rd,rs,rt) dmuhu rd,rs,rt
  82. #else
  83. # define dmultu(rs,rt) dmultu rs,rt
  84. # define mflo(rd,rs,rt) mflo rd
  85. # define mfhi(rd,rs,rt) mfhi rd
  86. #endif
  87. #ifdef __KERNEL__
  88. # define poly1305_init poly1305_block_init
  89. #endif
  90. #if defined(__MIPSEB__) && !defined(MIPSEB)
  91. # define MIPSEB
  92. #endif
  93. #ifdef MIPSEB
  94. # define MSB 0
  95. # define LSB 7
  96. #else
  97. # define MSB 7
  98. # define LSB 0
  99. #endif
  100. .text
  101. .set noat
  102. .set noreorder
  103. .align 5
  104. .globl poly1305_init
  105. .ent poly1305_init
  106. poly1305_init:
  107. .frame $sp,0,$ra
  108. .set reorder
  109. sd $zero,0($ctx)
  110. sd $zero,8($ctx)
  111. sd $zero,16($ctx)
  112. beqz $inp,.Lno_key
  113. #if defined(_MIPS_ARCH_MIPS64R6)
  114. andi $tmp0,$inp,7 # $inp % 8
  115. dsubu $inp,$inp,$tmp0 # align $inp
  116. sll $tmp0,$tmp0,3 # byte to bit offset
  117. ld $in0,0($inp)
  118. ld $in1,8($inp)
  119. beqz $tmp0,.Laligned_key
  120. ld $tmp2,16($inp)
  121. subu $tmp1,$zero,$tmp0
  122. # ifdef MIPSEB
  123. dsllv $in0,$in0,$tmp0
  124. dsrlv $tmp3,$in1,$tmp1
  125. dsllv $in1,$in1,$tmp0
  126. dsrlv $tmp2,$tmp2,$tmp1
  127. # else
  128. dsrlv $in0,$in0,$tmp0
  129. dsllv $tmp3,$in1,$tmp1
  130. dsrlv $in1,$in1,$tmp0
  131. dsllv $tmp2,$tmp2,$tmp1
  132. # endif
  133. or $in0,$in0,$tmp3
  134. or $in1,$in1,$tmp2
  135. .Laligned_key:
  136. #else
  137. ldl $in0,0+MSB($inp)
  138. ldl $in1,8+MSB($inp)
  139. ldr $in0,0+LSB($inp)
  140. ldr $in1,8+LSB($inp)
  141. #endif
  142. #ifdef MIPSEB
  143. # if defined(_MIPS_ARCH_MIPS64R2)
  144. dsbh $in0,$in0 # byte swap
  145. dsbh $in1,$in1
  146. dshd $in0,$in0
  147. dshd $in1,$in1
  148. # else
  149. ori $tmp0,$zero,0xFF
  150. dsll $tmp2,$tmp0,32
  151. or $tmp0,$tmp2 # 0x000000FF000000FF
  152. and $tmp1,$in0,$tmp0 # byte swap
  153. and $tmp3,$in1,$tmp0
  154. dsrl $tmp2,$in0,24
  155. dsrl $tmp4,$in1,24
  156. dsll $tmp1,24
  157. dsll $tmp3,24
  158. and $tmp2,$tmp0
  159. and $tmp4,$tmp0
  160. dsll $tmp0,8 # 0x0000FF000000FF00
  161. or $tmp1,$tmp2
  162. or $tmp3,$tmp4
  163. and $tmp2,$in0,$tmp0
  164. and $tmp4,$in1,$tmp0
  165. dsrl $in0,8
  166. dsrl $in1,8
  167. dsll $tmp2,8
  168. dsll $tmp4,8
  169. and $in0,$tmp0
  170. and $in1,$tmp0
  171. or $tmp1,$tmp2
  172. or $tmp3,$tmp4
  173. or $in0,$tmp1
  174. or $in1,$tmp3
  175. dsrl $tmp1,$in0,32
  176. dsrl $tmp3,$in1,32
  177. dsll $in0,32
  178. dsll $in1,32
  179. or $in0,$tmp1
  180. or $in1,$tmp3
  181. # endif
  182. #endif
  183. li $tmp0,1
  184. dsll $tmp0,32 # 0x0000000100000000
  185. daddiu $tmp0,-63 # 0x00000000ffffffc1
  186. dsll $tmp0,28 # 0x0ffffffc10000000
  187. daddiu $tmp0,-1 # 0x0ffffffc0fffffff
  188. and $in0,$tmp0
  189. daddiu $tmp0,-3 # 0x0ffffffc0ffffffc
  190. and $in1,$tmp0
  191. sd $in0,24($ctx)
  192. dsrl $tmp0,$in1,2
  193. sd $in1,32($ctx)
  194. daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
  195. sd $tmp0,40($ctx)
  196. .Lno_key:
  197. li $v0,0 # return 0
  198. jr $ra
  199. .end poly1305_init
  200. ___
  201. {
  202. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
  203. my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
  204. ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
  205. my ($shr,$shl) = ($s6,$s7); # used on R6
  206. $code.=<<___;
  207. .align 5
  208. .globl poly1305_blocks
  209. .ent poly1305_blocks
  210. poly1305_blocks:
  211. .set noreorder
  212. dsrl $len,4 # number of complete blocks
  213. bnez $len,poly1305_blocks_internal
  214. nop
  215. jr $ra
  216. nop
  217. .end poly1305_blocks
  218. .align 5
  219. .ent poly1305_blocks_internal
  220. poly1305_blocks_internal:
  221. .set noreorder
  222. #if defined(_MIPS_ARCH_MIPS64R6)
  223. .frame $sp,8*8,$ra
  224. .mask $SAVED_REGS_MASK|0x000c0000,-8
  225. dsubu $sp,8*8
  226. sd $s7,56($sp)
  227. sd $s6,48($sp)
  228. #else
  229. .frame $sp,6*8,$ra
  230. .mask $SAVED_REGS_MASK,-8
  231. dsubu $sp,6*8
  232. #endif
  233. sd $s5,40($sp)
  234. sd $s4,32($sp)
  235. ___
  236. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  237. sd $s3,24($sp)
  238. sd $s2,16($sp)
  239. sd $s1,8($sp)
  240. sd $s0,0($sp)
  241. ___
  242. $code.=<<___;
  243. .set reorder
  244. #if defined(_MIPS_ARCH_MIPS64R6)
  245. andi $shr,$inp,7
  246. dsubu $inp,$inp,$shr # align $inp
  247. sll $shr,$shr,3 # byte to bit offset
  248. subu $shl,$zero,$shr
  249. #endif
  250. ld $h0,0($ctx) # load hash value
  251. ld $h1,8($ctx)
  252. ld $h2,16($ctx)
  253. ld $r0,24($ctx) # load key
  254. ld $r1,32($ctx)
  255. ld $rs1,40($ctx)
  256. dsll $len,4
  257. daddu $len,$inp # end of buffer
  258. b .Loop
  259. .align 4
  260. .Loop:
  261. #if defined(_MIPS_ARCH_MIPS64R6)
  262. ld $in0,0($inp) # load input
  263. ld $in1,8($inp)
  264. beqz $shr,.Laligned_inp
  265. ld $tmp2,16($inp)
  266. # ifdef MIPSEB
  267. dsllv $in0,$in0,$shr
  268. dsrlv $tmp3,$in1,$shl
  269. dsllv $in1,$in1,$shr
  270. dsrlv $tmp2,$tmp2,$shl
  271. # else
  272. dsrlv $in0,$in0,$shr
  273. dsllv $tmp3,$in1,$shl
  274. dsrlv $in1,$in1,$shr
  275. dsllv $tmp2,$tmp2,$shl
  276. # endif
  277. or $in0,$in0,$tmp3
  278. or $in1,$in1,$tmp2
  279. .Laligned_inp:
  280. #else
  281. ldl $in0,0+MSB($inp) # load input
  282. ldl $in1,8+MSB($inp)
  283. ldr $in0,0+LSB($inp)
  284. ldr $in1,8+LSB($inp)
  285. #endif
  286. daddiu $inp,16
  287. #ifdef MIPSEB
  288. # if defined(_MIPS_ARCH_MIPS64R2)
  289. dsbh $in0,$in0 # byte swap
  290. dsbh $in1,$in1
  291. dshd $in0,$in0
  292. dshd $in1,$in1
  293. # else
  294. ori $tmp0,$zero,0xFF
  295. dsll $tmp2,$tmp0,32
  296. or $tmp0,$tmp2 # 0x000000FF000000FF
  297. and $tmp1,$in0,$tmp0 # byte swap
  298. and $tmp3,$in1,$tmp0
  299. dsrl $tmp2,$in0,24
  300. dsrl $tmp4,$in1,24
  301. dsll $tmp1,24
  302. dsll $tmp3,24
  303. and $tmp2,$tmp0
  304. and $tmp4,$tmp0
  305. dsll $tmp0,8 # 0x0000FF000000FF00
  306. or $tmp1,$tmp2
  307. or $tmp3,$tmp4
  308. and $tmp2,$in0,$tmp0
  309. and $tmp4,$in1,$tmp0
  310. dsrl $in0,8
  311. dsrl $in1,8
  312. dsll $tmp2,8
  313. dsll $tmp4,8
  314. and $in0,$tmp0
  315. and $in1,$tmp0
  316. or $tmp1,$tmp2
  317. or $tmp3,$tmp4
  318. or $in0,$tmp1
  319. or $in1,$tmp3
  320. dsrl $tmp1,$in0,32
  321. dsrl $tmp3,$in1,32
  322. dsll $in0,32
  323. dsll $in1,32
  324. or $in0,$tmp1
  325. or $in1,$tmp3
  326. # endif
  327. #endif
  328. dsrl $tmp1,$h2,2 # modulo-scheduled reduction
  329. andi $h2,$h2,3
  330. dsll $tmp0,$tmp1,2
  331. daddu $d0,$h0,$in0 # accumulate input
  332. daddu $tmp1,$tmp0
  333. sltu $tmp0,$d0,$h0
  334. daddu $d0,$d0,$tmp1 # ... and residue
  335. sltu $tmp1,$d0,$tmp1
  336. daddu $d1,$h1,$in1
  337. daddu $tmp0,$tmp1
  338. sltu $tmp1,$d1,$h1
  339. daddu $d1,$tmp0
  340. dmultu ($r0,$d0) # h0*r0
  341. daddu $d2,$h2,$padbit
  342. sltu $tmp0,$d1,$tmp0
  343. mflo ($h0,$r0,$d0)
  344. mfhi ($h1,$r0,$d0)
  345. dmultu ($rs1,$d1) # h1*5*r1
  346. daddu $d2,$tmp1
  347. daddu $d2,$tmp0
  348. mflo ($tmp0,$rs1,$d1)
  349. mfhi ($tmp1,$rs1,$d1)
  350. dmultu ($r1,$d0) # h0*r1
  351. mflo ($tmp2,$r1,$d0)
  352. mfhi ($h2,$r1,$d0)
  353. daddu $h0,$tmp0
  354. daddu $h1,$tmp1
  355. sltu $tmp0,$h0,$tmp0
  356. dmultu ($r0,$d1) # h1*r0
  357. daddu $h1,$tmp0
  358. daddu $h1,$tmp2
  359. mflo ($tmp0,$r0,$d1)
  360. mfhi ($tmp1,$r0,$d1)
  361. dmultu ($rs1,$d2) # h2*5*r1
  362. sltu $tmp2,$h1,$tmp2
  363. daddu $h2,$tmp2
  364. mflo ($tmp2,$rs1,$d2)
  365. dmultu ($r0,$d2) # h2*r0
  366. daddu $h1,$tmp0
  367. daddu $h2,$tmp1
  368. mflo ($tmp3,$r0,$d2)
  369. sltu $tmp0,$h1,$tmp0
  370. daddu $h2,$tmp0
  371. daddu $h1,$tmp2
  372. sltu $tmp2,$h1,$tmp2
  373. daddu $h2,$tmp2
  374. daddu $h2,$tmp3
  375. bne $inp,$len,.Loop
  376. sd $h0,0($ctx) # store hash value
  377. sd $h1,8($ctx)
  378. sd $h2,16($ctx)
  379. .set noreorder
  380. #if defined(_MIPS_ARCH_MIPS64R6)
  381. ld $s7,56($sp)
  382. ld $s6,48($sp)
  383. #endif
  384. ld $s5,40($sp) # epilogue
  385. ld $s4,32($sp)
  386. ___
  387. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
  388. ld $s3,24($sp)
  389. ld $s2,16($sp)
  390. ld $s1,8($sp)
  391. ld $s0,0($sp)
  392. ___
  393. $code.=<<___;
  394. jr $ra
  395. #if defined(_MIPS_ARCH_MIPS64R6)
  396. daddu $sp,8*8
  397. #else
  398. daddu $sp,6*8
  399. #endif
  400. .end poly1305_blocks_internal
  401. ___
  402. }
  403. {
  404. my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
  405. $code.=<<___;
  406. .align 5
  407. .globl poly1305_emit
  408. .ent poly1305_emit
  409. poly1305_emit:
  410. .frame $sp,0,$ra
  411. .set reorder
  412. ld $tmp2,16($ctx)
  413. ld $tmp0,0($ctx)
  414. ld $tmp1,8($ctx)
  415. li $in0,-4 # final reduction
  416. dsrl $in1,$tmp2,2
  417. and $in0,$tmp2
  418. andi $tmp2,$tmp2,3
  419. daddu $in0,$in1
  420. daddu $tmp0,$tmp0,$in0
  421. sltu $in1,$tmp0,$in0
  422. daddiu $in0,$tmp0,5 # compare to modulus
  423. daddu $tmp1,$tmp1,$in1
  424. sltiu $tmp3,$in0,5
  425. sltu $tmp4,$tmp1,$in1
  426. daddu $in1,$tmp1,$tmp3
  427. daddu $tmp2,$tmp2,$tmp4
  428. sltu $tmp3,$in1,$tmp3
  429. daddu $tmp2,$tmp2,$tmp3
  430. dsrl $tmp2,2 # see if it carried/borrowed
  431. dsubu $tmp2,$zero,$tmp2
  432. xor $in0,$tmp0
  433. xor $in1,$tmp1
  434. and $in0,$tmp2
  435. and $in1,$tmp2
  436. xor $in0,$tmp0
  437. xor $in1,$tmp1
  438. lwu $tmp0,0($nonce) # load nonce
  439. lwu $tmp1,4($nonce)
  440. lwu $tmp2,8($nonce)
  441. lwu $tmp3,12($nonce)
  442. dsll $tmp1,32
  443. dsll $tmp3,32
  444. or $tmp0,$tmp1
  445. or $tmp2,$tmp3
  446. daddu $in0,$tmp0 # accumulate nonce
  447. daddu $in1,$tmp2
  448. sltu $tmp0,$in0,$tmp0
  449. daddu $in1,$tmp0
  450. dsrl $tmp0,$in0,8 # write mac value
  451. dsrl $tmp1,$in0,16
  452. dsrl $tmp2,$in0,24
  453. sb $in0,0($mac)
  454. dsrl $tmp3,$in0,32
  455. sb $tmp0,1($mac)
  456. dsrl $tmp0,$in0,40
  457. sb $tmp1,2($mac)
  458. dsrl $tmp1,$in0,48
  459. sb $tmp2,3($mac)
  460. dsrl $tmp2,$in0,56
  461. sb $tmp3,4($mac)
  462. dsrl $tmp3,$in1,8
  463. sb $tmp0,5($mac)
  464. dsrl $tmp0,$in1,16
  465. sb $tmp1,6($mac)
  466. dsrl $tmp1,$in1,24
  467. sb $tmp2,7($mac)
  468. sb $in1,8($mac)
  469. dsrl $tmp2,$in1,32
  470. sb $tmp3,9($mac)
  471. dsrl $tmp3,$in1,40
  472. sb $tmp0,10($mac)
  473. dsrl $tmp0,$in1,48
  474. sb $tmp1,11($mac)
  475. dsrl $tmp1,$in1,56
  476. sb $tmp2,12($mac)
  477. sb $tmp3,13($mac)
  478. sb $tmp0,14($mac)
  479. sb $tmp1,15($mac)
  480. jr $ra
  481. .end poly1305_emit
  482. .rdata
  483. .asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
  484. .align 2
  485. ___
  486. }
  487. }}} else {{{
  488. ######################################################################
  489. # 32-bit code path
  490. #
  491. my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
  492. my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
  493. ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
  494. $code.=<<___;
  495. #if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
  496. defined(_MIPS_ARCH_MIPS32R6)) \\
  497. && !defined(_MIPS_ARCH_MIPS32R2)
  498. # define _MIPS_ARCH_MIPS32R2
  499. #endif
  500. #if defined(_MIPS_ARCH_MIPS32R6)
  501. # define multu(rs,rt)
  502. # define mflo(rd,rs,rt) mulu rd,rs,rt
  503. # define mfhi(rd,rs,rt) muhu rd,rs,rt
  504. #else
  505. # define multu(rs,rt) multu rs,rt
  506. # define mflo(rd,rs,rt) mflo rd
  507. # define mfhi(rd,rs,rt) mfhi rd
  508. #endif
  509. #ifdef __KERNEL__
  510. # define poly1305_init poly1305_block_init
  511. #endif
  512. #if defined(__MIPSEB__) && !defined(MIPSEB)
  513. # define MIPSEB
  514. #endif
  515. #ifdef MIPSEB
  516. # define MSB 0
  517. # define LSB 3
  518. #else
  519. # define MSB 3
  520. # define LSB 0
  521. #endif
  522. .text
  523. .set noat
  524. .set noreorder
  525. .align 5
  526. .globl poly1305_init
  527. .ent poly1305_init
  528. poly1305_init:
  529. .frame $sp,0,$ra
  530. .set reorder
  531. sw $zero,0($ctx)
  532. sw $zero,4($ctx)
  533. sw $zero,8($ctx)
  534. sw $zero,12($ctx)
  535. sw $zero,16($ctx)
  536. beqz $inp,.Lno_key
  537. #if defined(_MIPS_ARCH_MIPS32R6)
  538. andi $tmp0,$inp,3 # $inp % 4
  539. subu $inp,$inp,$tmp0 # align $inp
  540. sll $tmp0,$tmp0,3 # byte to bit offset
  541. lw $in0,0($inp)
  542. lw $in1,4($inp)
  543. lw $in2,8($inp)
  544. lw $in3,12($inp)
  545. beqz $tmp0,.Laligned_key
  546. lw $tmp2,16($inp)
  547. subu $tmp1,$zero,$tmp0
  548. # ifdef MIPSEB
  549. sllv $in0,$in0,$tmp0
  550. srlv $tmp3,$in1,$tmp1
  551. sllv $in1,$in1,$tmp0
  552. or $in0,$in0,$tmp3
  553. srlv $tmp3,$in2,$tmp1
  554. sllv $in2,$in2,$tmp0
  555. or $in1,$in1,$tmp3
  556. srlv $tmp3,$in3,$tmp1
  557. sllv $in3,$in3,$tmp0
  558. or $in2,$in2,$tmp3
  559. srlv $tmp2,$tmp2,$tmp1
  560. or $in3,$in3,$tmp2
  561. # else
  562. srlv $in0,$in0,$tmp0
  563. sllv $tmp3,$in1,$tmp1
  564. srlv $in1,$in1,$tmp0
  565. or $in0,$in0,$tmp3
  566. sllv $tmp3,$in2,$tmp1
  567. srlv $in2,$in2,$tmp0
  568. or $in1,$in1,$tmp3
  569. sllv $tmp3,$in3,$tmp1
  570. srlv $in3,$in3,$tmp0
  571. or $in2,$in2,$tmp3
  572. sllv $tmp2,$tmp2,$tmp1
  573. or $in3,$in3,$tmp2
  574. # endif
  575. .Laligned_key:
  576. #else
  577. lwl $in0,0+MSB($inp)
  578. lwl $in1,4+MSB($inp)
  579. lwl $in2,8+MSB($inp)
  580. lwl $in3,12+MSB($inp)
  581. lwr $in0,0+LSB($inp)
  582. lwr $in1,4+LSB($inp)
  583. lwr $in2,8+LSB($inp)
  584. lwr $in3,12+LSB($inp)
  585. #endif
  586. #ifdef MIPSEB
  587. # if defined(_MIPS_ARCH_MIPS32R2)
  588. wsbh $in0,$in0 # byte swap
  589. wsbh $in1,$in1
  590. wsbh $in2,$in2
  591. wsbh $in3,$in3
  592. rotr $in0,$in0,16
  593. rotr $in1,$in1,16
  594. rotr $in2,$in2,16
  595. rotr $in3,$in3,16
  596. # else
  597. srl $tmp0,$in0,24 # byte swap
  598. srl $tmp1,$in0,8
  599. andi $tmp2,$in0,0xFF00
  600. sll $in0,$in0,24
  601. andi $tmp1,0xFF00
  602. sll $tmp2,$tmp2,8
  603. or $in0,$tmp0
  604. srl $tmp0,$in1,24
  605. or $tmp1,$tmp2
  606. srl $tmp2,$in1,8
  607. or $in0,$tmp1
  608. andi $tmp1,$in1,0xFF00
  609. sll $in1,$in1,24
  610. andi $tmp2,0xFF00
  611. sll $tmp1,$tmp1,8
  612. or $in1,$tmp0
  613. srl $tmp0,$in2,24
  614. or $tmp2,$tmp1
  615. srl $tmp1,$in2,8
  616. or $in1,$tmp2
  617. andi $tmp2,$in2,0xFF00
  618. sll $in2,$in2,24
  619. andi $tmp1,0xFF00
  620. sll $tmp2,$tmp2,8
  621. or $in2,$tmp0
  622. srl $tmp0,$in3,24
  623. or $tmp1,$tmp2
  624. srl $tmp2,$in3,8
  625. or $in2,$tmp1
  626. andi $tmp1,$in3,0xFF00
  627. sll $in3,$in3,24
  628. andi $tmp2,0xFF00
  629. sll $tmp1,$tmp1,8
  630. or $in3,$tmp0
  631. or $tmp2,$tmp1
  632. or $in3,$tmp2
  633. # endif
  634. #endif
  635. lui $tmp0,0x0fff
  636. ori $tmp0,0xffff # 0x0fffffff
  637. and $in0,$in0,$tmp0
  638. subu $tmp0,3 # 0x0ffffffc
  639. and $in1,$in1,$tmp0
  640. and $in2,$in2,$tmp0
  641. and $in3,$in3,$tmp0
  642. sw $in0,20($ctx)
  643. sw $in1,24($ctx)
  644. sw $in2,28($ctx)
  645. sw $in3,32($ctx)
  646. srl $tmp1,$in1,2
  647. srl $tmp2,$in2,2
  648. srl $tmp3,$in3,2
  649. addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2)
  650. addu $in2,$in2,$tmp2
  651. addu $in3,$in3,$tmp3
  652. sw $in1,36($ctx)
  653. sw $in2,40($ctx)
  654. sw $in3,44($ctx)
  655. .Lno_key:
  656. li $v0,0
  657. jr $ra
  658. .end poly1305_init
  659. ___
  660. {
  661. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
  662. my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
  663. ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
  664. my ($d0,$d1,$d2,$d3) =
  665. ($a4,$a5,$a6,$a7);
  666. my $shr = $t2; # used on R6
  667. my $one = $t2; # used on R2
  668. $code.=<<___;
  669. .globl poly1305_blocks
  670. .align 5
  671. .ent poly1305_blocks
  672. poly1305_blocks:
  673. .frame $sp,16*4,$ra
  674. .mask $SAVED_REGS_MASK,-4
  675. .set noreorder
  676. subu $sp, $sp,4*12
  677. sw $s11,4*11($sp)
  678. sw $s10,4*10($sp)
  679. sw $s9, 4*9($sp)
  680. sw $s8, 4*8($sp)
  681. sw $s7, 4*7($sp)
  682. sw $s6, 4*6($sp)
  683. sw $s5, 4*5($sp)
  684. sw $s4, 4*4($sp)
  685. ___
  686. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  687. sw $s3, 4*3($sp)
  688. sw $s2, 4*2($sp)
  689. sw $s1, 4*1($sp)
  690. sw $s0, 4*0($sp)
  691. ___
  692. $code.=<<___;
  693. .set reorder
  694. srl $len,4 # number of complete blocks
  695. li $one,1
  696. beqz $len,.Labort
  697. #if defined(_MIPS_ARCH_MIPS32R6)
  698. andi $shr,$inp,3
  699. subu $inp,$inp,$shr # align $inp
  700. sll $shr,$shr,3 # byte to bit offset
  701. #endif
  702. lw $h0,0($ctx) # load hash value
  703. lw $h1,4($ctx)
  704. lw $h2,8($ctx)
  705. lw $h3,12($ctx)
  706. lw $h4,16($ctx)
  707. lw $r0,20($ctx) # load key
  708. lw $r1,24($ctx)
  709. lw $r2,28($ctx)
  710. lw $r3,32($ctx)
  711. lw $rs1,36($ctx)
  712. lw $rs2,40($ctx)
  713. lw $rs3,44($ctx)
  714. sll $len,4
  715. addu $len,$len,$inp # end of buffer
  716. b .Loop
  717. .align 4
  718. .Loop:
  719. #if defined(_MIPS_ARCH_MIPS32R6)
  720. lw $d0,0($inp) # load input
  721. lw $d1,4($inp)
  722. lw $d2,8($inp)
  723. lw $d3,12($inp)
  724. beqz $shr,.Laligned_inp
  725. lw $t0,16($inp)
  726. subu $t1,$zero,$shr
  727. # ifdef MIPSEB
  728. sllv $d0,$d0,$shr
  729. srlv $at,$d1,$t1
  730. sllv $d1,$d1,$shr
  731. or $d0,$d0,$at
  732. srlv $at,$d2,$t1
  733. sllv $d2,$d2,$shr
  734. or $d1,$d1,$at
  735. srlv $at,$d3,$t1
  736. sllv $d3,$d3,$shr
  737. or $d2,$d2,$at
  738. srlv $t0,$t0,$t1
  739. or $d3,$d3,$t0
  740. # else
  741. srlv $d0,$d0,$shr
  742. sllv $at,$d1,$t1
  743. srlv $d1,$d1,$shr
  744. or $d0,$d0,$at
  745. sllv $at,$d2,$t1
  746. srlv $d2,$d2,$shr
  747. or $d1,$d1,$at
  748. sllv $at,$d3,$t1
  749. srlv $d3,$d3,$shr
  750. or $d2,$d2,$at
  751. sllv $t0,$t0,$t1
  752. or $d3,$d3,$t0
  753. # endif
  754. .Laligned_inp:
  755. #else
  756. lwl $d0,0+MSB($inp) # load input
  757. lwl $d1,4+MSB($inp)
  758. lwl $d2,8+MSB($inp)
  759. lwl $d3,12+MSB($inp)
  760. lwr $d0,0+LSB($inp)
  761. lwr $d1,4+LSB($inp)
  762. lwr $d2,8+LSB($inp)
  763. lwr $d3,12+LSB($inp)
  764. #endif
  765. #ifdef MIPSEB
  766. # if defined(_MIPS_ARCH_MIPS32R2)
  767. wsbh $d0,$d0 # byte swap
  768. wsbh $d1,$d1
  769. wsbh $d2,$d2
  770. wsbh $d3,$d3
  771. rotr $d0,$d0,16
  772. rotr $d1,$d1,16
  773. rotr $d2,$d2,16
  774. rotr $d3,$d3,16
  775. # else
  776. srl $at,$d0,24 # byte swap
  777. srl $t0,$d0,8
  778. andi $t1,$d0,0xFF00
  779. sll $d0,$d0,24
  780. andi $t0,0xFF00
  781. sll $t1,$t1,8
  782. or $d0,$at
  783. srl $at,$d1,24
  784. or $t0,$t1
  785. srl $t1,$d1,8
  786. or $d0,$t0
  787. andi $t0,$d1,0xFF00
  788. sll $d1,$d1,24
  789. andi $t1,0xFF00
  790. sll $t0,$t0,8
  791. or $d1,$at
  792. srl $at,$d2,24
  793. or $t1,$t0
  794. srl $t0,$d2,8
  795. or $d1,$t1
  796. andi $t1,$d2,0xFF00
  797. sll $d2,$d2,24
  798. andi $t0,0xFF00
  799. sll $t1,$t1,8
  800. or $d2,$at
  801. srl $at,$d3,24
  802. or $t0,$t1
  803. srl $t1,$d3,8
  804. or $d2,$t0
  805. andi $t0,$d3,0xFF00
  806. sll $d3,$d3,24
  807. andi $t1,0xFF00
  808. sll $t0,$t0,8
  809. or $d3,$at
  810. or $t1,$t0
  811. or $d3,$t1
  812. # endif
  813. #endif
  814. srl $t0,$h4,2 # modulo-scheduled reduction
  815. andi $h4,$h4,3
  816. sll $at,$t0,2
  817. addu $d0,$d0,$h0 # accumulate input
  818. addu $t0,$t0,$at
  819. sltu $h0,$d0,$h0
  820. addu $d0,$d0,$t0 # ... and residue
  821. sltu $at,$d0,$t0
  822. addu $d1,$d1,$h1
  823. addu $h0,$h0,$at # carry
  824. sltu $h1,$d1,$h1
  825. addu $d1,$d1,$h0
  826. sltu $h0,$d1,$h0
  827. addu $d2,$d2,$h2
  828. addu $h1,$h1,$h0 # carry
  829. sltu $h2,$d2,$h2
  830. addu $d2,$d2,$h1
  831. sltu $h1,$d2,$h1
  832. addu $d3,$d3,$h3
  833. addu $h2,$h2,$h1 # carry
  834. sltu $h3,$d3,$h3
  835. addu $d3,$d3,$h2
  836. #if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
  837. multu $r0,$d0 # d0*r0
  838. sltu $h2,$d3,$h2
  839. maddu $rs3,$d1 # d1*s3
  840. addu $h3,$h3,$h2 # carry
  841. maddu $rs2,$d2 # d2*s2
  842. addu $h4,$h4,$padbit
  843. maddu $rs1,$d3 # d3*s1
  844. addu $h4,$h4,$h3
  845. mfhi $at
  846. mflo $h0
  847. multu $r1,$d0 # d0*r1
  848. maddu $r0,$d1 # d1*r0
  849. maddu $rs3,$d2 # d2*s3
  850. maddu $rs2,$d3 # d3*s2
  851. maddu $rs1,$h4 # h4*s1
  852. maddu $at,$one # hi*1
  853. mfhi $at
  854. mflo $h1
  855. multu $r2,$d0 # d0*r2
  856. maddu $r1,$d1 # d1*r1
  857. maddu $r0,$d2 # d2*r0
  858. maddu $rs3,$d3 # d3*s3
  859. maddu $rs2,$h4 # h4*s2
  860. maddu $at,$one # hi*1
  861. mfhi $at
  862. mflo $h2
  863. mul $t0,$r0,$h4 # h4*r0
  864. multu $r3,$d0 # d0*r3
  865. maddu $r2,$d1 # d1*r2
  866. maddu $r1,$d2 # d2*r1
  867. maddu $r0,$d3 # d3*r0
  868. maddu $rs3,$h4 # h4*s3
  869. maddu $at,$one # hi*1
  870. mfhi $at
  871. mflo $h3
  872. addiu $inp,$inp,16
  873. addu $h4,$t0,$at
  874. #else
  875. multu ($r0,$d0) # d0*r0
  876. mflo ($h0,$r0,$d0)
  877. mfhi ($h1,$r0,$d0)
  878. sltu $h2,$d3,$h2
  879. addu $h3,$h3,$h2 # carry
  880. multu ($rs3,$d1) # d1*s3
  881. mflo ($at,$rs3,$d1)
  882. mfhi ($t0,$rs3,$d1)
  883. addu $h4,$h4,$padbit
  884. addiu $inp,$inp,16
  885. addu $h4,$h4,$h3
  886. multu ($rs2,$d2) # d2*s2
  887. mflo ($a3,$rs2,$d2)
  888. mfhi ($t1,$rs2,$d2)
  889. addu $h0,$h0,$at
  890. addu $h1,$h1,$t0
  891. multu ($rs1,$d3) # d3*s1
  892. sltu $at,$h0,$at
  893. addu $h1,$h1,$at
  894. mflo ($at,$rs1,$d3)
  895. mfhi ($t0,$rs1,$d3)
  896. addu $h0,$h0,$a3
  897. addu $h1,$h1,$t1
  898. multu ($r1,$d0) # d0*r1
  899. sltu $a3,$h0,$a3
  900. addu $h1,$h1,$a3
  901. mflo ($a3,$r1,$d0)
  902. mfhi ($h2,$r1,$d0)
  903. addu $h0,$h0,$at
  904. addu $h1,$h1,$t0
  905. multu ($r0,$d1) # d1*r0
  906. sltu $at,$h0,$at
  907. addu $h1,$h1,$at
  908. mflo ($at,$r0,$d1)
  909. mfhi ($t0,$r0,$d1)
  910. addu $h1,$h1,$a3
  911. sltu $a3,$h1,$a3
  912. multu ($rs3,$d2) # d2*s3
  913. addu $h2,$h2,$a3
  914. mflo ($a3,$rs3,$d2)
  915. mfhi ($t1,$rs3,$d2)
  916. addu $h1,$h1,$at
  917. addu $h2,$h2,$t0
  918. multu ($rs2,$d3) # d3*s2
  919. sltu $at,$h1,$at
  920. addu $h2,$h2,$at
  921. mflo ($at,$rs2,$d3)
  922. mfhi ($t0,$rs2,$d3)
  923. addu $h1,$h1,$a3
  924. addu $h2,$h2,$t1
  925. multu ($rs1,$h4) # h4*s1
  926. sltu $a3,$h1,$a3
  927. addu $h2,$h2,$a3
  928. mflo ($a3,$rs1,$h4)
  929. addu $h1,$h1,$at
  930. addu $h2,$h2,$t0
  931. multu ($r2,$d0) # d0*r2
  932. sltu $at,$h1,$at
  933. addu $h2,$h2,$at
  934. mflo ($at,$r2,$d0)
  935. mfhi ($h3,$r2,$d0)
  936. addu $h1,$h1,$a3
  937. sltu $a3,$h1,$a3
  938. multu ($r1,$d1) # d1*r1
  939. addu $h2,$h2,$a3
  940. mflo ($a3,$r1,$d1)
  941. mfhi ($t1,$r1,$d1)
  942. addu $h2,$h2,$at
  943. sltu $at,$h2,$at
  944. multu ($r0,$d2) # d2*r0
  945. addu $h3,$h3,$at
  946. mflo ($at,$r0,$d2)
  947. mfhi ($t0,$r0,$d2)
  948. addu $h2,$h2,$a3
  949. addu $h3,$h3,$t1
  950. multu ($rs3,$d3) # d3*s3
  951. sltu $a3,$h2,$a3
  952. addu $h3,$h3,$a3
  953. mflo ($a3,$rs3,$d3)
  954. mfhi ($t1,$rs3,$d3)
  955. addu $h2,$h2,$at
  956. addu $h3,$h3,$t0
  957. multu ($rs2,$h4) # h4*s2
  958. sltu $at,$h2,$at
  959. addu $h3,$h3,$at
  960. mflo ($at,$rs2,$h4)
  961. addu $h2,$h2,$a3
  962. addu $h3,$h3,$t1
  963. multu ($r3,$d0) # d0*r3
  964. sltu $a3,$h2,$a3
  965. addu $h3,$h3,$a3
  966. mflo ($a3,$r3,$d0)
  967. mfhi ($t1,$r3,$d0)
  968. addu $h2,$h2,$at
  969. sltu $at,$h2,$at
  970. multu ($r2,$d1) # d1*r2
  971. addu $h3,$h3,$at
  972. mflo ($at,$r2,$d1)
  973. mfhi ($t0,$r2,$d1)
  974. addu $h3,$h3,$a3
  975. sltu $a3,$h3,$a3
  976. multu ($r0,$d3) # d3*r0
  977. addu $t1,$t1,$a3
  978. mflo ($a3,$r0,$d3)
  979. mfhi ($d3,$r0,$d3)
  980. addu $h3,$h3,$at
  981. addu $t1,$t1,$t0
  982. multu ($r1,$d2) # d2*r1
  983. sltu $at,$h3,$at
  984. addu $t1,$t1,$at
  985. mflo ($at,$r1,$d2)
  986. mfhi ($t0,$r1,$d2)
  987. addu $h3,$h3,$a3
  988. addu $t1,$t1,$d3
  989. multu ($rs3,$h4) # h4*s3
  990. sltu $a3,$h3,$a3
  991. addu $t1,$t1,$a3
  992. mflo ($a3,$rs3,$h4)
  993. addu $h3,$h3,$at
  994. addu $t1,$t1,$t0
  995. multu ($r0,$h4) # h4*r0
  996. sltu $at,$h3,$at
  997. addu $t1,$t1,$at
  998. mflo ($h4,$r0,$h4)
  999. addu $h3,$h3,$a3
  1000. sltu $a3,$h3,$a3
  1001. addu $t1,$t1,$a3
  1002. addu $h4,$h4,$t1
  1003. li $padbit,1 # if we loop, padbit is 1
  1004. #endif
  1005. bne $inp,$len,.Loop
  1006. sw $h0,0($ctx) # store hash value
  1007. sw $h1,4($ctx)
  1008. sw $h2,8($ctx)
  1009. sw $h3,12($ctx)
  1010. sw $h4,16($ctx)
  1011. .set noreorder
  1012. .Labort:
  1013. lw $s11,4*11($sp)
  1014. lw $s10,4*10($sp)
  1015. lw $s9, 4*9($sp)
  1016. lw $s8, 4*8($sp)
  1017. lw $s7, 4*7($sp)
  1018. lw $s6, 4*6($sp)
  1019. lw $s5, 4*5($sp)
  1020. lw $s4, 4*4($sp)
  1021. ___
  1022. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  1023. lw $s3, 4*3($sp)
  1024. lw $s2, 4*2($sp)
  1025. lw $s1, 4*1($sp)
  1026. lw $s0, 4*0($sp)
  1027. ___
  1028. $code.=<<___;
  1029. jr $ra
  1030. addu $sp,$sp,4*12
  1031. .end poly1305_blocks
  1032. ___
  1033. }
  1034. {
  1035. my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
  1036. $code.=<<___;
  1037. .align 5
  1038. .globl poly1305_emit
  1039. .ent poly1305_emit
  1040. poly1305_emit:
  1041. .frame $sp,0,$ra
  1042. .set reorder
  1043. lw $tmp4,16($ctx)
  1044. lw $tmp0,0($ctx)
  1045. lw $tmp1,4($ctx)
  1046. lw $tmp2,8($ctx)
  1047. lw $tmp3,12($ctx)
  1048. li $in0,-4 # final reduction
  1049. srl $ctx,$tmp4,2
  1050. and $in0,$in0,$tmp4
  1051. andi $tmp4,$tmp4,3
  1052. addu $ctx,$ctx,$in0
  1053. addu $tmp0,$tmp0,$ctx
  1054. sltu $ctx,$tmp0,$ctx
  1055. addiu $in0,$tmp0,5 # compare to modulus
  1056. addu $tmp1,$tmp1,$ctx
  1057. sltiu $in1,$in0,5
  1058. sltu $ctx,$tmp1,$ctx
  1059. addu $in1,$in1,$tmp1
  1060. addu $tmp2,$tmp2,$ctx
  1061. sltu $in2,$in1,$tmp1
  1062. sltu $ctx,$tmp2,$ctx
  1063. addu $in2,$in2,$tmp2
  1064. addu $tmp3,$tmp3,$ctx
  1065. sltu $in3,$in2,$tmp2
  1066. sltu $ctx,$tmp3,$ctx
  1067. addu $in3,$in3,$tmp3
  1068. addu $tmp4,$tmp4,$ctx
  1069. sltu $ctx,$in3,$tmp3
  1070. addu $ctx,$tmp4
  1071. srl $ctx,2 # see if it carried/borrowed
  1072. subu $ctx,$zero,$ctx
  1073. xor $in0,$tmp0
  1074. xor $in1,$tmp1
  1075. xor $in2,$tmp2
  1076. xor $in3,$tmp3
  1077. and $in0,$ctx
  1078. and $in1,$ctx
  1079. and $in2,$ctx
  1080. and $in3,$ctx
  1081. xor $in0,$tmp0
  1082. xor $in1,$tmp1
  1083. xor $in2,$tmp2
  1084. xor $in3,$tmp3
  1085. lw $tmp0,0($nonce) # load nonce
  1086. lw $tmp1,4($nonce)
  1087. lw $tmp2,8($nonce)
  1088. lw $tmp3,12($nonce)
  1089. addu $in0,$tmp0 # accumulate nonce
  1090. sltu $ctx,$in0,$tmp0
  1091. addu $in1,$tmp1
  1092. sltu $tmp1,$in1,$tmp1
  1093. addu $in1,$ctx
  1094. sltu $ctx,$in1,$ctx
  1095. addu $ctx,$tmp1
  1096. addu $in2,$tmp2
  1097. sltu $tmp2,$in2,$tmp2
  1098. addu $in2,$ctx
  1099. sltu $ctx,$in2,$ctx
  1100. addu $ctx,$tmp2
  1101. addu $in3,$tmp3
  1102. addu $in3,$ctx
  1103. srl $tmp0,$in0,8 # write mac value
  1104. srl $tmp1,$in0,16
  1105. srl $tmp2,$in0,24
  1106. sb $in0, 0($mac)
  1107. sb $tmp0,1($mac)
  1108. srl $tmp0,$in1,8
  1109. sb $tmp1,2($mac)
  1110. srl $tmp1,$in1,16
  1111. sb $tmp2,3($mac)
  1112. srl $tmp2,$in1,24
  1113. sb $in1, 4($mac)
  1114. sb $tmp0,5($mac)
  1115. srl $tmp0,$in2,8
  1116. sb $tmp1,6($mac)
  1117. srl $tmp1,$in2,16
  1118. sb $tmp2,7($mac)
  1119. srl $tmp2,$in2,24
  1120. sb $in2, 8($mac)
  1121. sb $tmp0,9($mac)
  1122. srl $tmp0,$in3,8
  1123. sb $tmp1,10($mac)
  1124. srl $tmp1,$in3,16
  1125. sb $tmp2,11($mac)
  1126. srl $tmp2,$in3,24
  1127. sb $in3, 12($mac)
  1128. sb $tmp0,13($mac)
  1129. sb $tmp1,14($mac)
  1130. sb $tmp2,15($mac)
  1131. jr $ra
  1132. .end poly1305_emit
  1133. .rdata
  1134. .asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
  1135. .align 2
  1136. ___
  1137. }
  1138. }}}
  1139. $output=pop and open STDOUT,">$output";
  1140. print $code;
  1141. close STDOUT;