addmul_1.S 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
  2. # the result to a second limb vector.
  3. # Copyright (C) 1992-2026 Free Software Foundation, Inc.
  4. # This file is part of the GNU MP Library.
  5. # The GNU MP Library is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at your
  8. # option) any later version.
  9. # The GNU MP Library is distributed in the hope that it will be useful, but
  10. # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11. # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  12. # License for more details.
  13. # You should have received a copy of the GNU Lesser General Public License
  14. # along with the GNU MP Library. If not, see <https://www.gnu.org/licenses/>.
  15. # INPUT PARAMETERS
  16. # res_ptr r16
  17. # s1_ptr r17
  18. # size r18
  19. # s2_limb r19
  20. # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
  21. .set noreorder
  22. .set noat
  23. .text
  24. .align 3
  25. .globl __mpn_addmul_1
  26. .ent __mpn_addmul_1 2
  27. __mpn_addmul_1:
  28. .frame $30,0,$26
  29. ldq $2,0($17) # $2 = s1_limb
  30. addq $17,8,$17 # s1_ptr++
  31. subq $18,1,$18 # size--
  32. mulq $2,$19,$3 # $3 = prod_low
  33. ldq $5,0($16) # $5 = *res_ptr
  34. umulh $2,$19,$0 # $0 = prod_high
  35. beq $18,.Lend1 # jump if size was == 1
  36. ldq $2,0($17) # $2 = s1_limb
  37. addq $17,8,$17 # s1_ptr++
  38. subq $18,1,$18 # size--
  39. addq $5,$3,$3
  40. cmpult $3,$5,$4
  41. stq $3,0($16)
  42. addq $16,8,$16 # res_ptr++
  43. beq $18,.Lend2 # jump if size was == 2
  44. .align 3
  45. .Loop: mulq $2,$19,$3 # $3 = prod_low
  46. ldq $5,0($16) # $5 = *res_ptr
  47. addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
  48. subq $18,1,$18 # size--
  49. umulh $2,$19,$4 # $4 = cy_limb
  50. ldq $2,0($17) # $2 = s1_limb
  51. addq $17,8,$17 # s1_ptr++
  52. addq $3,$0,$3 # $3 = cy_limb + prod_low
  53. cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
  54. addq $5,$3,$3
  55. cmpult $3,$5,$5
  56. stq $3,0($16)
  57. addq $16,8,$16 # res_ptr++
  58. addq $5,$0,$0 # combine carries
  59. bne $18,.Loop
  60. .Lend2: mulq $2,$19,$3 # $3 = prod_low
  61. ldq $5,0($16) # $5 = *res_ptr
  62. addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
  63. umulh $2,$19,$4 # $4 = cy_limb
  64. addq $3,$0,$3 # $3 = cy_limb + prod_low
  65. cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
  66. addq $5,$3,$3
  67. cmpult $3,$5,$5
  68. stq $3,0($16)
  69. addq $5,$0,$0 # combine carries
  70. addq $4,$0,$0 # cy_limb = prod_high + cy
  71. ret $31,($26),1
  72. .Lend1: addq $5,$3,$3
  73. cmpult $3,$5,$5
  74. stq $3,0($16)
  75. addq $0,$5,$0
  76. ret $31,($26),1
  77. .end __mpn_addmul_1