rshift.S 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. /* x86-64 __mpn_rshift --
  2. Copyright (C) 2007-2026 Free Software Foundation, Inc.
  3. This file is part of the GNU MP Library.
  4. The GNU MP Library is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU Lesser General Public License as published by
  6. the Free Software Foundation; either version 2.1 of the License, or (at your
  7. option) any later version.
  8. The GNU MP Library is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  11. License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with the GNU MP Library; see the file COPYING.LIB. If not,
  14. see <https://www.gnu.org/licenses/>. */
  15. #include "sysdep.h"
  16. #include "asm-syntax.h"
  17. #define rp %rdi
  18. #define up %rsi
  19. #define n %rdx
  20. #define cnt %cl
  21. .text
  22. ENTRY (__mpn_rshift)
  23. mov %edx, %eax
  24. and $3, %eax
  25. jne L(nb00)
  26. L(b00): /* n = 4, 8, 12, ... */
  27. mov (up), %r10
  28. mov 8(up), %r11
  29. xor %eax, %eax
  30. shrd %cl, %r10, %rax
  31. mov 16(up), %r8
  32. lea 8(up), up
  33. lea -24(rp), rp
  34. sub $4, n
  35. jmp L(00)
  36. L(nb00):/* n = 1, 5, 9, ... */
  37. cmp $2, %eax
  38. jae L(nb01)
  39. L(b01): mov (up), %r9
  40. xor %eax, %eax
  41. shrd %cl, %r9, %rax
  42. sub $2, n
  43. jb L(le1)
  44. mov 8(up), %r10
  45. mov 16(up), %r11
  46. lea 16(up), up
  47. lea -16(rp), rp
  48. jmp L(01)
  49. L(le1): shr %cl, %r9
  50. mov %r9, (rp)
  51. ret
  52. L(nb01):/* n = 2, 6, 10, ... */
  53. jne L(b11)
  54. L(b10): mov (up), %r8
  55. mov 8(up), %r9
  56. xor %eax, %eax
  57. shrd %cl, %r8, %rax
  58. sub $3, n
  59. jb L(le2)
  60. mov 16(up), %r10
  61. lea 24(up), up
  62. lea -8(rp), rp
  63. jmp L(10)
  64. L(le2): shrd %cl, %r9, %r8
  65. mov %r8, (rp)
  66. shr %cl, %r9
  67. mov %r9, 8(rp)
  68. ret
  69. .p2align 4
  70. L(b11): /* n = 3, 7, 11, ... */
  71. mov (up), %r11
  72. mov 8(up), %r8
  73. xor %eax, %eax
  74. shrd %cl, %r11, %rax
  75. mov 16(up), %r9
  76. lea 32(up), up
  77. sub $4, n
  78. jb L(end)
  79. .p2align 4
  80. L(top): shrd %cl, %r8, %r11
  81. mov -8(up), %r10
  82. mov %r11, (rp)
  83. L(10): shrd %cl, %r9, %r8
  84. mov (up), %r11
  85. mov %r8, 8(rp)
  86. L(01): shrd %cl, %r10, %r9
  87. mov 8(up), %r8
  88. mov %r9, 16(rp)
  89. L(00): shrd %cl, %r11, %r10
  90. mov 16(up), %r9
  91. mov %r10, 24(rp)
  92. add $32, up
  93. lea 32(rp), rp
  94. sub $4, n
  95. jnc L(top)
  96. L(end): shrd %cl, %r8, %r11
  97. mov %r11, (rp)
  98. shrd %cl, %r9, %r8
  99. mov %r8, 8(rp)
  100. shr %cl, %r9
  101. mov %r9, 16(rp)
  102. ret
  103. END (__mpn_rshift)