lshift.S 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. /* x86-64 __mpn_lshift --
  2. Copyright (C) 2007-2026 Free Software Foundation, Inc.
  3. This file is part of the GNU MP Library.
  4. The GNU MP Library is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU Lesser General Public License as published by
  6. the Free Software Foundation; either version 2.1 of the License, or (at your
  7. option) any later version.
  8. The GNU MP Library is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  11. License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with the GNU MP Library; see the file COPYING.LIB. If not,
  14. see <https://www.gnu.org/licenses/>. */
  15. #include "sysdep.h"
  16. #include "asm-syntax.h"
  17. #define rp %rdi
  18. #define up %rsi
  19. #define n %rdx
  20. #define cnt %cl
  21. .text
  22. ENTRY (__mpn_lshift)
  23. lea -8(rp,n,8), rp
  24. lea -8(up,n,8), up
  25. mov %edx, %eax
  26. and $3, %eax
  27. jne L(nb00)
  28. L(b00): /* n = 4, 8, 12, ... */
  29. mov (up), %r10
  30. mov -8(up), %r11
  31. xor %eax, %eax
  32. shld %cl, %r10, %rax
  33. mov -16(up), %r8
  34. lea 24(rp), rp
  35. sub $4, n
  36. jmp L(00)
  37. L(nb00):/* n = 1, 5, 9, ... */
  38. cmp $2, %eax
  39. jae L(nb01)
  40. L(b01): mov (up), %r9
  41. xor %eax, %eax
  42. shld %cl, %r9, %rax
  43. sub $2, n
  44. jb L(le1)
  45. mov -8(up), %r10
  46. mov -16(up), %r11
  47. lea -8(up), up
  48. lea 16(rp), rp
  49. jmp L(01)
  50. L(le1): shl %cl, %r9
  51. mov %r9, (rp)
  52. ret
  53. L(nb01):/* n = 2, 6, 10, ... */
  54. jne L(b11)
  55. L(b10): mov (up), %r8
  56. mov -8(up), %r9
  57. xor %eax, %eax
  58. shld %cl, %r8, %rax
  59. sub $3, n
  60. jb L(le2)
  61. mov -16(up), %r10
  62. lea -16(up), up
  63. lea 8(rp), rp
  64. jmp L(10)
  65. L(le2): shld %cl, %r9, %r8
  66. mov %r8, (rp)
  67. shl %cl, %r9
  68. mov %r9, -8(rp)
  69. ret
  70. .p2align 4 /* performance critical! */
  71. L(b11): /* n = 3, 7, 11, ... */
  72. mov (up), %r11
  73. mov -8(up), %r8
  74. xor %eax, %eax
  75. shld %cl, %r11, %rax
  76. mov -16(up), %r9
  77. lea -24(up), up
  78. sub $4, n
  79. jb L(end)
  80. .p2align 4
  81. L(top): shld %cl, %r8, %r11
  82. mov (up), %r10
  83. mov %r11, (rp)
  84. L(10): shld %cl, %r9, %r8
  85. mov -8(up), %r11
  86. mov %r8, -8(rp)
  87. L(01): shld %cl, %r10, %r9
  88. mov -16(up), %r8
  89. mov %r9, -16(rp)
  90. L(00): shld %cl, %r11, %r10
  91. mov -24(up), %r9
  92. mov %r10, -24(rp)
  93. add $-32, up
  94. lea -32(rp), rp
  95. sub $4, n
  96. jnc L(top)
  97. L(end): shld %cl, %r8, %r11
  98. mov %r11, (rp)
  99. shld %cl, %r9, %r8
  100. mov %r8, -8(rp)
  101. shl %cl, %r9
  102. mov %r9, -16(rp)
  103. ret
  104. END (__mpn_lshift)