wmemcmp-vx.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /* Vector Optimized 32/64 bit S/390 version of wmemcmp.
  2. Copyright (C) 2015-2026 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, see
  14. <https://www.gnu.org/licenses/>. */
  15. #include <ifunc-wmemcmp.h>
  16. #if HAVE_WMEMCMP_Z13
  17. # include "sysdep.h"
  18. # include "asm-syntax.h"
  19. .text
  20. /* int wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n)
  21. Compare at most n characters of two wchar_t-arrays.
  22. Register usage:
  23. -r0=tmp
  24. -r1=number of blocks
  25. -r2=s1
  26. -r3=s2
  27. -r4=n
  28. -r5=current_len
  29. -v16=part of s1
  30. -v17=part of s2
  31. -v18=index of unequal
  32. */
  33. ENTRY(WMEMCMP_Z13)
  34. .machine "z13"
  35. .machinemode "zarch_nohighgprs"
  36. # if !defined __s390x__
  37. llgfr %r4,%r4
  38. # endif /* !defined __s390x__ */
  39. clgije %r4,0,.Lend_equal /* Nothing to do if n == 0. */
  40. /* Check range of maxlen and convert to byte-count. */
  41. # ifdef __s390x__
  42. tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */
  43. lghi %r1,-4 /* Max byte-count is 18446744073709551612. */
  44. # else
  45. tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */
  46. llilf %r1,4294967292 /* Max byte-count is 4294967292. */
  47. # endif /* !__s390x__ */
  48. sllg %r4,%r4,2 /* Convert character-count to byte-count. */
  49. locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */
  50. lghi %r5,0 /* current_len = 0. */
  51. clgijh %r4,16,.Lgt16
  52. .Lremaining:
  53. aghi %r4,-1 /* vstl needs highest index. */
  54. vll %v16,%r4,0(%r2)
  55. vll %v17,%r4,0(%r3)
  56. vfenef %v18,%v16,%v17 /* Compare not equal. */
  57. vlgvb %r1,%v18,7 /* Load unequal index or 16 if not found. */
  58. clrj %r1,%r4,12,.Lfound2 /* r1 <= r4 -> unequal within loaded
  59. bytes. */
  60. .Lend_equal:
  61. lghi %r2,0
  62. br %r14
  63. .Lfound:
  64. /* vfenezf found an unequal element or zero.
  65. This instruction compares unsigned words, but wchar_t is signed.
  66. Thus we have to compare the found element again. */
  67. vlgvb %r1,%v18,7 /* Extract not equal byte-index. */
  68. .Lfound2:
  69. srl %r1,2 /* And convert it to character-index. */
  70. vlgvf %r0,%v16,0(%r1) /* Load character-values. */
  71. vlgvf %r1,%v17,0(%r1)
  72. cr %r0,%r1
  73. je .Lend_equal
  74. lghi %r2,1
  75. lghi %r1,-1
  76. locgrl %r2,%r1
  77. br %r14
  78. .Lgt16:
  79. clgijh %r4,64,.Lpreloop64
  80. .Lpreloop16:
  81. srlg %r1,%r4,4 /* Split into 16byte blocks */
  82. .Lloop16:
  83. vl %v16,0(%r5,%r2)
  84. vl %v17,0(%r5,%r3)
  85. aghi %r5,16
  86. vfenefs %v18,%v16,%v17 /* Compare not equal. */
  87. jno .Lfound
  88. brctg %r1,.Lloop16 /* Loop until all blocks are processed. */
  89. llgfr %r4,%r4
  90. nilf %r4,15 /* Get remaining bytes */
  91. locgre %r2,%r4
  92. ber %r14
  93. la %r2,0(%r5,%r2)
  94. la %r3,0(%r5,%r3)
  95. j .Lremaining
  96. .Lpreloop64:
  97. srlg %r1,%r4,6 /* Split into 64byte blocks */
  98. .Lloop64:
  99. vl %v16,0(%r5,%r2)
  100. vl %v17,0(%r5,%r3)
  101. vfenefs %v18,%v16,%v17 /* Compare not equal. */
  102. jno .Lfound
  103. vl %v16,16(%r5,%r2)
  104. vl %v17,16(%r5,%r3)
  105. vfenefs %v18,%v16,%v17
  106. jno .Lfound
  107. vl %v16,32(%r5,%r2)
  108. vl %v17,32(%r5,%r3)
  109. vfenefs %v18,%v16,%v17
  110. jno .Lfound
  111. vl %v16,48(%r5,%r2)
  112. vl %v17,48(%r5,%r3)
  113. aghi %r5,64
  114. vfenefs %v18,%v16,%v17
  115. jno .Lfound
  116. brctg %r1,.Lloop64 /* Loop until all blocks are processed. */
  117. llgfr %r4,%r4
  118. nilf %r4,63 /* Get remaining bytes */
  119. locgre %r2,%r4
  120. ber %r14
  121. clgijh %r4,16,.Lpreloop16
  122. la %r2,0(%r5,%r2)
  123. la %r3,0(%r5,%r3)
  124. j .Lremaining
  125. END(WMEMCMP_Z13)
  126. # if ! HAVE_WMEMCMP_IFUNC
  127. strong_alias (WMEMCMP_Z13, __wmemcmp)
  128. weak_alias (__wmemcmp, wmemcmp)
  129. # endif
  130. #endif