strnlen.S 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /* strnlen - calculate the length of a string with limit.
  2. Copyright (C) 2013-2026 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library. If not, see
  14. <https://www.gnu.org/licenses/>. */
  15. #include <sysdep.h>
  16. /* Assumptions:
  17. *
  18. * ARMv8-a, AArch64, Advanced SIMD.
  19. * MTE compatible.
  20. */
  21. #define srcin x0
  22. #define cntin x1
  23. #define result x0
  24. #define src x2
  25. #define synd x3
  26. #define shift x4
  27. #define tmp x4
  28. #define cntrem x5
  29. #define qdata q0
  30. #define vdata v0
  31. #define vhas_chr v1
  32. #define vend v2
  33. #define dend d2
  34. /*
  35. Core algorithm:
  36. Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
  37. four bits per byte using the shrn instruction. A count trailing zeros then
  38. identifies the first zero byte. */
  39. ENTRY (__strnlen)
  40. bic src, srcin, 15
  41. cbz cntin, L(nomatch)
  42. ld1 {vdata.16b}, [src]
  43. cmeq vhas_chr.16b, vdata.16b, 0
  44. lsl shift, srcin, 2
  45. shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
  46. fmov synd, dend
  47. lsr synd, synd, shift
  48. cbz synd, L(start_loop)
  49. L(finish):
  50. rbit synd, synd
  51. clz synd, synd
  52. lsr result, synd, 2
  53. cmp cntin, result
  54. csel result, cntin, result, ls
  55. ret
  56. L(nomatch):
  57. mov result, cntin
  58. ret
  59. L(start_loop):
  60. sub tmp, src, srcin
  61. add tmp, tmp, 17
  62. subs cntrem, cntin, tmp
  63. b.lo L(nomatch)
  64. /* Make sure that it won't overread by a 16-byte chunk */
  65. tbz cntrem, 4, L(loop32_2)
  66. sub src, src, 16
  67. .p2align 5
  68. L(loop32):
  69. ldr qdata, [src, 32]!
  70. cmeq vhas_chr.16b, vdata.16b, 0
  71. umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
  72. fmov synd, dend
  73. cbnz synd, L(end)
  74. L(loop32_2):
  75. ldr qdata, [src, 16]
  76. subs cntrem, cntrem, 32
  77. cmeq vhas_chr.16b, vdata.16b, 0
  78. b.lo L(end_2)
  79. umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
  80. fmov synd, dend
  81. cbz synd, L(loop32)
  82. L(end_2):
  83. add src, src, 16
  84. L(end):
  85. shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
  86. sub result, src, srcin
  87. fmov synd, dend
  88. #ifndef __AARCH64EB__
  89. rbit synd, synd
  90. #endif
  91. clz synd, synd
  92. add result, result, synd, lsr 2
  93. cmp cntin, result
  94. csel result, cntin, result, ls
  95. ret
  96. END (__strnlen)
  97. libc_hidden_def (__strnlen)
  98. weak_alias (__strnlen, strnlen)
  99. libc_hidden_def (strnlen)