| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- /* strchr - find a character in a string
- Copyright (C) 2014-2026 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- <https://www.gnu.org/licenses/>. */
- #include <sysdep.h>
- /* Assumptions:
- *
- * ARMv8-a, AArch64, Advanced SIMD.
- * MTE compatible.
- */
- #define srcin x0
- #define chrin w1
- #define result x0
- #define src x2
- #define tmp1 x1
- #define tmp2 x3
- #define vrepchr v0
- #define vdata v1
- #define qdata q1
- #define vhas_nul v2
- #define vhas_chr v3
- #define vrepmask v4
- #define vend v5
- #define dend d5
- /* Core algorithm.
- For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
- per byte. Bits 0-1 are set if the relevant byte matched the requested
- character, bits 2-3 are set if the byte is NUL or matched. Count trailing
- zeroes gives the position of the matching byte if it is a multiple of 4.
- If it is not a multiple of 4, there was no match. */
- ENTRY (strchr)
- bic src, srcin, 15
- dup vrepchr.16b, chrin
- ld1 {vdata.16b}, [src]
- movi vrepmask.16b, 0x33
- cmeq vhas_nul.16b, vdata.16b, 0
- cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
- bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
- lsl tmp2, srcin, 2
- shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
- fmov tmp1, dend
- lsr tmp1, tmp1, tmp2
- cbz tmp1, L(loop)
- rbit tmp1, tmp1
- clz tmp1, tmp1
- /* Tmp1 is an even multiple of 2 if the target character was
- found first. Otherwise we've found the end of string. */
- tst tmp1, 2
- add result, srcin, tmp1, lsr 2
- csel result, result, xzr, eq
- ret
- .p2align 4
- L(loop):
- ldr qdata, [src, 16]
- cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
- cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
- umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
- fmov tmp1, dend
- cbnz tmp1, L(end)
- ldr qdata, [src, 32]!
- cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
- cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
- umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
- fmov tmp1, dend
- cbz tmp1, L(loop)
- sub src, src, 16
- L(end):
- #ifdef __AARCH64EB__
- bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b
- shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
- fmov tmp1, dend
- #else
- bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
- shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
- fmov tmp1, dend
- rbit tmp1, tmp1
- #endif
- add src, src, 16
- clz tmp1, tmp1
- /* Tmp1 is a multiple of 4 if the target character was found. */
- tst tmp1, 2
- add result, src, tmp1, lsr 2
- csel result, result, xzr, eq
- ret
- END (strchr)
- libc_hidden_builtin_def (strchr)
- weak_alias (strchr, index)
|