| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458 |
- /* Conversion loop frame work.
- Copyright (C) 1998-2026 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
- /* This file provides a frame for the reader loop in all conversion modules.
- The actual code must (of course) be provided in the actual module source
- code but certain actions can be written down generically, with some
- customization options which are these:
- MIN_NEEDED_INPUT minimal number of input bytes needed for the next
- conversion.
- MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round
- of conversion.
- MAX_NEEDED_INPUT you guess it, this is the maximal number of input
- bytes needed. It defaults to MIN_NEEDED_INPUT
- MAX_NEEDED_OUTPUT likewise for output bytes.
- LOOPFCT name of the function created. If not specified
- the name is `loop' but this prevents the use
- of multiple functions in the same file.
- BODY this is supposed to expand to the body of the loop.
- The user must provide this.
- EXTRA_LOOP_DECLS extra arguments passed from conversion loop call.
- INIT_PARAMS code to define and initialize variables from params.
- UPDATE_PARAMS code to store result in params.
- ONEBYTE_BODY body of the specialized conversion function for a
- single byte from the current character set to INTERNAL.
- */
- #include <assert.h>
- #include <endian.h>
- #include <iconv/gconv_int.h>
- #include <stdint.h>
- #include <string.h>
- #include <wchar.h>
- #include <sys/param.h> /* For MIN. */
- #define __need_size_t
- #include <stddef.h>
- #include <libc-diag.h>
- #undef FCTNAME2
- #define FCTNAME(name) name
- /* We need at least one byte for the next round. */
- #ifndef MIN_NEEDED_INPUT
- # error "MIN_NEEDED_INPUT definition missing"
- #elif MIN_NEEDED_INPUT < 1
- # error "MIN_NEEDED_INPUT must be >= 1"
- #endif
- /* Let's see how many bytes we produce. */
- #ifndef MAX_NEEDED_INPUT
- # define MAX_NEEDED_INPUT MIN_NEEDED_INPUT
- #endif
- /* We produce at least one byte in the next round. */
- #ifndef MIN_NEEDED_OUTPUT
- # error "MIN_NEEDED_OUTPUT definition missing"
- #elif MIN_NEEDED_OUTPUT < 1
- # error "MIN_NEEDED_OUTPUT must be >= 1"
- #endif
- /* Let's see how many bytes we produce. */
- #ifndef MAX_NEEDED_OUTPUT
- # define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT
- #endif
- /* Default name for the function. */
- #ifndef LOOPFCT
- # define LOOPFCT loop
- #endif
- /* Make sure we have a loop body. */
- #ifndef BODY
- # error "Definition of BODY missing for function" LOOPFCT
- #endif
- /* If no arguments have to passed to the loop function define the macro
- as empty. */
- #ifndef EXTRA_LOOP_DECLS
- # define EXTRA_LOOP_DECLS
- #endif
- /* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test
- isn't possible. */
- #ifndef UPDATE_PARAMS
- # define UPDATE_PARAMS do { } while (0)
- #endif
- #ifndef REINIT_PARAMS
- # define REINIT_PARAMS do { } while (0)
- #endif
- /* To make it easier for the writers of the modules, we define a macro
- to test whether we have to ignore errors. */
- #define ignore_errors_p() \
- (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
- /* Error handling for the FROM_LOOP direction, with ignoring of errors.
- Note that we cannot use the do while (0) trick since `break' and
- `continue' must reach certain points. */
- #define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
- { \
- result = __gconv_mark_illegal_input (step_data); \
- if (! ignore_errors_p ()) \
- break; \
- \
- /* We ignore the invalid input byte sequence. */ \
- inptr += (Incr); \
- ++*irreversible; \
- /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
- that "iconv -c" must give the same exitcode as "iconv". */ \
- continue; \
- }
- /* Error handling for the TO_LOOP direction, with use of transliteration/
- transcription functions and ignoring of errors. Note that we cannot use
- the do while (0) trick since `break' and `continue' must reach certain
- points. */
- #define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
- { \
- if (irreversible == NULL) \
- { \
- /* This means we are in call from __gconv_transliterate. In this \
- case we are not doing any error recovery ourselves. */ \
- result = __gconv_mark_illegal_input (step_data); \
- break; \
- } \
- \
- /* If needed, flush any conversion state, so that __gconv_transliterate \
- starts with current shift state. */ \
- UPDATE_PARAMS; \
- \
- /* First try the transliteration methods. */ \
- if ((step_data->__flags & __GCONV_TRANSLIT) != 0) \
- result = __gconv_transliterate \
- (step, step_data, *inptrp, \
- &inptr, inend, &outptr, irreversible); \
- else \
- result = __gconv_mark_illegal_input (step_data); \
- \
- REINIT_PARAMS; \
- \
- /* If any of them recognized the input continue with the loop. */ \
- if (result != __GCONV_ILLEGAL_INPUT) \
- { \
- if (__glibc_unlikely (result == __GCONV_FULL_OUTPUT)) \
- break; \
- \
- continue; \
- } \
- \
- /* Next see whether we have to ignore the error. If not, stop. */ \
- if (! ignore_errors_p ()) \
- break; \
- \
- /* When we come here it means we ignore the character. */ \
- ++*irreversible; \
- inptr += Incr; \
- /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
- that "iconv -c" must give the same exitcode as "iconv". */ \
- continue; \
- }
- /* With GCC 7 when compiling with -Os for 32-bit s390 the compiler
- warns that the variable 'ch', in the definition of BODY in
- sysdeps/s390/multiarch/8bit-generic.c, may be used uninitialized in
- the call to UNICODE_TAG_HANDLER in that macro. This variable is
- actually always initialized before use, in the prior loop if INDEX
- is nonzero and in the following 'if' if INDEX is zero. That code
- has a comment referencing this diagnostic disabling; updates in one
- place may require updates in the other. */
- DIAG_PUSH_NEEDS_COMMENT;
- DIAG_IGNORE_Os_NEEDS_COMMENT_GCC (7, "-Wmaybe-uninitialized");
- /* Handling of Unicode 3.1 TAG characters. Unicode recommends
- "If language codes are not relevant to the particular processing
- operation, then they should be ignored." This macro is usually
- called right before STANDARD_TO_LOOP_ERR_HANDLER (Incr). */
- #define UNICODE_TAG_HANDLER(Character, Incr) \
- { \
- /* TAG characters are those in the range U+E0000..U+E007F. */ \
- if (((Character) >> 7) == (0xe0000 >> 7)) \
- { \
- inptr += Incr; \
- continue; \
- } \
- }
- DIAG_POP_NEEDS_COMMENT;
- /* The function returns the status, as defined in gconv.h. */
- static inline int
- __attribute ((always_inline))
- FCTNAME (LOOPFCT) (struct __gconv_step *step,
- struct __gconv_step_data *step_data,
- const unsigned char **inptrp, const unsigned char *inend,
- unsigned char **outptrp, const unsigned char *outend,
- size_t *irreversible EXTRA_LOOP_DECLS)
- {
- #ifdef LOOP_NEED_STATE
- mbstate_t *state = step_data->__statep;
- #endif
- #ifdef LOOP_NEED_FLAGS
- int flags = step_data->__flags;
- #endif
- #ifdef LOOP_NEED_DATA
- void *data = step->__data;
- #endif
- int result = __GCONV_EMPTY_INPUT;
- const unsigned char *inptr = *inptrp;
- unsigned char *outptr = *outptrp;
- #ifdef INIT_PARAMS
- INIT_PARAMS;
- #endif
- while (inptr != inend)
- {
- /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
- compiler generating better code. They will be optimized away
- since MIN_NEEDED_OUTPUT is always a constant. */
- if (MIN_NEEDED_INPUT > 1
- && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
- {
- /* We don't have enough input for another complete input
- character. */
- result = __GCONV_INCOMPLETE_INPUT;
- break;
- }
- if ((MIN_NEEDED_OUTPUT != 1
- && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
- || (MIN_NEEDED_OUTPUT == 1
- && __builtin_expect (outptr >= outend, 0)))
- {
- /* Overflow in the output buffer. */
- result = __GCONV_FULL_OUTPUT;
- break;
- }
- /* Here comes the body the user provides. It can stop with
- RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
- input characters vary in size), GCONV_ILLEGAL_INPUT, or
- GCONV_FULL_OUTPUT (if the output characters vary in size). */
- BODY
- }
- /* Update the pointers pointed to by the parameters. */
- *inptrp = inptr;
- *outptrp = outptr;
- UPDATE_PARAMS;
- return result;
- }
- #if MAX_NEEDED_INPUT > 1
- # define SINGLE(fct) SINGLE2 (fct)
- # define SINGLE2(fct) fct##_single
- static inline int
- __attribute ((always_inline))
- SINGLE(LOOPFCT) (struct __gconv_step *step,
- struct __gconv_step_data *step_data,
- const unsigned char **inptrp, const unsigned char *inend,
- unsigned char **outptrp, unsigned char *outend,
- size_t *irreversible EXTRA_LOOP_DECLS)
- {
- mbstate_t *state = step_data->__statep;
- # ifdef LOOP_NEED_FLAGS
- int flags = step_data->__flags;
- # endif
- # ifdef LOOP_NEED_DATA
- void *data = step->__data;
- # endif
- int result = __GCONV_OK;
- unsigned char bytebuf[MAX_NEEDED_INPUT];
- const unsigned char *inptr = *inptrp;
- unsigned char *outptr = *outptrp;
- size_t inlen;
- # ifdef INIT_PARAMS
- INIT_PARAMS;
- # endif
- # ifdef UNPACK_BYTES
- UNPACK_BYTES
- # else
- /* Add the bytes from the state to the input buffer. */
- assert ((state->__count & 7) <= sizeof (state->__value));
- for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
- bytebuf[inlen] = state->__value.__wchb[inlen];
- # endif
- /* Are there enough bytes in the input buffer? */
- if (MIN_NEEDED_INPUT > 1
- && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
- {
- *inptrp = inend;
- # ifdef STORE_REST
- /* Building with -O3 GCC emits a `array subscript is above array
- bounds' warning. GCC BZ #64739 has been opened for this. */
- DIAG_PUSH_NEEDS_COMMENT;
- DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Warray-bounds");
- while (inptr < inend)
- bytebuf[inlen++] = *inptr++;
- DIAG_POP_NEEDS_COMMENT;
- inptr = bytebuf;
- inptrp = &inptr;
- inend = &bytebuf[inlen];
- STORE_REST
- # else
- /* We don't have enough input for another complete input
- character. */
- size_t inlen_after = inlen + (inend - inptr);
- assert (inlen_after <= sizeof (state->__value.__wchb));
- for (; inlen < inlen_after; inlen++)
- state->__value.__wchb[inlen] = *inptr++;
- # endif
- return __GCONV_INCOMPLETE_INPUT;
- }
- /* Enough space in output buffer. */
- if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
- || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
- /* Overflow in the output buffer. */
- return __GCONV_FULL_OUTPUT;
- /* Now add characters from the normal input buffer. */
- if (inlen >= MAX_NEEDED_INPUT || inptr >= inend)
- /* Avoid a -Wstringop-overflow= warning when this loop is
- unrolled. The compiler cannot otherwise see that this is
- unreachable because it depends on (state->__count & 7) not
- being too large after a previous conversion step.
- Starting with GCC 12, we also have mark the inptr >= inend
- case as unreachable to omit the warning. Note that this SINGLE
- function is only used to implement the mb*towc*() or wc*tomb*()
- functions. Those functions use inptr and inend pointing to a
- variable on stack, compute the inend pointer or explicitly check
- the arguments which always leads to inptr < inend. */
- __builtin_unreachable ();
- do
- bytebuf[inlen++] = *inptr++;
- while (inlen < MAX_NEEDED_INPUT && inptr < inend);
- inptr = bytebuf;
- inend = &bytebuf[inlen];
- do
- {
- BODY
- }
- while (0);
- /* Now we either have produced an output character and consumed all the
- bytes from the state and at least one more, or the character is still
- incomplete, or we have some other error (like illegal input character,
- no space in output buffer). */
- if (__glibc_likely (inptr != bytebuf))
- {
- /* We found a new character. */
- assert (inptr - bytebuf > (state->__count & 7));
- *inptrp += inptr - bytebuf - (state->__count & 7);
- *outptrp = outptr;
- result = __GCONV_OK;
- /* Clear the state buffer. */
- # ifdef CLEAR_STATE
- CLEAR_STATE;
- # else
- state->__count &= ~7;
- # endif
- }
- else if (result == __GCONV_INCOMPLETE_INPUT)
- {
- /* This can only happen if we have less than MAX_NEEDED_INPUT bytes
- available. */
- assert (inend != &bytebuf[MAX_NEEDED_INPUT]);
- *inptrp += inend - bytebuf - (state->__count & 7);
- # ifdef STORE_REST
- inptrp = &inptr;
- STORE_REST
- # else
- /* We don't have enough input for another complete input
- character. */
- assert (inend - inptr > (state->__count & ~7));
- assert (inend - inptr <= sizeof (state->__value.__wchb));
- state->__count = (state->__count & ~7) | (inend - inptr);
- for (inlen = 0; inlen < inend - inptr; inlen++)
- state->__value.__wchb[inlen] = inptr[inlen];
- inptr = inend;
- # endif
- }
- return result;
- }
- # undef SINGLE
- # undef SINGLE2
- # ifdef ONEBYTE_BODY
- /* Define the shortcut function for btowc. */
- static wint_t
- gconv_btowc (struct __gconv_step *step, unsigned char c)
- ONEBYTE_BODY
- # define FROM_ONEBYTE gconv_btowc
- # endif
- #endif
- /* We remove the macro definitions so that we can include this file again
- for the definition of another function. */
- #undef MIN_NEEDED_INPUT
- #undef MAX_NEEDED_INPUT
- #undef MIN_NEEDED_OUTPUT
- #undef MAX_NEEDED_OUTPUT
- #undef LOOPFCT
- #undef BODY
- #undef LOOPFCT
- #undef EXTRA_LOOP_DECLS
- #undef INIT_PARAMS
- #undef UPDATE_PARAMS
- #undef REINIT_PARAMS
- #undef ONEBYTE_BODY
- #undef UNPACK_BYTES
- #undef CLEAR_STATE
- #undef LOOP_NEED_STATE
- #undef LOOP_NEED_FLAGS
- #undef LOOP_NEED_DATA
|