mbrtoc16.c 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. /* Copyright (C) 2011-2026 Free Software Foundation, Inc.
  2. This file is part of the GNU C Library.
  3. The GNU C Library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public
  5. License as published by the Free Software Foundation; either
  6. version 2.1 of the License, or (at your option) any later version.
  7. The GNU C Library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public
  12. License along with the GNU C Library; if not, see
  13. <https://www.gnu.org/licenses/>. */
  14. #include <assert.h>
  15. #include <dlfcn.h>
  16. #include <errno.h>
  17. #include <gconv.h>
  18. #include <uchar.h>
  19. #include <wcsmbsload.h>
  20. #include <pointer_guard.h>
  21. #ifndef EILSEQ
  22. # define EILSEQ EINVAL
  23. #endif
  24. /* This is the private state used if PS is NULL. */
  25. static mbstate_t state;
  26. size_t
  27. mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
  28. {
  29. if (ps == NULL)
  30. ps = &state;
  31. /* The standard text does not say that S being NULL means the state
  32. is reset even if the second half of a surrogate still have to be
  33. returned. In fact, the error code description indicates
  34. otherwise. Therefore always first try to return a second
  35. half. */
  36. if (ps->__count & 0x80000000)
  37. {
  38. /* We have to return the second word for a surrogate. */
  39. ps->__count &= 0x7fffffff;
  40. *pc16 = ps->__value.__wch;
  41. ps->__value.__wch = L'\0';
  42. return (size_t) -3;
  43. }
  44. wchar_t wc;
  45. struct __gconv_step_data data;
  46. int status;
  47. size_t result;
  48. size_t dummy;
  49. const unsigned char *inbuf, *endbuf;
  50. unsigned char *outbuf = (unsigned char *) &wc;
  51. const struct gconv_fcts *fcts;
  52. /* Set information for this step. */
  53. data.__invocation_counter = 0;
  54. data.__internal_use = 1;
  55. data.__flags = __GCONV_IS_LAST;
  56. data.__statep = ps;
  57. /* A first special case is if S is NULL. This means put PS in the
  58. initial state. */
  59. if (s == NULL)
  60. {
  61. pc16 = NULL;
  62. s = "";
  63. n = 1;
  64. }
  65. if (n == 0)
  66. return (size_t) -2;
  67. /* Tell where we want the result. */
  68. data.__outbuf = outbuf;
  69. data.__outbufend = outbuf + sizeof (wchar_t);
  70. /* Get the conversion functions. */
  71. fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
  72. /* Do a normal conversion. */
  73. inbuf = (const unsigned char *) s;
  74. endbuf = inbuf + n;
  75. if (__glibc_unlikely (endbuf < inbuf))
  76. {
  77. endbuf = (const unsigned char *) ~(uintptr_t) 0;
  78. if (endbuf == inbuf)
  79. goto ilseq;
  80. }
  81. __gconv_fct fct = fcts->towc->__fct;
  82. if (fcts->towc->__shlib_handle != NULL)
  83. PTR_DEMANGLE (fct);
  84. status = DL_CALL_FCT (fct, (fcts->towc, &data, &inbuf, endbuf,
  85. NULL, &dummy, 0, 1));
  86. /* There must not be any problems with the conversion but illegal input
  87. characters. The output buffer must be large enough, otherwise the
  88. definition of MB_CUR_MAX is not correct. All the other possible
  89. errors also must not happen. */
  90. assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
  91. || status == __GCONV_ILLEGAL_INPUT
  92. || status == __GCONV_INCOMPLETE_INPUT
  93. || status == __GCONV_FULL_OUTPUT);
  94. if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
  95. || status == __GCONV_FULL_OUTPUT)
  96. {
  97. result = inbuf - (const unsigned char *) s;
  98. if (wc < 0x10000)
  99. {
  100. if (pc16 != NULL)
  101. *pc16 = wc;
  102. if (data.__outbuf != outbuf && wc == L'\0')
  103. {
  104. /* The converted character is the NUL character. */
  105. assert (__mbsinit (data.__statep));
  106. result = 0;
  107. }
  108. }
  109. else
  110. {
  111. /* This is a surrogate. */
  112. if (pc16 != NULL)
  113. *pc16 = 0xd7c0 + (wc >> 10);
  114. ps->__count |= 0x80000000;
  115. ps->__value.__wch = 0xdc00 + (wc & 0x3ff);
  116. }
  117. }
  118. else if (status == __GCONV_INCOMPLETE_INPUT)
  119. result = (size_t) -2;
  120. else
  121. {
  122. ilseq:
  123. result = (size_t) -1;
  124. __set_errno (EILSEQ);
  125. }
  126. return result;
  127. }