c8rtomb.c 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. /* UTF-8 to multibyte conversion.
  2. Copyright (C) 2022-2026 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, see
  14. <https://www.gnu.org/licenses/>. */
  15. #include <errno.h>
  16. #include <uchar.h>
  17. #include <wchar.h>
  18. /* This is the private state used if PS is NULL. */
  19. static mbstate_t state;
  20. size_t
  21. c8rtomb (char *s, char8_t c8, mbstate_t *ps)
  22. {
  23. /* This implementation depends on the converter invoked by wcrtomb not
  24. needing to retain state in either the top most bit of ps->__count or
  25. in ps->__value between invocations. This implementation uses the
  26. top most bit of ps->__count to indicate that trailing code units are
  27. expected and uses ps->__value to store previously seen code units. */
  28. wchar_t wc;
  29. if (ps == NULL)
  30. ps = &state;
  31. if (s == NULL)
  32. {
  33. /* if 's' is a null pointer, behave as if u8'\0' was passed as 'c8'. If
  34. this occurs for an incomplete code unit sequence, then an error will
  35. be reported below. */
  36. c8 = u8""[0];
  37. }
  38. if (! (ps->__count & 0x80000000))
  39. {
  40. /* Initial state. */
  41. if ((c8 >= 0x80 && c8 <= 0xC1) || c8 >= 0xF5)
  42. {
  43. /* An invalid lead code unit. */
  44. __set_errno (EILSEQ);
  45. return -1;
  46. }
  47. if (c8 >= 0xC2)
  48. {
  49. /* A valid lead code unit. */
  50. ps->__count |= 0x80000000;
  51. ps->__value.__wchb[0] = c8;
  52. ps->__value.__wchb[3] = 1;
  53. return 0;
  54. }
  55. /* A single byte (ASCII) code unit. */
  56. wc = c8;
  57. }
  58. else
  59. {
  60. char8_t cu1 = ps->__value.__wchb[0];
  61. if (ps->__value.__wchb[3] == 1)
  62. {
  63. /* A single lead code unit was previously seen. */
  64. if ((c8 < 0x80 || c8 > 0xBF)
  65. || (cu1 == 0xE0 && c8 < 0xA0)
  66. || (cu1 == 0xED && c8 > 0x9F)
  67. || (cu1 == 0xF0 && c8 < 0x90)
  68. || (cu1 == 0xF4 && c8 > 0x8F))
  69. {
  70. /* An invalid second code unit. */
  71. __set_errno (EILSEQ);
  72. return -1;
  73. }
  74. if (cu1 >= 0xE0)
  75. {
  76. /* A three or four code unit sequence. */
  77. ps->__value.__wchb[1] = c8;
  78. ++ps->__value.__wchb[3];
  79. return 0;
  80. }
  81. wc = ((cu1 & 0x1F) << 6)
  82. + (c8 & 0x3F);
  83. }
  84. else
  85. {
  86. char8_t cu2 = ps->__value.__wchb[1];
  87. /* A three or four byte code unit sequence. */
  88. if (c8 < 0x80 || c8 > 0xBF)
  89. {
  90. /* An invalid third or fourth code unit. */
  91. __set_errno (EILSEQ);
  92. return -1;
  93. }
  94. if (ps->__value.__wchb[3] == 2 && cu1 >= 0xF0)
  95. {
  96. /* A four code unit sequence. */
  97. ps->__value.__wchb[2] = c8;
  98. ++ps->__value.__wchb[3];
  99. return 0;
  100. }
  101. if (cu1 < 0xF0)
  102. {
  103. wc = ((cu1 & 0x0F) << 12)
  104. + ((cu2 & 0x3F) << 6)
  105. + (c8 & 0x3F);
  106. }
  107. else
  108. {
  109. char8_t cu3 = ps->__value.__wchb[2];
  110. wc = ((cu1 & 0x07) << 18)
  111. + ((cu2 & 0x3F) << 12)
  112. + ((cu3 & 0x3F) << 6)
  113. + (c8 & 0x3F);
  114. }
  115. }
  116. ps->__count &= 0x7fffffff;
  117. ps->__value.__wch = 0;
  118. }
  119. return wcrtomb (s, wc, ps);
  120. }