gconv_conf.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. /* Handle configuration data.
  2. Copyright (C) 1997-2026 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, see
  14. <https://www.gnu.org/licenses/>. */
  15. #include <assert.h>
  16. #include <ctype.h>
  17. #include <errno.h>
  18. #include <limits.h>
  19. #include <locale.h>
  20. #include <search.h>
  21. #include <stddef.h>
  22. #include <stdio.h>
  23. #include <stdio_ext.h>
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <unistd.h>
  27. #include <sys/param.h>
  28. #include <libc-lock.h>
  29. #include <gconv_int.h>
  30. #include <gconv_parseconfdir.h>
  31. /* This is the default path where we look for module lists. */
  32. static const char default_gconv_path[] = GCONV_PATH;
  33. /* Type to represent search path. */
  34. struct path_elem
  35. {
  36. const char *name;
  37. size_t len;
  38. };
  39. /* The path elements, as determined by the __gconv_get_path function.
  40. All path elements end in a slash. */
  41. struct path_elem *__gconv_path_elem;
  42. /* Maximum length of a single path element in __gconv_path_elem. */
  43. size_t __gconv_max_path_elem_len;
  44. /* We use the following struct if we couldn't allocate memory. */
  45. static const struct path_elem empty_path_elem = { NULL, 0 };
  46. /* Filename extension for the modules. */
  47. #ifndef MODULE_EXT
  48. # define MODULE_EXT ".so"
  49. #endif
  50. static const char gconv_module_ext[] = MODULE_EXT;
  51. /* We have a few builtin transformations. */
  52. static struct gconv_module builtin_modules[] =
  53. {
  54. #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
  55. MinF, MaxF, MinT, MaxT) \
  56. { \
  57. .from_string = From, \
  58. .to_string = To, \
  59. .cost_hi = Cost, \
  60. .cost_lo = INT_MAX, \
  61. .module_name = Name \
  62. },
  63. #define BUILTIN_ALIAS(From, To)
  64. #include "gconv_builtin.h"
  65. #undef BUILTIN_TRANSFORMATION
  66. #undef BUILTIN_ALIAS
  67. };
  68. static const char builtin_aliases[] =
  69. {
  70. #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
  71. MinF, MaxF, MinT, MaxT)
  72. #define BUILTIN_ALIAS(From, To) From "\0" To "\0"
  73. #include "gconv_builtin.h"
  74. #undef BUILTIN_TRANSFORMATION
  75. #undef BUILTIN_ALIAS
  76. };
  77. /* Value of the GCONV_PATH environment variable. */
  78. const char *__gconv_path_envvar;
  79. /* Test whether there is already a matching module known. */
  80. static int
  81. detect_conflict (const char *alias)
  82. {
  83. struct gconv_module *node = __gconv_modules_db;
  84. while (node != NULL)
  85. {
  86. int cmpres = strcmp (alias, node->from_string);
  87. if (cmpres == 0)
  88. /* We have a conflict. */
  89. return 1;
  90. else if (cmpres < 0)
  91. node = node->left;
  92. else
  93. node = node->right;
  94. }
  95. return node != NULL;
  96. }
  97. /* The actual code to add aliases. */
  98. static void
  99. add_alias2 (const char *from, const char *to, const char *wp)
  100. {
  101. /* Test whether this alias conflicts with any available module. */
  102. if (detect_conflict (from))
  103. /* It does conflict, don't add the alias. */
  104. return;
  105. struct gconv_alias *new_alias = (struct gconv_alias *)
  106. malloc (sizeof (struct gconv_alias) + (wp - from));
  107. if (new_alias != NULL)
  108. {
  109. void **inserted;
  110. new_alias->fromname = memcpy ((char *) new_alias
  111. + sizeof (struct gconv_alias),
  112. from, wp - from);
  113. new_alias->toname = new_alias->fromname + (to - from);
  114. inserted = (void **) __tsearch (new_alias, &__gconv_alias_db,
  115. __gconv_alias_compare);
  116. if (inserted == NULL || *inserted != new_alias)
  117. /* Something went wrong, free this entry. */
  118. free (new_alias);
  119. }
  120. }
  121. /* Add new alias. */
  122. static void
  123. add_alias (char *rp)
  124. {
  125. /* We now expect two more string. The strings are normalized
  126. (converted to UPPER case) and stored in the alias database. */
  127. char *from, *to, *wp;
  128. while (__isspace_l (*rp, _nl_C_locobj_ptr))
  129. ++rp;
  130. from = wp = rp;
  131. while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
  132. *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
  133. if (*rp == '\0')
  134. /* There is no `to' string on the line. Ignore it. */
  135. return;
  136. *wp++ = '\0';
  137. to = ++rp;
  138. while (__isspace_l (*rp, _nl_C_locobj_ptr))
  139. ++rp;
  140. while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
  141. *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
  142. if (to == wp)
  143. /* No `to' string, ignore the line. */
  144. return;
  145. *wp++ = '\0';
  146. add_alias2 (from, to, wp);
  147. }
  148. /* Insert a data structure for a new module in the search tree. */
  149. static void
  150. insert_module (struct gconv_module *newp, int tobefreed)
  151. {
  152. struct gconv_module **rootp = &__gconv_modules_db;
  153. while (*rootp != NULL)
  154. {
  155. struct gconv_module *root = *rootp;
  156. int cmpres;
  157. cmpres = strcmp (newp->from_string, root->from_string);
  158. if (cmpres == 0)
  159. {
  160. /* Both strings are identical. Insert the string at the
  161. end of the `same' list if it is not already there. */
  162. while (strcmp (newp->from_string, root->from_string) != 0
  163. || strcmp (newp->to_string, root->to_string) != 0)
  164. {
  165. rootp = &root->same;
  166. root = *rootp;
  167. if (root == NULL)
  168. break;
  169. }
  170. if (root != NULL)
  171. {
  172. /* This is a no new conversion. But maybe the cost is
  173. better. */
  174. if (newp->cost_hi < root->cost_hi
  175. || (newp->cost_hi == root->cost_hi
  176. && newp->cost_lo < root->cost_lo))
  177. {
  178. newp->left = root->left;
  179. newp->right = root->right;
  180. newp->same = root->same;
  181. *rootp = newp;
  182. free (root);
  183. }
  184. else if (tobefreed)
  185. free (newp);
  186. return;
  187. }
  188. break;
  189. }
  190. else if (cmpres < 0)
  191. rootp = &root->left;
  192. else
  193. rootp = &root->right;
  194. }
  195. /* Plug in the new node here. */
  196. *rootp = newp;
  197. }
  198. /* Add new module. */
  199. static void
  200. add_module (char *rp, const char *directory, size_t dir_len, int modcounter)
  201. {
  202. /* We expect now
  203. 1. `from' name
  204. 2. `to' name
  205. 3. filename of the module
  206. 4. an optional cost value
  207. */
  208. struct gconv_alias fake_alias;
  209. struct gconv_module *new_module;
  210. char *from, *to, *module, *wp;
  211. int need_ext;
  212. int cost_hi;
  213. while (__isspace_l (*rp, _nl_C_locobj_ptr))
  214. ++rp;
  215. from = rp;
  216. while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
  217. {
  218. *rp = __toupper_l (*rp, _nl_C_locobj_ptr);
  219. ++rp;
  220. }
  221. if (*rp == '\0')
  222. return;
  223. *rp++ = '\0';
  224. to = wp = rp;
  225. while (__isspace_l (*rp, _nl_C_locobj_ptr))
  226. ++rp;
  227. while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
  228. *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
  229. if (*rp == '\0')
  230. return;
  231. *wp++ = '\0';
  232. do
  233. ++rp;
  234. while (__isspace_l (*rp, _nl_C_locobj_ptr));
  235. module = wp;
  236. while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
  237. *wp++ = *rp++;
  238. if (*rp == '\0')
  239. {
  240. /* There is no cost, use one by default. */
  241. *wp++ = '\0';
  242. cost_hi = 1;
  243. }
  244. else
  245. {
  246. /* There might be a cost value. */
  247. char *endp;
  248. *wp++ = '\0';
  249. cost_hi = strtol (rp, &endp, 10);
  250. if (rp == endp || cost_hi < 1)
  251. /* No useful information. */
  252. cost_hi = 1;
  253. }
  254. if (module[0] == '\0')
  255. /* No module name given. */
  256. return;
  257. if (module[0] == '/')
  258. dir_len = 0;
  259. /* See whether we must add the ending. */
  260. need_ext = 0;
  261. if (wp - module < (ptrdiff_t) sizeof (gconv_module_ext)
  262. || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext,
  263. sizeof (gconv_module_ext)) != 0)
  264. /* We must add the module extension. */
  265. need_ext = sizeof (gconv_module_ext) - 1;
  266. /* See whether we have already an alias with this name defined. */
  267. fake_alias.fromname = strndupa (from, to - from);
  268. if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL)
  269. /* This module duplicates an alias. */
  270. return;
  271. new_module = (struct gconv_module *) calloc (1,
  272. sizeof (struct gconv_module)
  273. + (wp - from)
  274. + dir_len + need_ext);
  275. if (new_module != NULL)
  276. {
  277. char *tmp;
  278. new_module->from_string = tmp = (char *) (new_module + 1);
  279. tmp = __mempcpy (tmp, from, to - from);
  280. new_module->to_string = tmp;
  281. tmp = __mempcpy (tmp, to, module - to);
  282. new_module->cost_hi = cost_hi;
  283. new_module->cost_lo = modcounter;
  284. new_module->module_name = tmp;
  285. if (dir_len != 0)
  286. tmp = __mempcpy (tmp, directory, dir_len);
  287. tmp = __mempcpy (tmp, module, wp - module);
  288. if (need_ext)
  289. memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext));
  290. /* Now insert the new module data structure in our search tree. */
  291. insert_module (new_module, 1);
  292. }
  293. }
  294. /* Determine the directories we are looking for data in. This function should
  295. only be called from __gconv_read_conf. */
  296. static void
  297. __gconv_get_path (void)
  298. {
  299. struct path_elem *result;
  300. /* This function is only ever called when __gconv_path_elem is NULL. */
  301. result = __gconv_path_elem;
  302. assert (result == NULL);
  303. /* Determine the complete path first. */
  304. char *gconv_path;
  305. size_t gconv_path_len;
  306. char *elem;
  307. char *oldp;
  308. char *cp;
  309. int nelems;
  310. char *cwd;
  311. size_t cwdlen;
  312. if (__gconv_path_envvar == NULL)
  313. {
  314. /* No user-defined path. Make a modifiable copy of the
  315. default path. */
  316. gconv_path = strdupa (default_gconv_path);
  317. gconv_path_len = sizeof (default_gconv_path);
  318. cwd = NULL;
  319. cwdlen = 0;
  320. }
  321. else
  322. {
  323. /* Append the default path to the user-defined path. */
  324. size_t user_len = strlen (__gconv_path_envvar);
  325. gconv_path_len = user_len + 1 + sizeof (default_gconv_path);
  326. gconv_path = alloca (gconv_path_len);
  327. __mempcpy (__mempcpy (__mempcpy (gconv_path, __gconv_path_envvar,
  328. user_len),
  329. ":", 1),
  330. default_gconv_path, sizeof (default_gconv_path));
  331. cwd = __getcwd (NULL, 0);
  332. cwdlen = __glibc_unlikely (cwd == NULL) ? 0 : strlen (cwd);
  333. }
  334. assert (default_gconv_path[0] == '/');
  335. /* In a first pass we calculate the number of elements. */
  336. oldp = NULL;
  337. cp = strchr (gconv_path, ':');
  338. nelems = 1;
  339. while (cp != NULL)
  340. {
  341. if (cp != oldp + 1)
  342. ++nelems;
  343. oldp = cp;
  344. cp = strchr (cp + 1, ':');
  345. }
  346. /* Allocate the memory for the result. */
  347. result = malloc ((nelems + 1)
  348. * sizeof (struct path_elem)
  349. + gconv_path_len + nelems
  350. + (nelems - 1) * (cwdlen + 1));
  351. if (result != NULL)
  352. {
  353. char *strspace = (char *) &result[nelems + 1];
  354. int n = 0;
  355. /* Separate the individual parts. */
  356. __gconv_max_path_elem_len = 0;
  357. elem = __strtok_r (gconv_path, ":", &gconv_path);
  358. assert (elem != NULL);
  359. do
  360. {
  361. result[n].name = strspace;
  362. if (elem[0] != '/')
  363. {
  364. assert (cwd != NULL);
  365. strspace = __mempcpy (strspace, cwd, cwdlen);
  366. *strspace++ = '/';
  367. }
  368. strspace = __stpcpy (strspace, elem);
  369. if (strspace[-1] != '/')
  370. *strspace++ = '/';
  371. result[n].len = strspace - result[n].name;
  372. if (result[n].len > __gconv_max_path_elem_len)
  373. __gconv_max_path_elem_len = result[n].len;
  374. *strspace++ = '\0';
  375. ++n;
  376. }
  377. while ((elem = __strtok_r (NULL, ":", &gconv_path)) != NULL);
  378. result[n].name = NULL;
  379. result[n].len = 0;
  380. }
  381. __gconv_path_elem = result ?: (struct path_elem *) &empty_path_elem;
  382. free (cwd);
  383. }
  384. /* Read all configuration files found in the user-specified and the default
  385. path. This function should only be called once during the program's
  386. lifetime. It disregards locking and synchronization because its only
  387. caller, __gconv_load_conf, handles this. */
  388. static void
  389. __gconv_read_conf (void)
  390. {
  391. int save_errno = errno;
  392. size_t cnt;
  393. /* First see whether we should use the cache. */
  394. if (__gconv_load_cache () == 0)
  395. {
  396. /* Yes, we are done. */
  397. __set_errno (save_errno);
  398. return;
  399. }
  400. #ifndef STATIC_GCONV
  401. /* Find out where we have to look. */
  402. __gconv_get_path ();
  403. for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt)
  404. gconv_parseconfdir (NULL, __gconv_path_elem[cnt].name,
  405. __gconv_path_elem[cnt].len);
  406. #endif
  407. /* Add the internal modules. */
  408. for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
  409. ++cnt)
  410. {
  411. struct gconv_alias fake_alias;
  412. fake_alias.fromname = (char *) builtin_modules[cnt].from_string;
  413. if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
  414. != NULL)
  415. /* It'll conflict so don't add it. */
  416. continue;
  417. insert_module (&builtin_modules[cnt], 0);
  418. }
  419. /* Add aliases for builtin conversions. */
  420. const char *cp = builtin_aliases;
  421. do
  422. {
  423. const char *from = cp;
  424. const char *to = strchr (from, '\0') + 1;
  425. cp = strchr (to, '\0') + 1;
  426. add_alias2 (from, to, cp);
  427. }
  428. while (*cp != '\0');
  429. /* Restore the error number. */
  430. __set_errno (save_errno);
  431. }
  432. /* This "once" variable is used to do a one-time load of the configuration. */
  433. __libc_once_define (static, once);
  434. /* Read all configuration files found in the user-specified and the default
  435. path, but do it only "once" using __gconv_read_conf to do the actual
  436. work. This is the function that must be called when reading iconv
  437. configuration. */
  438. void
  439. __gconv_load_conf (void)
  440. {
  441. __libc_once (once, __gconv_read_conf);
  442. }
  443. /* Free all resources if necessary. */
  444. void
  445. __gconv_conf_freemem (void)
  446. {
  447. if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem)
  448. free ((void *) __gconv_path_elem);
  449. }