| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383 |
- /* Copyright (C) 1996-2026 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, see <https://www.gnu.org/licenses/>. */
- #define PACKAGE _libc_intl_domainname
- #ifdef HAVE_CONFIG_H
- # include <config.h>
- #endif
- #include <argp.h>
- #include <assert.h>
- #include <ctype.h>
- #include <endian.h>
- #include <errno.h>
- #include <error.h>
- #include <fcntl.h>
- #include <iconv.h>
- #include <langinfo.h>
- #include <locale.h>
- #include <libintl.h>
- #include <limits.h>
- #include <nl_types.h>
- #include <obstack.h>
- #include <scratch_buffer.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #include <unistd_ext.h>
- #include <wchar.h>
- #include "version.h"
- #include "catgetsinfo.h"
- #define SWAPU32(w) \
- (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
- struct message_list
- {
- int number;
- const char *message;
- const char *fname;
- size_t line;
- const char *symbol;
- struct message_list *next;
- };
- struct set_list
- {
- int number;
- int deleted;
- struct message_list *messages;
- int last_message;
- const char *fname;
- size_t line;
- const char *symbol;
- struct set_list *next;
- };
- struct catalog
- {
- struct set_list *all_sets;
- struct set_list *current_set;
- size_t total_messages;
- wint_t quote_char;
- int last_set;
- struct obstack mem_pool;
- };
- /* If non-zero force creation of new file, not using existing one. */
- static int force_new;
- /* Name of output file. */
- static const char *output_name;
- /* Name of generated C header file. */
- static const char *header_name;
- /* Name and version of program. */
- static void print_version (FILE *stream, struct argp_state *state);
- void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
- #define OPT_NEW 1
- /* Definitions of arguments for argp functions. */
- static const struct argp_option options[] =
- {
- { "header", 'H', N_("NAME"), 0,
- N_("Create C header file NAME containing symbol definitions") },
- { "new", OPT_NEW, NULL, 0,
- N_("Do not use existing catalog, force new output file") },
- { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
- { NULL, 0, NULL, 0, NULL }
- };
- /* Short description of program. */
- static const char doc[] = N_("Generate message catalog.\
- \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
- is -, output is written to standard output.\n");
- /* Strings for arguments in help texts. */
- static const char args_doc[] = N_("\
- -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
- /* Prototype for option handler. */
- static error_t parse_opt (int key, char *arg, struct argp_state *state);
- /* Function to print some extra text in the help message. */
- static char *more_help (int key, const char *text, void *input);
- /* Data structure to communicate with argp functions. */
- static struct argp argp =
- {
- options, parse_opt, args_doc, doc, NULL, more_help
- };
- /* Wrapper functions with error checking for standard functions. */
- #include <programs/xmalloc.h>
- /* Prototypes for local functions. */
- static void error_print (void);
- static struct catalog *read_input_file (struct catalog *current,
- const char *fname);
- static void write_out (struct catalog *result, const char *output_name,
- const char *header_name);
- static struct set_list *find_set (struct catalog *current, int number);
- static void normalize_line (const char *fname, size_t line, iconv_t cd,
- wchar_t *string, wchar_t quote_char,
- wchar_t escape_char);
- static void read_old (struct catalog *catalog, const char *file_name);
- static int open_conversion (const char *codesetp, iconv_t *cd_towcp,
- iconv_t *cd_tombp, wchar_t *escape_charp);
- int
- main (int argc, char *argv[])
- {
- struct catalog *result;
- int remaining;
- /* Set program name for messages. */
- error_print_progname = error_print;
- /* Set locale via LC_ALL. */
- setlocale (LC_ALL, "");
- /* Set the text message domain. */
- textdomain (PACKAGE);
- /* Initialize local variables. */
- result = NULL;
- /* Parse and process arguments. */
- argp_parse (&argp, argc, argv, 0, &remaining, NULL);
- /* Determine output file. */
- if (output_name == NULL)
- output_name = remaining < argc ? argv[remaining++] : "-";
- /* Process all input files. */
- setlocale (LC_CTYPE, "C");
- if (remaining < argc)
- do
- result = read_input_file (result, argv[remaining]);
- while (++remaining < argc);
- else
- result = read_input_file (NULL, "-");
- /* Write out the result. */
- if (result != NULL)
- write_out (result, output_name, header_name);
- return error_message_count != 0;
- }
- /* Handle program arguments. */
- static error_t
- parse_opt (int key, char *arg, struct argp_state *state)
- {
- switch (key)
- {
- case 'H':
- header_name = arg;
- break;
- case OPT_NEW:
- force_new = 1;
- break;
- case 'o':
- output_name = arg;
- break;
- default:
- return ARGP_ERR_UNKNOWN;
- }
- return 0;
- }
- static char *
- more_help (int key, const char *text, void *input)
- {
- char *tp = NULL;
- switch (key)
- {
- case ARGP_KEY_HELP_EXTRA:
- /* We print some extra information. */
- if (asprintf (&tp, gettext ("\
- For bug reporting instructions, please see:\n\
- %s.\n"), REPORT_BUGS_TO) < 0)
- return NULL;
- return tp;
- default:
- break;
- }
- return (char *) text;
- }
- /* Print the version information. */
- static void
- print_version (FILE *stream, struct argp_state *state)
- {
- fprintf (stream, "gencat %s%s\n", PKGVERSION, VERSION);
- fprintf (stream, gettext ("\
- Copyright (C) %s Free Software Foundation, Inc.\n\
- This is free software; see the source for copying conditions. There is NO\n\
- warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
- "), "2024");
- fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
- }
- /* The address of this function will be assigned to the hook in the
- error functions. */
- static void
- error_print (void)
- {
- /* We don't want the program name to be printed in messages. Emacs'
- compile.el does not like this. */
- }
- static struct catalog *
- read_input_file (struct catalog *current, const char *fname)
- {
- FILE *fp;
- char *buf;
- size_t len;
- size_t line_number;
- wchar_t *wbuf;
- size_t wbufsize;
- iconv_t cd_towc = (iconv_t) -1;
- iconv_t cd_tomb = (iconv_t) -1;
- wchar_t escape_char = L'\\';
- char *codeset = NULL;
- if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0)
- {
- fp = stdin;
- fname = gettext ("*standard input*");
- }
- else
- fp = fopen (fname, "r");
- if (fp == NULL)
- {
- error (0, errno, gettext ("cannot open input file `%s'"), fname);
- return current;
- }
- /* If we haven't seen anything yet, allocate result structure. */
- if (current == NULL)
- {
- current = (struct catalog *) xcalloc (1, sizeof (*current));
- #define obstack_chunk_alloc malloc
- #define obstack_chunk_free free
- obstack_init (¤t->mem_pool);
- current->current_set = find_set (current, NL_SETD);
- }
- buf = NULL;
- len = 0;
- line_number = 0;
- wbufsize = 1024;
- wbuf = (wchar_t *) xmalloc (wbufsize);
- while (!feof (fp))
- {
- int continued;
- int used;
- size_t start_line = line_number + 1;
- char *this_line;
- do
- {
- int act_len;
- act_len = getline (&buf, &len, fp);
- if (act_len <= 0)
- break;
- ++line_number;
- /* It the line continued? */
- continued = 0;
- if (buf[act_len - 1] == '\n')
- {
- --act_len;
- /* There might be more than one backslash at the end of
- the line. Only if there is an odd number of them is
- the line continued. */
- if (act_len > 0 && buf[act_len - 1] == '\\')
- {
- int temp_act_len = act_len;
- do
- {
- --temp_act_len;
- continued = !continued;
- }
- while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\');
- if (continued)
- --act_len;
- }
- }
- /* Append to currently selected line. */
- obstack_grow (¤t->mem_pool, buf, act_len);
- }
- while (continued);
- obstack_1grow (¤t->mem_pool, '\0');
- this_line = (char *) obstack_finish (¤t->mem_pool);
- used = 0;
- if (this_line[0] == '$')
- {
- if (isblank (this_line[1]))
- {
- int cnt = 1;
- while (isblank (this_line[cnt]))
- ++cnt;
- if (strncmp (&this_line[cnt], "codeset=", 8) != 0)
- /* This is a comment line. Do nothing. */;
- else if (codeset != NULL)
- /* Ignore multiple codeset. */;
- else
- {
- int start = cnt + 8;
- cnt = start;
- while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
- ++cnt;
- if (cnt != start)
- {
- int len = cnt - start;
- codeset = xmalloc (len + 1);
- *((char *) mempcpy (codeset, &this_line[start], len))
- = '\0';
- }
- }
- }
- else if (strncmp (&this_line[1], "set", 3) == 0)
- {
- int cnt = sizeof ("set");
- int set_number;
- const char *symbol = NULL;
- while (isspace (this_line[cnt]))
- ++cnt;
- if (isdigit (this_line[cnt]))
- {
- set_number = atol (&this_line[cnt]);
- /* If the given number for the character set is
- higher than any we used for symbolic set names
- avoid clashing by using only higher numbers for
- the following symbolic definitions. */
- if (set_number > current->last_set)
- current->last_set = set_number;
- }
- else
- {
- /* See whether it is a reasonable identifier. */
- int start = cnt;
- while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
- ++cnt;
- if (cnt == start)
- {
- /* No correct character found. */
- error_at_line (0, 0, fname, start_line,
- gettext ("illegal set number"));
- set_number = 0;
- }
- else
- {
- /* We have found seomthing that looks like a
- correct identifier. */
- struct set_list *runp;
- this_line[cnt] = '\0';
- used = 1;
- symbol = &this_line[start];
- /* Test whether the identifier was already used. */
- runp = current->all_sets;
- while (runp != NULL)
- if (runp->symbol != NULL
- && strcmp (runp->symbol, symbol) == 0)
- break;
- else
- runp = runp->next;
- if (runp != NULL)
- {
- /* We cannot allow duplicate identifiers for
- message sets. */
- error_at_line (0, 0, fname, start_line,
- gettext ("duplicate set definition"));
- error_at_line (0, 0, runp->fname, runp->line,
- gettext ("\
- this is the first definition"));
- set_number = 0;
- }
- else
- /* Allocate next free message set for identifier. */
- set_number = ++current->last_set;
- }
- }
- if (set_number != 0)
- {
- /* We found a legal set number. */
- current->current_set = find_set (current, set_number);
- if (symbol != NULL)
- used = 1;
- current->current_set->symbol = symbol;
- current->current_set->fname = fname;
- current->current_set->line = start_line;
- }
- }
- else if (strncmp (&this_line[1], "delset", 6) == 0)
- {
- int cnt = sizeof ("delset");
- while (isspace (this_line[cnt]))
- ++cnt;
- if (isdigit (this_line[cnt]))
- {
- size_t set_number = atol (&this_line[cnt]);
- struct set_list *set;
- /* Mark the message set with the given number as
- deleted. */
- set = find_set (current, set_number);
- set->deleted = 1;
- }
- else
- {
- /* See whether it is a reasonable identifier. */
- int start = cnt;
- while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
- ++cnt;
- if (cnt == start)
- error_at_line (0, 0, fname, start_line,
- gettext ("illegal set number"));
- else
- {
- const char *symbol;
- struct set_list *runp;
- this_line[cnt] = '\0';
- used = 1;
- symbol = &this_line[start];
- /* We have a symbolic set name. This name must
- appear somewhere else in the catalogs read so
- far. */
- for (runp = current->all_sets; runp != NULL;
- runp = runp->next)
- {
- if (strcmp (runp->symbol, symbol) == 0)
- {
- runp->deleted = 1;
- break;
- }
- }
- if (runp == NULL)
- /* Name does not exist before. */
- error_at_line (0, 0, fname, start_line,
- gettext ("unknown set `%s'"), symbol);
- }
- }
- }
- else if (strncmp (&this_line[1], "quote", 5) == 0)
- {
- char buf[2];
- char *bufptr;
- size_t buflen;
- char *wbufptr;
- size_t wbuflen;
- int cnt;
- cnt = sizeof ("quote");
- while (isspace (this_line[cnt]))
- ++cnt;
- /* We need the conversion. */
- if (cd_towc == (iconv_t) -1
- && open_conversion (codeset, &cd_towc, &cd_tomb,
- &escape_char) != 0)
- /* Something is wrong. */
- goto out;
- /* Yes, the quote char can be '\0'; this means no quote
- char. The function using the information works on
- wide characters so we have to convert it here. */
- buf[0] = this_line[cnt];
- buf[1] = '\0';
- bufptr = buf;
- buflen = 2;
- wbufptr = (char *) wbuf;
- wbuflen = wbufsize;
- /* Flush the state. */
- iconv (cd_towc, NULL, NULL, NULL, NULL);
- iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen);
- if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2])
- error_at_line (0, 0, fname, start_line,
- gettext ("invalid quote character"));
- else
- /* Use the converted wide character. */
- current->quote_char = wbuf[0];
- }
- else
- {
- int cnt;
- cnt = 2;
- while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
- ++cnt;
- this_line[cnt] = '\0';
- error_at_line (0, 0, fname, start_line,
- gettext ("unknown directive `%s': line ignored"),
- &this_line[1]);
- }
- }
- else if (isalnum (this_line[0]) || this_line[0] == '_')
- {
- const char *ident = this_line;
- char *line = this_line;
- int message_number;
- do
- ++line;
- while (line[0] != '\0' && !isspace (line[0]));
- if (line[0] != '\0')
- *line++ = '\0'; /* Terminate the identifier. */
- /* Now we found the beginning of the message itself. */
- if (isdigit (ident[0]))
- {
- struct message_list *runp;
- struct message_list *lastp;
- message_number = atoi (ident);
- /* Find location to insert the new message. */
- runp = current->current_set->messages;
- lastp = NULL;
- while (runp != NULL)
- if (runp->number == message_number)
- break;
- else
- {
- lastp = runp;
- runp = runp->next;
- }
- if (runp != NULL)
- {
- /* Oh, oh. There is already a message with this
- number in the message set. */
- if (runp->symbol == NULL)
- {
- /* The existing message had its number specified
- by the user. Fatal collision type uh, oh. */
- error_at_line (0, 0, fname, start_line,
- gettext ("duplicated message number"));
- error_at_line (0, 0, runp->fname, runp->line,
- gettext ("this is the first definition"));
- message_number = 0;
- }
- else
- {
- /* Collision was with number auto-assigned to a
- symbolic. Change existing symbolic number
- and move to end the list (if not already there). */
- runp->number = ++current->current_set->last_message;
- if (runp->next != NULL)
- {
- struct message_list *endp;
- if (lastp == NULL)
- current->current_set->messages=runp->next;
- else
- lastp->next=runp->next;
- endp = runp->next;
- while (endp->next != NULL)
- endp = endp->next;
- endp->next = runp;
- runp->next = NULL;
- }
- }
- }
- ident = NULL; /* We don't have a symbol. */
- if (message_number != 0
- && message_number > current->current_set->last_message)
- current->current_set->last_message = message_number;
- }
- else if (ident[0] != '\0')
- {
- struct message_list *runp;
- /* Test whether the symbolic name was not used for
- another message in this message set. */
- runp = current->current_set->messages;
- while (runp != NULL)
- if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0)
- break;
- else
- runp = runp->next;
- if (runp != NULL)
- {
- /* The name is already used. */
- error_at_line (0, 0, fname, start_line, gettext ("\
- duplicated message identifier"));
- error_at_line (0, 0, runp->fname, runp->line,
- gettext ("this is the first definition"));
- message_number = 0;
- }
- else
- /* Give the message the next unused number. */
- message_number = ++current->current_set->last_message;
- }
- else
- message_number = 0;
- if (message_number != 0)
- {
- char *inbuf;
- size_t inlen;
- char *outbuf;
- size_t outlen;
- struct message_list *newp;
- size_t line_len = strlen (line) + 1;
- size_t ident_len = 0;
- /* We need the conversion. */
- if (cd_towc == (iconv_t) -1
- && open_conversion (codeset, &cd_towc, &cd_tomb,
- &escape_char) != 0)
- /* Something is wrong. */
- goto out;
- /* Convert to a wide character string. We have to
- interpret escape sequences which will be impossible
- without doing the conversion if the codeset of the
- message is stateful. */
- while (1)
- {
- inbuf = line;
- inlen = line_len;
- outbuf = (char *) wbuf;
- outlen = wbufsize;
- /* Flush the state. */
- iconv (cd_towc, NULL, NULL, NULL, NULL);
- iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen);
- if (inlen == 0)
- {
- /* The string is converted. */
- assert (outlen < wbufsize);
- assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1]
- == L'\0');
- break;
- }
- if (outlen != 0)
- {
- /* Something is wrong with this string, we ignore it. */
- error_at_line (0, 0, fname, start_line, gettext ("\
- invalid character: message ignored"));
- goto ignore;
- }
- /* The output buffer is too small. */
- wbufsize *= 2;
- wbuf = (wchar_t *) xrealloc (wbuf, wbufsize);
- }
- /* Strip quote characters, change escape sequences into
- correct characters etc. */
- normalize_line (fname, start_line, cd_towc, wbuf,
- current->quote_char, escape_char);
- if (ident)
- ident_len = line - this_line;
- /* Now the string is free of escape sequences. Convert it
- back into a multibyte character string. First free the
- memory allocated for the original string. */
- obstack_free (¤t->mem_pool, this_line);
- used = 1; /* Yes, we use the line. */
- /* Now fill in the new string. It should never happen that
- the replaced string is longer than the original. */
- inbuf = (char *) wbuf;
- inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t);
- outlen = obstack_room (¤t->mem_pool);
- obstack_blank (¤t->mem_pool, outlen);
- this_line = (char *) obstack_base (¤t->mem_pool);
- outbuf = this_line + ident_len;
- outlen -= ident_len;
- /* Flush the state. */
- iconv (cd_tomb, NULL, NULL, NULL, NULL);
- iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen);
- if (inlen != 0)
- {
- error_at_line (0, 0, fname, start_line,
- gettext ("invalid line"));
- goto ignore;
- }
- assert (outbuf[-1] == '\0');
- /* Free the memory in the obstack we don't use. */
- obstack_blank (¤t->mem_pool, -(int) outlen);
- line = obstack_finish (¤t->mem_pool);
- newp = (struct message_list *) xmalloc (sizeof (*newp));
- newp->number = message_number;
- newp->message = line + ident_len;
- /* Remember symbolic name; is NULL if no is given. */
- newp->symbol = ident ? line : NULL;
- /* Remember where we found the character. */
- newp->fname = fname;
- newp->line = start_line;
- /* Find place to insert to message. We keep them in a
- sorted single linked list. */
- if (current->current_set->messages == NULL
- || current->current_set->messages->number > message_number)
- {
- newp->next = current->current_set->messages;
- current->current_set->messages = newp;
- }
- else
- {
- struct message_list *runp;
- runp = current->current_set->messages;
- while (runp->next != NULL)
- if (runp->next->number > message_number)
- break;
- else
- runp = runp->next;
- newp->next = runp->next;
- runp->next = newp;
- }
- }
- ++current->total_messages;
- }
- else
- {
- size_t cnt;
- cnt = 0;
- /* See whether we have any non-white space character in this
- line. */
- while (this_line[cnt] != '\0' && isspace (this_line[cnt]))
- ++cnt;
- if (this_line[cnt] != '\0')
- /* Yes, some unknown characters found. */
- error_at_line (0, 0, fname, start_line,
- gettext ("malformed line ignored"));
- }
- ignore:
- /* We can save the memory for the line if it was not used. */
- if (!used)
- obstack_free (¤t->mem_pool, this_line);
- }
- /* Close the conversion modules. */
- iconv_close (cd_towc);
- iconv_close (cd_tomb);
- free (codeset);
- out:
- free (wbuf);
- if (fp != stdin)
- fclose (fp);
- return current;
- }
- static void
- write_out (struct catalog *catalog, const char *output_name,
- const char *header_name)
- {
- /* Computing the "optimal" size. */
- struct set_list *set_run;
- size_t best_total, best_size, best_depth;
- size_t act_size, act_depth;
- struct catalog_obj obj;
- struct obstack string_pool;
- const char *strings;
- size_t strings_size;
- uint32_t *array1, *array2;
- size_t cnt;
- int fd;
- struct scratch_buffer buf1;
- scratch_buffer_init (&buf1);
- struct scratch_buffer buf2;
- scratch_buffer_init (&buf2);
- /* If not otherwise told try to read file with existing
- translations. */
- if (!force_new)
- read_old (catalog, output_name);
- /* Initialize best_size with a very high value. */
- best_total = best_size = best_depth = UINT_MAX;
- /* We need some start size for testing. Let's start with
- TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
- 5. */
- act_size = 1 + catalog->total_messages / 5;
- /* We determine the size of a hash table here. Because the message
- numbers can be chosen arbitrary by the programmer we cannot use
- the simple method of accessing the array using the message
- number. The algorithm is based on the trivial hash function
- NUMBER % TABLE_SIZE, where collisions are stored in a second
- dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
- the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
- while (act_size <= best_total)
- {
- size_t deep[act_size];
- act_depth = 1;
- memset (deep, '\0', act_size * sizeof (size_t));
- set_run = catalog->all_sets;
- while (set_run != NULL)
- {
- struct message_list *message_run;
- message_run = set_run->messages;
- while (message_run != NULL)
- {
- size_t idx = (message_run->number * set_run->number) % act_size;
- ++deep[idx];
- if (deep[idx] > act_depth)
- {
- act_depth = deep[idx];
- if (act_depth * act_size > best_total)
- break;
- }
- message_run = message_run->next;
- }
- set_run = set_run->next;
- }
- if (act_depth * act_size <= best_total)
- {
- /* We have found a better solution. */
- best_total = act_depth * act_size;
- best_size = act_size;
- best_depth = act_depth;
- }
- ++act_size;
- }
- /* let's be prepared for an empty message file. */
- if (best_size == UINT_MAX)
- {
- best_size = 1;
- best_depth = 1;
- }
- /* OK, now we have the size we will use. Fill in the header, build
- the table and the second one with swapped byte order. */
- obj.magic = CATGETS_MAGIC;
- obj.plane_size = best_size;
- obj.plane_depth = best_depth;
- uint32_t array_size = best_size * best_depth * sizeof (uint32_t) * 3;
- /* Allocate room for all needed arrays. */
- if (!scratch_buffer_set_array_size (&buf1, best_size * best_depth * 3,
- sizeof (uint32_t)))
- error (EXIT_FAILURE, ENOMEM, gettext ("cannot allocate memory"));
- array1 = buf1.data;
- memset (array1, '\0', array_size);
- if (!scratch_buffer_set_array_size (&buf2, best_size * best_depth * 3,
- sizeof (uint32_t)))
- {
- scratch_buffer_free (&buf1);
- error (EXIT_FAILURE, ENOMEM, gettext ("cannot allocate memory"));
- }
- array2 = buf2.data;
- obstack_init (&string_pool);
- set_run = catalog->all_sets;
- while (set_run != NULL)
- {
- struct message_list *message_run;
- message_run = set_run->messages;
- while (message_run != NULL)
- {
- size_t idx = (((message_run->number * set_run->number) % best_size)
- * 3);
- /* Determine collision depth. */
- while (array1[idx] != 0)
- idx += best_size * 3;
- /* Store set number, message number and pointer into string
- space, relative to the first string. */
- array1[idx + 0] = set_run->number;
- array1[idx + 1] = message_run->number;
- array1[idx + 2] = obstack_object_size (&string_pool);
- /* Add current string to the continuous space containing all
- strings. */
- obstack_grow0 (&string_pool, message_run->message,
- strlen (message_run->message));
- message_run = message_run->next;
- }
- set_run = set_run->next;
- }
- strings_size = obstack_object_size (&string_pool);
- strings = obstack_finish (&string_pool);
- /* Compute ARRAY2 by changing the byte order. */
- for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt)
- array2[cnt] = SWAPU32 (array1[cnt]);
- /* Now we can write out the whole data. */
- if (strcmp (output_name, "-") == 0
- || strcmp (output_name, "/dev/stdout") == 0)
- fd = STDOUT_FILENO;
- else
- {
- fd = creat (output_name, 0666);
- if (fd < 0)
- {
- scratch_buffer_free (&buf1);
- scratch_buffer_free (&buf2);
- error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"),
- output_name);
- }
- }
- /* Write out header. */
- write_all(fd, &obj, sizeof (obj));
- /* We always write out the little endian version of the index
- arrays. */
- #if __BYTE_ORDER == __LITTLE_ENDIAN
- write_all(fd, array1, array_size);
- write_all(fd, array2, array_size);
- #elif __BYTE_ORDER == __BIG_ENDIAN
- write_all(fd, array2, array_size);
- write_all(fd, array1, array_size);
- #else
- # error Cannot handle __BYTE_ORDER byte order
- #endif
- /* Finally write the strings. */
- write_all(fd, strings, strings_size);
- if (fd != STDOUT_FILENO)
- close (fd);
- /* If requested now write out the header file. */
- if (header_name != NULL)
- {
- int first = 1;
- FILE *fp;
- /* Open output file. "-" or "/dev/stdout" means write to
- standard output. */
- if (strcmp (header_name, "-") == 0
- || strcmp (header_name, "/dev/stdout") == 0)
- fp = stdout;
- else
- {
- fp = fopen (header_name, "w");
- if (fp == NULL)
- {
- scratch_buffer_free (&buf1);
- scratch_buffer_free (&buf2);
- error (EXIT_FAILURE, errno,
- gettext ("cannot open output file `%s'"), header_name);
- }
- }
- /* Iterate over all sets and all messages. */
- set_run = catalog->all_sets;
- while (set_run != NULL)
- {
- struct message_list *message_run;
- /* If the current message set has a symbolic name write this
- out first. */
- if (set_run->symbol != NULL)
- fprintf (fp, "%s#define %sSet %#x\t/* %s:%zu */\n",
- first ? "" : "\n", set_run->symbol, set_run->number - 1,
- set_run->fname, set_run->line);
- first = 0;
- message_run = set_run->messages;
- while (message_run != NULL)
- {
- /* If the current message has a symbolic name write
- #define out. But we have to take care for the set
- not having a symbolic name. */
- if (message_run->symbol != NULL)
- {
- if (set_run->symbol == NULL)
- fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%zu */\n",
- set_run->number, message_run->symbol,
- message_run->number, message_run->fname,
- message_run->line);
- else
- fprintf (fp, "#define %s%s %#x\t/* %s:%zu */\n",
- set_run->symbol, message_run->symbol,
- message_run->number, message_run->fname,
- message_run->line);
- }
- message_run = message_run->next;
- }
- set_run = set_run->next;
- }
- if (fp != stdout)
- fclose (fp);
- }
- scratch_buffer_free (&buf1);
- scratch_buffer_free (&buf2);
- }
- static struct set_list *
- find_set (struct catalog *current, int number)
- {
- struct set_list *result = current->all_sets;
- /* We must avoid set number 0 because a set of this number signals
- in the tables that the entry is not occupied. */
- ++number;
- while (result != NULL)
- if (result->number == number)
- return result;
- else
- result = result->next;
- /* Prepare new message set. */
- result = (struct set_list *) xcalloc (1, sizeof (*result));
- result->number = number;
- result->next = current->all_sets;
- current->all_sets = result;
- return result;
- }
- /* Normalize given string *in*place* by processing escape sequences
- and quote characters. */
- static void
- normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
- wchar_t quote_char, wchar_t escape_char)
- {
- int is_quoted;
- wchar_t *rp = string;
- wchar_t *wp = string;
- if (quote_char != L'\0' && *rp == quote_char)
- {
- is_quoted = 1;
- ++rp;
- }
- else
- is_quoted = 0;
- while (*rp != L'\0')
- if (*rp == quote_char)
- /* We simply end the string when we find the first time an
- not-escaped quote character. */
- break;
- else if (*rp == escape_char)
- {
- ++rp;
- if (quote_char != L'\0' && *rp == quote_char)
- /* This is an extension to XPG. */
- *wp++ = *rp++;
- else
- /* Recognize escape sequences. */
- switch (*rp)
- {
- case L'n':
- *wp++ = L'\n';
- ++rp;
- break;
- case L't':
- *wp++ = L'\t';
- ++rp;
- break;
- case L'v':
- *wp++ = L'\v';
- ++rp;
- break;
- case L'b':
- *wp++ = L'\b';
- ++rp;
- break;
- case L'r':
- *wp++ = L'\r';
- ++rp;
- break;
- case L'f':
- *wp++ = L'\f';
- ++rp;
- break;
- case L'0' ... L'7':
- {
- int number;
- char cbuf[2];
- char *cbufptr;
- size_t cbufin;
- wchar_t wcbuf[2];
- char *wcbufptr;
- size_t wcbufin;
- number = *rp++ - L'0';
- while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7')
- {
- number *= 8;
- number += *rp++ - L'0';
- }
- cbuf[0] = (char) number;
- cbuf[1] = '\0';
- cbufptr = cbuf;
- cbufin = 2;
- wcbufptr = (char *) wcbuf;
- wcbufin = sizeof (wcbuf);
- /* Flush the state. */
- iconv (cd, NULL, NULL, NULL, NULL);
- iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin);
- if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2])
- error_at_line (0, 0, fname, line,
- gettext ("invalid escape sequence"));
- else
- *wp++ = wcbuf[0];
- }
- break;
- default:
- if (*rp == escape_char)
- {
- *wp++ = escape_char;
- ++rp;
- }
- else
- {
- /* Simply ignore the backslash character. */
- }
- break;
- }
- }
- else
- *wp++ = *rp++;
- /* If we saw a quote character at the beginning we expect another
- one at the end. */
- if (is_quoted && *rp != quote_char)
- error_at_line (0, 0, fname, line, gettext ("unterminated message"));
- /* Terminate string. */
- *wp = L'\0';
- return;
- }
- static void
- read_old (struct catalog *catalog, const char *file_name)
- {
- struct catalog_info old_cat_obj;
- struct set_list *set = NULL;
- int last_set = -1;
- size_t cnt;
- /* Try to open catalog, but don't look through the NLSPATH. */
- if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0)
- {
- if (errno == ENOENT)
- /* No problem, the catalog simply does not exist. */
- return;
- else
- error (EXIT_FAILURE, errno,
- gettext ("while opening old catalog file"));
- }
- /* OK, we have the catalog loaded. Now read all messages and merge
- them. When set and message number clash for any message the new
- one is used. If the new one is empty it indicates that the
- message should be deleted. */
- for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt)
- {
- struct message_list *message, *last;
- if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0)
- /* No message in this slot. */
- continue;
- if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set)
- {
- last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1;
- set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1);
- }
- last = NULL;
- message = set->messages;
- while (message != NULL)
- {
- if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1])
- break;
- last = message;
- message = message->next;
- }
- if (message == NULL
- || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1])
- {
- /* We have found a message which is not yet in the catalog.
- Insert it at the right position. */
- struct message_list *newp;
- newp = (struct message_list *) xmalloc (sizeof (*newp));
- newp->number = old_cat_obj.name_ptr[cnt * 3 + 1];
- newp->message =
- &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]];
- newp->fname = NULL;
- newp->line = 0;
- newp->symbol = NULL;
- newp->next = message;
- if (last == NULL)
- set->messages = newp;
- else
- last->next = newp;
- ++catalog->total_messages;
- }
- else if (*message->message == '\0')
- {
- /* The new empty message has overridden the old one thus
- "deleting" it as required. Now remove the empty remains. */
- if (last == NULL)
- set->messages = message->next;
- else
- last->next = message->next;
- }
- }
- }
- static int
- open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp,
- wchar_t *escape_charp)
- {
- char buf[2];
- char *bufptr;
- size_t bufsize;
- wchar_t wbuf[2];
- char *wbufptr;
- size_t wbufsize;
- /* If the input file does not specify the codeset use the locale's. */
- if (codeset == NULL)
- {
- setlocale (LC_ALL, "");
- codeset = nl_langinfo (CODESET);
- setlocale (LC_ALL, "C");
- }
- /* Get the conversion modules. */
- *cd_towcp = iconv_open ("WCHAR_T", codeset);
- *cd_tombp = iconv_open (codeset, "WCHAR_T");
- if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1)
- {
- error (0, 0, gettext ("conversion modules not available"));
- if (*cd_towcp != (iconv_t) -1)
- iconv_close (*cd_towcp);
- return 1;
- }
- /* One special case for historical reasons is the backslash
- character. In some codesets the byte value 0x5c is not mapped to
- U005c in Unicode. These charsets then don't have a backslash
- character at all. Therefore we have to live with whatever the
- codeset provides and recognize, instead of the U005c, the character
- the byte value 0x5c is mapped to. */
- buf[0] = '\\';
- buf[1] = '\0';
- bufptr = buf;
- bufsize = 2;
- wbufptr = (char *) wbuf;
- wbufsize = sizeof (wbuf);
- iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize);
- if (bufsize != 0 || wbufsize != 0)
- {
- /* Something went wrong, we couldn't convert the byte 0x5c. Go
- on with using U005c. */
- error (0, 0, gettext ("cannot determine escape character"));
- *escape_charp = L'\\';
- }
- else
- *escape_charp = wbuf[0];
- return 0;
- }
|