The base system supports localization of error messages in strerror(). catopen(3) is used to read files that contain translated versions of error messages.
Currently these files have arbitrary encodings. Most are latin1, one is UTF-8, one is KOI8. These encodings are currently used regardless of locale, so in some cases the strings look garbled. The diff below adds support for locale-specific messages, so that the proper encoding is used for output. Since we lack iconv() in base we cannot convert messages at runtime. Pre-generated message files are needed instead. Existing lib/libc/nls/*.msg files are renamed to the names shown in the libc/Makefile part of the diff, and new ones are added to support the UTF-8 locale (converted from the existing .msg files with iconv). I'm not including this change in the diff since that would mix four different character sets in a single email. While here, update catopen(3) to comply with POSIX-2008. I see no point in listing errno values in the catopen(3) man page. The current list is incomplete and all errors originate from library functions that catopen(3) uses. The complete diff which includes new message files passes make build. There are distrib/sets changes which I'm not including below either. Index: lib/libc/Makefile =================================================================== RCS file: /cvs/src/lib/libc/Makefile,v retrieving revision 1.31 diff -u -p -r1.31 Makefile --- lib/libc/Makefile 15 Aug 2012 18:08:22 -0000 1.31 +++ lib/libc/Makefile 31 May 2013 19:30:12 -0000 @@ -17,7 +17,11 @@ LDADD=-nodefaultlibs -lgcc LIBCSRCDIR=${.CURDIR} .include "${LIBCSRCDIR}/Makefile.inc" -NLS= C.msg Pig.msg da.msg de.msg es.msg fi.msg fr.msg nl.msg no.msg ru.msg sv.msg it.msg +NLS= C.msg Pig.msg da-ISO8859-1.msg da-UTF-8.msg de-ISO8859-1.msg \ + de-UTF-8.msg es-ISO8859-1.msg es-UTF-8.msg fi-ISO8859-1.msg \ + fi-UTF-8.msg fr-ISO8859-1.msg fr-UTF-8.msg it-UTF-8.msg \ + nl-ISO8859-1.msg nl-UTF-8.msg no-ISO8859-1.msg no-UTF-8.msg \ + ru-KOI8-R.msg ru-UTF-8.msg sv-ISO8859-1.msg sv-UTF-8.msg copy-to-libkern: copy-to-libkern-machind copy-to-libkern-machdep Index: lib/libc/nls/catopen.3 =================================================================== RCS file: /cvs/src/lib/libc/nls/catopen.3,v retrieving revision 1.6 diff -u -p -r1.6 catopen.3 --- lib/libc/nls/catopen.3 31 May 2007 19:19:30 -0000 1.6 +++ lib/libc/nls/catopen.3 31 May 2013 19:45:37 -0000 @@ -33,9 +33,14 @@ is used with substituted for .Ql \&%N . .Pp -The +If the +.Fa oflag +argument is NL_CAT_LOCALE, the LC_MESSAGES environment variable +is used to select the message catalog. +If the .Fa oflag -argument is reserved for future use and should be set to zero. +argument is zero, the LANG environment variable is used to select +the message catalog. .Sh RETURN VALUES Upon successful completion, .Fn catopen @@ -43,11 +48,6 @@ returns a message catalog descriptor. Otherwise, \-1 is returned and .Va errno is set to indicate the error. -.Sh ERRORS -.Bl -tag -width Er -.It Bq Er ENOMEM -Insufficient memory available. -.El .Sh SEE ALSO .Xr catclose 3 , .Xr catgets 3 @@ -55,4 +55,4 @@ Insufficient memory available. The .Fn catopen function conforms to -.St -xpg3 . +.St -p1003.1-2008 . Index: lib/libc/nls/catopen.c =================================================================== RCS file: /cvs/src/lib/libc/nls/catopen.c,v retrieving revision 1.14 diff -u -p -r1.14 catopen.c --- lib/libc/nls/catopen.c 12 Jul 2011 21:31:20 -0000 1.14 +++ lib/libc/nls/catopen.c 31 May 2013 19:41:34 -0000 @@ -41,11 +41,63 @@ #include <fcntl.h> #include <nl_types.h> -#define NLS_DEFAULT_PATH "/usr/share/nls/%L/%N.cat:/usr/share/nls/%N/%L" +#define NLS_DEFAULT_PATH "/usr/share/nls/%L/%N.cat:/usr/share/nls/%l-%c/%N.cat:/usr/share/nls/%l/%N.cat:" #define NLS_DEFAULT_LANG "C" static nl_catd load_msgcat(const char *); +/* + * Helpers for parsing components of locale names. + * Locale names are of the form language[_territory][.codeset]. + * See POSIX-1-2008 "8.2 Internationalization Variables" + */ + +static char * +parse_lang(const char *lang) +{ + char *sep, *locname; + + locname = strdup(lang); + if (locname == NULL) + return NULL; + + sep = strchr(locname, '_'); + if (sep) + *sep = '\0'; + + return locname; +} + +static char * +parse_territory(const char *lang) +{ + char *sep, *territory; + + sep = strchr(lang, '_'); + if (sep && strrchr(sep + 1, '.')) { + territory = strdup(sep + 1); + if (territory) { + sep = strrchr(territory, '.'); + *sep = '\0'; + return territory; + } + } + + return NULL; +} + +static char * +parse_codeset(const char *lang) +{ + char *sep; + + sep = strrchr(lang, '.'); + if (sep) + return strdup(sep + 1); + + return NULL; +} + /* ARGSUSED */ nl_catd _catopen(const char *name, int oflag) @@ -53,7 +105,7 @@ _catopen(const char *name, int oflag) char tmppath[PATH_MAX]; char *nlspath; char *lang; - char *s, *t; + char *s, *t, *p, *q; const char *u; nl_catd catd; @@ -66,7 +118,13 @@ _catopen(const char *name, int oflag) if (issetugid() != 0 || (nlspath = getenv("NLSPATH")) == NULL) nlspath = NLS_DEFAULT_PATH; - if ((lang = getenv("LANG")) == NULL) + + lang = NULL; + if (oflag & NL_CAT_LOCALE) + lang = getenv("LC_MESSAGES"); + if (lang == NULL) + lang = getenv("LANG"); + if (lang == NULL) lang = NLS_DEFAULT_LANG; s = nlspath; @@ -75,19 +133,39 @@ _catopen(const char *name, int oflag) while (*s && *s != ':') { if (*s == '%') { switch (*(++s)) { - case 'L': /* locale */ + case 'L': /* LANG or LC_MESSAGES */ u = lang; while (*u && t < tmppath + PATH_MAX-1) *t++ = *u++; break; - case 'N': /* name */ + case 'N': /* value of name parameter */ u = name; while (*u && t < tmppath + PATH_MAX-1) *t++ = *u++; break; - case 'l': /* lang */ - case 't': /* territory */ - case 'c': /* codeset */ + case 'l': /* language part */ + p = parse_lang(lang); + q = p; + while (q && *q && + t < tmppath + PATH_MAX-1) + *t++ = *q++; + free(p); + break; + case 't': /* territory part */ + p = parse_territory(lang); + q = p; + while (q && *q && + t < tmppath + PATH_MAX-1) + *t++ = *q++; + free(p); + break; + case 'c': /* codeset part */ + p = parse_codeset(lang); + q = p; + while (q && *q && + t < tmppath + PATH_MAX-1) + *t++ = *q++; + free(p); break; default: if (t < tmppath + PATH_MAX-1) @@ -123,6 +201,8 @@ load_msgcat(const char *path) if ((fd = open(path, O_RDONLY)) == -1) return (nl_catd) -1; + + fcntl(fd, F_SETFD, FD_CLOEXEC); if (fstat(fd, &st) != 0) { close (fd); Index: lib/libc/string/strerror_r.c =================================================================== RCS file: /cvs/src/lib/libc/string/strerror_r.c,v retrieving revision 1.7 diff -u -p -r1.7 strerror_r.c --- lib/libc/string/strerror_r.c 1 Mar 2007 16:29:09 -0000 1.7 +++ lib/libc/string/strerror_r.c 31 May 2013 19:30:13 -0000 @@ -75,7 +75,7 @@ __num2string(int num, int sign, int seti #ifdef NLS nl_catd catd; - catd = catopen("libc", 0); + catd = catopen("libc", NL_CAT_LOCALE); #endif if (0 <= num && num < max) {