On Thu, May 30, 2013 at 05:18:48PM -0430, Andres Perera wrote: > As I mentioned, there's code that expects the prior layout, and that's > confusing. > > on src/lib/libc/locale/setlocale.c, load_locale_sub() : > > 228 len = snprintf(name, sizeof(name), "%s/%s/%s", > 229 _PATH_LOCALE, locname, categories[category]); > 230 if (len < 0 || len >= sizeof(name)) > 231 return -1;
Right, thanks for pointing this out. I think the above check can just be removed. It seems to serve no purpose other than making sure that the path constructed from _PATH_LOCALE and the locname argument doesn't exceed PATH_MAX. This is redundant because the same check is performed again within _xpg4_setrunelocale(). If we assume that functions handling other LC_* categories might use different paths in the future, it makes sense to perform this overflow check only inside of the LC_*-specific functions, rather than upfront. > on src/lib/libc/locale/setrunelocale.c, _xpg4_setrunelocale(): > > 184 len = snprintf(path, sizeof(path), > 185 "%s/%s/LC_CTYPE", _PATH_LOCALE, encoding); > 186 if (len < 0 || len >= sizeof(path)) > 187 return ENAMETOOLONG; This section is modified as part of my diff, isn't it? > > + /* Assume "<whatever>.<encoding>" locale name. */ > > There should be some notion of syntax for cc_LL.CTYPE, even if only > mentioned in comments. > > E.g., > > ISO 3166-1 for country codes and BCP 47 for language tags. > > glibc did not do this and directly because of that it's a mess to > navigate their structure. You mean we should specify encoding name syntax in a comment? If so, my answer would be that recognized encoding names are specified by filenames we use in /usr/share/locale. A comment documenting the same would risk becoming obsolete over time. Updated diff, with the redundant check removed: Index: share/locale/ctype/Makefile =================================================================== RCS file: /cvs/src/share/locale/ctype/Makefile,v retrieving revision 1.6 diff -u -p -r1.6 Makefile --- share/locale/ctype/Makefile 16 Jul 2011 21:33:30 -0000 1.6 +++ share/locale/ctype/Makefile 30 May 2013 19:16:33 -0000 @@ -5,295 +5,81 @@ NOMAN= # defined # pull LOCALEDIR and other declarations .include <bsd.own.mk> -LOCALES += ar_SD.UTF-8 - LOCALESRC_ar_SD.UTF-8 = en_US.UTF-8 +LOCALES += UTF-8 + LOCALESRC_UTF-8 = en_US.UTF-8 -LOCALES += ar_SY.UTF-8 - LOCALESRC_ar_SY.UTF-8 = en_US.UTF-8 +LOCALES += CP1251 + LOCALESRC_CP1251 = bg_BG.CP1251 -LOCALES += bg_BG.CP1251 - LOCALESRC_bg_BG.CP1251 = bg_BG.CP1251 +LOCALES += ISO8859-1 + LOCALESRC_ISO8859-1 = en_US.ISO_8859-1 -LOCALES += ca_ES.ISO8859-1 - LOCALESRC_ca_ES.ISO8859-1 = en_US.ISO_8859-1 +LOCALES += ISO8859-15 + LOCALESRC_ISO8859-15 = en_US.DIS_8859-15 -LOCALES += ca_ES.ISO8859-15 - LOCALESRC_ca_ES.ISO8859-15 = en_US.DIS_8859-15 +LOCALES += ISO8859-2 + LOCALESRC_ISO8859-2 = en_US.ISO_8859-2 -LOCALES += cs_CZ.ISO8859-2 - LOCALESRC_cs_CZ.ISO8859-2 = en_US.ISO_8859-2 +LOCALES += ISO8859-7 + LOCALESRC_ISO8859-7 = el_GR.ISO8859-7 -LOCALES += da_DK.ISO8859-1 - LOCALESRC_da_DK.ISO8859-1 = en_US.ISO_8859-1 +LOCALES += ARMSCII-8 + LOCALESRC_ARMSCII-8 = hy_AM.ARMSCII-8 -LOCALES += da_DK.ISO8859-15 - LOCALESRC_da_DK.ISO8859-15 = en_US.DIS_8859-15 +#LOCALES += ct +# LOCALESRC_ct = ja_JP.CTEXT -LOCALES += de_AT.ISO8859-1 - LOCALESRC_de_AT.ISO8859-1 = en_US.ISO_8859-1 +#LOCALES += eucJP +# LOCALESRC_eucJP = ja_JP.eucJP -LOCALES += de_AT.ISO8859-15 - LOCALESRC_de_AT.ISO8859-15 = en_US.DIS_8859-15 +#LOCALES += ISO2022-JP +# LOCALESRC_ISO2022-JP = ja_JP.ISO-2022-JP -LOCALES += de_CH.ISO8859-1 - LOCALESRC_de_CH.ISO8859-1 = en_US.ISO_8859-1 +#LOCALES += ISO2022-JP2 +# LOCALESRC_ISO2022-JP2 = ja_JP.ISO-2022-JP-2 -LOCALES += de_CH.ISO8859-15 - LOCALESRC_de_CH.ISO8859-15 = en_US.DIS_8859-15 +#LOCALES += SJIS +# LOCALESRC_SJIS = ja_JP.SJIS -LOCALES += de_DE.ISO8859-1 - LOCALESRC_de_DE.ISO8859-1 = en_US.ISO_8859-1 +#LOCALES += eucKR +# LOCALESRC_eucKR = ko_KR.eucKR -LOCALES += de_DE.ISO8859-15 - LOCALESRC_de_DE.ISO8859-15 = en_US.DIS_8859-15 +LOCALES += ISO8859-4 + LOCALESRC_ISO8859-4 = en_US.ISO_8859-4 -LOCALES += de_DE.UTF-8 - LOCALESRC_de_DE.UTF-8 = en_US.UTF-8 +LOCALES += ISO8859-13 + LOCALESRC_ISO8859-13 = lt_LT.ISO8859-13 -LOCALES += el_GR.ISO8859-7 - LOCALESRC_el_GR.ISO8859-7 = el_GR.ISO8859-7 +LOCALES += CP866 + LOCALESRC_CP866 = ru_RU.CP866 -LOCALES += en_AU.ISO8859-1 - LOCALESRC_en_AU.ISO8859-1 = en_US.ISO_8859-1 +LOCALES += KOI8-R + LOCALESRC_KOI8-R = ru_RU.KOI8-R -LOCALES += en_AU.ISO8859-15 - LOCALESRC_en_AU.ISO8859-15 = en_US.DIS_8859-15 +LOCALES += ISO8859-5 + LOCALESRC_ISO8859-5 = ru_RU.ISO_8859-5 -LOCALES += en_CA.ISO8859-1 - LOCALESRC_en_CA.ISO8859-1 = en_US.ISO_8859-1 +LOCALES += KOI8-U + LOCALESRC_KOI8-U = uk_UA.KOI8-U -LOCALES += en_CA.ISO8859-15 - LOCALESRC_en_CA.ISO8859-15 = en_US.DIS_8859-15 +#LOCALES += eucCN +# LOCALESRC_eucCN = zh_CN.eucCN -LOCALES += en_GB.ISO8859-1 - LOCALESRC_en_GB.ISO8859-1 = en_US.ISO_8859-1 +#LOCALES += GB18030 +# LOCALESRC_GB18030 = zh_CN.GB18030 -LOCALES += en_GB.ISO8859-15 - LOCALESRC_en_GB.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += en_US.ISO8859-1 - LOCALESRC_en_US.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += en_US.ISO8859-15 - LOCALESRC_en_US.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += en_US.UTF-8 - LOCALESRC_en_US.UTF-8 = en_US.UTF-8 - -LOCALES += es_ES.ISO8859-1 - LOCALESRC_es_ES.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += es_ES.ISO8859-15 - LOCALESRC_es_ES.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += es_ES.UTF-8 - LOCALESRC_es_ES.UTF-8 = en_US.UTF-8 - -LOCALES += fa_IR.UTF-8 - LOCALESRC_fa_IR.UTF-8 = en_US.UTF-8 - -LOCALES += fi_FI.ISO8859-1 - LOCALESRC_fi_FI.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += fi_FI.ISO8859-15 - LOCALESRC_fi_FI.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += fr_BE.ISO8859-1 - LOCALESRC_fr_BE.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += fr_BE.ISO8859-15 - LOCALESRC_fr_BE.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += fr_BE.UTF-8 - LOCALESRC_fr_BE.UTF-8 = en_US.UTF-8 - -LOCALES += fr_CA.ISO8859-1 - LOCALESRC_fr_CA.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += fr_CA.ISO8859-15 - LOCALESRC_fr_CA.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += fr_CA.UTF-8 - LOCALESRC_fr_CA.UTF-8 = en_US.UTF-8 - -LOCALES += fr_CH.ISO8859-1 - LOCALESRC_fr_CH.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += fr_CH.ISO8859-15 - LOCALESRC_fr_CH.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += fr_CH.UTF-8 - LOCALESRC_fr_CH.UTF-8 = en_US.UTF-8 - -LOCALES += fr_FR.ISO8859-1 - LOCALESRC_fr_FR.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += fr_FR.ISO8859-15 - LOCALESRC_fr_FR.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += fr_FR.UTF-8 - LOCALESRC_fr_FR.UTF-8 = en_US.UTF-8 - -LOCALES += hr_HR.ISO8859-2 - LOCALESRC_hr_HR.ISO8859-2 = en_US.ISO_8859-2 - -LOCALES += hu_HU.ISO8859-2 - LOCALESRC_hu_HU.ISO8859-2 = en_US.ISO_8859-2 - -LOCALES += hu_HU.UTF-8 - LOCALESRC_hu_HU.UTF-8 = en_US.UTF-8 - -LOCALES += hy_AM.ARMSCII-8 - LOCALESRC_hy_AM.ARMSCII-8 = hy_AM.ARMSCII-8 - -LOCALES += is_IS.ISO8859-1 - LOCALESRC_is_IS.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += is_IS.ISO8859-15 - LOCALESRC_is_IS.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += it_CH.ISO8859-1 - LOCALESRC_it_CH.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += it_CH.ISO8859-15 - LOCALESRC_it_CH.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += it_CH.UTF-8 - LOCALESRC_it_CH.UTF-8 = en_US.UTF-8 - -LOCALES += it_IT.ISO8859-1 - LOCALESRC_it_IT.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += it_IT.ISO8859-15 - LOCALESRC_it_IT.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += it_IT.UTF-8 - LOCALESRC_it_IT.UTF-8 = en_US.UTF-8 - -#LOCALES += ja_JP.ct -# LOCALESRC_ja_JP.ct = ja_JP.CTEXT - -#LOCALES += ja_JP.eucJP -# LOCALESRC_ja_JP.eucJP = ja_JP.eucJP - -#LOCALES += ja_JP.ISO2022-JP -# LOCALESRC_ja_JP.ISO2022-JP = ja_JP.ISO-2022-JP - -#LOCALES += ja_JP.ISO2022-JP2 -# LOCALESRC_ja_JP.ISO2022-JP2 = ja_JP.ISO-2022-JP-2 - -#LOCALES += ja_JP.SJIS -# LOCALESRC_ja_JP.SJIS = ja_JP.SJIS - -LOCALES += ja_JP.UTF-8 - LOCALESRC_ja_JP.UTF-8 = en_US.UTF-8 - -#LOCALES += ko_KR.eucKR -# LOCALESRC_ko_KR.eucKR = ko_KR.eucKR - -LOCALES += ko_KR.UTF-8 - LOCALESRC_ko_KR.UTF-8 = en_US.UTF-8 - -LOCALES += lt_LT.ISO8859-4 - LOCALESRC_lt_LT.ISO8859-4 = en_US.ISO_8859-4 - -LOCALES += lt_LT.ISO8859-13 - LOCALESRC_lt_LT.ISO8859-13 = lt_LT.ISO8859-13 - -LOCALES += nl_BE.ISO8859-1 - LOCALESRC_nl_BE.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += nl_BE.ISO8859-15 - LOCALESRC_nl_BE.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += nl_NL.ISO8859-1 - LOCALESRC_nl_NL.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += nl_NL.ISO8859-15 - LOCALESRC_nl_NL.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += no_NO.ISO8859-1 - LOCALESRC_no_NO.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += no_NO.ISO8859-15 - LOCALESRC_no_NO.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += pl_PL.ISO8859-2 - LOCALESRC_pl_PL.ISO8859-2 = en_US.ISO_8859-2 - -LOCALES += pl_PL.UTF-8 - LOCALESRC_pl_PL.UTF-8 = en_US.UTF-8 - -LOCALES += pt_PT.ISO8859-1 - LOCALESRC_pt_PT.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += pt_PT.UTF-8 - LOCALESRC_pt_PT.UTF-8 = en_US.UTF-8 - -LOCALES += pt_PT.ISO8859-15 - LOCALESRC_pt_PT.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += ro_RO.UTF-8 - LOCALESRC_ro_RO.UTF-8 = en_US.UTF-8 - -LOCALES += ru_RU.CP866 - LOCALESRC_ru_RU.CP866 = ru_RU.CP866 - -LOCALES += ru_RU.KOI8-R - LOCALESRC_ru_RU.KOI8-R = ru_RU.KOI8-R - -LOCALES += ru_RU.ISO8859-5 - LOCALESRC_ru_RU.ISO8859-5 = ru_RU.ISO_8859-5 - -LOCALES += ru_RU.UTF-8 - LOCALESRC_ru_RU.UTF-8 = en_US.UTF-8 - -LOCALES += sk_SK.ISO8859-2 - LOCALESRC_sk_SK.ISO8859-2 = en_US.ISO_8859-2 - -LOCALES += sl_SI.ISO8859-2 - LOCALESRC_sl_SI.ISO8859-2 = en_US.ISO_8859-2 - -LOCALES += sv_SE.ISO8859-1 - LOCALESRC_sv_SE.ISO8859-1 = en_US.ISO_8859-1 - -LOCALES += sv_SE.ISO8859-15 - LOCALESRC_sv_SE.ISO8859-15 = en_US.DIS_8859-15 - -LOCALES += sv_SE.UTF-8 - LOCALESRC_sv_SE.UTF-8 = en_US.UTF-8 - -LOCALES += uk_UA.KOI8-U - LOCALESRC_uk_UA.KOI8-U = uk_UA.KOI8-U - -#LOCALES += zh_CN.eucCN -# LOCALESRC_zh_CN.eucCN = zh_CN.eucCN - -#LOCALES += zh_CN.GB18030 -# LOCALESRC_zh_CN.GB18030 = zh_CN.GB18030 - -LOCALES += zh_CN.UTF-8 - LOCALESRC_zh_CN.UTF-8 = en_US.UTF-8 - -#LOCALES += zh_TW.Big5 -# LOCALESRC_zh_TW.Big5 = zh_TW.BIG5 +#LOCALES += Big5 +# LOCALESRC_Big5 = zh_TW.BIG5 # XXX: EUC-TW is not EUC! -#LOCALES += zh_TW.eucTW -# LOCALESRC_zh_TW.eucTW = zh_TW.eucTW - -LOCALES += zh_TW.UTF-8 - LOCALESRC_zh_TW.UTF-8 = en_US.UTF-8 +#LOCALES += eucTW +# LOCALESRC_eucTW = zh_TW.eucTW all: ${LOCALES:S/$/.out/g} realall: ${LOCALES:S/$/.out/g} -.for locale in ${LOCALES} -LOCALESRCS+= ${LOCALESRC_${locale}} -.endfor CLEANFILES+= ${LOCALES:S/$/.out/g} -# TODO: more use of symlinks? FILES= ${LOCALES:S/$/.out/g} .for locale in ${LOCALES} FILESDIR_${locale}.out= ${LOCALEDIR}/${locale} Index: lib/libc/locale/setlocale.c =================================================================== RCS file: /cvs/src/lib/libc/locale/setlocale.c,v retrieving revision 1.18 diff -u -p -r1.18 setlocale.c --- lib/libc/locale/setlocale.c 15 Mar 2011 22:27:48 -0000 1.18 +++ lib/libc/locale/setlocale.c 31 May 2013 03:59:02 -0000 @@ -211,7 +211,6 @@ revert_to_default(int category) static int load_locale_sub(int category, const char *locname, int isspecial) { - char name[PATH_MAX]; int len; /* check for the default locales */ @@ -223,11 +222,6 @@ load_locale_sub(int category, const char /* sanity check */ if (strchr(locname, '/') != NULL) - return -1; - - len = snprintf(name, sizeof(name), "%s/%s/%s", - _PATH_LOCALE, locname, categories[category]); - if (len < 0 || len >= sizeof(name)) return -1; switch (category) { Index: lib/libc/locale/setrunelocale.c =================================================================== RCS file: /cvs/src/lib/libc/locale/setrunelocale.c,v retrieving revision 1.9 diff -u -p -r1.9 setrunelocale.c --- lib/libc/locale/setrunelocale.c 30 May 2013 18:35:55 -0000 1.9 +++ lib/libc/locale/setrunelocale.c 30 May 2013 19:23:16 -0000 @@ -171,17 +171,27 @@ found: } int -_xpg4_setrunelocale(const char *encoding) +_xpg4_setrunelocale(const char *locname) { char path[PATH_MAX]; _RuneLocale *rl; int error, len; + const char *dot, *encoding; - if (!strcmp(encoding, "C") || !strcmp(encoding, "POSIX")) { + if (!strcmp(locname, "C") || !strcmp(locname, "POSIX")) { rl = &_DefaultRuneLocale; goto found; } + /* Assume "<whatever>.<encoding>" locale name. */ + dot = strrchr(locname, '.'); + if (dot == NULL) { + /* No encoding specified. Fall back to ASCII. */ + rl = &_DefaultRuneLocale; + goto found; + } + + encoding = dot + 1; len = snprintf(path, sizeof(path), "%s/%s/LC_CTYPE", _PATH_LOCALE, encoding); if (len < 0 || len >= sizeof(path))