On Thu, May 30, 2013 at 05:18:48PM -0430, Andres Perera wrote:
> As I mentioned, there's code that expects the prior layout, and that's
> confusing.
> 
> on src/lib/libc/locale/setlocale.c, load_locale_sub() :
> 
>    228          len = snprintf(name, sizeof(name), "%s/%s/%s",
>    229                         _PATH_LOCALE, locname, categories[category]);
>    230          if (len < 0 || len >= sizeof(name))
>    231                  return -1;

Right, thanks for pointing this out.

I think the above check can just be removed.
It seems to serve no purpose other than making sure that the path
constructed from _PATH_LOCALE and the locname argument doesn't
exceed PATH_MAX. This is redundant because the same check is
performed again within _xpg4_setrunelocale(). If we assume that functions
handling other LC_* categories might use different paths in the future,
it makes sense to perform this overflow check only inside of the
LC_*-specific functions, rather than upfront.

> on src/lib/libc/locale/setrunelocale.c, _xpg4_setrunelocale():
> 
>    184          len = snprintf(path, sizeof(path),
>    185              "%s/%s/LC_CTYPE", _PATH_LOCALE, encoding);
>    186          if (len < 0 || len >= sizeof(path))
>    187                  return ENAMETOOLONG;

This section is modified as part of my diff, isn't it?

> > +       /* Assume "<whatever>.<encoding>" locale name. */
> 
> There should be some notion of syntax for cc_LL.CTYPE, even if only
> mentioned in comments.
> 
> E.g.,
> 
> ISO 3166-1 for country codes and BCP 47 for language tags.
> 
> glibc did not do this and directly because of that it's a mess to
> navigate their structure.

You mean we should specify encoding name syntax in a comment?
If so, my answer would be that recognized encoding names are
specified by filenames we use in /usr/share/locale. A comment
documenting the same would risk becoming obsolete over time.

Updated diff, with the redundant check removed:

Index: share/locale/ctype/Makefile
===================================================================
RCS file: /cvs/src/share/locale/ctype/Makefile,v
retrieving revision 1.6
diff -u -p -r1.6 Makefile
--- share/locale/ctype/Makefile 16 Jul 2011 21:33:30 -0000      1.6
+++ share/locale/ctype/Makefile 30 May 2013 19:16:33 -0000
@@ -5,295 +5,81 @@ NOMAN=       # defined
 # pull LOCALEDIR and other declarations
 .include <bsd.own.mk>
 
-LOCALES += ar_SD.UTF-8
- LOCALESRC_ar_SD.UTF-8 = en_US.UTF-8
+LOCALES += UTF-8
+ LOCALESRC_UTF-8 = en_US.UTF-8
 
-LOCALES += ar_SY.UTF-8
- LOCALESRC_ar_SY.UTF-8 = en_US.UTF-8
+LOCALES += CP1251
+ LOCALESRC_CP1251 = bg_BG.CP1251
 
-LOCALES += bg_BG.CP1251
- LOCALESRC_bg_BG.CP1251 = bg_BG.CP1251
+LOCALES += ISO8859-1
+ LOCALESRC_ISO8859-1 = en_US.ISO_8859-1
 
-LOCALES += ca_ES.ISO8859-1
- LOCALESRC_ca_ES.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += ISO8859-15
+ LOCALESRC_ISO8859-15 = en_US.DIS_8859-15
 
-LOCALES += ca_ES.ISO8859-15
- LOCALESRC_ca_ES.ISO8859-15 = en_US.DIS_8859-15
+LOCALES += ISO8859-2
+ LOCALESRC_ISO8859-2 = en_US.ISO_8859-2
 
-LOCALES += cs_CZ.ISO8859-2
- LOCALESRC_cs_CZ.ISO8859-2 = en_US.ISO_8859-2
+LOCALES += ISO8859-7
+ LOCALESRC_ISO8859-7 = el_GR.ISO8859-7
 
-LOCALES += da_DK.ISO8859-1
- LOCALESRC_da_DK.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += ARMSCII-8
+ LOCALESRC_ARMSCII-8 = hy_AM.ARMSCII-8
 
-LOCALES += da_DK.ISO8859-15
- LOCALESRC_da_DK.ISO8859-15 = en_US.DIS_8859-15
+#LOCALES += ct
+# LOCALESRC_ct = ja_JP.CTEXT
 
-LOCALES += de_AT.ISO8859-1
- LOCALESRC_de_AT.ISO8859-1 = en_US.ISO_8859-1
+#LOCALES += eucJP
+# LOCALESRC_eucJP = ja_JP.eucJP
 
-LOCALES += de_AT.ISO8859-15
- LOCALESRC_de_AT.ISO8859-15 = en_US.DIS_8859-15
+#LOCALES += ISO2022-JP
+# LOCALESRC_ISO2022-JP = ja_JP.ISO-2022-JP
 
-LOCALES += de_CH.ISO8859-1
- LOCALESRC_de_CH.ISO8859-1 = en_US.ISO_8859-1
+#LOCALES += ISO2022-JP2
+# LOCALESRC_ISO2022-JP2 = ja_JP.ISO-2022-JP-2
 
-LOCALES += de_CH.ISO8859-15
- LOCALESRC_de_CH.ISO8859-15 = en_US.DIS_8859-15
+#LOCALES += SJIS
+# LOCALESRC_SJIS = ja_JP.SJIS
 
-LOCALES += de_DE.ISO8859-1
- LOCALESRC_de_DE.ISO8859-1 = en_US.ISO_8859-1
+#LOCALES += eucKR
+# LOCALESRC_eucKR = ko_KR.eucKR
 
-LOCALES += de_DE.ISO8859-15
- LOCALESRC_de_DE.ISO8859-15 = en_US.DIS_8859-15
+LOCALES += ISO8859-4
+ LOCALESRC_ISO8859-4 = en_US.ISO_8859-4
 
-LOCALES += de_DE.UTF-8
- LOCALESRC_de_DE.UTF-8 = en_US.UTF-8
+LOCALES += ISO8859-13
+ LOCALESRC_ISO8859-13 = lt_LT.ISO8859-13
 
-LOCALES += el_GR.ISO8859-7
- LOCALESRC_el_GR.ISO8859-7 = el_GR.ISO8859-7
+LOCALES += CP866
+ LOCALESRC_CP866 = ru_RU.CP866
 
-LOCALES += en_AU.ISO8859-1
- LOCALESRC_en_AU.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += KOI8-R
+ LOCALESRC_KOI8-R = ru_RU.KOI8-R
 
-LOCALES += en_AU.ISO8859-15
- LOCALESRC_en_AU.ISO8859-15 = en_US.DIS_8859-15
+LOCALES += ISO8859-5
+ LOCALESRC_ISO8859-5 = ru_RU.ISO_8859-5
 
-LOCALES += en_CA.ISO8859-1
- LOCALESRC_en_CA.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += KOI8-U
+ LOCALESRC_KOI8-U = uk_UA.KOI8-U
 
-LOCALES += en_CA.ISO8859-15
- LOCALESRC_en_CA.ISO8859-15 = en_US.DIS_8859-15
+#LOCALES += eucCN
+# LOCALESRC_eucCN = zh_CN.eucCN
 
-LOCALES += en_GB.ISO8859-1
- LOCALESRC_en_GB.ISO8859-1 = en_US.ISO_8859-1
+#LOCALES += GB18030
+# LOCALESRC_GB18030 = zh_CN.GB18030
 
-LOCALES += en_GB.ISO8859-15
- LOCALESRC_en_GB.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += en_US.ISO8859-1
- LOCALESRC_en_US.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += en_US.ISO8859-15
- LOCALESRC_en_US.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += en_US.UTF-8
- LOCALESRC_en_US.UTF-8 = en_US.UTF-8
-
-LOCALES += es_ES.ISO8859-1
- LOCALESRC_es_ES.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += es_ES.ISO8859-15
- LOCALESRC_es_ES.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += es_ES.UTF-8
- LOCALESRC_es_ES.UTF-8 = en_US.UTF-8
-
-LOCALES += fa_IR.UTF-8
- LOCALESRC_fa_IR.UTF-8 = en_US.UTF-8
-
-LOCALES += fi_FI.ISO8859-1
- LOCALESRC_fi_FI.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fi_FI.ISO8859-15
- LOCALESRC_fi_FI.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_BE.ISO8859-1
- LOCALESRC_fr_BE.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fr_BE.ISO8859-15
- LOCALESRC_fr_BE.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_BE.UTF-8
- LOCALESRC_fr_BE.UTF-8 = en_US.UTF-8
-
-LOCALES += fr_CA.ISO8859-1
- LOCALESRC_fr_CA.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fr_CA.ISO8859-15
- LOCALESRC_fr_CA.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_CA.UTF-8
- LOCALESRC_fr_CA.UTF-8 = en_US.UTF-8
-
-LOCALES += fr_CH.ISO8859-1
- LOCALESRC_fr_CH.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fr_CH.ISO8859-15
- LOCALESRC_fr_CH.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_CH.UTF-8
- LOCALESRC_fr_CH.UTF-8 = en_US.UTF-8
-
-LOCALES += fr_FR.ISO8859-1
- LOCALESRC_fr_FR.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fr_FR.ISO8859-15
- LOCALESRC_fr_FR.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_FR.UTF-8
- LOCALESRC_fr_FR.UTF-8 = en_US.UTF-8
-
-LOCALES += hr_HR.ISO8859-2
- LOCALESRC_hr_HR.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += hu_HU.ISO8859-2
- LOCALESRC_hu_HU.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += hu_HU.UTF-8
- LOCALESRC_hu_HU.UTF-8 = en_US.UTF-8
-
-LOCALES += hy_AM.ARMSCII-8
- LOCALESRC_hy_AM.ARMSCII-8 = hy_AM.ARMSCII-8
-
-LOCALES += is_IS.ISO8859-1
- LOCALESRC_is_IS.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += is_IS.ISO8859-15
- LOCALESRC_is_IS.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += it_CH.ISO8859-1
- LOCALESRC_it_CH.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += it_CH.ISO8859-15
- LOCALESRC_it_CH.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += it_CH.UTF-8
- LOCALESRC_it_CH.UTF-8 = en_US.UTF-8
-
-LOCALES += it_IT.ISO8859-1
- LOCALESRC_it_IT.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += it_IT.ISO8859-15
- LOCALESRC_it_IT.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += it_IT.UTF-8
- LOCALESRC_it_IT.UTF-8 = en_US.UTF-8
-
-#LOCALES += ja_JP.ct
-# LOCALESRC_ja_JP.ct = ja_JP.CTEXT
-
-#LOCALES += ja_JP.eucJP
-# LOCALESRC_ja_JP.eucJP = ja_JP.eucJP
-
-#LOCALES += ja_JP.ISO2022-JP
-# LOCALESRC_ja_JP.ISO2022-JP = ja_JP.ISO-2022-JP
-
-#LOCALES += ja_JP.ISO2022-JP2
-# LOCALESRC_ja_JP.ISO2022-JP2 = ja_JP.ISO-2022-JP-2
-
-#LOCALES += ja_JP.SJIS
-# LOCALESRC_ja_JP.SJIS = ja_JP.SJIS
-
-LOCALES += ja_JP.UTF-8
- LOCALESRC_ja_JP.UTF-8 = en_US.UTF-8
-
-#LOCALES += ko_KR.eucKR
-# LOCALESRC_ko_KR.eucKR = ko_KR.eucKR
-
-LOCALES += ko_KR.UTF-8
- LOCALESRC_ko_KR.UTF-8 = en_US.UTF-8
-
-LOCALES += lt_LT.ISO8859-4
- LOCALESRC_lt_LT.ISO8859-4 = en_US.ISO_8859-4
-
-LOCALES += lt_LT.ISO8859-13
- LOCALESRC_lt_LT.ISO8859-13 = lt_LT.ISO8859-13
-
-LOCALES += nl_BE.ISO8859-1
- LOCALESRC_nl_BE.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += nl_BE.ISO8859-15
- LOCALESRC_nl_BE.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += nl_NL.ISO8859-1
- LOCALESRC_nl_NL.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += nl_NL.ISO8859-15
- LOCALESRC_nl_NL.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += no_NO.ISO8859-1
- LOCALESRC_no_NO.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += no_NO.ISO8859-15
- LOCALESRC_no_NO.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += pl_PL.ISO8859-2
- LOCALESRC_pl_PL.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += pl_PL.UTF-8
- LOCALESRC_pl_PL.UTF-8 = en_US.UTF-8
-
-LOCALES += pt_PT.ISO8859-1
- LOCALESRC_pt_PT.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += pt_PT.UTF-8
- LOCALESRC_pt_PT.UTF-8 = en_US.UTF-8
-
-LOCALES += pt_PT.ISO8859-15
- LOCALESRC_pt_PT.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += ro_RO.UTF-8
- LOCALESRC_ro_RO.UTF-8 = en_US.UTF-8
-
-LOCALES += ru_RU.CP866
- LOCALESRC_ru_RU.CP866 = ru_RU.CP866
-
-LOCALES += ru_RU.KOI8-R
- LOCALESRC_ru_RU.KOI8-R = ru_RU.KOI8-R
-
-LOCALES += ru_RU.ISO8859-5
- LOCALESRC_ru_RU.ISO8859-5 = ru_RU.ISO_8859-5
-
-LOCALES += ru_RU.UTF-8
- LOCALESRC_ru_RU.UTF-8 = en_US.UTF-8
-
-LOCALES += sk_SK.ISO8859-2
- LOCALESRC_sk_SK.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += sl_SI.ISO8859-2
- LOCALESRC_sl_SI.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += sv_SE.ISO8859-1
- LOCALESRC_sv_SE.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += sv_SE.ISO8859-15
- LOCALESRC_sv_SE.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += sv_SE.UTF-8
- LOCALESRC_sv_SE.UTF-8 = en_US.UTF-8
-
-LOCALES += uk_UA.KOI8-U
- LOCALESRC_uk_UA.KOI8-U = uk_UA.KOI8-U
-
-#LOCALES += zh_CN.eucCN
-# LOCALESRC_zh_CN.eucCN = zh_CN.eucCN
-
-#LOCALES += zh_CN.GB18030
-# LOCALESRC_zh_CN.GB18030 = zh_CN.GB18030
-
-LOCALES += zh_CN.UTF-8
- LOCALESRC_zh_CN.UTF-8 = en_US.UTF-8
-
-#LOCALES += zh_TW.Big5
-# LOCALESRC_zh_TW.Big5 = zh_TW.BIG5
+#LOCALES += Big5
+# LOCALESRC_Big5 = zh_TW.BIG5
 
 # XXX: EUC-TW is not EUC!
-#LOCALES += zh_TW.eucTW
-# LOCALESRC_zh_TW.eucTW = zh_TW.eucTW
-
-LOCALES += zh_TW.UTF-8
- LOCALESRC_zh_TW.UTF-8 = en_US.UTF-8
+#LOCALES += eucTW
+# LOCALESRC_eucTW = zh_TW.eucTW
 
 all: ${LOCALES:S/$/.out/g}
 realall: ${LOCALES:S/$/.out/g}
 
-.for locale in ${LOCALES}
-LOCALESRCS+=   ${LOCALESRC_${locale}}
-.endfor
 CLEANFILES+=   ${LOCALES:S/$/.out/g}
 
-# TODO: more use of symlinks?
 FILES= ${LOCALES:S/$/.out/g}
 .for locale in ${LOCALES}
 FILESDIR_${locale}.out=        ${LOCALEDIR}/${locale}
Index: lib/libc/locale/setlocale.c
===================================================================
RCS file: /cvs/src/lib/libc/locale/setlocale.c,v
retrieving revision 1.18
diff -u -p -r1.18 setlocale.c
--- lib/libc/locale/setlocale.c 15 Mar 2011 22:27:48 -0000      1.18
+++ lib/libc/locale/setlocale.c 31 May 2013 03:59:02 -0000
@@ -211,7 +211,6 @@ revert_to_default(int category)
 static int
 load_locale_sub(int category, const char *locname, int isspecial)
 {
-       char name[PATH_MAX];
        int len;
 
        /* check for the default locales */
@@ -223,11 +222,6 @@ load_locale_sub(int category, const char
 
        /* sanity check */
        if (strchr(locname, '/') != NULL)
-               return -1;
-
-       len = snprintf(name, sizeof(name), "%s/%s/%s",
-                      _PATH_LOCALE, locname, categories[category]);
-       if (len < 0 || len >= sizeof(name))
                return -1;
 
        switch (category) {
Index: lib/libc/locale/setrunelocale.c
===================================================================
RCS file: /cvs/src/lib/libc/locale/setrunelocale.c,v
retrieving revision 1.9
diff -u -p -r1.9 setrunelocale.c
--- lib/libc/locale/setrunelocale.c     30 May 2013 18:35:55 -0000      1.9
+++ lib/libc/locale/setrunelocale.c     30 May 2013 19:23:16 -0000
@@ -171,17 +171,27 @@ found:
 }
 
 int
-_xpg4_setrunelocale(const char *encoding)
+_xpg4_setrunelocale(const char *locname)
 {
        char path[PATH_MAX];
        _RuneLocale *rl;
        int error, len;
+       const char *dot, *encoding;
 
-       if (!strcmp(encoding, "C") || !strcmp(encoding, "POSIX")) {
+       if (!strcmp(locname, "C") || !strcmp(locname, "POSIX")) {
                rl = &_DefaultRuneLocale;
                goto found;
        }
 
+       /* Assume "<whatever>.<encoding>" locale name. */
+       dot = strrchr(locname, '.');
+       if (dot == NULL) {
+               /* No encoding specified. Fall back to ASCII. */
+               rl = &_DefaultRuneLocale;
+               goto found;
+       }
+
+       encoding = dot + 1;
        len = snprintf(path, sizeof(path),
            "%s/%s/LC_CTYPE", _PATH_LOCALE, encoding);
        if (len < 0 || len >= sizeof(path))

Reply via email to