On Thu, May 30, 2013 at 10:05:45PM +0200, Stefan Sperling wrote: > I've received several requests for adding new locale names, > both on this list, and off-list, from several people. > > I've been trying to find a way to keep /usr/share/locale reasonably > clean while also allowing people to use their preferred locale names. > > Currently, the list of supported locale names is represented by the > list of directories in /usr/share/locale. I don't think we should > continue to maintain a list of <language>_<country>.<encoding> names > because such a list cannot be maintained properly. > > Some requests that have been made are non-functional changes. > E.g. adding a <country> doesn't have a functional effect on OpenBSD. > Still, some users would like to use names containing > <theirlanguage>_<theircountry>, for whatever reason. > > There have also been requests for supporting locale names such > as "C.UTF-8". I'm not sure what the use case is but as a side-effect > of the proposal below such names would also be possible. > > POSIX doesn't specify how files in /usr/share/locale are stored. > bluhm@ suggested to change the filesystem layout such that encoding > and language are separated. libc will look up locale definition data at > specific places depending on which of the LC_* categories is being set. > > LC_CTYPE support code needs to look at the character encoding only. > It only cares about the encoding part of the locale name, which by > convention is the substring after the last dot in the locale name. > > The suggested new layout looks like this: > > /usr/share/locale/UTF-8/LC_CTYPE > /usr/share/locale/CP1251/LC_CTYPE > /usr/share/locale/ISO8859-1/LC_CTYPE > /usr/share/locale/ISO8859-15/LC_CTYPE > /usr/share/locale/ISO8859-2/LC_CTYPE > /usr/share/locale/ISO8859-7/LC_CTYPE > /usr/share/locale/ARMSCII-8/LC_CTYPE > /usr/share/locale/ISO8859-4/LC_CTYPE > /usr/share/locale/ISO8859-13/LC_CTYPE > /usr/share/locale/CP866/LC_CTYPE > /usr/share/locale/KOI8-R/LC_CTYPE > /usr/share/locale/ISO8859-5/LC_CTYPE > /usr/share/locale/KOI8-U/LC_CTYPE > > All other files and directories currently in /usr/share/locale > can be removed. > > If we later add support for language- or country-specific features > such as LC_COLLATE we can add directories for every language the > collation code supports: > > /usr/share/locale/en/LC_COLLATE > /usr/share/locale/es/LC_COLLATE > /usr/share/locale/de/LC_COLLATE > > Or even add country names, if necessary and supported by the > hypothetical collation code: > /usr/share/locale/it_IT/LC_COLLATE > /usr/share/locale/it_CH/LC_COLLATE > > Does anyone see problems with this plan?
I think some softwares expect a specific hierarchy. I remember something about checking the supported stuffs under LOCALEDIR/LOCALENAME/LC_* I don't remember where it was though, I need to dig into memory. But I'm not versed into locale enough to know whether it's a real issue or not (like do en_US and en_US.UTF8 means the same thing or not)... > > Index: share/locale/ctype/Makefile > =================================================================== > RCS file: /cvs/src/share/locale/ctype/Makefile,v > retrieving revision 1.6 > diff -u -p -r1.6 Makefile > --- share/locale/ctype/Makefile 16 Jul 2011 21:33:30 -0000 1.6 > +++ share/locale/ctype/Makefile 30 May 2013 19:16:33 -0000 > @@ -5,295 +5,81 @@ NOMAN= # defined > # pull LOCALEDIR and other declarations > .include <bsd.own.mk> > > -LOCALES += ar_SD.UTF-8 > - LOCALESRC_ar_SD.UTF-8 = en_US.UTF-8 > +LOCALES += UTF-8 > + LOCALESRC_UTF-8 = en_US.UTF-8 > > -LOCALES += ar_SY.UTF-8 > - LOCALESRC_ar_SY.UTF-8 = en_US.UTF-8 > +LOCALES += CP1251 > + LOCALESRC_CP1251 = bg_BG.CP1251 > > -LOCALES += bg_BG.CP1251 > - LOCALESRC_bg_BG.CP1251 = bg_BG.CP1251 > +LOCALES += ISO8859-1 > + LOCALESRC_ISO8859-1 = en_US.ISO_8859-1 > > -LOCALES += ca_ES.ISO8859-1 > - LOCALESRC_ca_ES.ISO8859-1 = en_US.ISO_8859-1 > +LOCALES += ISO8859-15 > + LOCALESRC_ISO8859-15 = en_US.DIS_8859-15 > > -LOCALES += ca_ES.ISO8859-15 > - LOCALESRC_ca_ES.ISO8859-15 = en_US.DIS_8859-15 > +LOCALES += ISO8859-2 > + LOCALESRC_ISO8859-2 = en_US.ISO_8859-2 > > -LOCALES += cs_CZ.ISO8859-2 > - LOCALESRC_cs_CZ.ISO8859-2 = en_US.ISO_8859-2 > +LOCALES += ISO8859-7 > + LOCALESRC_ISO8859-7 = el_GR.ISO8859-7 > > -LOCALES += da_DK.ISO8859-1 > - LOCALESRC_da_DK.ISO8859-1 = en_US.ISO_8859-1 > +LOCALES += ARMSCII-8 > + LOCALESRC_ARMSCII-8 = hy_AM.ARMSCII-8 > > -LOCALES += da_DK.ISO8859-15 > - LOCALESRC_da_DK.ISO8859-15 = en_US.DIS_8859-15 > +#LOCALES += ct > +# LOCALESRC_ct = ja_JP.CTEXT > > -LOCALES += de_AT.ISO8859-1 > - LOCALESRC_de_AT.ISO8859-1 = en_US.ISO_8859-1 > +#LOCALES += eucJP > +# LOCALESRC_eucJP = ja_JP.eucJP > > -LOCALES += de_AT.ISO8859-15 > - LOCALESRC_de_AT.ISO8859-15 = en_US.DIS_8859-15 > +#LOCALES += ISO2022-JP > +# LOCALESRC_ISO2022-JP = ja_JP.ISO-2022-JP > > -LOCALES += de_CH.ISO8859-1 > - LOCALESRC_de_CH.ISO8859-1 = en_US.ISO_8859-1 > +#LOCALES += ISO2022-JP2 > +# LOCALESRC_ISO2022-JP2 = ja_JP.ISO-2022-JP-2 > > -LOCALES += de_CH.ISO8859-15 > - LOCALESRC_de_CH.ISO8859-15 = en_US.DIS_8859-15 > +#LOCALES += SJIS > +# LOCALESRC_SJIS = ja_JP.SJIS > > -LOCALES += de_DE.ISO8859-1 > - LOCALESRC_de_DE.ISO8859-1 = en_US.ISO_8859-1 > +#LOCALES += eucKR > +# LOCALESRC_eucKR = ko_KR.eucKR > > -LOCALES += de_DE.ISO8859-15 > - LOCALESRC_de_DE.ISO8859-15 = en_US.DIS_8859-15 > +LOCALES += ISO8859-4 > + LOCALESRC_ISO8859-4 = en_US.ISO_8859-4 > > -LOCALES += de_DE.UTF-8 > - LOCALESRC_de_DE.UTF-8 = en_US.UTF-8 > +LOCALES += ISO8859-13 > + LOCALESRC_ISO8859-13 = lt_LT.ISO8859-13 > > -LOCALES += el_GR.ISO8859-7 > - LOCALESRC_el_GR.ISO8859-7 = el_GR.ISO8859-7 > +LOCALES += CP866 > + LOCALESRC_CP866 = ru_RU.CP866 > > -LOCALES += en_AU.ISO8859-1 > - LOCALESRC_en_AU.ISO8859-1 = en_US.ISO_8859-1 > +LOCALES += KOI8-R > + LOCALESRC_KOI8-R = ru_RU.KOI8-R > > -LOCALES += en_AU.ISO8859-15 > - LOCALESRC_en_AU.ISO8859-15 = en_US.DIS_8859-15 > +LOCALES += ISO8859-5 > + LOCALESRC_ISO8859-5 = ru_RU.ISO_8859-5 > > -LOCALES += en_CA.ISO8859-1 > - LOCALESRC_en_CA.ISO8859-1 = en_US.ISO_8859-1 > +LOCALES += KOI8-U > + LOCALESRC_KOI8-U = uk_UA.KOI8-U > > -LOCALES += en_CA.ISO8859-15 > - LOCALESRC_en_CA.ISO8859-15 = en_US.DIS_8859-15 > +#LOCALES += eucCN > +# LOCALESRC_eucCN = zh_CN.eucCN > > -LOCALES += en_GB.ISO8859-1 > - LOCALESRC_en_GB.ISO8859-1 = en_US.ISO_8859-1 > +#LOCALES += GB18030 > +# LOCALESRC_GB18030 = zh_CN.GB18030 > > -LOCALES += en_GB.ISO8859-15 > - LOCALESRC_en_GB.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += en_US.ISO8859-1 > - LOCALESRC_en_US.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += en_US.ISO8859-15 > - LOCALESRC_en_US.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += en_US.UTF-8 > - LOCALESRC_en_US.UTF-8 = en_US.UTF-8 > - > -LOCALES += es_ES.ISO8859-1 > - LOCALESRC_es_ES.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += es_ES.ISO8859-15 > - LOCALESRC_es_ES.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += es_ES.UTF-8 > - LOCALESRC_es_ES.UTF-8 = en_US.UTF-8 > - > -LOCALES += fa_IR.UTF-8 > - LOCALESRC_fa_IR.UTF-8 = en_US.UTF-8 > - > -LOCALES += fi_FI.ISO8859-1 > - LOCALESRC_fi_FI.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += fi_FI.ISO8859-15 > - LOCALESRC_fi_FI.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += fr_BE.ISO8859-1 > - LOCALESRC_fr_BE.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += fr_BE.ISO8859-15 > - LOCALESRC_fr_BE.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += fr_BE.UTF-8 > - LOCALESRC_fr_BE.UTF-8 = en_US.UTF-8 > - > -LOCALES += fr_CA.ISO8859-1 > - LOCALESRC_fr_CA.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += fr_CA.ISO8859-15 > - LOCALESRC_fr_CA.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += fr_CA.UTF-8 > - LOCALESRC_fr_CA.UTF-8 = en_US.UTF-8 > - > -LOCALES += fr_CH.ISO8859-1 > - LOCALESRC_fr_CH.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += fr_CH.ISO8859-15 > - LOCALESRC_fr_CH.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += fr_CH.UTF-8 > - LOCALESRC_fr_CH.UTF-8 = en_US.UTF-8 > - > -LOCALES += fr_FR.ISO8859-1 > - LOCALESRC_fr_FR.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += fr_FR.ISO8859-15 > - LOCALESRC_fr_FR.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += fr_FR.UTF-8 > - LOCALESRC_fr_FR.UTF-8 = en_US.UTF-8 > - > -LOCALES += hr_HR.ISO8859-2 > - LOCALESRC_hr_HR.ISO8859-2 = en_US.ISO_8859-2 > - > -LOCALES += hu_HU.ISO8859-2 > - LOCALESRC_hu_HU.ISO8859-2 = en_US.ISO_8859-2 > - > -LOCALES += hu_HU.UTF-8 > - LOCALESRC_hu_HU.UTF-8 = en_US.UTF-8 > - > -LOCALES += hy_AM.ARMSCII-8 > - LOCALESRC_hy_AM.ARMSCII-8 = hy_AM.ARMSCII-8 > - > -LOCALES += is_IS.ISO8859-1 > - LOCALESRC_is_IS.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += is_IS.ISO8859-15 > - LOCALESRC_is_IS.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += it_CH.ISO8859-1 > - LOCALESRC_it_CH.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += it_CH.ISO8859-15 > - LOCALESRC_it_CH.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += it_CH.UTF-8 > - LOCALESRC_it_CH.UTF-8 = en_US.UTF-8 > - > -LOCALES += it_IT.ISO8859-1 > - LOCALESRC_it_IT.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += it_IT.ISO8859-15 > - LOCALESRC_it_IT.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += it_IT.UTF-8 > - LOCALESRC_it_IT.UTF-8 = en_US.UTF-8 > - > -#LOCALES += ja_JP.ct > -# LOCALESRC_ja_JP.ct = ja_JP.CTEXT > - > -#LOCALES += ja_JP.eucJP > -# LOCALESRC_ja_JP.eucJP = ja_JP.eucJP > - > -#LOCALES += ja_JP.ISO2022-JP > -# LOCALESRC_ja_JP.ISO2022-JP = ja_JP.ISO-2022-JP > - > -#LOCALES += ja_JP.ISO2022-JP2 > -# LOCALESRC_ja_JP.ISO2022-JP2 = ja_JP.ISO-2022-JP-2 > - > -#LOCALES += ja_JP.SJIS > -# LOCALESRC_ja_JP.SJIS = ja_JP.SJIS > - > -LOCALES += ja_JP.UTF-8 > - LOCALESRC_ja_JP.UTF-8 = en_US.UTF-8 > - > -#LOCALES += ko_KR.eucKR > -# LOCALESRC_ko_KR.eucKR = ko_KR.eucKR > - > -LOCALES += ko_KR.UTF-8 > - LOCALESRC_ko_KR.UTF-8 = en_US.UTF-8 > - > -LOCALES += lt_LT.ISO8859-4 > - LOCALESRC_lt_LT.ISO8859-4 = en_US.ISO_8859-4 > - > -LOCALES += lt_LT.ISO8859-13 > - LOCALESRC_lt_LT.ISO8859-13 = lt_LT.ISO8859-13 > - > -LOCALES += nl_BE.ISO8859-1 > - LOCALESRC_nl_BE.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += nl_BE.ISO8859-15 > - LOCALESRC_nl_BE.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += nl_NL.ISO8859-1 > - LOCALESRC_nl_NL.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += nl_NL.ISO8859-15 > - LOCALESRC_nl_NL.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += no_NO.ISO8859-1 > - LOCALESRC_no_NO.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += no_NO.ISO8859-15 > - LOCALESRC_no_NO.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += pl_PL.ISO8859-2 > - LOCALESRC_pl_PL.ISO8859-2 = en_US.ISO_8859-2 > - > -LOCALES += pl_PL.UTF-8 > - LOCALESRC_pl_PL.UTF-8 = en_US.UTF-8 > - > -LOCALES += pt_PT.ISO8859-1 > - LOCALESRC_pt_PT.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += pt_PT.UTF-8 > - LOCALESRC_pt_PT.UTF-8 = en_US.UTF-8 > - > -LOCALES += pt_PT.ISO8859-15 > - LOCALESRC_pt_PT.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += ro_RO.UTF-8 > - LOCALESRC_ro_RO.UTF-8 = en_US.UTF-8 > - > -LOCALES += ru_RU.CP866 > - LOCALESRC_ru_RU.CP866 = ru_RU.CP866 > - > -LOCALES += ru_RU.KOI8-R > - LOCALESRC_ru_RU.KOI8-R = ru_RU.KOI8-R > - > -LOCALES += ru_RU.ISO8859-5 > - LOCALESRC_ru_RU.ISO8859-5 = ru_RU.ISO_8859-5 > - > -LOCALES += ru_RU.UTF-8 > - LOCALESRC_ru_RU.UTF-8 = en_US.UTF-8 > - > -LOCALES += sk_SK.ISO8859-2 > - LOCALESRC_sk_SK.ISO8859-2 = en_US.ISO_8859-2 > - > -LOCALES += sl_SI.ISO8859-2 > - LOCALESRC_sl_SI.ISO8859-2 = en_US.ISO_8859-2 > - > -LOCALES += sv_SE.ISO8859-1 > - LOCALESRC_sv_SE.ISO8859-1 = en_US.ISO_8859-1 > - > -LOCALES += sv_SE.ISO8859-15 > - LOCALESRC_sv_SE.ISO8859-15 = en_US.DIS_8859-15 > - > -LOCALES += sv_SE.UTF-8 > - LOCALESRC_sv_SE.UTF-8 = en_US.UTF-8 > - > -LOCALES += uk_UA.KOI8-U > - LOCALESRC_uk_UA.KOI8-U = uk_UA.KOI8-U > - > -#LOCALES += zh_CN.eucCN > -# LOCALESRC_zh_CN.eucCN = zh_CN.eucCN > - > -#LOCALES += zh_CN.GB18030 > -# LOCALESRC_zh_CN.GB18030 = zh_CN.GB18030 > - > -LOCALES += zh_CN.UTF-8 > - LOCALESRC_zh_CN.UTF-8 = en_US.UTF-8 > - > -#LOCALES += zh_TW.Big5 > -# LOCALESRC_zh_TW.Big5 = zh_TW.BIG5 > +#LOCALES += Big5 > +# LOCALESRC_Big5 = zh_TW.BIG5 > > # XXX: EUC-TW is not EUC! > -#LOCALES += zh_TW.eucTW > -# LOCALESRC_zh_TW.eucTW = zh_TW.eucTW > - > -LOCALES += zh_TW.UTF-8 > - LOCALESRC_zh_TW.UTF-8 = en_US.UTF-8 > +#LOCALES += eucTW > +# LOCALESRC_eucTW = zh_TW.eucTW > > all: ${LOCALES:S/$/.out/g} > realall: ${LOCALES:S/$/.out/g} > > -.for locale in ${LOCALES} > -LOCALESRCS+= ${LOCALESRC_${locale}} > -.endfor > CLEANFILES+= ${LOCALES:S/$/.out/g} > > -# TODO: more use of symlinks? > FILES= ${LOCALES:S/$/.out/g} > .for locale in ${LOCALES} > FILESDIR_${locale}.out= ${LOCALEDIR}/${locale} > Index: lib/libc/locale/setrunelocale.c > =================================================================== > RCS file: /cvs/src/lib/libc/locale/setrunelocale.c,v > retrieving revision 1.9 > diff -u -p -r1.9 setrunelocale.c > --- lib/libc/locale/setrunelocale.c 30 May 2013 18:35:55 -0000 1.9 > +++ lib/libc/locale/setrunelocale.c 30 May 2013 19:23:16 -0000 > @@ -171,17 +171,27 @@ found: > } > > int > -_xpg4_setrunelocale(const char *encoding) > +_xpg4_setrunelocale(const char *locname) > { > char path[PATH_MAX]; > _RuneLocale *rl; > int error, len; > + const char *dot, *encoding; > > - if (!strcmp(encoding, "C") || !strcmp(encoding, "POSIX")) { > + if (!strcmp(locname, "C") || !strcmp(locname, "POSIX")) { > rl = &_DefaultRuneLocale; > goto found; > } > > + /* Assume "<whatever>.<encoding>" locale name. */ > + dot = strrchr(locname, '.'); > + if (dot == NULL) { > + /* No encoding specified. Fall back to ASCII. */ > + rl = &_DefaultRuneLocale; > + goto found; > + } > + > + encoding = dot + 1; > len = snprintf(path, sizeof(path), > "%s/%s/LC_CTYPE", _PATH_LOCALE, encoding); > if (len < 0 || len >= sizeof(path)) > -- Antoine