commit: d1d1854a402b02b23b08725dac069984506f1f96 Author: Kerin Millar <kfm <AT> plushkava <DOT> net> AuthorDate: Mon Oct 20 03:54:38 2025 +0000 Commit: Kerin Millar <kfm <AT> plushkava <DOT> net> CommitDate: Mon Oct 20 05:11:44 2025 +0000 URL: https://gitweb.gentoo.org/proj/locale-gen.git/commit/?id=d1d1854a
Produce short-form aliases for a subset of valid locales Where localedef(1) is instructed to incorporate a directory into an archive, it is the name of the directory that determines the apparent name of the locale. However, if the name of the directory does not incorporate a codeset part, localedef(1) will sniff the codeset from the contents of the directory then proceed to incorporate two locales, with one serving as an alias of the other. # cd /usr/lib/locale # localedef --no-archive -i en_US -f ISO-8859-1 en_US # ambiguous # localedef --add-to-archive --replace en_US # locale -a en_US en_US.iso88591 These short-form aliases are bad for interoperability. Even among GNU/Linux vendors, there is no consensus as to the circumstances under which they should be offered (if at all), nor the character maps that should be implied by effecting any of them. That being said, locale-gen-2.23 would incorporate these aliases under certain conditions, whereas >=locale-gen-3.0 currently does not. As such, there is a legitimate concern as to backward-compatibility. It stands to reason that some Gentoo users will have chosen to effect a short-form locale. For these locales to disappear can have adverse effects on any applications and services that they depend upon. Address this concern by having locale-gen(8) predictably incorporate short-form aliases for certain locale/charmap combinations. Consider the following configuration. # All supported combinations for the USA en_US.UTF-8 UTF-8 en_US ISO-8859-1 # this will be aliased as "en_US" # All supported combinations for China zh_CN.GB18030 GB18030 zh_CN.GBK GBK zh_CN.UTF-8 UTF-8 zh_CN GB2312 # this will be aliased as "zh_CN" This is also the strategy employed by (at least) Fedora and Void Linux. Reported-by: Horste Prote <prote <AT> fmi.uni-stuttgart.de> Closes: https://bugs.gentoo.org/963974 Signed-off-by: Kerin Millar <kfm <AT> plushkava.net> locale-gen | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/locale-gen b/locale-gen index 56f8eb1..b703f1d 100644 --- a/locale-gen +++ b/locale-gen @@ -64,7 +64,7 @@ umask 0022; } # Ensure that the C.UTF-8 locale is made available. - my @locales = ([ 'C', 'UTF-8', 'C.UTF-8' ]); + my @locales = ([ 'C', 'UTF-8', 'C.UTF-8', 'C.UTF-8' ]); # Compose a list of up to two configuration files to be read. my @config_files = select_config_files($prefix, %opt); @@ -116,8 +116,8 @@ umask 0022; # Integrate the compiled locales into a new locale archive. my $src_path = do { my $prior_archive = $opt{'update'} ? $dst_path : undef; - my @canonicals = map +( $_->[2] ), @locales; - generate_archive($gentoo_prefix, $locale_dir, $prior_archive, @canonicals); + my @names = map +( $_->[3] ), @locales; + generate_archive($gentoo_prefix, $locale_dir, $prior_archive, @names); }; # Install the new locale archive. @@ -381,9 +381,9 @@ sub parse_config ($fh, $path, $supported_by) { # Determine the locale name in both the form that accords with # the subdirectories of /usr/share/i18n/locales, and in the # canonical form that incorporates the <codeset> part. - ($locale, my $canonical) = parse_entry($locale, $charmap); + ($locale, my $canonical, my $name) = parse_entry($locale, $charmap); - push @locales, [ $locale, $charmap, $canonical ]; + push @locales, [ $locale, $charmap, $canonical, $name ]; } return @locales; @@ -391,6 +391,7 @@ sub parse_config ($fh, $path, $supported_by) { sub parse_entry ($locale, $charmap) { my $canonical; + my $name; if (2 == (my @fields = split /@/, $locale, 3)) { # de_DE@euro ISO-8859-15 => de_DE.ISO-8859-15@euro $canonical = sprintf '%s.%s@%s', $fields[0], $charmap, $fields[1]; @@ -401,8 +402,18 @@ sub parse_entry ($locale, $charmap) { } elsif (1 == @fields) { # en_US ISO-8859-1 => en_US.ISO-8859-1 $canonical = "$locale.$charmap"; + + # Where given an input path whose name does not incorporate a + # charmap, localedef(1) will incorporate it into the archive as + # an alias of its canonical name. For example, "en_US" may be + # rendered a valid name that is equivalent to "en_US.iso88591". + # It is strongly discouraged to rely on these in any capacity. + # Still, for the time being, arrange for them to exist. + if ($locale ne 'C') { + $name = $locale; + } } - return $locale, $canonical; + return $locale, $canonical, $name // $canonical; } sub check_archive_dir ($prefix, $locale_dir) { @@ -454,7 +465,7 @@ sub generate_locales ($workers, @locales) { last if 0 != ($status_by{$pid} = $?); } - my ($locale, $charmap, $canonical) = $locales[$i]->@*; + my ($locale, $charmap, $canonical, $name) = $locales[$i]->@*; printf "[%*d/%d] Compiling locale: %s\n", $num_width, $i + 1, $total, $canonical; @@ -464,7 +475,7 @@ sub generate_locales ($workers, @locales) { last; } elsif ($pid == 0) { @SIG{'INT', 'TERM'} = ('DEFAULT', 'DEFAULT'); - compile_locale($locale, $charmap, $canonical); + compile_locale($locale, $charmap, $name); } } continue { last if $DEFERRED_SIGNAL; @@ -493,8 +504,8 @@ sub generate_locales ($workers, @locales) { } } -sub compile_locale ($locale, $charmap, $canonical) { - my $output_dir = "./$canonical"; +sub compile_locale ($locale, $charmap, $name) { + my $output_dir = "./$name"; run('localedef', '--no-archive', '-i', $locale, '-f', $charmap, '--', $output_dir); }
