commit:     588e3ede89a6b630420839e704cf975a28f0d981
Author:     Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Sun Oct 19 21:03:30 2025 +0000
Commit:     Kerin Millar <kfm <AT> plushkava <DOT> net>
CommitDate: Sun Oct 19 22:35:31 2025 +0000
URL:        https://gitweb.gentoo.org/proj/locale-gen.git/commit/?id=588e3ede

Validate the config strictly against the SUPPORTED file

Presently, the locale.gen configuration file supports arbitrary
locale/charmap pairings, provided that the specified locale and charmap
are both known to glibc.

This commit alters the behaviour so that only the combinations specified
by the /usr/share/i18n/SUPPORTED file are tolerated.

# echo 'en_US BIG5' | locale-gen -c -
locale-gen: Unsupported locale/charmap combination at /dev/stdin[1]: "en_US 
BIG5"

The reasons for this change are twofold. Firstly, to dispense with any
bickering as to precisely what should constitute a valid configuration.
Indeed, the new behaviour accords with the text of locale.gen(5), which
directly states that the supported combinations are to be found within
the SUPPORTED file, which - lest we forget - is a component of glibc.

Secondly, to lay the groundwork for an ensuing commit, which shall
pertain to a matter of backward-compatibility.

Bug: https://bugs.gentoo.org/963974
Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>

 locale-gen | 75 ++++++++++++++++++++++++--------------------------------------
 1 file changed, 29 insertions(+), 46 deletions(-)

diff --git a/locale-gen b/locale-gen
index 2465cb2..56f8eb1 100644
--- a/locale-gen
+++ b/locale-gen
@@ -7,7 +7,6 @@
 
 use v5.36;
 
-use Cwd qw(getcwd);
 use Errno qw(ENOENT);
 use File::Spec::Functions qw(canonpath catfile catdir path splitpath);
 use File::Temp qw(tempdir);
@@ -276,11 +275,7 @@ sub normalize ($canonical) {
 }
 
 sub read_config ($prefix, @paths) {
-       # Compose a dictionary of locale names known to be valid.
-       my %locale_by = map +( $_ => 1 ), get_valid_locales($prefix);
-
-       # Compose a dictionary of character maps known to be valid.
-       my %charmap_by = map +( $_ => 1 ), get_valid_charmaps($prefix);
+       my $supported_by = map_supported_combinations($prefix);
 
        # Iterate over the given paths and return the first non-empty list of
        # valid locale declarations that can be found among them, if any.
@@ -298,7 +293,7 @@ sub read_config ($prefix, @paths) {
                                die $e;
                        }
                }
-               my @locales = parse_config($fh, $path, \%locale_by, 
\%charmap_by);
+               my @locales = parse_config($fh, $path, $supported_by);
                if (my $count = scalar @locales) {
                        printf "Found %d locale declaration%s in '%s'.\n",
                                $count, plural($count), $path;
@@ -311,34 +306,21 @@ sub read_config ($prefix, @paths) {
        die "$PROGRAM: No locale declarations were found within $path_list\n";
 }
 
-sub get_valid_locales ($prefix) {
-       my $cmd = qq{ find . ! -path . -prune ! -path '*\n*' -type f -exec grep 
-lxF LC_IDENTIFICATION {} + };
-       my $top = catdir($prefix, '/usr/share/i18n/locales');
-       my $pwd = getcwd();
-       if (! chdir $top) {
-               die "$PROGRAM: Can't chdir to '$top': $!\n";
-       } elsif (! (my @paths = readpipe $cmd) || $? != 0) {
-               die "$PROGRAM: Failed to compose a list of valid locale names 
from '$top'\n";
-       } elsif (defined $pwd && ! chdir $pwd) {
-               die "$PROGRAM: Can't chdir to '$pwd': $!\n";
-       } else {
-               chomp @paths;
-               return map +basename($_), @paths;
-       }
-}
-
-sub get_valid_charmaps ($prefix) {
-       my $top = catdir($prefix, '/usr/share/i18n/charmaps');
-       if (! opendir my $dh, $top) {
-               die "$PROGRAM: Can't open '$top' for reading: $!\n";
-       } elsif (! (my @names = map +( -f "$top/$_" ? s/\.(gz|bz2)\z//nr : () 
), readdir $dh)) {
-               die "$PROGRAM: Failed to compose a list of valid character maps 
from '$top'\n";
-       } else {
-               return @names;
+sub map_supported_combinations ($prefix) {
+       my $path = catfile($prefix, '/usr/share/i18n', 'SUPPORTED');
+       my $fh = fopen($path);
+       my %supported_by;
+       while (my $line = readline $fh) {
+               chomp $line;
+               if (2 == (my ($locale, $charmap) = split ' ', $line)) {
+                       $supported_by{$locale}{$charmap} = 1;
+                       $supported_by{''}{$charmap} = 1;
+               }
        }
+       return \%supported_by;
 }
 
-sub parse_config ($fh, $path, $locale_by, $charmap_by) {
+sub parse_config ($fh, $path, $supported_by) {
        # Set up a helper routine to throw for validation errors.
        my $thrower = sub ($error, $line) {
                die sprintf "%s: %s at %s[%d]: %s\n",
@@ -386,20 +368,22 @@ sub parse_config ($fh, $path, $locale_by, $charmap_by) {
                        }
                }
 
-               # Extract the specified locale and character map. Upon success,
-               # a canonicalised representation of the locale is also returned.
-               my ($locale, $codeset, $charmap, $canonical) = 
parse_entry(@fields);
-
                # Validate both locale and character map before accepting.
-               if (! $locale_by->{$locale}) {
+               my ($locale, $charmap) = @fields;
+               if (! $supported_by->{$locale}) {
                        $thrower->('Invalid locale', $line);
-               } elsif (defined $codeset && $codeset ne $charmap) {
-                       $thrower->('Mismatching codeset/charmap', $line);
-               } elsif (! $charmap_by->{$charmap}) {
+               } elsif (! $supported_by->{''}{$charmap}) {
                        $thrower->('Invalid charmap', $line);
-               } else {
-                       push @locales, [ $locale, $charmap, $canonical ];
+               } elsif (! $supported_by->{$locale}{$charmap}) {
+                       $thrower->('Unsupported locale/charmap combination', 
$line);
                }
+
+               # Determine the locale name in both the form that accords with
+               # the subdirectories of /usr/share/i18n/locales, and in the
+               # canonical form that incorporates the <codeset> part.
+               ($locale, my $canonical) = parse_entry($locale, $charmap);
+
+               push @locales, [ $locale, $charmap, $canonical ];
        }
 
        return @locales;
@@ -407,19 +391,18 @@ sub parse_config ($fh, $path, $locale_by, $charmap_by) {
 
 sub parse_entry ($locale, $charmap) {
        my $canonical;
-       my $codeset;
        if (2 == (my @fields = split /@/, $locale, 3)) {
                # de_DE@euro ISO-8859-15 => de_DE.ISO-8859-15@euro
                $canonical = sprintf '%s.%s@%s', $fields[0], $charmap, 
$fields[1];
        } elsif (2 == (@fields = split /\./, $locale, 3)) {
                # en_US.UTF-8 UTF-8 => en_US.UTF-8
-               ($locale, $codeset) = @fields;
-               $canonical = "$locale.$codeset";
+               $locale = $fields[0];
+               $canonical = "$locale.$charmap";
        } elsif (1 == @fields) {
                # en_US ISO-8859-1 => en_US.ISO-8859-1
                $canonical = "$locale.$charmap";
        }
-       return $locale, $codeset, $charmap, $canonical;
+       return $locale, $canonical;
 }
 
 sub check_archive_dir ($prefix, $locale_dir) {

Reply via email to