commit:     d6f9f0ef8b3892b45207f7883268c3d653071192
Author:     Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Thu Jan  8 02:19:06 2026 +0000
Commit:     Kerin Millar <kfm <AT> plushkava <DOT> net>
CommitDate: Thu Jan  8 02:23:02 2026 +0000
URL:        https://gitweb.gentoo.org/proj/locale-gen.git/commit/?id=d6f9f0ef

Support locale declarations consisting of only one field

This commit adjusts the parse_config() subroutine so that a well-formed
locale declaration is one that consists of either one or two fields.
Absent the second field, the charmap value shall be taken as "UTF-8".

Backward-compatibility is maintained. That is, any configuration that
could be validated by locale-gen v3.9 shall continue to be valid, as is
demonstrated by the revised EXAMPLES section of locale.gen(5):

# The following three declarations are considered as identical.
en_US.UTF-8  UTF-8 # trailing comments are permitted
en_US        UTF-8 # the charmap may be omitted from the localename
en_US              # the charmap is taken as UTF-8

The mkconfig utility has also been adjusted so as to take advantage of
this simplified configuration scheme.

See-also: fc0c62e2a69cd2e15910f407a4092487c78c63d8
Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>

 locale-gen   | 19 +++++++++++++++----
 locale.gen.5 | 21 +++++++++------------
 mkconfig     |  2 +-
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/locale-gen b/locale-gen
index 60a15fe..a3d718e 100644
--- a/locale-gen
+++ b/locale-gen
@@ -366,12 +366,17 @@ sub parse_config ($fh, $path, $supported_by, $be_strict) {
                # Permit comments trailing locale declarations.
                $line =~ s/\h\K#\h.*//;
 
-               # Expect for two fields, separated by horizontal whitespace.
-               # For the benefit of the lax validation mode, neither field may
-               # contain any <slash> characters.
                chomp $line;
+
+               # A well-formed entry must contain either one or two fields. The
+               # first defines the localename. The second defines the charmap.
+               # If the second field is missing, a value of "UTF-8" is assumed.
+               # The <slash> character is forbidden in both fields.
+               my @fields;
                my ($locale, $charmap);
-               if (2 != (($locale, $charmap) = $line =~ 
m/^\h*([^\h\/]+)\h+([^\h\/]+)\h*\z/)) {
+               if (0 < (@fields = split /\h+/, trim_line($line), 3) < 3 && ! 
grep +( m/\// ), @fields) {
+                       ($locale, $charmap) = ($fields[0], $fields[1] // 
'UTF-8');
+               } else {
                        $thrower->('Malformed locale declaration', 1);
                }
 
@@ -759,6 +764,12 @@ sub is_eq_file ($path1, $path2) {
        return @stat1 && @stat2 && $stat1[1] == $stat2[1];
 }
 
+sub trim_line ($line) {
+       $line =~ s/^\h+//;
+       $line =~ s/\h+$//;
+       return $line;
+}
+
 END {
        if ($$ == $PID) {
                if (@TEMPFILES) {

diff --git a/locale.gen.5 b/locale.gen.5
index fcb8643..7ce2185 100644
--- a/locale.gen.5
+++ b/locale.gen.5
@@ -4,7 +4,7 @@ locale.gen - configuration file for locale\-gen
 .SH "DESCRIPTION"
 The \fBlocale\-gen\fR(8) utility compiles the locales specified by the 
\fI/etc/locale.gen\fR file and integrates them into a locale archive.
 .P
-The file is comprised of zero or more lines containing two fields separated by 
one or more <blank> characters. The first field defines the \fIlocalename\fR. 
The second field defines the \fIcharmap\fR, also known as a character set or 
codeset. The <slash> character is forbidden within both fields.
+The file is comprised of zero or more lines containing one or two fields 
separated by one or more <blank> characters. The first field defines the 
\fIlocalename\fR. The second field defines the \fIcharmap\fR, also known as a 
character set or codeset. The <slash> character is forbidden within both 
fields. If only one field is specified, the \fIcharmap\fR value shall be taken 
as "UTF-8".
 .P
 Empty lines and lines beginning with zero or more <blank> characters followed 
by a <number\-sign> character shall be ignored.
 .P
@@ -31,19 +31,16 @@ A \fIcharmap\fR represents a mapping of character symbols 
and collating element
 The complete list of supported \fIcharmap\fR values can be obtained by reading 
the second column of the \fI/usr/share/i18n/SUPPORTED\fR file. The values map 
directly to the names of files within the \fI/usr/share/i18n/charmaps\fR 
directory.
 .SH "EXAMPLES"
 .EX
-# This is a comment, followed by some valid locale declarations.
-en_US        UTF\-8 # trailing comments are permitted
-en_US.UTF-8  UTF\-8 # considered as identical to the preceding entry
-zh_CN        UTF\-8
-de_DE        UTF\-8
-hi_IN        UTF\-8
-ja_JP        UTF\-8
+# The following three declarations are considered as identical.
+en_US.UTF-8  UTF-8 # trailing comments are permitted
+en_US        UTF-8 # the charmap may be omitted from the localename
+en_US              # the charmap is taken as UTF-8
 .sp
-# Invalid because the localename/charmap combination is unsupported.
-en_US        GB2312
+# Other charmaps may be specified (not recommended).
+en_US        ISO-8859-1
 .sp
-# Malformed because the charmap field is missing.
-en_US
+# Invalid because the localename/charmap pair is unsupported.
+en_US        GB2312
 .EE
 .SH "FILES"
 .TP

diff --git a/mkconfig b/mkconfig
index 3b57912..64b9eba 100755
--- a/mkconfig
+++ b/mkconfig
@@ -56,7 +56,7 @@ use File::Slurper qw(read_dir read_lines read_text);
                        if (length $territory) {
                                $comment .= " ($territory)";
                        }
-                       printf {$pipe} "# %s\037%s\037# %s\n", $locale, 
$charmap, $comment;
+                       print {$pipe} "# ${locale}\037# ${comment}\n";
                }
        }
        close $pipe or exit 1;

Reply via email to