commit: d6f9f0ef8b3892b45207f7883268c3d653071192
Author: Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Thu Jan 8 02:19:06 2026 +0000
Commit: Kerin Millar <kfm <AT> plushkava <DOT> net>
CommitDate: Thu Jan 8 02:23:02 2026 +0000
URL: https://gitweb.gentoo.org/proj/locale-gen.git/commit/?id=d6f9f0ef
Support locale declarations consisting of only one field
This commit adjusts the parse_config() subroutine so that a well-formed
locale declaration is one that consists of either one or two fields.
Absent the second field, the charmap value shall be taken as "UTF-8".
Backward-compatibility is maintained. That is, any configuration that
could be validated by locale-gen v3.9 shall continue to be valid, as is
demonstrated by the revised EXAMPLES section of locale.gen(5):
# The following three declarations are considered as identical.
en_US.UTF-8 UTF-8 # trailing comments are permitted
en_US UTF-8 # the charmap may be omitted from the localename
en_US # the charmap is taken as UTF-8
The mkconfig utility has also been adjusted so as to take advantage of
this simplified configuration scheme.
See-also: fc0c62e2a69cd2e15910f407a4092487c78c63d8
Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>
locale-gen | 19 +++++++++++++++----
locale.gen.5 | 21 +++++++++------------
mkconfig | 2 +-
3 files changed, 25 insertions(+), 17 deletions(-)
diff --git a/locale-gen b/locale-gen
index 60a15fe..a3d718e 100644
--- a/locale-gen
+++ b/locale-gen
@@ -366,12 +366,17 @@ sub parse_config ($fh, $path, $supported_by, $be_strict) {
# Permit comments trailing locale declarations.
$line =~ s/\h\K#\h.*//;
- # Expect for two fields, separated by horizontal whitespace.
- # For the benefit of the lax validation mode, neither field may
- # contain any <slash> characters.
chomp $line;
+
+ # A well-formed entry must contain either one or two fields. The
+ # first defines the localename. The second defines the charmap.
+ # If the second field is missing, a value of "UTF-8" is assumed.
+ # The <slash> character is forbidden in both fields.
+ my @fields;
my ($locale, $charmap);
- if (2 != (($locale, $charmap) = $line =~
m/^\h*([^\h\/]+)\h+([^\h\/]+)\h*\z/)) {
+ if (0 < (@fields = split /\h+/, trim_line($line), 3) < 3 && !
grep +( m/\// ), @fields) {
+ ($locale, $charmap) = ($fields[0], $fields[1] //
'UTF-8');
+ } else {
$thrower->('Malformed locale declaration', 1);
}
@@ -759,6 +764,12 @@ sub is_eq_file ($path1, $path2) {
return @stat1 && @stat2 && $stat1[1] == $stat2[1];
}
+sub trim_line ($line) {
+ $line =~ s/^\h+//;
+ $line =~ s/\h+$//;
+ return $line;
+}
+
END {
if ($$ == $PID) {
if (@TEMPFILES) {
diff --git a/locale.gen.5 b/locale.gen.5
index fcb8643..7ce2185 100644
--- a/locale.gen.5
+++ b/locale.gen.5
@@ -4,7 +4,7 @@ locale.gen - configuration file for locale\-gen
.SH "DESCRIPTION"
The \fBlocale\-gen\fR(8) utility compiles the locales specified by the
\fI/etc/locale.gen\fR file and integrates them into a locale archive.
.P
-The file is comprised of zero or more lines containing two fields separated by
one or more <blank> characters. The first field defines the \fIlocalename\fR.
The second field defines the \fIcharmap\fR, also known as a character set or
codeset. The <slash> character is forbidden within both fields.
+The file is comprised of zero or more lines containing one or two fields
separated by one or more <blank> characters. The first field defines the
\fIlocalename\fR. The second field defines the \fIcharmap\fR, also known as a
character set or codeset. The <slash> character is forbidden within both
fields. If only one field is specified, the \fIcharmap\fR value shall be taken
as "UTF-8".
.P
Empty lines and lines beginning with zero or more <blank> characters followed
by a <number\-sign> character shall be ignored.
.P
@@ -31,19 +31,16 @@ A \fIcharmap\fR represents a mapping of character symbols
and collating element
The complete list of supported \fIcharmap\fR values can be obtained by reading
the second column of the \fI/usr/share/i18n/SUPPORTED\fR file. The values map
directly to the names of files within the \fI/usr/share/i18n/charmaps\fR
directory.
.SH "EXAMPLES"
.EX
-# This is a comment, followed by some valid locale declarations.
-en_US UTF\-8 # trailing comments are permitted
-en_US.UTF-8 UTF\-8 # considered as identical to the preceding entry
-zh_CN UTF\-8
-de_DE UTF\-8
-hi_IN UTF\-8
-ja_JP UTF\-8
+# The following three declarations are considered as identical.
+en_US.UTF-8 UTF-8 # trailing comments are permitted
+en_US UTF-8 # the charmap may be omitted from the localename
+en_US # the charmap is taken as UTF-8
.sp
-# Invalid because the localename/charmap combination is unsupported.
-en_US GB2312
+# Other charmaps may be specified (not recommended).
+en_US ISO-8859-1
.sp
-# Malformed because the charmap field is missing.
-en_US
+# Invalid because the localename/charmap pair is unsupported.
+en_US GB2312
.EE
.SH "FILES"
.TP
diff --git a/mkconfig b/mkconfig
index 3b57912..64b9eba 100755
--- a/mkconfig
+++ b/mkconfig
@@ -56,7 +56,7 @@ use File::Slurper qw(read_dir read_lines read_text);
if (length $territory) {
$comment .= " ($territory)";
}
- printf {$pipe} "# %s\037%s\037# %s\n", $locale,
$charmap, $comment;
+ print {$pipe} "# ${locale}\037# ${comment}\n";
}
}
close $pipe or exit 1;