Re: nl_langinfo and localized day/month names

Paul Eggert Sat, 05 Jul 2014 15:00:07 -0700

Thanks, I tried that out on Fedora 20 (pretending that its nl_langinfodidn't exist) and had some trouble building and testing it, indicatingtrouble for other POSIXish platforms. I fixed the problems I found. Afew things:

* nl_langinfo (FRAC_DIGITS) returns a pointer to a character, not astring, and similarly for the other values representing tiny integers.There's no need for null-termination.


* struct tm might not have the traditional Unix v7 layout.

* Some code can be shared between rpl_nl_langinfo and nl_langinfo byhoisting it into a shared static function ctype_codeset.

* Often it's better to use char[][] constants instead of char *[]constants when strings don't vary in size much, as this avoidsunnecessary relocations in shared libraries.

* I didn't follow why tmm had some members updated but not others. DoesMicrosoft strftime care about otherwise-unused struct tm members? Ifso, perhaps we should initialize all the members to a valid andconsistent value; if not, let's not bother with anything but the minimal.


Revised proposed patch attached.

From 4630fdeb658f96fce13ed2d4d9d89e9955aaa273 Mon Sep 17 00:00:00 2001
From: Eli Zaretskii <[email protected]>
Date: Sat, 5 Jul 2014 14:42:47 -0700
Subject: [PATCH] nl_langinfo: CODESET on MS-Windows and more items from
 localeconv

* lib/langinfo.in.h (DECIMAL_POINT, THOUSANDS_SEP, GROUPING)
(CURRENCY_SYMBOL, INT_CURR_SYMBOL, MON_DECIMAL_POINT)
(MON_THOUSANDS_SEP, MON_GROUPING, POSITIVE_SIGN, NEGATIVE_SIGN)
(FRAC_DIGITS, INT_FRAC_DIGITS, P_CS_PRECEDES, N_CS_PRECEDES)
(P_SEP_BY_SPACE, N_SEP_BY_SPACE, P_SIGN_POSN, N_SIGN_POSN): Define.
* lib/nl_langinfo.c: Include <locale.h> and <string.h> early.
Include <time.h> if !REPLACE_NL_LANGINFO.
(ctype_codeset): New function, taken from rpl_nl_langinfo.
(rpl_nl_langinfo): Use it.
(nl_langinfo) [!REPLACE_NL_LANGINFO]: Likewise.
Compute the values of RADIXCHAR, THOUSEP, GROUPING, CRNCYSTR,
INT_CURR_SYMBOL, MON_DECIMAL_POINT, MON_THOUSANDS_SEP,
MON_GROUPING, POSITIVE_SIGN, NEGATIVE_SIGN, FRAC_DIGITS,
INT_FRAC_DIGITS, P_CS_PRECEDES, N_CS_PRECEDES, P_SEP_BY_SPACE,
N_SEP_BY_SPACE, P_SIGN_POSN, and N_SIGN_POSN from the
corresponding values returned by 'localeconv'.  Compute the values
of AM_STR, PM_STR, DAY_n, ABDAY_n, MON_n, and ABMON_n by calling
'strftime' with a suitable struct tm value.
---
 ChangeLog         |  23 ++++++
 lib/langinfo.in.h |  18 +++++
 lib/nl_langinfo.c | 215 +++++++++++++++++++++++++++++++++---------------------
 3 files changed, 172 insertions(+), 84 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index eccf03e..0e800a1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2014-07-05  Eli Zaretskii  <[email protected]>
+           Paul Eggert  <[email protected]>
+
+       nl_langinfo: CODESET on MS-Windows and more items from localeconv
+       * lib/langinfo.in.h (DECIMAL_POINT, THOUSANDS_SEP, GROUPING)
+       (CURRENCY_SYMBOL, INT_CURR_SYMBOL, MON_DECIMAL_POINT)
+       (MON_THOUSANDS_SEP, MON_GROUPING, POSITIVE_SIGN, NEGATIVE_SIGN)
+       (FRAC_DIGITS, INT_FRAC_DIGITS, P_CS_PRECEDES, N_CS_PRECEDES)
+       (P_SEP_BY_SPACE, N_SEP_BY_SPACE, P_SIGN_POSN, N_SIGN_POSN): Define.
+       * lib/nl_langinfo.c: Include <locale.h> and <string.h> early.
+       Include <time.h> if !REPLACE_NL_LANGINFO.
+       (ctype_codeset): New function, taken from rpl_nl_langinfo.
+       (rpl_nl_langinfo): Use it.
+       (nl_langinfo) [!REPLACE_NL_LANGINFO]: Likewise.
+       Compute the values of RADIXCHAR, THOUSEP, GROUPING, CRNCYSTR,
+       INT_CURR_SYMBOL, MON_DECIMAL_POINT, MON_THOUSANDS_SEP,
+       MON_GROUPING, POSITIVE_SIGN, NEGATIVE_SIGN, FRAC_DIGITS,
+       INT_FRAC_DIGITS, P_CS_PRECEDES, N_CS_PRECEDES, P_SEP_BY_SPACE,
+       N_SEP_BY_SPACE, P_SIGN_POSN, and N_SIGN_POSN from the
+       corresponding values returned by 'localeconv'.  Compute the values
+       of AM_STR, PM_STR, DAY_n, ABDAY_n, MON_n, and ABMON_n by calling
+       'strftime' with a suitable struct tm value.
+
 2014-07-05  Paul Eggert  <[email protected]>
 
        Bruno Haible has stepped down as maintainer.
diff --git a/lib/langinfo.in.h b/lib/langinfo.in.h
index 17f4b9b..d118d5d 100644
--- a/lib/langinfo.in.h
+++ b/lib/langinfo.in.h
@@ -49,7 +49,10 @@ typedef int nl_item;
 # define CODESET     10000
 /* nl_langinfo items of the LC_NUMERIC category */
 # define RADIXCHAR   10001
+# define DECIMAL_POINT RADIXCHAR
 # define THOUSEP     10002
+# define THOUSANDS_SEP THOUSEP
+# define GROUPING    10114
 /* nl_langinfo items of the LC_TIME category */
 # define D_T_FMT     10003
 # define D_FMT       10004
@@ -102,6 +105,21 @@ typedef int nl_item;
 # define ALT_DIGITS  10051
 /* nl_langinfo items of the LC_MONETARY category */
 # define CRNCYSTR    10052
+# define CURRENCY_SYMBOL   CRNCYSTR
+# define INT_CURR_SYMBOL   10100
+# define MON_DECIMAL_POINT 10101
+# define MON_THOUSANDS_SEP 10102
+# define MON_GROUPING      10103
+# define POSITIVE_SIGN     10104
+# define NEGATIVE_SIGN     10105
+# define FRAC_DIGITS       10106
+# define INT_FRAC_DIGITS   10107
+# define P_CS_PRECEDES     10108
+# define N_CS_PRECEDES     10109
+# define P_SEP_BY_SPACE    10110
+# define N_SEP_BY_SPACE    10111
+# define P_SIGN_POSN       10112
+# define N_SIGN_POSN       10113
 /* nl_langinfo items of the LC_MESSAGES category */
 # define YESEXPR     10053
 # define NOEXPR      10054
diff --git a/lib/nl_langinfo.c b/lib/nl_langinfo.c
index 287abfd..e461186 100644
--- a/lib/nl_langinfo.c
+++ b/lib/nl_langinfo.c
@@ -20,13 +20,47 @@
 /* Specification.  */
 #include <langinfo.h>
 
+#include <locale.h>
+#include <string.h>
+
+/* Return the codeset of the current locale, if this is easily deducible.
+   Otherwise, return "".  */
+static char *
+ctype_codeset (void)
+{
+  static char buf[2 + 10 + 1];
+  size_t buflen = 0;
+  char const *locale = setlocale (LC_CTYPE, NULL);
+
+  if (locale && locale[0])
+    {
+      /* If the locale name contains an encoding after the dot, return it.  */
+      char *dot = strchr (locale, '.');
+
+      if (dot)
+        {
+          /* Look for the possible @... trailer and remove it, if any.  */
+          char const *modifier = strchr (++dot, '@');
+
+          if (! modifier)
+            return dot;
+          if (modifier - dot < sizeof buf)
+            {
+              buflen = modifier - dot;
+              memcpy (buf, dot, buflen);
+            }
+        }
+    }
+
+  buf[buflen] = '\0';
+  return buf;
+}
+
+
 #if REPLACE_NL_LANGINFO
 
 /* Override nl_langinfo with support for added nl_item values.  */
 
-# include <locale.h>
-# include <string.h>
-
 # undef nl_langinfo
 
 char *
@@ -36,36 +70,7 @@ rpl_nl_langinfo (nl_item item)
     {
 # if GNULIB_defined_CODESET
     case CODESET:
-      {
-        const char *locale;
-        static char buf[2 + 10 + 1];
-
-        locale = setlocale (LC_CTYPE, NULL);
-        if (locale != NULL && locale[0] != '\0')
-          {
-            /* If the locale name contains an encoding after the dot, return
-               it.  */
-            const char *dot = strchr (locale, '.');
-
-            if (dot != NULL)
-              {
-                const char *modifier;
-
-                dot++;
-                /* Look for the possible @... trailer and remove it, if any.  
*/
-                modifier = strchr (dot, '@');
-                if (modifier == NULL)
-                  return dot;
-                if (modifier - dot < sizeof (buf))
-                  {
-                    memcpy (buf, dot, modifier - dot);
-                    buf [modifier - dot] = '\0';
-                    return buf;
-                  }
-              }
-          }
-        return "";
-      }
+      return ctype_codeset ();
 # endif
 # if GNULIB_defined_T_FMT_AMPM
     case T_FMT_AMPM:
@@ -128,24 +133,31 @@ rpl_nl_langinfo (nl_item item)
 
 # endif
 
-# include <locale.h>
+# include <time.h>
 
 char *
 nl_langinfo (nl_item item)
 {
+  static char nlbuf[100];
+  struct tm tmm = { 0 };
+
   switch (item)
     {
     /* nl_langinfo items of the LC_CTYPE category */
     case CODESET:
-# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
       {
-        static char buf[2 + 10 + 1];
-
-        /* The Windows API has a function returning the locale's codepage as
-           a number.  */
-        sprintf (buf, "CP%u", GetACP ());
-        return buf;
+        char *codeset = ctype_codeset ();
+        if (*codeset)
+          return codeset;
       }
+# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
+      /* The Windows API has a function returning the locale's
+         codepage as a number, but the value doesn't change according
+         to what the 'setlocale' call specified.  So use it as a
+         last resort, in case the string returned by 'setlocale'
+         doesn't specify the codepage.  */
+      sprintf (nlbuf, "CP%u", GetACP ());
+      return nlbuf;
 # elif defined __BEOS__
       return "UTF-8";
 # else
@@ -156,6 +168,8 @@ nl_langinfo (nl_item item)
       return localeconv () ->decimal_point;
     case THOUSEP:
       return localeconv () ->thousands_sep;
+    case GROUPING:
+      return localeconv () ->grouping;
     /* nl_langinfo items of the LC_TIME category.
        TODO: Really use the locale.  */
     case D_T_FMT:
@@ -170,93 +184,126 @@ nl_langinfo (nl_item item)
     case T_FMT_AMPM:
       return "%I:%M:%S %p";
     case AM_STR:
-      return "AM";
+      if (!strftime (nlbuf, sizeof nlbuf, "%p", &tmm))
+        return "AM";
+      return nlbuf;
     case PM_STR:
-      return "PM";
+      tmm.tm_hour = 12;
+      if (!strftime (nlbuf, sizeof nlbuf, "%p", &tmm))
+        return "PM";
+      return nlbuf;
     case DAY_1:
-      return "Sunday";
     case DAY_2:
-      return "Monday";
     case DAY_3:
-      return "Tuesday";
     case DAY_4:
-      return "Wednesday";
     case DAY_5:
-      return "Thursday";
     case DAY_6:
-      return "Friday";
     case DAY_7:
-      return "Saturday";
+      {
+        static char const days[][sizeof "Wednesday"] = {
+          "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
+          "Friday", "Saturday"
+        };
+        tmm.tm_wday = item - DAY_1;
+        if (!strftime (nlbuf, sizeof nlbuf, "%A", &tmm))
+          return (char *) days[item - DAY_1];
+        return nlbuf;
+      }
     case ABDAY_1:
-      return "Sun";
     case ABDAY_2:
-      return "Mon";
     case ABDAY_3:
-      return "Tue";
     case ABDAY_4:
-      return "Wed";
     case ABDAY_5:
-      return "Thu";
     case ABDAY_6:
-      return "Fri";
     case ABDAY_7:
-      return "Sat";
+      {
+        static char const abdays[][sizeof "Sun"] = {
+          "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+        };
+        tmm.tm_wday = item - ABDAY_1;
+        if (!strftime (nlbuf, sizeof nlbuf, "%a", &tmm))
+          return (char *) abdays[item - ABDAY_1];
+        return nlbuf;
+      }
     case MON_1:
-      return "January";
     case MON_2:
-      return "February";
     case MON_3:
-      return "March";
     case MON_4:
-      return "April";
     case MON_5:
-      return "May";
     case MON_6:
-      return "June";
     case MON_7:
-      return "July";
     case MON_8:
-      return "August";
     case MON_9:
-      return "September";
     case MON_10:
-      return "October";
     case MON_11:
-      return "November";
     case MON_12:
-      return "December";
+      {
+        static char const months[][sizeof "September"] = {
+          "January", "February", "March", "April", "May", "June", "July",
+          "September", "October", "November", "December"
+        };
+        tmm.tm_mon = item - MON_1;
+        if (!strftime (nlbuf, sizeof nlbuf, "%B", &tmm))
+          return (char *) months[item - MON_1];
+        return nlbuf;
+      }
     case ABMON_1:
-      return "Jan";
     case ABMON_2:
-      return "Feb";
     case ABMON_3:
-      return "Mar";
     case ABMON_4:
-      return "Apr";
     case ABMON_5:
-      return "May";
     case ABMON_6:
-      return "Jun";
     case ABMON_7:
-      return "Jul";
     case ABMON_8:
-      return "Aug";
     case ABMON_9:
-      return "Sep";
     case ABMON_10:
-      return "Oct";
     case ABMON_11:
-      return "Nov";
     case ABMON_12:
-      return "Dec";
+      {
+        static char const abmonths[][sizeof "Jan"] = {
+          "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
+          "Sep", "Oct", "Nov", "Dec"
+        };
+        tmm.tm_mon = item - ABMON_1;
+        if (!strftime (nlbuf, sizeof nlbuf, "%b", &tmm))
+          return (char *) abmonths[item - ABMON_1];
+        return nlbuf;
+      }
     case ERA:
       return "";
     case ALT_DIGITS:
       return "\0\0\0\0\0\0\0\0\0\0";
-    /* nl_langinfo items of the LC_MONETARY category
-       TODO: Really use the locale. */
+    /* nl_langinfo items of the LC_MONETARY category.  */
     case CRNCYSTR:
-      return "-";
+      return localeconv () ->currency_symbol;
+    case INT_CURR_SYMBOL:
+      return localeconv () ->int_curr_symbol;
+    case MON_DECIMAL_POINT:
+      return localeconv () ->mon_decimal_point;
+    case MON_THOUSANDS_SEP:
+      return localeconv () ->mon_thousands_sep;
+    case MON_GROUPING:
+      return localeconv () ->mon_grouping;
+    case POSITIVE_SIGN:
+      return localeconv () ->positive_sign;
+    case NEGATIVE_SIGN:
+      return localeconv () ->negative_sign;
+    case FRAC_DIGITS:
+      return & localeconv () ->frac_digits;
+    case INT_FRAC_DIGITS:
+      return & localeconv () ->int_frac_digits;
+    case P_CS_PRECEDES:
+      return & localeconv () ->p_cs_precedes;
+    case N_CS_PRECEDES:
+      return & localeconv () ->n_cs_precedes;
+    case P_SEP_BY_SPACE:
+      return & localeconv () ->p_sep_by_space;
+    case N_SEP_BY_SPACE:
+      return & localeconv () ->n_sep_by_space;
+    case P_SIGN_POSN:
+      return & localeconv () ->p_sign_posn;
+    case N_SIGN_POSN:
+      return & localeconv () ->n_sign_posn;
     /* nl_langinfo items of the LC_MESSAGES category
        TODO: Really use the locale. */
     case YESEXPR:
-- 
1.9.3

Re: nl_langinfo and localized day/month names

Reply via email to