EricWF created this revision.
EricWF added a reviewer: mclow.lists.
EricWF added a subscriber: cfe-commits.

The underlying C locales provide the `thousands_sep` and `decimal_point` as 
strings, possible with more than one character. We currently don't handle this 
case even for `wchar_t`.

This patch properly converts the mbs -> wide character for 
`moneypunct_byname<wchar_t>`.  It doesn't handle the case for 
`moneypunct_byname<char>` because it's not clear what to do.

Note that the changes to `curr_symbol.pass.cpp` are just drive by fixes.

https://reviews.llvm.org/D24218

Files:
  src/locale.cpp
  
test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp
  
test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp
  
test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
  test/support/test_macros.h

Index: test/support/test_macros.h
===================================================================
--- test/support/test_macros.h
+++ test/support/test_macros.h
@@ -62,6 +62,13 @@
 #endif
 #endif
 
+// Attempt to deduce GCC version
+#if defined(_LIBCPP_VERSION) && __has_include(<features.h>)
+#include <features.h>
+#define TEST_HAS_GLIBC
+#define TEST_GLIBC_PREREQ(major, minor) __GLIBC_PREREQ(major, minor)
+#endif
+
 /* Features that were introduced in C++14 */
 #if TEST_STD_VER >= 14
 #define TEST_HAS_EXTENDED_CONSTEXPR
Index: test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
===================================================================
--- test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
+++ test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
@@ -18,16 +18,11 @@
 
 // charT thousands_sep() const;
 
-// Failure related to GLIBC's use of U00A0 as mon_thousands_sep
-// and U002E as mon_decimal_point.
-// TODO: U00A0 should be investigated.
-// Possibly related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
-// XFAIL: linux-gnu
-
 #include <locale>
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
@@ -114,22 +109,30 @@
         Fwt f(LOCALE_fr_FR_UTF_8, 1);
         assert(f.thousands_sep() == L' ');
     }
-
+// The belowe tests fail due to GLIBC's use of U00A0 as mon_thousands_sep
+// and U002E as mon_decimal_point.
+// TODO: Fix thousands_sep for 'char'.
+// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
+#ifndef TEST_HAS_GLIBC
     {
         Fnf f(LOCALE_ru_RU_UTF_8, 1);
         assert(f.thousands_sep() == ' ');
     }
     {
         Fnt f(LOCALE_ru_RU_UTF_8, 1);
         assert(f.thousands_sep() == ' ');
     }
+    const wchar_t sep = L' ';
+#else
+    const wchar_t sep = L'\u00A0';
+#endif
     {
         Fwf f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.thousands_sep() == L' ');
+        assert(f.thousands_sep() == sep);
     }
     {
         Fwt f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.thousands_sep() == L' ');
+        assert(f.thousands_sep() == sep);
     }
 
     {
Index: test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp
===================================================================
--- test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp
+++ test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp
@@ -12,9 +12,6 @@
 // REQUIRES: locale.ru_RU.UTF-8
 // REQUIRES: locale.zh_CN.UTF-8
 
-// Russia uses ',' for the decimal separator. GLIBC returns '.'
-// XFAIL: linux
-
 // <locale>
 
 // class moneypunct_byname<charT, International>
@@ -25,6 +22,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
@@ -112,21 +110,30 @@
         assert(f.decimal_point() == L',');
     }
 
+// The belowe tests fail due to GLIBC's use of U00A0 as mon_decimal_point
+// and U002E as mon_decimal_point.
+// TODO: Fix decimal_point for 'char'.
+// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
+#ifndef TEST_HAS_GLIBC
     {
         Fnf f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.decimal_point() == ',');
+        assert(f.decimal_point() == '.');
     }
     {
         Fnt f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.decimal_point() == ',');
+        assert(f.decimal_point() == '.');
     }
+    const wchar_t sep = L'.';
+#else
+    const wchar_t sep = L'\u002E';
+#endif
     {
         Fwf f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.decimal_point() == L',');
+        assert(f.decimal_point() == sep);
     }
     {
         Fwt f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.decimal_point() == L',');
+        assert(f.decimal_point() == sep);
     }
 
     {
Index: test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp
===================================================================
--- test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp
+++ test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp
@@ -24,6 +24,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
@@ -113,7 +114,14 @@
 
     {
         Fnf f(LOCALE_ru_RU_UTF_8, 1);
+        // GLIBC <= 2.23 uses currency_symbol="<U0440><U0443><U0431>"
+        // GLIBC >= 2.24 uses currency_symbol="<U20BD>"
+        // See also: http://www.fileformat.info/info/unicode/char/20bd/index.htm
+#if defined(TEST_GLIBC_PREREQ) && TEST_GLIBC_PREREQ(2, 24)
+        assert(f.curr_symbol() == " \xE2\x82\xBD");
+#else
         assert(f.curr_symbol() == " \xD1\x80\xD1\x83\xD0\xB1");
+#endif
     }
     {
         Fnt f(LOCALE_ru_RU_UTF_8, 1);
Index: src/locale.cpp
===================================================================
--- src/locale.cpp
+++ src/locale.cpp
@@ -5768,6 +5768,26 @@
     pat.field[3] = value;
 }
 
+static char checked_string_to_char_convert(const char* ptr) {
+  /*
+  // FIXME: The input character is multiple bytes. Do something...
+  if (ptr[0] && ptr[1])
+    __throw_runtime_error("locale not supported");
+  */
+  return *ptr;
+}
+
+static wchar_t checked_string_to_wchar_convert(const char* ptr,
+                                               __locale_struct* loc) {
+  mbstate_t mb = {0};
+  wchar_t out;
+  size_t ret = __libcpp_mbrtowc_l(&out, ptr, strlen(ptr), &mb, loc);
+  if (ret == static_cast<size_t>(-1) || ret == static_cast<size_t>(-2)) {
+      __throw_runtime_error("locale not supported");
+  }
+  return out;
+}
+
 template<>
 void
 moneypunct_byname<char, false>::init(const char* nm)
@@ -5780,11 +5800,11 @@
 
     lconv* lc = __libcpp_localeconv_l(loc.get());
     if (*lc->mon_decimal_point)
-        __decimal_point_ = *lc->mon_decimal_point;
+        __decimal_point_ = checked_string_to_char_convert(lc->mon_decimal_point);
     else
         __decimal_point_ = base::do_decimal_point();
     if (*lc->mon_thousands_sep)
-        __thousands_sep_ = *lc->mon_thousands_sep;
+        __thousands_sep_ = checked_string_to_char_convert(lc->mon_thousands_sep);
     else
         __thousands_sep_ = base::do_thousands_sep();
     __grouping_ = lc->mon_grouping;
@@ -5823,12 +5843,12 @@
 
     lconv* lc = __libcpp_localeconv_l(loc.get());
     if (*lc->mon_decimal_point)
-        __decimal_point_ = *lc->mon_decimal_point;
+        __decimal_point_ = checked_string_to_char_convert(lc->mon_decimal_point);
     else
         __decimal_point_ = base::do_decimal_point();
-    if (*lc->mon_thousands_sep)
-        __thousands_sep_ = *lc->mon_thousands_sep;
-    else
+    if (*lc->mon_thousands_sep) {
+        __thousands_sep_ = checked_string_to_char_convert(lc->mon_thousands_sep);
+    } else
         __thousands_sep_ = base::do_thousands_sep();
     __grouping_ = lc->mon_grouping;
     __curr_symbol_ = lc->int_curr_symbol;
@@ -5882,11 +5902,13 @@
                             " failed to construct for " + string(nm));
     lconv* lc = __libcpp_localeconv_l(loc.get());
     if (*lc->mon_decimal_point)
-        __decimal_point_ = static_cast<wchar_t>(*lc->mon_decimal_point);
+        __decimal_point_ = checked_string_to_wchar_convert(lc->mon_decimal_point,
+                                                           loc.get());
     else
         __decimal_point_ = base::do_decimal_point();
     if (*lc->mon_thousands_sep)
-        __thousands_sep_ = static_cast<wchar_t>(*lc->mon_thousands_sep);
+        __thousands_sep_ = checked_string_to_wchar_convert(lc->mon_thousands_sep,
+                                                            loc.get());
     else
         __thousands_sep_ = base::do_thousands_sep();
     __grouping_ = lc->mon_grouping;
@@ -5948,11 +5970,13 @@
 
     lconv* lc = __libcpp_localeconv_l(loc.get());
     if (*lc->mon_decimal_point)
-        __decimal_point_ = static_cast<wchar_t>(*lc->mon_decimal_point);
+        __decimal_point_ = checked_string_to_wchar_convert(lc->mon_decimal_point,
+                                                           loc.get());
     else
         __decimal_point_ = base::do_decimal_point();
     if (*lc->mon_thousands_sep)
-        __thousands_sep_ = static_cast<wchar_t>(*lc->mon_thousands_sep);
+      __thousands_sep_ = checked_string_to_wchar_convert(lc->mon_thousands_sep,
+                                                         loc.get());
     else
         __thousands_sep_ = base::do_thousands_sep();
     __grouping_ = lc->mon_grouping;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to