[libcxx] r289347 - [libc++] Fix support for multibyte thousands_sep and decimal_point in moneypunct_byname and numpunct_byname.

Eric Fiselier via cfe-commits cfe-commits at lists.llvm.org
Sat Dec 10 16:21:00 PST 2016


Author: ericwf
Date: Sat Dec 10 18:20:59 2016
New Revision: 289347

URL: http://llvm.org/viewvc/llvm-project?rev=289347&view=rev
Log:
[libc++] Fix support for multibyte thousands_sep and decimal_point in moneypunct_byname and numpunct_byname.

Summary:
The underlying C locales provide the `thousands_sep` and `decimal_point` as strings, possible with more than one character. We currently don't handle this case even for `wchar_t`.

This patch properly converts the mbs -> wide character for `moneypunct_byname<wchar_t>`. For the `moneypunct_byname<char>` case we attempt to narrow the WC and if that fails we also attempt to translate it to some reasonable value. For example we translate U00A0 (non-breaking space) into U0020 (regular space). If none of these conversions succeed then we simply allow the base class to provide a fallback value.


Reviewers: mclow.lists, EricWF

Subscribers: vangyzen, george.burgess.iv, cfe-commits

Differential Revision: https://reviews.llvm.org/D24218

Modified:
    libcxx/trunk/src/locale.cpp
    libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp
    libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
    libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp
    libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp

Modified: libcxx/trunk/src/locale.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/src/locale.cpp?rev=289347&r1=289346&r2=289347&view=diff
==============================================================================
--- libcxx/trunk/src/locale.cpp (original)
+++ libcxx/trunk/src/locale.cpp Sat Dec 10 18:20:59 2016
@@ -4183,6 +4183,54 @@ __widen_from_utf8<32>::~__widen_from_utf
 {
 }
 
+
+static bool checked_string_to_wchar_convert(wchar_t& dest,
+                                            const char* ptr,
+                                            __locale_struct* loc) {
+  if (*ptr == '\0')
+    return false;
+  mbstate_t mb = {};
+  wchar_t out;
+  size_t ret = __libcpp_mbrtowc_l(&out, ptr, strlen(ptr), &mb, loc);
+  if (ret == static_cast<size_t>(-1) || ret == static_cast<size_t>(-2)) {
+    return false;
+  }
+  dest = out;
+  return true;
+}
+
+static bool checked_string_to_char_convert(char& dest,
+                                           const char* ptr,
+                                           __locale_struct* __loc) {
+  if (*ptr == '\0')
+    return false;
+  if (!ptr[1]) {
+    dest = *ptr;
+    return true;
+  }
+  // First convert the MBS into a wide char then attempt to narrow it using
+  // wctob_l.
+  wchar_t wout;
+  if (!checked_string_to_wchar_convert(wout, ptr, __loc))
+    return false;
+  int res;
+  if ((res = __libcpp_wctob_l(wout, __loc)) != char_traits<char>::eof()) {
+    dest = res;
+    return true;
+  }
+  // FIXME: Work around specific multibyte sequences that we can reasonable
+  // translate into a different single byte.
+  switch (wout) {
+  case L'\u00A0': // non-breaking space
+    dest = ' ';
+    return true;
+  default:
+    return false;
+  }
+  _LIBCPP_UNREACHABLE();
+}
+
+
 // numpunct<char> && numpunct<wchar_t>
 
 locale::id numpunct< char  >::id;
@@ -4254,10 +4302,10 @@ numpunct_byname<char>::__init(const char
                                 " failed to construct for " + string(nm));
 
         lconv* lc = __libcpp_localeconv_l(loc.get());
-        if (*lc->decimal_point)
-            __decimal_point_ = *lc->decimal_point;
-        if (*lc->thousands_sep)
-            __thousands_sep_ = *lc->thousands_sep;
+        checked_string_to_char_convert(__decimal_point_, lc->decimal_point,
+                                       loc.get());
+        checked_string_to_char_convert(__thousands_sep_, lc->thousands_sep,
+                                       loc.get());
         __grouping_ = lc->grouping;
         // localization for truename and falsename is not available
     }
@@ -4288,16 +4336,16 @@ numpunct_byname<wchar_t>::__init(const c
     {
         __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
         if (loc == nullptr)
-            __throw_runtime_error("numpunct_byname<char>::numpunct_byname"
+            __throw_runtime_error("numpunct_byname<wchar_t>::numpunct_byname"
                                 " failed to construct for " + string(nm));
 
         lconv* lc = __libcpp_localeconv_l(loc.get());
-        if (*lc->decimal_point)
-            __decimal_point_ = *lc->decimal_point;
-        if (*lc->thousands_sep)
-            __thousands_sep_ = *lc->thousands_sep;
+        checked_string_to_wchar_convert(__decimal_point_, lc->decimal_point,
+                                        loc.get());
+        checked_string_to_wchar_convert(__thousands_sep_, lc->thousands_sep,
+                                        loc.get());
         __grouping_ = lc->grouping;
-        // locallization for truename and falsename is not available
+        // localization for truename and falsename is not available
     }
 }
 
@@ -5779,14 +5827,15 @@ moneypunct_byname<char, false>::init(con
                             " failed to construct for " + string(nm));
 
     lconv* lc = __libcpp_localeconv_l(loc.get());
-    if (*lc->mon_decimal_point)
-        __decimal_point_ = *lc->mon_decimal_point;
-    else
-        __decimal_point_ = base::do_decimal_point();
-    if (*lc->mon_thousands_sep)
-        __thousands_sep_ = *lc->mon_thousands_sep;
-    else
-        __thousands_sep_ = base::do_thousands_sep();
+    if (!checked_string_to_char_convert(__decimal_point_,
+                                        lc->mon_decimal_point,
+                                        loc.get()))
+      __decimal_point_ = base::do_decimal_point();
+    if (!checked_string_to_char_convert(__thousands_sep_,
+                                        lc->mon_thousands_sep,
+                                        loc.get()))
+      __thousands_sep_ = base::do_thousands_sep();
+
     __grouping_ = lc->mon_grouping;
     __curr_symbol_ = lc->currency_symbol;
     if (lc->frac_digits != CHAR_MAX)
@@ -5822,14 +5871,14 @@ moneypunct_byname<char, true>::init(cons
                             " failed to construct for " + string(nm));
 
     lconv* lc = __libcpp_localeconv_l(loc.get());
-    if (*lc->mon_decimal_point)
-        __decimal_point_ = *lc->mon_decimal_point;
-    else
-        __decimal_point_ = base::do_decimal_point();
-    if (*lc->mon_thousands_sep)
-        __thousands_sep_ = *lc->mon_thousands_sep;
-    else
-        __thousands_sep_ = base::do_thousands_sep();
+    if (!checked_string_to_char_convert(__decimal_point_,
+                                        lc->mon_decimal_point,
+                                        loc.get()))
+      __decimal_point_ = base::do_decimal_point();
+    if (!checked_string_to_char_convert(__thousands_sep_,
+                                        lc->mon_thousands_sep,
+                                        loc.get()))
+      __thousands_sep_ = base::do_thousands_sep();
     __grouping_ = lc->mon_grouping;
     __curr_symbol_ = lc->int_curr_symbol;
     if (lc->int_frac_digits != CHAR_MAX)
@@ -5881,14 +5930,14 @@ moneypunct_byname<wchar_t, false>::init(
         __throw_runtime_error("moneypunct_byname"
                             " failed to construct for " + string(nm));
     lconv* lc = __libcpp_localeconv_l(loc.get());
-    if (*lc->mon_decimal_point)
-        __decimal_point_ = static_cast<wchar_t>(*lc->mon_decimal_point);
-    else
-        __decimal_point_ = base::do_decimal_point();
-    if (*lc->mon_thousands_sep)
-        __thousands_sep_ = static_cast<wchar_t>(*lc->mon_thousands_sep);
-    else
-        __thousands_sep_ = base::do_thousands_sep();
+    if (!checked_string_to_wchar_convert(__decimal_point_,
+                                         lc->mon_decimal_point,
+                                         loc.get()))
+      __decimal_point_ = base::do_decimal_point();
+    if (!checked_string_to_wchar_convert(__thousands_sep_,
+                                         lc->mon_thousands_sep,
+                                         loc.get()))
+      __thousands_sep_ = base::do_thousands_sep();
     __grouping_ = lc->mon_grouping;
     wchar_t wbuf[100];
     mbstate_t mb = {0};
@@ -5947,14 +5996,14 @@ moneypunct_byname<wchar_t, true>::init(c
                             " failed to construct for " + string(nm));
 
     lconv* lc = __libcpp_localeconv_l(loc.get());
-    if (*lc->mon_decimal_point)
-        __decimal_point_ = static_cast<wchar_t>(*lc->mon_decimal_point);
-    else
-        __decimal_point_ = base::do_decimal_point();
-    if (*lc->mon_thousands_sep)
-        __thousands_sep_ = static_cast<wchar_t>(*lc->mon_thousands_sep);
-    else
-        __thousands_sep_ = base::do_thousands_sep();
+    if (!checked_string_to_wchar_convert(__decimal_point_,
+                                         lc->mon_decimal_point,
+                                         loc.get()))
+      __decimal_point_ = base::do_decimal_point();
+    if (!checked_string_to_wchar_convert(__thousands_sep_,
+                                         lc->mon_thousands_sep,
+                                         loc.get()))
+      __thousands_sep_ = base::do_thousands_sep();
     __grouping_ = lc->mon_grouping;
     wchar_t wbuf[100];
     mbstate_t mb = {0};

Modified: libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp?rev=289347&r1=289346&r2=289347&view=diff
==============================================================================
--- libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp (original)
+++ libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/decimal_point.pass.cpp Sat Dec 10 18:20:59 2016
@@ -12,9 +12,6 @@
 // REQUIRES: locale.ru_RU.UTF-8
 // REQUIRES: locale.zh_CN.UTF-8
 
-// Russia uses ',' for the decimal separator. GLIBC returns '.'
-// XFAIL: linux
-
 // <locale>
 
 // class moneypunct_byname<charT, International>
@@ -25,6 +22,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
@@ -111,22 +109,29 @@ int main()
         Fwt f(LOCALE_fr_FR_UTF_8, 1);
         assert(f.decimal_point() == L',');
     }
-
+// GLIBC 2.23 uses '.' as the decimal point while other C libraries use ','
+#ifndef TEST_HAS_GLIBC
+    const char sep = ',';
+    const wchar_t wsep = L',';
+#else
+    const char sep = '.';
+    const wchar_t wsep = L'.';
+#endif
     {
         Fnf f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.decimal_point() == ',');
+        assert(f.decimal_point() == sep);
     }
     {
         Fnt f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.decimal_point() == ',');
+        assert(f.decimal_point() == sep);
     }
     {
         Fwf f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.decimal_point() == L',');
+        assert(f.decimal_point() == wsep);
     }
     {
         Fwt f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.decimal_point() == L',');
+        assert(f.decimal_point() == wsep);
     }
 
     {

Modified: libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp?rev=289347&r1=289346&r2=289347&view=diff
==============================================================================
--- libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp (original)
+++ libcxx/trunk/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp Sat Dec 10 18:20:59 2016
@@ -18,16 +18,11 @@
 
 // charT thousands_sep() const;
 
-// Failure related to GLIBC's use of U00A0 as mon_thousands_sep
-// and U002E as mon_decimal_point.
-// TODO: U00A0 should be investigated.
-// Possibly related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
-// XFAIL: linux-gnu
-
 #include <locale>
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
@@ -114,22 +109,34 @@ int main()
         Fwt f(LOCALE_fr_FR_UTF_8, 1);
         assert(f.thousands_sep() == L' ');
     }
-
+// The below tests work around GLIBC's use of U00A0 as mon_thousands_sep
+// and U002E as mon_decimal_point.
+// TODO: Fix thousands_sep for 'char'.
+// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
+#ifndef TEST_HAS_GLIBC
+    const char sep = ' ';
+    const wchar_t wsep = L' ';
+#else
+    // FIXME libc++ specifically works around \u00A0 by translating it into
+    // a regular space.
+    const char sep = ' ';
+    const wchar_t wsep = L'\u00A0';
+#endif
     {
         Fnf f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.thousands_sep() == ' ');
+        assert(f.thousands_sep() == sep);
     }
     {
         Fnt f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.thousands_sep() == ' ');
+        assert(f.thousands_sep() == sep);
     }
     {
         Fwf f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.thousands_sep() == L' ');
+        assert(f.thousands_sep() == wsep);
     }
     {
         Fwt f(LOCALE_ru_RU_UTF_8, 1);
-        assert(f.thousands_sep() == L' ');
+        assert(f.thousands_sep() == wsep);
     }
 
     {

Modified: libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp?rev=289347&r1=289346&r2=289347&view=diff
==============================================================================
--- libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp (original)
+++ libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp Sat Dec 10 18:20:59 2016
@@ -16,12 +16,10 @@
 
 // string grouping() const;
 
-// TODO: investigation needed
-// XFAIL: linux-gnu
-
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main()
@@ -54,15 +52,20 @@ int main()
     }
     {
         std::locale l(LOCALE_fr_FR_UTF_8);
+#if defined(TEST_HAS_GLIBC)
+        const char* const group = "\3";
+#else
+        const char* const group = "\x7f";
+#endif
         {
             typedef char C;
             const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
-            assert(np.grouping() == "\x7F");
+            assert(np.grouping() ==  group);
         }
         {
             typedef wchar_t C;
             const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
-            assert(np.grouping() == "\x7F");
+            assert(np.grouping() == group);
         }
     }
 }

Modified: libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp?rev=289347&r1=289346&r2=289347&view=diff
==============================================================================
--- libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp (original)
+++ libcxx/trunk/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp Sat Dec 10 18:20:59 2016
@@ -16,12 +16,11 @@
 
 // char_type thousands_sep() const;
 
-// TODO: investigation needed
-// XFAIL: linux-gnu
 
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main()
@@ -54,15 +53,22 @@ int main()
     }
     {
         std::locale l(LOCALE_fr_FR_UTF_8);
+#if defined(TEST_HAS_GLIBC)
+        const char sep = ' ';
+        const wchar_t wsep = L' ';
+#else
+        const char sep = ',';
+        const wchar_t wsep = L',';
+#endif
         {
             typedef char C;
             const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
-            assert(np.thousands_sep() == ',');
+            assert(np.thousands_sep() == sep);
         }
         {
             typedef wchar_t C;
             const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
-            assert(np.thousands_sep() == L',');
+            assert(np.thousands_sep() == wsep);
         }
     }
 }




More information about the cfe-commits mailing list