[libcxx-commits] [libcxx] 38d25ae - [libcxx] [test] Use proper UTF-8 locales on Windows

Martin Storsjö via libcxx-commits libcxx-commits at lists.llvm.org
Thu Feb 17 14:03:47 PST 2022


Author: Martin Storsjö
Date: 2022-02-18T00:02:34+02:00
New Revision: 38d25aecdf72177d72ed40f3dfbbf1d3c726dc8f

URL: https://github.com/llvm/llvm-project/commit/38d25aecdf72177d72ed40f3dfbbf1d3c726dc8f
DIFF: https://github.com/llvm/llvm-project/commit/38d25aecdf72177d72ed40f3dfbbf1d3c726dc8f.diff

LOG: [libcxx] [test] Use proper UTF-8 locales on Windows

Since Windows 10 version 1803 (10.0.17134.0) (or Windows Server 2019),
the Windows Universal C Runtime (UCRT) actually does support UTF-8
locales - they're available e.g. via the same names as commonly on Unices,
e.g. "en_US.UTF-8".

The UTF-8 locale support unfortunately has a bug which breaks a couple
tests that were passing previously. That bug is fixed in the very
latest version of the UCRT (in UCRT 10.0.20348.0, available in Windows
11 or Windows Server 2022), so it will get resolved at some point
eventually, provided that the CI environment does get upgraded to a
newer version of Windows Server.

While the net number of xfailed/passing tests in this patch is a loss,
this does allow fixing a lot more locale tests properly for Windows
in later patches.

Intentionally not touching the ISO-8859-1/2 locales used for testing;
they're not detected and tested/used right now, and fixing that up
is another project.

Differential Revision: https://reviews.llvm.org/D119930

Added: 
    

Modified: 
    libcxx/test/libcxx/selftest/dsl/dsl.sh.py
    libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp
    libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp
    libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
    libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
    libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
    libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
    libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp
    libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp
    libcxx/test/std/re/re.traits/translate_nocase.pass.cpp
    libcxx/test/support/platform_support.h
    libcxx/utils/libcxx/test/dsl.py
    libcxx/utils/libcxx/test/features.py

Removed: 
    


################################################################################
diff  --git a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
index 5da2cc9f045ba..f4c48549a7c2d 100644
--- a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
+++ b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
@@ -216,6 +216,28 @@ def test_program_stderr_is_not_conflated_with_stdout(self):
         self.assertEqual(dsl.programOutput(self.config, source), "STDOUT-OUTPUT")
 
 
+class TestProgramSucceeds(SetupConfigs):
+    """
+    Tests for libcxx.test.dsl.programSucceeds
+    """
+    def test_success(self):
+        source = """
+        int main(int, char**) { return 0; }
+        """
+        self.assertTrue(dsl.programSucceeds(self.config, source))
+
+    def test_failure(self):
+        source = """
+        int main(int, char**) { return 1; }
+        """
+        self.assertFalse(dsl.programSucceeds(self.config, source))
+
+    def test_compile_failure(self):
+        source = """
+        this does not compile
+        """
+        self.assertRaises(dsl.ConfigurationCompilationError, lambda: dsl.programSucceeds(self.config, source))
+
 class TestHasLocale(SetupConfigs):
     """
     Tests for libcxx.test.dsl.hasLocale

diff  --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp
index d4f8f0f2c322c..90161432a2d34 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp
@@ -8,8 +8,6 @@
 
 // REQUIRES: locale.en_US.UTF-8
 
-// XFAIL: LIBCXX-WINDOWS-FIXME
-
 // <fstream>
 
 // int_type overflow(int_type c = traits::eof());

diff  --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp
index 7adc07da9d380..21418bd1958a4 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp
@@ -9,8 +9,6 @@
 // REQUIRES: locale.en_US.UTF-8
 // FILE_DEPENDENCIES: underflow.dat, underflow_utf8.dat
 
-// XFAIL: LIBCXX-WINDOWS-FIXME
-
 // <fstream>
 
 // int_type underflow();

diff  --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
index a85f89f46a527..5e2b81d06e5aa 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 // REQUIRES: locale.en_US.UTF-8
+// XFAIL: broken-utf8-wchar-ctype
 
 // <locale>
 

diff  --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
index 8363b57a504d3..289d569ab9fb0 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 // REQUIRES: locale.en_US.UTF-8
+// XFAIL: broken-utf8-wchar-ctype
 
 // <locale>
 

diff  --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
index 503cb198e3142..d957e1ee09919 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 // REQUIRES: locale.en_US.UTF-8
+// XFAIL: broken-utf8-wchar-ctype
 
 // <locale>
 

diff  --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
index ffb3b7ecdc1e2..7893a32204a9a 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 // REQUIRES: locale.en_US.UTF-8
+// XFAIL: broken-utf8-wchar-ctype
 
 // <locale>
 

diff  --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp
index db6f845271f40..f2153f0c6620f 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp
@@ -10,8 +10,6 @@
 // REQUIRES: locale.fr_FR.UTF-8
 // REQUIRES: locale.zh_CN.UTF-8
 
-// XFAIL: LIBCXX-WINDOWS-FIXME
-
 // <locale>
 
 // class time_get_byname<charT, InputIterator>

diff  --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp
index c2d706855a5e8..4a452f23fcd1b 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp
@@ -11,8 +11,6 @@
 // REQUIRES: locale.ru_RU.UTF-8
 // REQUIRES: locale.zh_CN.UTF-8
 
-// XFAIL: LIBCXX-WINDOWS-FIXME
-
 // <locale>
 
 // class time_get_byname<charT, InputIterator>

diff  --git a/libcxx/test/std/re/re.traits/translate_nocase.pass.cpp b/libcxx/test/std/re/re.traits/translate_nocase.pass.cpp
index d7eabc919812d..257c627f20eed 100644
--- a/libcxx/test/std/re/re.traits/translate_nocase.pass.cpp
+++ b/libcxx/test/std/re/re.traits/translate_nocase.pass.cpp
@@ -13,6 +13,7 @@
 // charT translate_nocase(charT c) const;
 
 // REQUIRES: locale.en_US.UTF-8
+// XFAIL: broken-utf8-wchar-ctype
 
 #include <regex>
 #include <cassert>

diff  --git a/libcxx/test/support/platform_support.h b/libcxx/test/support/platform_support.h
index f8183d2fb4f09..7cfb21f794944 100644
--- a/libcxx/test/support/platform_support.h
+++ b/libcxx/test/support/platform_support.h
@@ -15,30 +15,21 @@
 #define PLATFORM_SUPPORT_H
 
 // locale names
-#ifdef _WIN32
-    // WARNING: Windows does not support UTF-8 codepages.
-    // Locales are "converted" using https://docs.moodle.org/dev/Table_of_locales
-#   define LOCALE_en_US           "en-US"
-#   define LOCALE_en_US_UTF_8     "en-US"
-#   define LOCALE_cs_CZ_ISO8859_2 "cs-CZ"
-#   define LOCALE_fr_FR_UTF_8     "fr-FR"
-#   define LOCALE_fr_CA_ISO8859_1 "fr-CA"
-#   define LOCALE_ru_RU_UTF_8     "ru-RU"
-#   define LOCALE_zh_CN_UTF_8     "zh-CN"
+#define LOCALE_en_US           "en_US"
+#define LOCALE_en_US_UTF_8     "en_US.UTF-8"
+#define LOCALE_fr_FR_UTF_8     "fr_FR.UTF-8"
+#ifdef __linux__
+#    define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO-8859-1"
+#    define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO-8859-2"
+#elif defined(_WIN32)
+#    define LOCALE_fr_CA_ISO8859_1 "fr-CA"
+#    define LOCALE_cs_CZ_ISO8859_2 "cs-CZ"
 #else
-#   define LOCALE_en_US           "en_US"
-#   define LOCALE_en_US_UTF_8     "en_US.UTF-8"
-#   define LOCALE_fr_FR_UTF_8     "fr_FR.UTF-8"
-#   ifdef __linux__
-#       define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO-8859-1"
-#       define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO-8859-2"
-#   else
-#       define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO8859-1"
-#       define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO8859-2"
-#   endif
-#   define LOCALE_ru_RU_UTF_8     "ru_RU.UTF-8"
-#   define LOCALE_zh_CN_UTF_8     "zh_CN.UTF-8"
+#    define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO8859-1"
+#    define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO8859-2"
 #endif
+#define LOCALE_ru_RU_UTF_8     "ru_RU.UTF-8"
+#define LOCALE_zh_CN_UTF_8     "zh_CN.UTF-8"
 
 #include <stdio.h>
 #include <stdlib.h>

diff  --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index c50a7508cab30..791edb3406fc9 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -175,6 +175,22 @@ def programOutput(config, program, args=None):
     actualOut = actualOut.group(1) if actualOut else ""
     return actualOut
 
+ at _memoizeExpensiveOperation(lambda c, p, args=None: (c.substitutions, c.environment, p, args))
+def programSucceeds(config, program, args=None):
+  """
+  Compiles a program for the test target, run it on the test target and return
+  whether it completed successfully.
+
+  Note that execution of the program is done through the %{exec} substitution,
+  which means that the program may be run on a remote host depending on what
+  %{exec} does.
+  """
+  try:
+    programOutput(config, program, args)
+  except ConfigurationRuntimeError:
+    return False
+  return True
+
 @_memoizeExpensiveOperation(lambda c, f: (c.substitutions, c.environment, f))
 def hasCompileFlag(config, flag):
   """
@@ -229,11 +245,7 @@ def hasAnyLocale(config, locales):
       }
     #endif
   """
-  try:
-    programOutput(config, program, args=[pipes.quote(l) for l in locales])
-  except ConfigurationRuntimeError:
-    return False
-  return True
+  return programSucceeds(config, program, args=[pipes.quote(l) for l in locales])
 
 @_memoizeExpensiveOperation(lambda c, flags='': (c.substitutions, c.environment, flags))
 def compilerMacros(config, flags=''):

diff  --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py
index 0dfb516ae5297..a2498cada94bb 100644
--- a/libcxx/utils/libcxx/test/features.py
+++ b/libcxx/utils/libcxx/test/features.py
@@ -73,6 +73,18 @@
             void f() { new int(3); }
           """, ['-shared'])),
 
+  # Check for a Windows UCRT bug (fixed in UCRT/Windows 10.0.20348.0):
+  # https://developercommunity.visualstudio.com/t/utf-8-locales-break-ctype-functions-for-wchar-type/1653678
+  Feature(name='broken-utf8-wchar-ctype',
+          when=lambda cfg: '_WIN32' in compilerMacros(cfg) and not programSucceeds(cfg, """
+          #include <locale.h>
+          #include <wctype.h>
+          int main(int, char**) {
+            setlocale(LC_ALL, "en_US.UTF-8");
+            return towlower(L'\\xDA') != L'\\xFA';
+          }
+          """)),
+
   # Whether Bash can run on the executor.
   # This is not always the case, for example when running on embedded systems.
   #


        


More information about the libcxx-commits mailing list