[libcxx-commits] [libcxx] [libcxx] Work around picolibc argv handling in tests. (PR #127662)

Simon Tatham via libcxx-commits libcxx-commits at lists.llvm.org
Wed Feb 19 05:23:32 PST 2025


https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/127662

>From 8b31aa204ce3c3e8601501b9d0d2d193f5741b26 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Tue, 18 Feb 2025 15:01:53 +0000
Subject: [PATCH 1/5] [libcxx] Work around picolibc argv handling in tests.

This fixes some test failures when the libcxx tests are run against an
up-to-date picolibc on embedded Arm, because those tests depend on an
unsupported locale but the `hasAnyLocale` preliminary check wrongly
concluded that the locale _was_ supported.

`hasAnyLocale` passes a set of locale strings to a test program via
the command line, and checks if the libc under test reports that any
of the locals can be successfully set via setlocale(). In some
invocations one of the locale names contains a space, e.g. the
Windows-style locale name "English_United States.1252".

Unfortunately picolibc's crt0, when running under Arm semihosting,
fetches the single command string from the host and then splits it up
at spaces without implementing any kind of quoting. So it simply isn't
possible to get a space into an argv word. As a result, we end up
testing for the locale (in this example) "English_United". In
up-to-date versions of picolibc, this is actually accepted, since it
contains no objectionable character set specification (or indeed any
at all). So the lit check wrongly concludes that libc supports that
locale, and enables some locale tests, which fail.

This patch works around the issue entirely within `hasAnyLocale()`, by
applying URL %-encoding to each locale name to remove spaces and any
likely command-line quote characters. Then the test program (included
as a string literal within the same Python function) undoes that
quoting. This should make no difference in cases where the argv
quoting worked anyway: the quoting and unquoting is unnecessary in
that situation, but should still be reliable. But in cases where argv
handling is extremely simplistic, this works around it.
---
 libcxx/utils/libcxx/test/dsl.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index 7ff4be0ee7cf9..81db5d099e903 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -274,14 +274,38 @@ def hasAnyLocale(config, locales):
     %{exec} -- this means that the command may be executed on a remote host
     depending on the %{exec} substitution.
     """
+
+    # Some locale names contain spaces, and some embedded use cases
+    # (in particular picolibc under Arm semihosting) can't support
+    # spaces in argv words. Work around this by applying URL-style
+    # %-encoding to the locale names, and undoing it in our test
+    # program.
+    percent_encode = lambda s: ''.join(
+        "%{:02x}".format(c) if c in b' %\\"\'' or c >= 128 else chr(c)
+        for c in s.encode())
+
     program = """
     #include <stddef.h>
     #if defined(_LIBCPP_VERSION) && !_LIBCPP_HAS_LOCALIZATION
       int main(int, char**) { return 1; }
     #else
+      #include <stdlib.h>
       #include <locale.h>
       int main(int argc, char** argv) {
         for (int i = 1; i < argc; i++) {
+          // %-decode argv[i] in place
+          for (char *p = argv[i], *q = argv[i]; *p; p++) {
+            if (*p == '%') {
+              char hex[3];
+              hex[0] = *++p;
+              hex[1] = *++p;
+              hex[2] = '\\0';
+              *q++ = (char)strtoul(hex, NULL, 0);
+            } else {
+              *q++ = *p;
+            }
+          }
+          // Try to set the locale
           if (::setlocale(LC_ALL, argv[i]) != NULL) {
             return 0;
           }
@@ -290,7 +314,8 @@ def hasAnyLocale(config, locales):
       }
     #endif
   """
-    return programSucceeds(config, program, args=[shlex.quote(l) for l in locales])
+    return programSucceeds(config, program, args=[
+        shlex.quote(percent_encode(l)) for l in locales])
 
 
 @_memoizeExpensiveOperation(lambda c, flags="": (c.substitutions, c.environment, flags))

>From a343def2bc351d89b1a7f5df13322023ea8574ad Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Tue, 18 Feb 2025 16:45:20 +0000
Subject: [PATCH 2/5] formatting fixes from CI

---
 libcxx/utils/libcxx/test/dsl.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index 81db5d099e903..6c1d430b86f65 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -280,9 +280,10 @@ def hasAnyLocale(config, locales):
     # spaces in argv words. Work around this by applying URL-style
     # %-encoding to the locale names, and undoing it in our test
     # program.
-    percent_encode = lambda s: ''.join(
-        "%{:02x}".format(c) if c in b' %\\"\'' or c >= 128 else chr(c)
-        for c in s.encode())
+    percent_encode = lambda s: "".join(
+        "%{:02x}".format(c) if c in b" %\\\"'" or c >= 128 else chr(c)
+        for c in s.encode()
+    )
 
     program = """
     #include <stddef.h>
@@ -314,8 +315,9 @@ def hasAnyLocale(config, locales):
       }
     #endif
   """
-    return programSucceeds(config, program, args=[
-        shlex.quote(percent_encode(l)) for l in locales])
+    return programSucceeds(
+        config, program, args=[shlex.quote(percent_encode(l)) for l in locales]
+    )
 
 
 @_memoizeExpensiveOperation(lambda c, flags="": (c.substitutions, c.environment, flags))

>From 3e8d38087ead91972ceaaee031ef450dc9db0910 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Wed, 19 Feb 2025 09:35:52 +0000
Subject: [PATCH 3/5] Change strategies to avoid using argv at all

---
 libcxx/utils/libcxx/test/dsl.py | 42 ++++++++++-----------------------
 1 file changed, 13 insertions(+), 29 deletions(-)

diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index 6c1d430b86f65..2c970d7a13a30 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -275,14 +275,11 @@ def hasAnyLocale(config, locales):
     depending on the %{exec} substitution.
     """
 
-    # Some locale names contain spaces, and some embedded use cases
-    # (in particular picolibc under Arm semihosting) can't support
-    # spaces in argv words. Work around this by applying URL-style
-    # %-encoding to the locale names, and undoing it in our test
-    # program.
-    percent_encode = lambda s: "".join(
-        "%{:02x}".format(c) if c in b" %\\\"'" or c >= 128 else chr(c)
-        for c in s.encode()
+    # Convert the locale names into C string literals, by escaping \
+    # and " and wrapping each one in double quotes.
+    name_string_literals = ", ".join(
+        '"' + locale.replace("\\", r'\\').replace("\"", r'\"') + '"'
+        for locale in locales
     )
 
     program = """
@@ -290,34 +287,21 @@ def hasAnyLocale(config, locales):
     #if defined(_LIBCPP_VERSION) && !_LIBCPP_HAS_LOCALIZATION
       int main(int, char**) { return 1; }
     #else
-      #include <stdlib.h>
       #include <locale.h>
-      int main(int argc, char** argv) {
-        for (int i = 1; i < argc; i++) {
-          // %-decode argv[i] in place
-          for (char *p = argv[i], *q = argv[i]; *p; p++) {
-            if (*p == '%') {
-              char hex[3];
-              hex[0] = *++p;
-              hex[1] = *++p;
-              hex[2] = '\\0';
-              *q++ = (char)strtoul(hex, NULL, 0);
-            } else {
-              *q++ = *p;
-            }
-          }
-          // Try to set the locale
-          if (::setlocale(LC_ALL, argv[i]) != NULL) {
+      static const char *const test_locale_names[] = {
+          """ + name_string_literals + """, nullptr,
+      };
+      int main() {
+        for (size_t i = 0; test_locale_names[i]; i++) {
+          if (::setlocale(LC_ALL, test_locale_names[i]) != NULL) {
             return 0;
           }
         }
         return 1;
       }
     #endif
-  """
-    return programSucceeds(
-        config, program, args=[shlex.quote(percent_encode(l)) for l in locales]
-    )
+    """
+    return programSucceeds(config, program)
 
 
 @_memoizeExpensiveOperation(lambda c, flags="": (c.substitutions, c.environment, flags))

>From b60a904c9963e9fa39fe03f988f874cc46fd432f Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Wed, 19 Feb 2025 10:38:22 +0000
Subject: [PATCH 4/5] formatting fixes

---
 libcxx/utils/libcxx/test/dsl.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index 2c970d7a13a30..e35fd29e6b76b 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -278,18 +278,21 @@ def hasAnyLocale(config, locales):
     # Convert the locale names into C string literals, by escaping \
     # and " and wrapping each one in double quotes.
     name_string_literals = ", ".join(
-        '"' + locale.replace("\\", r'\\').replace("\"", r'\"') + '"'
+        '"' + locale.replace("\\", r"\\").replace('"', r"\"") + '"'
         for locale in locales
     )
 
-    program = """
+    program = (
+        """
     #include <stddef.h>
     #if defined(_LIBCPP_VERSION) && !_LIBCPP_HAS_LOCALIZATION
       int main(int, char**) { return 1; }
     #else
       #include <locale.h>
       static const char *const test_locale_names[] = {
-          """ + name_string_literals + """, nullptr,
+          """
+        + name_string_literals
+        + """, nullptr,
       };
       int main() {
         for (size_t i = 0; test_locale_names[i]; i++) {
@@ -301,6 +304,7 @@ def hasAnyLocale(config, locales):
       }
     #endif
     """
+    )
     return programSucceeds(config, program)
 
 

>From 185f4bd569fb52bca3a9af6fade369c8e87deb3c Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Wed, 19 Feb 2025 13:22:44 +0000
Subject: [PATCH 5/5] Remove now-unnecessary import of shlex

---
 libcxx/utils/libcxx/test/dsl.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index e35fd29e6b76b..38fc957492f3d 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -9,7 +9,6 @@
 import os
 import pickle
 import platform
-import shlex
 import shutil
 import tempfile
 



More information about the libcxx-commits mailing list