[libcxx-commits] [libcxx] a1da739 - [SystemZ][z/OS] ASCII/EBCDIC support with no coexistence

Muiez Ahmed via libcxx-commits libcxx-commits at lists.llvm.org
Fri Jan 14 08:37:23 PST 2022


Author: Muiez Ahmed
Date: 2022-01-14T11:37:09-05:00
New Revision: a1da73961d291c6a205150caa6ebda71757b9add

URL: https://github.com/llvm/llvm-project/commit/a1da73961d291c6a205150caa6ebda71757b9add
DIFF: https://github.com/llvm/llvm-project/commit/a1da73961d291c6a205150caa6ebda71757b9add.diff

LOG: [SystemZ][z/OS] ASCII/EBCDIC support with no coexistence

The aim of this patch is to break up the larger patch (https://reviews.llvm.org/D111323) to be more upstream friendly. In particular, this patch adds the char encoding sensitive changes but does not use inline namespaces as before. The use of namespaces to build both versions of the library, and localization of error messages will follow in a subsequent patch.

Differential Revision: https://reviews.llvm.org/D114813

Added: 
    

Modified: 
    libcxx/CMakeLists.txt
    libcxx/include/__config
    libcxx/include/__locale
    libcxx/include/regex
    libcxx/src/locale.cpp
    libcxx/src/regex.cpp
    libcxxabi/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 82a643e74eb79..f78fb77ceb378 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -467,6 +467,9 @@ include(HandleLibcxxFlags)
 # These flags get added to CMAKE_CXX_FLAGS and CMAKE_C_FLAGS so that
 # 'config-ix' use them during feature checks. It also adds them to both
 # 'LIBCXX_COMPILE_FLAGS' and 'LIBCXX_LINK_FLAGS'
+if(ZOS)
+  add_target_flags_if_supported("-fzos-le-char-mode=ebcdic")
+endif()
 if(LIBCXX_TARGET_TRIPLE)
   add_target_flags_if_supported("--target=${LIBCXX_TARGET_TRIPLE}")
 endif()

diff  --git a/libcxx/include/__config b/libcxx/include/__config
index 654816b78f295..85f334ff1ba98 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -257,6 +257,10 @@
 #  endif // defined(__GLIBC_PREREQ)
 #endif // defined(__linux__)
 
+#if defined(__MVS__)
+#  include <features.h> // for __NATIVE_ASCII_F
+#endif
+
 #ifdef __LITTLE_ENDIAN__
 #  if __LITTLE_ENDIAN__
 #    define _LIBCPP_LITTLE_ENDIAN
@@ -1220,8 +1224,8 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
 #endif
 
 #if defined(__BIONIC__) || defined(__NuttX__) ||      \
-    defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) || \
-    defined(__MVS__) || defined(__OpenBSD__)
+    defined(__Fuchsia__) || defined(__wasi__) ||		  \
+    defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__)
 #define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
 #endif
 

diff  --git a/libcxx/include/__locale b/libcxx/include/__locale
index 3bddbc8f313e3..6181f2539475d 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -511,6 +511,33 @@ public:
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT
+#elif defined(__MVS__)
+    static const mask __regex_word = 0x8000;
+# if defined(__NATIVE_ASCII_F)`
+    typedef unsigned int mask;
+    static const mask space  = _ISSPACE_A;
+    static const mask print  = _ISPRINT_A;
+    static const mask cntrl  = _ISCNTRL_A;
+    static const mask upper  = _ISUPPER_A;
+    static const mask lower  = _ISLOWER_A;
+    static const mask alpha  = _ISALPHA_A;
+    static const mask digit  = _ISDIGIT_A;
+    static const mask punct  = _ISPUNCT_A;
+    static const mask xdigit = _ISXDIGIT_A;
+    static const mask blank  = _ISBLANK_A;
+# else
+    typedef unsigned short mask;
+    static const mask space  = __ISSPACE;
+    static const mask print  = __ISPRINT;
+    static const mask cntrl  = __ISCNTRL;
+    static const mask upper  = __ISUPPER;
+    static const mask lower  = __ISLOWER;
+    static const mask alpha  = __ISALPHA;
+    static const mask digit  = __ISDIGIT;
+    static const mask punct  = __ISPUNCT;
+    static const mask xdigit = __ISXDIGIT;
+    static const mask blank  = __ISBLANK;
+# endif
 #else
 # error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE?
 #endif
@@ -734,6 +761,10 @@ public:
     static const short* __classic_upper_table() _NOEXCEPT;
     static const short* __classic_lower_table() _NOEXCEPT;
 #endif
+#if defined(__MVS__)
+    static const unsigned short* __classic_upper_table() _NOEXCEPT;
+    static const unsigned short* __classic_lower_table() _NOEXCEPT;
+#endif
 
 protected:
     ~ctype();

diff  --git a/libcxx/include/regex b/libcxx/include/regex
index 8203c81659c87..59b2c9a23399c 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -1310,19 +1310,51 @@ regex_traits<_CharT>::isctype(char_type __c, char_class_type __m) const
     return (__c == '_' && (__m & __regex_word));
 }
 
+inline _LIBCPP_INLINE_VISIBILITY
+bool __is_07(unsigned char c)
+{
+    return (c & 0xF8u) ==
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+        0xF0;
+#else
+        0x30;
+#endif
+}
+
+inline _LIBCPP_INLINE_VISIBILITY
+bool __is_89(unsigned char c)
+{
+    return (c & 0xFEu) ==
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+        0xF8;
+#else
+        0x38;
+#endif
+}
+
+inline _LIBCPP_INLINE_VISIBILITY
+unsigned char __to_lower(unsigned char c)
+{
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+    return c & 0xBF;
+#else
+    return c | 0x20;
+#endif
+}
+
 template <class _CharT>
 int
 regex_traits<_CharT>::__regex_traits_value(unsigned char __ch, int __radix)
 {
-    if ((__ch & 0xF8u) == 0x30)  // '0' <= __ch && __ch <= '7'
+    if (__is_07(__ch))  // '0' <= __ch && __ch <= '7'
         return __ch - '0';
     if (__radix != 8)
     {
-        if ((__ch & 0xFEu) == 0x38)  // '8' <= __ch && __ch <= '9'
+        if (__is_89(__ch))  // '8' <= __ch && __ch <= '9'
             return __ch - '0';
         if (__radix == 16)
         {
-            __ch |= 0x20;  // tolower
+            __ch = __to_lower(__ch);  // tolower
             if ('a' <= __ch && __ch <= 'f')
                 return __ch - ('a' - 10);
         }

diff  --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 79f03b85fab3d..2234784769dd3 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -898,7 +898,7 @@ ctype<wchar_t>::do_toupper(char_type c) const
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
     return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
     return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
 #else
     return (isascii(c) && iswlower_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'a'+L'A' : c;
@@ -912,7 +912,7 @@ ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
         *low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
         *low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low]
                              : *low;
 #else
@@ -927,7 +927,7 @@ ctype<wchar_t>::do_tolower(char_type c) const
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
     return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
     return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
 #else
     return (isascii(c) && isupper_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'A'+'a' : c;
@@ -941,7 +941,7 @@ ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
         *low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
         *low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low]
                              : *low;
 #else
@@ -1013,7 +1013,7 @@ ctype<char>::do_toupper(char_type c) const
       static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptr
diff _t>(c)]) : c;
 #elif defined(__NetBSD__)
     return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
     return isascii(c) ?
       static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
 #else
@@ -1030,7 +1030,7 @@ ctype<char>::do_toupper(char_type* low, const char_type* high) const
           static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptr
diff _t>(*low)]) : *low;
 #elif defined(__NetBSD__)
         *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
         *low = isascii(*low) ?
           static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
 #else
@@ -1047,7 +1047,7 @@ ctype<char>::do_tolower(char_type c) const
       static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptr
diff _t>(c)]) : c;
 #elif defined(__NetBSD__)
     return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
     return isascii(c) ?
       static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
 #else
@@ -1063,7 +1063,7 @@ ctype<char>::do_tolower(char_type* low, const char_type* high) const
         *low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptr
diff _t>(*low)]) : *low;
 #elif defined(__NetBSD__)
         *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
         *low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
 #else
         *low = (isascii(*low) && isupper_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low-'A'+'a' : *low;
@@ -1211,6 +1211,12 @@ ctype<char>::classic_table() noexcept
     return _ctype_ + 1;
 #elif defined(_AIX)
     return (const unsigned int *)__lc_ctype_ptr->obj->mask;
+#elif defined(__MVS__)
+# if defined(__NATIVE_ASCII_F)
+    return const_cast<const ctype<char>::mask*> (__OBJ_DATA(__lc_ctype_a)->mask);
+# else
+    return const_cast<const ctype<char>::mask*> (__ctypec);
+# endif
 #else
     // Platform not supported: abort so the person doing the port knows what to
     // fix
@@ -1259,7 +1265,26 @@ ctype<char>::__classic_upper_table() noexcept
 {
     return *__ctype_toupper_loc();
 }
-#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__
+#elif defined(__MVS__)
+const unsigned short*
+ctype<char>::__classic_lower_table() _NOEXCEPT
+{
+# if defined(__NATIVE_ASCII_F)
+  return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->lower);
+# else
+  return const_cast<const unsigned short*>(__ctype + __TOLOWER_INDEX);
+# endif
+}
+const unsigned short *
+ctype<char>::__classic_upper_table() _NOEXCEPT
+{
+# if defined(__NATIVE_ASCII_F)
+  return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->upper);
+# else
+  return const_cast<const unsigned short*>(__ctype + __TOUPPER_INDEX);
+# endif
+}
+#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__ || __MVS__
 
 // template <> class ctype_byname<char>
 

diff  --git a/libcxx/src/regex.cpp b/libcxx/src/regex.cpp
index 425339a5c9b8d..16ad8f0effdb0 100644
--- a/libcxx/src/regex.cpp
+++ b/libcxx/src/regex.cpp
@@ -76,6 +76,125 @@ struct collationnames
     char char_;
 };
 
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+// EBCDIC IBM-1047
+// Sorted via the EBCDIC collating sequence
+const collationnames collatenames[] =
+{
+    {"a", 0x81},
+    {"alert", 0x2f},
+    {"ampersand", 0x50},
+    {"apostrophe", 0x7d},
+    {"asterisk", 0x5c},
+    {"b", 0x82},
+    {"backslash", 0xe0},
+    {"backspace", 0x16},
+    {"c", 0x83},
+    {"carriage-return", 0xd},
+    {"circumflex", 0x5f},
+    {"circumflex-accent", 0x5f},
+    {"colon", 0x7a},
+    {"comma", 0x6b},
+    {"commercial-at", 0x7c},
+    {"d", 0x84},
+    {"dollar-sign", 0x5b},
+    {"e", 0x85},
+    {"eight", 0xf8},
+    {"equals-sign", 0x7e},
+    {"exclamation-mark", 0x5a},
+    {"f", 0x86},
+    {"five", 0xf5},
+    {"form-feed", 0xc},
+    {"four", 0xf4},
+    {"full-stop", 0x4b},
+    {"g", 0x87},
+    {"grave-accent", 0x79},
+    {"greater-than-sign", 0x6e},
+    {"h", 0x88},
+    {"hyphen", 0x60},
+    {"hyphen-minus", 0x60},
+    {"i", 0x89},
+    {"j", 0x91},
+    {"k", 0x92},
+    {"l", 0x93},
+    {"left-brace", 0xc0},
+    {"left-curly-bracket", 0xc0},
+    {"left-parenthesis", 0x4d},
+    {"left-square-bracket", 0xad},
+    {"less-than-sign", 0x4c},
+    {"low-line", 0x6d},
+    {"m", 0x94},
+    {"n", 0x95},
+    {"newline", 0x15},
+    {"nine", 0xf9},
+    {"number-sign", 0x7b},
+    {"o", 0x96},
+    {"one", 0xf1},
+    {"p", 0x97},
+    {"percent-sign", 0x6c},
+    {"period", 0x4b},
+    {"plus-sign", 0x4e},
+    {"q", 0x98},
+    {"question-mark", 0x6f},
+    {"quotation-mark", 0x7f},
+    {"r", 0x99},
+    {"reverse-solidus", 0xe0},
+    {"right-brace", 0xd0},
+    {"right-curly-bracket", 0xd0},
+    {"right-parenthesis", 0x5d},
+    {"right-square-bracket", 0xbd},
+    {"s", 0xa2},
+    {"semicolon", 0x5e},
+    {"seven", 0xf7},
+    {"six", 0xf6},
+    {"slash", 0x61},
+    {"solidus", 0x61},
+    {"space", 0x40},
+    {"t", 0xa3},
+    {"tab", 0x5},
+    {"three", 0xf3},
+    {"tilde", 0xa1},
+    {"two", 0xf2},
+    {"u", 0xa4},
+    {"underscore", 0x6d},
+    {"v", 0xa5},
+    {"vertical-line", 0x4f},
+    {"vertical-tab", 0xb},
+    {"w", 0xa6},
+    {"x", 0xa7},
+    {"y", 0xa8},
+    {"z", 0xa9},
+    {"zero", 0xf0},
+    {"A", 0xc1},
+    {"B", 0xc2},
+    {"C", 0xc3},
+    {"D", 0xc4},
+    {"E", 0xc5},
+    {"F", 0xc6},
+    {"G", 0xc7},
+    {"H", 0xc8},
+    {"I", 0xc9},
+    {"J", 0xd1},
+    {"K", 0xd2},
+    {"L", 0xd3},
+    {"M", 0xd4},
+    {"N", 0xd5},
+    {"NUL", 0},
+    {"O", 0xd6},
+    {"P", 0xd7},
+    {"Q", 0xd8},
+    {"R", 0xd9},
+    {"S", 0xe2},
+    {"T", 0xe3},
+    {"U", 0xe4},
+    {"V", 0xe5},
+    {"W", 0xe6},
+    {"X", 0xe7},
+    {"Y", 0xe8},
+    {"Z", 0xe9}
+};
+#else
+// ASCII
 const collationnames collatenames[] =
 {
     {"A", 0x41},
@@ -190,6 +309,7 @@ const collationnames collatenames[] =
     {"z", 0x7a},
     {"zero", 0x30}
 };
+#endif
 
 struct classnames
 {

diff  --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 89722df0bf465..b73804390ba20 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -267,6 +267,9 @@ include(HandleLibcxxabiFlags)
 #===============================================================================
 
 # Configure target flags
+if(ZOS)
+  add_target_flags_if_supported("-fzos-le-char-mode=ebcdic")
+endif()
 if(LIBCXXABI_TARGET_TRIPLE)
   add_target_flags_if_supported("--target=${LIBCXXABI_TARGET_TRIPLE}")
 endif()


        


More information about the libcxx-commits mailing list