[libc-commits] [libc] 2ba1eba - Revert "[libc][wctype] Implement internal UTF8 wctype classification functions" (#180186)

via libc-commits libc-commits at lists.llvm.org
Fri Feb 6 04:51:01 PST 2026


Author: Marcell Leleszi
Date: 2026-02-06T13:50:56+01:00
New Revision: 2ba1eba4be3f5225a5dde6223c434a8bd24bb9b6

URL: https://github.com/llvm/llvm-project/commit/2ba1eba4be3f5225a5dde6223c434a8bd24bb9b6
DIFF: https://github.com/llvm/llvm-project/commit/2ba1eba4be3f5225a5dde6223c434a8bd24bb9b6.diff

LOG: Revert "[libc][wctype] Implement internal UTF8 wctype classification functions" (#180186)

Reverts llvm/llvm-project#174607

Breaks libc++

Added: 
    

Modified: 
    libc/src/__support/CMakeLists.txt
    libc/src/__support/wctype_utils.h
    libc/test/src/__support/CMakeLists.txt

Removed: 
    libc/test/src/__support/wctype_utils_test.cpp


################################################################################
diff  --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index d771d3eb1920c..64cf368713221 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -156,22 +156,12 @@ add_header_library(
     ctype_utils.h
 )
 
-list(APPEND wctype_utils_deps
-  libc.hdr.types.wchar_t
-)
-
-if ("${LIBC_CONF_WCTYPE_MODE}" STREQUAL "LIBC_WCTYPE_MODE_UTF8")
-  list(APPEND wctype_utils_deps
-      libc.src.__support.wctype.wctype_classification_utils
-  )
-endif()
-
 add_header_library(
   wctype_utils
   HDRS
     wctype_utils.h
   DEPENDS
-    ${wctype_utils_deps}
+    libc.hdr.types.wchar_t
 )
 
 add_header_library(

diff  --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h
index c738efb344f2c..7f17224104ffb 100644
--- a/libc/src/__support/wctype_utils.h
+++ b/libc/src/__support/wctype_utils.h
@@ -13,13 +13,6 @@
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"
 
-#define LIBC_WCTYPE_MODE_ASCII 0
-#define LIBC_WCTYPE_MODE_UTF8 1
-
-#if LIBC_CONF_WCTYPE_MODE == LIBC_WCTYPE_MODE_UTF8
-#include "src/__support/wctype/wctype_classification_utils.h"
-#endif
-
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
@@ -45,8 +38,8 @@ namespace internal {
 // This assumes the character ranges are contiguous, which they aren't in
 // EBCDIC. Technically we could use some smaller ranges, but that's even harder
 // to read.
-namespace ascii {
-LIBC_INLINE constexpr bool islower(wchar_t wch) {
+
+LIBC_INLINE static constexpr bool islower(wchar_t wch) {
   switch (wch) {
   case L'a':
   case L'b':
@@ -80,7 +73,7 @@ LIBC_INLINE constexpr bool islower(wchar_t wch) {
   }
 }
 
-LIBC_INLINE constexpr bool isupper(wchar_t wch) {
+LIBC_INLINE static constexpr bool isupper(wchar_t wch) {
   switch (wch) {
   case L'A':
   case L'B':
@@ -114,7 +107,7 @@ LIBC_INLINE constexpr bool isupper(wchar_t wch) {
   }
 }
 
-LIBC_INLINE constexpr bool isdigit(wchar_t wch) {
+LIBC_INLINE static constexpr bool isdigit(wchar_t wch) {
   switch (wch) {
   case L'0':
   case L'1':
@@ -132,169 +125,125 @@ LIBC_INLINE constexpr bool isdigit(wchar_t wch) {
   }
 }
 
-LIBC_INLINE constexpr bool isalpha(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t tolower(wchar_t wch) {
   switch (wch) {
-  case L'a':
-  case L'b':
-  case L'c':
-  case L'd':
-  case L'e':
-  case L'f':
-  case L'g':
-  case L'h':
-  case L'i':
-  case L'j':
-  case L'k':
-  case L'l':
-  case L'm':
-  case L'n':
-  case L'o':
-  case L'p':
-  case L'q':
-  case L'r':
-  case L's':
-  case L't':
-  case L'u':
-  case L'v':
-  case L'w':
-  case L'x':
-  case L'y':
-  case L'z':
   case L'A':
+    return L'a';
   case L'B':
+    return L'b';
   case L'C':
+    return L'c';
   case L'D':
+    return L'd';
   case L'E':
+    return L'e';
   case L'F':
+    return L'f';
   case L'G':
+    return L'g';
   case L'H':
+    return L'h';
   case L'I':
+    return L'i';
   case L'J':
+    return L'j';
   case L'K':
+    return L'k';
   case L'L':
+    return L'l';
   case L'M':
+    return L'm';
   case L'N':
+    return L'n';
   case L'O':
+    return L'o';
   case L'P':
+    return L'p';
   case L'Q':
+    return L'q';
   case L'R':
+    return L'r';
   case L'S':
+    return L's';
   case L'T':
+    return L't';
   case L'U':
+    return L'u';
   case L'V':
+    return L'v';
   case L'W':
+    return L'w';
   case L'X':
+    return L'x';
   case L'Y':
+    return L'y';
   case L'Z':
-    return true;
+    return L'z';
   default:
-    return false;
+    return wch;
   }
 }
 
-LIBC_INLINE constexpr bool isalnum(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t toupper(wchar_t wch) {
   switch (wch) {
   case L'a':
+    return L'A';
   case L'b':
+    return L'B';
   case L'c':
+    return L'C';
   case L'd':
+    return L'D';
   case L'e':
+    return L'E';
   case L'f':
+    return L'F';
   case L'g':
+    return L'G';
   case L'h':
+    return L'H';
   case L'i':
+    return L'I';
   case L'j':
+    return L'J';
   case L'k':
+    return L'K';
   case L'l':
+    return L'L';
   case L'm':
+    return L'M';
   case L'n':
+    return L'N';
   case L'o':
+    return L'O';
   case L'p':
+    return L'P';
   case L'q':
+    return L'Q';
   case L'r':
+    return L'R';
   case L's':
+    return L'S';
   case L't':
+    return L'T';
   case L'u':
+    return L'U';
   case L'v':
+    return L'V';
   case L'w':
+    return L'W';
   case L'x':
+    return L'X';
   case L'y':
+    return L'Y';
   case L'z':
-  case L'A':
-  case L'B':
-  case L'C':
-  case L'D':
-  case L'E':
-  case L'F':
-  case L'G':
-  case L'H':
-  case L'I':
-  case L'J':
-  case L'K':
-  case L'L':
-  case L'M':
-  case L'N':
-  case L'O':
-  case L'P':
-  case L'Q':
-  case L'R':
-  case L'S':
-  case L'T':
-  case L'U':
-  case L'V':
-  case L'W':
-  case L'X':
-  case L'Y':
-  case L'Z':
-  case L'0':
-  case L'1':
-  case L'2':
-  case L'3':
-  case L'4':
-  case L'5':
-  case L'6':
-  case L'7':
-  case L'8':
-  case L'9':
-    return true;
-  default:
-    return false;
-  }
-}
-
-LIBC_INLINE constexpr bool isspace(wchar_t wch) {
-  switch (wch) {
-  case L' ':
-  case L'\t':
-  case L'\n':
-  case L'\v':
-  case L'\f':
-  case L'\r':
-    return true;
-  default:
-    return false;
-  }
-}
-
-LIBC_INLINE constexpr bool isblank(wchar_t wch) {
-  switch (wch) {
-  case L' ':
-  case L'\t':
-    return true;
+    return L'Z';
   default:
-    return false;
+    return wch;
   }
 }
 
-LIBC_INLINE constexpr bool isgraph(wchar_t wch) {
-  return 0x20 < wch && wch < 0x7f;
-}
-
-LIBC_INLINE constexpr bool isprint(wchar_t wch) {
-  return (static_cast<unsigned>(wch) - ' ') < 95;
-}
-
-LIBC_INLINE constexpr bool isxdigit(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalpha(wchar_t wch) {
   switch (wch) {
   case L'a':
   case L'b':
@@ -302,304 +251,129 @@ LIBC_INLINE constexpr bool isxdigit(wchar_t wch) {
   case L'd':
   case L'e':
   case L'f':
+  case L'g':
+  case L'h':
+  case L'i':
+  case L'j':
+  case L'k':
+  case L'l':
+  case L'm':
+  case L'n':
+  case L'o':
+  case L'p':
+  case L'q':
+  case L'r':
+  case L's':
+  case L't':
+  case L'u':
+  case L'v':
+  case L'w':
+  case L'x':
+  case L'y':
+  case L'z':
   case L'A':
   case L'B':
   case L'C':
   case L'D':
   case L'E':
   case L'F':
-  case L'0':
-  case L'1':
-  case L'2':
-  case L'3':
-  case L'4':
-  case L'5':
-  case L'6':
-  case L'7':
-  case L'8':
-  case L'9':
-    return true;
-  default:
-    return false;
-  }
-}
-
-LIBC_INLINE constexpr bool iscntrl(wchar_t wch) {
-  return (wch < 0x20 || wch == 0x7f);
-}
-
-LIBC_INLINE constexpr bool ispunct(wchar_t wch) {
-  return !isalnum(wch) && isgraph(wch);
-}
-
-LIBC_INLINE constexpr wchar_t tolower(wchar_t wch) {
-  switch (wch) {
-  case L'A':
-    return L'a';
-  case L'B':
-    return L'b';
-  case L'C':
-    return L'c';
-  case L'D':
-    return L'd';
-  case L'E':
-    return L'e';
-  case L'F':
-    return L'f';
   case L'G':
-    return L'g';
   case L'H':
-    return L'h';
   case L'I':
-    return L'i';
   case L'J':
-    return L'j';
   case L'K':
-    return L'k';
   case L'L':
-    return L'l';
   case L'M':
-    return L'm';
   case L'N':
-    return L'n';
   case L'O':
-    return L'o';
   case L'P':
-    return L'p';
   case L'Q':
-    return L'q';
   case L'R':
-    return L'r';
   case L'S':
-    return L's';
   case L'T':
-    return L't';
   case L'U':
-    return L'u';
   case L'V':
-    return L'v';
   case L'W':
-    return L'w';
   case L'X':
-    return L'x';
   case L'Y':
-    return L'y';
   case L'Z':
-    return L'z';
+    return true;
   default:
-    return wch;
+    return false;
   }
 }
 
-LIBC_INLINE constexpr wchar_t toupper(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalnum(wchar_t wch) {
   switch (wch) {
   case L'a':
-    return L'A';
   case L'b':
-    return L'B';
   case L'c':
-    return L'C';
   case L'd':
-    return L'D';
   case L'e':
-    return L'E';
   case L'f':
-    return L'F';
   case L'g':
-    return L'G';
   case L'h':
-    return L'H';
   case L'i':
-    return L'I';
   case L'j':
-    return L'J';
   case L'k':
-    return L'K';
   case L'l':
-    return L'L';
   case L'm':
-    return L'M';
   case L'n':
-    return L'N';
   case L'o':
-    return L'O';
   case L'p':
-    return L'P';
   case L'q':
-    return L'Q';
   case L'r':
-    return L'R';
   case L's':
-    return L'S';
   case L't':
-    return L'T';
   case L'u':
-    return L'U';
   case L'v':
-    return L'V';
   case L'w':
-    return L'W';
   case L'x':
-    return L'X';
   case L'y':
-    return L'Y';
   case L'z':
-    return L'Z';
+  case L'A':
+  case L'B':
+  case L'C':
+  case L'D':
+  case L'E':
+  case L'F':
+  case L'G':
+  case L'H':
+  case L'I':
+  case L'J':
+  case L'K':
+  case L'L':
+  case L'M':
+  case L'N':
+  case L'O':
+  case L'P':
+  case L'Q':
+  case L'R':
+  case L'S':
+  case L'T':
+  case L'U':
+  case L'V':
+  case L'W':
+  case L'X':
+  case L'Y':
+  case L'Z':
+  case L'0':
+  case L'1':
+  case L'2':
+  case L'3':
+  case L'4':
+  case L'5':
+  case L'6':
+  case L'7':
+  case L'8':
+  case L'9':
+    return true;
   default:
-    return wch;
-  }
-}
-
-} // namespace ascii
-
-LIBC_INLINE constexpr bool islower(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::islower(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::islower(wch);
-  }
-  return lookup_properties(wch) & PropertyFlag::LOWER;
-#endif
-}
-
-LIBC_INLINE constexpr bool isupper(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::isupper(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::isupper(wch);
-  }
-  return lookup_properties(wch) & PropertyFlag::UPPER;
-#endif
-}
-
-LIBC_INLINE constexpr bool isdigit(wchar_t wch) {
-  // In C.UT8, only ASCII digits are considered digits
-  return ascii::isdigit(wch);
-}
-
-LIBC_INLINE constexpr bool isalpha(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::isalpha(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::isalpha(wch);
-  }
-  return lookup_properties(wch) & PropertyFlag::ALPHA;
-#endif
-}
-
-LIBC_INLINE constexpr bool isalnum(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::isalnum(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::isalnum(wch);
-  }
-  // Only need to check ALPHA, digit cases are covered by ASCII path
-  return lookup_properties(wch) & PropertyFlag::ALPHA;
-#endif
-}
-
-LIBC_INLINE constexpr bool isspace(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::isspace(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::isspace(wch);
-  }
-  return lookup_properties(wch) & PropertyFlag::SPACE;
-#endif
-}
-
-LIBC_INLINE constexpr bool isblank(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::isblank(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::isblank(wch);
-  }
-  return lookup_properties(wch) & PropertyFlag::BLANK;
-#endif
-}
-
-LIBC_INLINE constexpr bool isgraph(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::isgraph(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::isgraph(wch);
-  }
-  // print && !space
-  return (lookup_properties(wch) &
-          (PropertyFlag::PRINT | PropertyFlag::SPACE)) == PropertyFlag::PRINT;
-#endif
-}
-
-LIBC_INLINE constexpr bool isprint(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::isprint(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::isprint(wch);
-  }
-  return lookup_properties(wch) & PropertyFlag::PRINT;
-#endif
-}
-
-LIBC_INLINE constexpr bool isxdigit(wchar_t wch) {
-  // Hexadecimal digits are the same in C.UTF8 as in ASCII
-  return ascii::isxdigit(wch);
-}
-
-LIBC_INLINE constexpr bool iscntrl(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::iscntrl(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::iscntrl(wch);
-  }
-  return lookup_properties(wch) & PropertyFlag::CNTRL;
-#endif
-}
-
-LIBC_INLINE constexpr bool ispunct(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::ispunct(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::ispunct(wch);
-  }
-  return lookup_properties(wch) & PropertyFlag::PUNCT;
-#endif
-}
-
-LIBC_INLINE constexpr wchar_t tolower(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::tolower(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::tolower(wch);
-  }
-  // TODO: Add UTF8 implementation.
-  return wch;
-#endif
-}
-
-LIBC_INLINE constexpr wchar_t toupper(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
-  return ascii::toupper(wch);
-#else
-  if (static_cast<uint32_t>(wch) < 128) {
-    return ascii::toupper(wch);
+    return false;
   }
-  // TODO: Add UTF8 implementation.
-  return wch;
-#endif
 }
 
-LIBC_INLINE constexpr int b36_char_to_int(wchar_t wch) {
+LIBC_INLINE static constexpr int b36_char_to_int(wchar_t wch) {
   switch (wch) {
   case L'0':
     return 0;
@@ -787,6 +561,20 @@ LIBC_INLINE static constexpr wchar_t int_to_b36_wchar(int num) {
   }
 }
 
+LIBC_INLINE static constexpr bool isspace(wchar_t wch) {
+  switch (wch) {
+  case L' ':
+  case L'\t':
+  case L'\n':
+  case L'\v':
+  case L'\f':
+  case L'\r':
+    return true;
+  default:
+    return false;
+  }
+}
+
 // An overload which provides a way to compare input with specific character
 // values, when input can be of a regular or a wide character type.
 LIBC_INLINE static constexpr bool

diff  --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 64e15852c2ec3..b6729ba5eb269 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -290,18 +290,6 @@ add_libc_test(
     libc.src.__support.weak_avl
 )
 
-add_libc_test(
-  wctype_utils_test
-  SUITE
-    libc-support-tests
-  SRCS
-    wctype_utils_test.cpp
-  DEPENDS
-    libc.src.__support.wctype_utils
-    libc.src.__support.wctype.wctype_classification_utils
-    libc.src.__support.macros.properties.os
-)
-
 add_subdirectory(CPP)
 add_subdirectory(File)
 add_subdirectory(RPC)

diff  --git a/libc/test/src/__support/wctype_utils_test.cpp b/libc/test/src/__support/wctype_utils_test.cpp
deleted file mode 100644
index 9bc7e818cdbc6..0000000000000
--- a/libc/test/src/__support/wctype_utils_test.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-#include "src/__support/macros/config.h"
-#include "src/__support/wctype/wctype_classification_utils.h"
-#include "test/UnitTest/Test.h"
-
-namespace {
-
-namespace ascii_mode {
-#undef LIBC_CONF_WCTYPE_MODE
-#define LIBC_CONF_WCTYPE_MODE LIBC_WCTYPE_MODE_ASCII
-
-#undef LLVM_LIBC_SRC___SUPPORT_WCTYPE_UTILS_H
-#include "src/__support/wctype_utils.h"
-} // namespace ascii_mode
-
-namespace utf8_mode {
-#undef LIBC_CONF_WCTYPE_MODE
-#define LIBC_CONF_WCTYPE_MODE LIBC_WCTYPE_MODE_UTF8
-
-namespace LIBC_NAMESPACE_DECL {
-using ::LIBC_NAMESPACE::lookup_properties;
-using ::LIBC_NAMESPACE::PropertyFlag;
-} // namespace LIBC_NAMESPACE_DECL
-
-#undef LLVM_LIBC_SRC___SUPPORT_WCTYPE_UTILS_H
-#include "src/__support/wctype_utils.h"
-} // namespace utf8_mode
-
-struct TestCase {
-  uint32_t wc;
-  const char *name;
-  bool expected;
-};
-
-// Helper function to mark the sections of the ASCII table that are
-// punctuation characters. These are listed below:
-//  Decimal    |         Symbol
-//  -----------------------------------------
-//  33 -  47   |  ! " $ % & ' ( ) * + , - . /
-//  58 -  64   |  : ; < = > ? @
-//  91 -  96   |  [ \ ] ^ _ `
-// 123 - 126   |  { | } ~
-bool is_punctuation_character(int c) {
-  return ('!' <= c && c <= '/') || (':' <= c && c <= '@') ||
-         ('[' <= c && c <= '`') || ('{' <= c && c <= '~');
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsLowerAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::islower;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = c >= 'a' && c <= 'z';
-    EXPECT_EQ(islower(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
-      {0x03B1, "GREEK SMALL LETTER ALPHA", false},
-      {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(islower(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsLowerUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::islower;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = c >= 'a' && c <= 'z';
-    EXPECT_EQ(islower(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", true},
-      {0x03B1, "GREEK SMALL LETTER ALPHA", true},
-      {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(islower(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsUpperAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isupper;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = c >= 'A' && c <= 'Z';
-    EXPECT_EQ(isupper(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
-      {0x03B1, "GREEK SMALL LETTER ALPHA", false},
-      {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isupper(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsUpperUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isupper;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = c >= 'A' && c <= 'Z';
-    EXPECT_EQ(isupper(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
-      {0x03B1, "GREEK SMALL LETTER ALPHA", false},
-      {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isupper(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsAlphaAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isalpha;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
-    EXPECT_EQ(isalpha(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
-      {0x03B1, "GREEK SMALL LETTER ALPHA", false},
-      {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isalpha(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsAlphaUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isalpha;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
-    EXPECT_EQ(isalpha(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", true},
-      {0x03B1, "GREEK SMALL LETTER ALPHA", true},
-      {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isalpha(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsDigitAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isdigit;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= '0' && c <= '9');
-    EXPECT_EQ(isdigit(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
-      {0x0660, "ARABIC-INDIC DIGIT ZERO", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isdigit(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsDigitUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isdigit;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= '0' && c <= '9');
-    EXPECT_EQ(isdigit(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  // In C.UTF-8, isdigit only returns true for ASCII digits.
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
-      {0x0660, "ARABIC-INDIC DIGIT ZERO", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isdigit(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsAlnumAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isalnum;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
-                    (c >= 'A' && c <= 'Z');
-    EXPECT_EQ(isalnum(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
-      {0x0660, "ARABIC-INDIC DIGIT ZERO", false},
-      {0x0030, "DIGIT ZERO", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isalnum(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsAlnumUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isalnum;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
-                    (c >= 'A' && c <= 'Z');
-    EXPECT_EQ(isalnum(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", true},
-      {0x0660, "ARABIC-INDIC DIGIT ZERO", true},
-      {0x0030, "DIGIT ZERO", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isalnum(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsXDigitAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isxdigit;
-
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                    (c >= 'A' && c <= 'F');
-    EXPECT_EQ(isxdigit(static_cast<wchar_t>(c)), expected);
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsXDigitUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isxdigit;
-
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                    (c >= 'A' && c <= 'F');
-    EXPECT_EQ(isxdigit(static_cast<wchar_t>(c)), expected);
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsSpaceAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isspace;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c == ' ' || c == '\t' || c == '\n' || c == '\v' ||
-                     c == '\f' || c == '\r');
-    EXPECT_EQ(isspace(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A0, "NO-BREAK SPACE", false},
-      {0x2000, "EN QUAD", false},
-      {0x2028, "LINE SEPARATOR", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isspace(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsSpaceUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isspace;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c == ' ' || c == '\t' || c == '\n' || c == '\v' ||
-                     c == '\f' || c == '\r');
-    EXPECT_EQ(isspace(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A0, "NO-BREAK SPACE", false},
-      {0x1680, "OGHAM SPACE MARK", true},
-      {0x2000, "EN QUAD", true},
-      {0x2028, "LINE SEPARATOR", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isspace(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsBlankAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isblank;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c == ' ' || c == '\t');
-    EXPECT_EQ(isblank(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A0, "NO-BREAK SPACE", false},
-      {0x2000, "EN QUAD", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isblank(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsBlankUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isblank;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c == ' ' || c == '\t');
-    EXPECT_EQ(isblank(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A0, "NO-BREAK SPACE", false},
-      {0x1680, "OGHAM SPACE MARK", true},
-      {0x2000, "EN QUAD", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isblank(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsGraphAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isgraph;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c > 0x20 && c < 0x7f);
-    EXPECT_EQ(isgraph(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A0, "NO-BREAK SPACE", false},
-      {0x2000, "EN QUAD", false},
-      {0x2603, "SNOWMAN", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isgraph(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsGraphUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isgraph;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c > 0x20 && c < 0x7f);
-    EXPECT_EQ(isgraph(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A0, "NO-BREAK SPACE", true},
-      {0x2000, "EN QUAD", false},
-      {0x2603, "SNOWMAN", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isgraph(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsPrintAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isprint;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= 0x20 && c < 0x7f);
-    EXPECT_EQ(isprint(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A0, "NO-BREAK SPACE", false},
-      {0x2000, "EN QUAD", false},
-      {0x2603, "SNOWMAN", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isprint(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsPrintUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isprint;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c >= 0x20 && c < 0x7f);
-    EXPECT_EQ(isprint(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A0, "NO-BREAK SPACE", true},
-      {0x2000, "EN QUAD", true},
-      {0x2603, "SNOWMAN", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(isprint(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsPunctAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::isalnum;
-  using ascii_mode::LIBC_NAMESPACE::internal::isgraph;
-  using ascii_mode::LIBC_NAMESPACE::internal::ispunct;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = is_punctuation_character(c);
-    EXPECT_EQ(ispunct(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A1, "INVERTED EXCLAMATION MARK", false},
-      {0x2014, "EM DASH", false},
-      {0x20AC, "EURO SIGN", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(ispunct(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsPunctUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::isalnum;
-  using utf8_mode::LIBC_NAMESPACE::internal::isgraph;
-  using utf8_mode::LIBC_NAMESPACE::internal::ispunct;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = is_punctuation_character(c);
-    EXPECT_EQ(ispunct(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x00A1, "INVERTED EXCLAMATION MARK", true},
-      {0x2014, "EM DASH", true},
-      {0x20AC, "EURO SIGN", true},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(ispunct(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsCntrlAscii) {
-  using ascii_mode::LIBC_NAMESPACE::internal::iscntrl;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c < 0x20 || c == 0x7f);
-    EXPECT_EQ(iscntrl(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x0080, "PADDING CHARACTER", false},
-      {0x009F, "APPLICATION PROGRAM COMMAND", false},
-      {0x2028, "LINE SEPARATOR", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(iscntrl(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsCntrlUtf8) {
-  using utf8_mode::LIBC_NAMESPACE::internal::iscntrl;
-
-  // ASCII
-  for (int c = 0; c < 128; ++c) {
-    bool expected = (c < 0x20 || c == 0x7f);
-    EXPECT_EQ(iscntrl(static_cast<wchar_t>(c)), expected);
-  }
-
-  // Non ASCII
-  TestCase cases[] = {
-      {0x0080, "PADDING CHARACTER", true},
-      {0x009F, "APPLICATION PROGRAM COMMAND", true},
-      {0x2028, "LINE SEPARATOR", false},
-  };
-
-  for (const auto &tc : cases) {
-    EXPECT_EQ(iscntrl(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
-  }
-}
-
-} // namespace


        


More information about the libc-commits mailing list