[libc-commits] [libc] 2ba1eba - Revert "[libc][wctype] Implement internal UTF8 wctype classification functions" (#180186)
via libc-commits
libc-commits at lists.llvm.org
Fri Feb 6 04:51:01 PST 2026
Author: Marcell Leleszi
Date: 2026-02-06T13:50:56+01:00
New Revision: 2ba1eba4be3f5225a5dde6223c434a8bd24bb9b6
URL: https://github.com/llvm/llvm-project/commit/2ba1eba4be3f5225a5dde6223c434a8bd24bb9b6
DIFF: https://github.com/llvm/llvm-project/commit/2ba1eba4be3f5225a5dde6223c434a8bd24bb9b6.diff
LOG: Revert "[libc][wctype] Implement internal UTF8 wctype classification functions" (#180186)
Reverts llvm/llvm-project#174607
Breaks libc++
Added:
Modified:
libc/src/__support/CMakeLists.txt
libc/src/__support/wctype_utils.h
libc/test/src/__support/CMakeLists.txt
Removed:
libc/test/src/__support/wctype_utils_test.cpp
################################################################################
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index d771d3eb1920c..64cf368713221 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -156,22 +156,12 @@ add_header_library(
ctype_utils.h
)
-list(APPEND wctype_utils_deps
- libc.hdr.types.wchar_t
-)
-
-if ("${LIBC_CONF_WCTYPE_MODE}" STREQUAL "LIBC_WCTYPE_MODE_UTF8")
- list(APPEND wctype_utils_deps
- libc.src.__support.wctype.wctype_classification_utils
- )
-endif()
-
add_header_library(
wctype_utils
HDRS
wctype_utils.h
DEPENDS
- ${wctype_utils_deps}
+ libc.hdr.types.wchar_t
)
add_header_library(
diff --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h
index c738efb344f2c..7f17224104ffb 100644
--- a/libc/src/__support/wctype_utils.h
+++ b/libc/src/__support/wctype_utils.h
@@ -13,13 +13,6 @@
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/config.h"
-#define LIBC_WCTYPE_MODE_ASCII 0
-#define LIBC_WCTYPE_MODE_UTF8 1
-
-#if LIBC_CONF_WCTYPE_MODE == LIBC_WCTYPE_MODE_UTF8
-#include "src/__support/wctype/wctype_classification_utils.h"
-#endif
-
namespace LIBC_NAMESPACE_DECL {
namespace internal {
@@ -45,8 +38,8 @@ namespace internal {
// This assumes the character ranges are contiguous, which they aren't in
// EBCDIC. Technically we could use some smaller ranges, but that's even harder
// to read.
-namespace ascii {
-LIBC_INLINE constexpr bool islower(wchar_t wch) {
+
+LIBC_INLINE static constexpr bool islower(wchar_t wch) {
switch (wch) {
case L'a':
case L'b':
@@ -80,7 +73,7 @@ LIBC_INLINE constexpr bool islower(wchar_t wch) {
}
}
-LIBC_INLINE constexpr bool isupper(wchar_t wch) {
+LIBC_INLINE static constexpr bool isupper(wchar_t wch) {
switch (wch) {
case L'A':
case L'B':
@@ -114,7 +107,7 @@ LIBC_INLINE constexpr bool isupper(wchar_t wch) {
}
}
-LIBC_INLINE constexpr bool isdigit(wchar_t wch) {
+LIBC_INLINE static constexpr bool isdigit(wchar_t wch) {
switch (wch) {
case L'0':
case L'1':
@@ -132,169 +125,125 @@ LIBC_INLINE constexpr bool isdigit(wchar_t wch) {
}
}
-LIBC_INLINE constexpr bool isalpha(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t tolower(wchar_t wch) {
switch (wch) {
- case L'a':
- case L'b':
- case L'c':
- case L'd':
- case L'e':
- case L'f':
- case L'g':
- case L'h':
- case L'i':
- case L'j':
- case L'k':
- case L'l':
- case L'm':
- case L'n':
- case L'o':
- case L'p':
- case L'q':
- case L'r':
- case L's':
- case L't':
- case L'u':
- case L'v':
- case L'w':
- case L'x':
- case L'y':
- case L'z':
case L'A':
+ return L'a';
case L'B':
+ return L'b';
case L'C':
+ return L'c';
case L'D':
+ return L'd';
case L'E':
+ return L'e';
case L'F':
+ return L'f';
case L'G':
+ return L'g';
case L'H':
+ return L'h';
case L'I':
+ return L'i';
case L'J':
+ return L'j';
case L'K':
+ return L'k';
case L'L':
+ return L'l';
case L'M':
+ return L'm';
case L'N':
+ return L'n';
case L'O':
+ return L'o';
case L'P':
+ return L'p';
case L'Q':
+ return L'q';
case L'R':
+ return L'r';
case L'S':
+ return L's';
case L'T':
+ return L't';
case L'U':
+ return L'u';
case L'V':
+ return L'v';
case L'W':
+ return L'w';
case L'X':
+ return L'x';
case L'Y':
+ return L'y';
case L'Z':
- return true;
+ return L'z';
default:
- return false;
+ return wch;
}
}
-LIBC_INLINE constexpr bool isalnum(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t toupper(wchar_t wch) {
switch (wch) {
case L'a':
+ return L'A';
case L'b':
+ return L'B';
case L'c':
+ return L'C';
case L'd':
+ return L'D';
case L'e':
+ return L'E';
case L'f':
+ return L'F';
case L'g':
+ return L'G';
case L'h':
+ return L'H';
case L'i':
+ return L'I';
case L'j':
+ return L'J';
case L'k':
+ return L'K';
case L'l':
+ return L'L';
case L'm':
+ return L'M';
case L'n':
+ return L'N';
case L'o':
+ return L'O';
case L'p':
+ return L'P';
case L'q':
+ return L'Q';
case L'r':
+ return L'R';
case L's':
+ return L'S';
case L't':
+ return L'T';
case L'u':
+ return L'U';
case L'v':
+ return L'V';
case L'w':
+ return L'W';
case L'x':
+ return L'X';
case L'y':
+ return L'Y';
case L'z':
- case L'A':
- case L'B':
- case L'C':
- case L'D':
- case L'E':
- case L'F':
- case L'G':
- case L'H':
- case L'I':
- case L'J':
- case L'K':
- case L'L':
- case L'M':
- case L'N':
- case L'O':
- case L'P':
- case L'Q':
- case L'R':
- case L'S':
- case L'T':
- case L'U':
- case L'V':
- case L'W':
- case L'X':
- case L'Y':
- case L'Z':
- case L'0':
- case L'1':
- case L'2':
- case L'3':
- case L'4':
- case L'5':
- case L'6':
- case L'7':
- case L'8':
- case L'9':
- return true;
- default:
- return false;
- }
-}
-
-LIBC_INLINE constexpr bool isspace(wchar_t wch) {
- switch (wch) {
- case L' ':
- case L'\t':
- case L'\n':
- case L'\v':
- case L'\f':
- case L'\r':
- return true;
- default:
- return false;
- }
-}
-
-LIBC_INLINE constexpr bool isblank(wchar_t wch) {
- switch (wch) {
- case L' ':
- case L'\t':
- return true;
+ return L'Z';
default:
- return false;
+ return wch;
}
}
-LIBC_INLINE constexpr bool isgraph(wchar_t wch) {
- return 0x20 < wch && wch < 0x7f;
-}
-
-LIBC_INLINE constexpr bool isprint(wchar_t wch) {
- return (static_cast<unsigned>(wch) - ' ') < 95;
-}
-
-LIBC_INLINE constexpr bool isxdigit(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalpha(wchar_t wch) {
switch (wch) {
case L'a':
case L'b':
@@ -302,304 +251,129 @@ LIBC_INLINE constexpr bool isxdigit(wchar_t wch) {
case L'd':
case L'e':
case L'f':
+ case L'g':
+ case L'h':
+ case L'i':
+ case L'j':
+ case L'k':
+ case L'l':
+ case L'm':
+ case L'n':
+ case L'o':
+ case L'p':
+ case L'q':
+ case L'r':
+ case L's':
+ case L't':
+ case L'u':
+ case L'v':
+ case L'w':
+ case L'x':
+ case L'y':
+ case L'z':
case L'A':
case L'B':
case L'C':
case L'D':
case L'E':
case L'F':
- case L'0':
- case L'1':
- case L'2':
- case L'3':
- case L'4':
- case L'5':
- case L'6':
- case L'7':
- case L'8':
- case L'9':
- return true;
- default:
- return false;
- }
-}
-
-LIBC_INLINE constexpr bool iscntrl(wchar_t wch) {
- return (wch < 0x20 || wch == 0x7f);
-}
-
-LIBC_INLINE constexpr bool ispunct(wchar_t wch) {
- return !isalnum(wch) && isgraph(wch);
-}
-
-LIBC_INLINE constexpr wchar_t tolower(wchar_t wch) {
- switch (wch) {
- case L'A':
- return L'a';
- case L'B':
- return L'b';
- case L'C':
- return L'c';
- case L'D':
- return L'd';
- case L'E':
- return L'e';
- case L'F':
- return L'f';
case L'G':
- return L'g';
case L'H':
- return L'h';
case L'I':
- return L'i';
case L'J':
- return L'j';
case L'K':
- return L'k';
case L'L':
- return L'l';
case L'M':
- return L'm';
case L'N':
- return L'n';
case L'O':
- return L'o';
case L'P':
- return L'p';
case L'Q':
- return L'q';
case L'R':
- return L'r';
case L'S':
- return L's';
case L'T':
- return L't';
case L'U':
- return L'u';
case L'V':
- return L'v';
case L'W':
- return L'w';
case L'X':
- return L'x';
case L'Y':
- return L'y';
case L'Z':
- return L'z';
+ return true;
default:
- return wch;
+ return false;
}
}
-LIBC_INLINE constexpr wchar_t toupper(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalnum(wchar_t wch) {
switch (wch) {
case L'a':
- return L'A';
case L'b':
- return L'B';
case L'c':
- return L'C';
case L'd':
- return L'D';
case L'e':
- return L'E';
case L'f':
- return L'F';
case L'g':
- return L'G';
case L'h':
- return L'H';
case L'i':
- return L'I';
case L'j':
- return L'J';
case L'k':
- return L'K';
case L'l':
- return L'L';
case L'm':
- return L'M';
case L'n':
- return L'N';
case L'o':
- return L'O';
case L'p':
- return L'P';
case L'q':
- return L'Q';
case L'r':
- return L'R';
case L's':
- return L'S';
case L't':
- return L'T';
case L'u':
- return L'U';
case L'v':
- return L'V';
case L'w':
- return L'W';
case L'x':
- return L'X';
case L'y':
- return L'Y';
case L'z':
- return L'Z';
+ case L'A':
+ case L'B':
+ case L'C':
+ case L'D':
+ case L'E':
+ case L'F':
+ case L'G':
+ case L'H':
+ case L'I':
+ case L'J':
+ case L'K':
+ case L'L':
+ case L'M':
+ case L'N':
+ case L'O':
+ case L'P':
+ case L'Q':
+ case L'R':
+ case L'S':
+ case L'T':
+ case L'U':
+ case L'V':
+ case L'W':
+ case L'X':
+ case L'Y':
+ case L'Z':
+ case L'0':
+ case L'1':
+ case L'2':
+ case L'3':
+ case L'4':
+ case L'5':
+ case L'6':
+ case L'7':
+ case L'8':
+ case L'9':
+ return true;
default:
- return wch;
- }
-}
-
-} // namespace ascii
-
-LIBC_INLINE constexpr bool islower(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::islower(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::islower(wch);
- }
- return lookup_properties(wch) & PropertyFlag::LOWER;
-#endif
-}
-
-LIBC_INLINE constexpr bool isupper(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::isupper(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::isupper(wch);
- }
- return lookup_properties(wch) & PropertyFlag::UPPER;
-#endif
-}
-
-LIBC_INLINE constexpr bool isdigit(wchar_t wch) {
- // In C.UT8, only ASCII digits are considered digits
- return ascii::isdigit(wch);
-}
-
-LIBC_INLINE constexpr bool isalpha(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::isalpha(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::isalpha(wch);
- }
- return lookup_properties(wch) & PropertyFlag::ALPHA;
-#endif
-}
-
-LIBC_INLINE constexpr bool isalnum(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::isalnum(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::isalnum(wch);
- }
- // Only need to check ALPHA, digit cases are covered by ASCII path
- return lookup_properties(wch) & PropertyFlag::ALPHA;
-#endif
-}
-
-LIBC_INLINE constexpr bool isspace(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::isspace(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::isspace(wch);
- }
- return lookup_properties(wch) & PropertyFlag::SPACE;
-#endif
-}
-
-LIBC_INLINE constexpr bool isblank(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::isblank(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::isblank(wch);
- }
- return lookup_properties(wch) & PropertyFlag::BLANK;
-#endif
-}
-
-LIBC_INLINE constexpr bool isgraph(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::isgraph(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::isgraph(wch);
- }
- // print && !space
- return (lookup_properties(wch) &
- (PropertyFlag::PRINT | PropertyFlag::SPACE)) == PropertyFlag::PRINT;
-#endif
-}
-
-LIBC_INLINE constexpr bool isprint(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::isprint(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::isprint(wch);
- }
- return lookup_properties(wch) & PropertyFlag::PRINT;
-#endif
-}
-
-LIBC_INLINE constexpr bool isxdigit(wchar_t wch) {
- // Hexadecimal digits are the same in C.UTF8 as in ASCII
- return ascii::isxdigit(wch);
-}
-
-LIBC_INLINE constexpr bool iscntrl(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::iscntrl(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::iscntrl(wch);
- }
- return lookup_properties(wch) & PropertyFlag::CNTRL;
-#endif
-}
-
-LIBC_INLINE constexpr bool ispunct(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::ispunct(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::ispunct(wch);
- }
- return lookup_properties(wch) & PropertyFlag::PUNCT;
-#endif
-}
-
-LIBC_INLINE constexpr wchar_t tolower(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::tolower(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::tolower(wch);
- }
- // TODO: Add UTF8 implementation.
- return wch;
-#endif
-}
-
-LIBC_INLINE constexpr wchar_t toupper(wchar_t wch) {
-#if LIBC_CONF_WCTYPE_MODE != LIBC_WCTYPE_MODE_UTF8
- return ascii::toupper(wch);
-#else
- if (static_cast<uint32_t>(wch) < 128) {
- return ascii::toupper(wch);
+ return false;
}
- // TODO: Add UTF8 implementation.
- return wch;
-#endif
}
-LIBC_INLINE constexpr int b36_char_to_int(wchar_t wch) {
+LIBC_INLINE static constexpr int b36_char_to_int(wchar_t wch) {
switch (wch) {
case L'0':
return 0;
@@ -787,6 +561,20 @@ LIBC_INLINE static constexpr wchar_t int_to_b36_wchar(int num) {
}
}
+LIBC_INLINE static constexpr bool isspace(wchar_t wch) {
+ switch (wch) {
+ case L' ':
+ case L'\t':
+ case L'\n':
+ case L'\v':
+ case L'\f':
+ case L'\r':
+ return true;
+ default:
+ return false;
+ }
+}
+
// An overload which provides a way to compare input with specific character
// values, when input can be of a regular or a wide character type.
LIBC_INLINE static constexpr bool
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 64e15852c2ec3..b6729ba5eb269 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -290,18 +290,6 @@ add_libc_test(
libc.src.__support.weak_avl
)
-add_libc_test(
- wctype_utils_test
- SUITE
- libc-support-tests
- SRCS
- wctype_utils_test.cpp
- DEPENDS
- libc.src.__support.wctype_utils
- libc.src.__support.wctype.wctype_classification_utils
- libc.src.__support.macros.properties.os
-)
-
add_subdirectory(CPP)
add_subdirectory(File)
add_subdirectory(RPC)
diff --git a/libc/test/src/__support/wctype_utils_test.cpp b/libc/test/src/__support/wctype_utils_test.cpp
deleted file mode 100644
index 9bc7e818cdbc6..0000000000000
--- a/libc/test/src/__support/wctype_utils_test.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-#include "src/__support/macros/config.h"
-#include "src/__support/wctype/wctype_classification_utils.h"
-#include "test/UnitTest/Test.h"
-
-namespace {
-
-namespace ascii_mode {
-#undef LIBC_CONF_WCTYPE_MODE
-#define LIBC_CONF_WCTYPE_MODE LIBC_WCTYPE_MODE_ASCII
-
-#undef LLVM_LIBC_SRC___SUPPORT_WCTYPE_UTILS_H
-#include "src/__support/wctype_utils.h"
-} // namespace ascii_mode
-
-namespace utf8_mode {
-#undef LIBC_CONF_WCTYPE_MODE
-#define LIBC_CONF_WCTYPE_MODE LIBC_WCTYPE_MODE_UTF8
-
-namespace LIBC_NAMESPACE_DECL {
-using ::LIBC_NAMESPACE::lookup_properties;
-using ::LIBC_NAMESPACE::PropertyFlag;
-} // namespace LIBC_NAMESPACE_DECL
-
-#undef LLVM_LIBC_SRC___SUPPORT_WCTYPE_UTILS_H
-#include "src/__support/wctype_utils.h"
-} // namespace utf8_mode
-
-struct TestCase {
- uint32_t wc;
- const char *name;
- bool expected;
-};
-
-// Helper function to mark the sections of the ASCII table that are
-// punctuation characters. These are listed below:
-// Decimal | Symbol
-// -----------------------------------------
-// 33 - 47 | ! " $ % & ' ( ) * + , - . /
-// 58 - 64 | : ; < = > ? @
-// 91 - 96 | [ \ ] ^ _ `
-// 123 - 126 | { | } ~
-bool is_punctuation_character(int c) {
- return ('!' <= c && c <= '/') || (':' <= c && c <= '@') ||
- ('[' <= c && c <= '`') || ('{' <= c && c <= '~');
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsLowerAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::islower;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = c >= 'a' && c <= 'z';
- EXPECT_EQ(islower(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
- {0x03B1, "GREEK SMALL LETTER ALPHA", false},
- {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(islower(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsLowerUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::islower;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = c >= 'a' && c <= 'z';
- EXPECT_EQ(islower(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", true},
- {0x03B1, "GREEK SMALL LETTER ALPHA", true},
- {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(islower(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsUpperAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isupper;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = c >= 'A' && c <= 'Z';
- EXPECT_EQ(isupper(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
- {0x03B1, "GREEK SMALL LETTER ALPHA", false},
- {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isupper(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsUpperUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isupper;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = c >= 'A' && c <= 'Z';
- EXPECT_EQ(isupper(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
- {0x03B1, "GREEK SMALL LETTER ALPHA", false},
- {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isupper(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsAlphaAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isalpha;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
- EXPECT_EQ(isalpha(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
- {0x03B1, "GREEK SMALL LETTER ALPHA", false},
- {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isalpha(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsAlphaUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isalpha;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
- EXPECT_EQ(isalpha(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", true},
- {0x03B1, "GREEK SMALL LETTER ALPHA", true},
- {0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isalpha(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsDigitAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isdigit;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= '0' && c <= '9');
- EXPECT_EQ(isdigit(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
- {0x0660, "ARABIC-INDIC DIGIT ZERO", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isdigit(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsDigitUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isdigit;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= '0' && c <= '9');
- EXPECT_EQ(isdigit(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- // In C.UTF-8, isdigit only returns true for ASCII digits.
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
- {0x0660, "ARABIC-INDIC DIGIT ZERO", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isdigit(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsAlnumAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isalnum;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z');
- EXPECT_EQ(isalnum(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", false},
- {0x0660, "ARABIC-INDIC DIGIT ZERO", false},
- {0x0030, "DIGIT ZERO", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isalnum(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsAlnumUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isalnum;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z');
- EXPECT_EQ(isalnum(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00E9, "LATIN SMALL LETTER E WITH ACUTE", true},
- {0x0660, "ARABIC-INDIC DIGIT ZERO", true},
- {0x0030, "DIGIT ZERO", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isalnum(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsXDigitAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isxdigit;
-
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
- (c >= 'A' && c <= 'F');
- EXPECT_EQ(isxdigit(static_cast<wchar_t>(c)), expected);
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsXDigitUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isxdigit;
-
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
- (c >= 'A' && c <= 'F');
- EXPECT_EQ(isxdigit(static_cast<wchar_t>(c)), expected);
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsSpaceAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isspace;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c == ' ' || c == '\t' || c == '\n' || c == '\v' ||
- c == '\f' || c == '\r');
- EXPECT_EQ(isspace(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A0, "NO-BREAK SPACE", false},
- {0x2000, "EN QUAD", false},
- {0x2028, "LINE SEPARATOR", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isspace(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsSpaceUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isspace;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c == ' ' || c == '\t' || c == '\n' || c == '\v' ||
- c == '\f' || c == '\r');
- EXPECT_EQ(isspace(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A0, "NO-BREAK SPACE", false},
- {0x1680, "OGHAM SPACE MARK", true},
- {0x2000, "EN QUAD", true},
- {0x2028, "LINE SEPARATOR", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isspace(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsBlankAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isblank;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c == ' ' || c == '\t');
- EXPECT_EQ(isblank(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A0, "NO-BREAK SPACE", false},
- {0x2000, "EN QUAD", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isblank(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsBlankUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isblank;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c == ' ' || c == '\t');
- EXPECT_EQ(isblank(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A0, "NO-BREAK SPACE", false},
- {0x1680, "OGHAM SPACE MARK", true},
- {0x2000, "EN QUAD", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isblank(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsGraphAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isgraph;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c > 0x20 && c < 0x7f);
- EXPECT_EQ(isgraph(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A0, "NO-BREAK SPACE", false},
- {0x2000, "EN QUAD", false},
- {0x2603, "SNOWMAN", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isgraph(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsGraphUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isgraph;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c > 0x20 && c < 0x7f);
- EXPECT_EQ(isgraph(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A0, "NO-BREAK SPACE", true},
- {0x2000, "EN QUAD", false},
- {0x2603, "SNOWMAN", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isgraph(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsPrintAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isprint;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= 0x20 && c < 0x7f);
- EXPECT_EQ(isprint(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A0, "NO-BREAK SPACE", false},
- {0x2000, "EN QUAD", false},
- {0x2603, "SNOWMAN", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isprint(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsPrintUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isprint;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c >= 0x20 && c < 0x7f);
- EXPECT_EQ(isprint(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A0, "NO-BREAK SPACE", true},
- {0x2000, "EN QUAD", true},
- {0x2603, "SNOWMAN", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(isprint(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsPunctAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::isalnum;
- using ascii_mode::LIBC_NAMESPACE::internal::isgraph;
- using ascii_mode::LIBC_NAMESPACE::internal::ispunct;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = is_punctuation_character(c);
- EXPECT_EQ(ispunct(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A1, "INVERTED EXCLAMATION MARK", false},
- {0x2014, "EM DASH", false},
- {0x20AC, "EURO SIGN", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(ispunct(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsPunctUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::isalnum;
- using utf8_mode::LIBC_NAMESPACE::internal::isgraph;
- using utf8_mode::LIBC_NAMESPACE::internal::ispunct;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = is_punctuation_character(c);
- EXPECT_EQ(ispunct(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x00A1, "INVERTED EXCLAMATION MARK", true},
- {0x2014, "EM DASH", true},
- {0x20AC, "EURO SIGN", true},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(ispunct(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsCntrlAscii) {
- using ascii_mode::LIBC_NAMESPACE::internal::iscntrl;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c < 0x20 || c == 0x7f);
- EXPECT_EQ(iscntrl(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x0080, "PADDING CHARACTER", false},
- {0x009F, "APPLICATION PROGRAM COMMAND", false},
- {0x2028, "LINE SEPARATOR", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(iscntrl(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-TEST(LlvmLibcWctypeUtilsTest, IsCntrlUtf8) {
- using utf8_mode::LIBC_NAMESPACE::internal::iscntrl;
-
- // ASCII
- for (int c = 0; c < 128; ++c) {
- bool expected = (c < 0x20 || c == 0x7f);
- EXPECT_EQ(iscntrl(static_cast<wchar_t>(c)), expected);
- }
-
- // Non ASCII
- TestCase cases[] = {
- {0x0080, "PADDING CHARACTER", true},
- {0x009F, "APPLICATION PROGRAM COMMAND", true},
- {0x2028, "LINE SEPARATOR", false},
- };
-
- for (const auto &tc : cases) {
- EXPECT_EQ(iscntrl(static_cast<wchar_t>(tc.wc)), tc.expected) << tc.name;
- }
-}
-
-} // namespace
More information about the libc-commits
mailing list