[libc-commits] [libc] [libc] Use function overloads to make string parsing code more generic. (PR #167417)
via libc-commits
libc-commits at lists.llvm.org
Mon Nov 10 15:49:59 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: Alexey Samsonov (vonosmas)
<details>
<summary>Changes</summary>
ctype_utils/wctype_utils were chaged in 120689e46679c6db37cd9e839ec0721e80a22d4f and e7f7973899f76773ae6e9a6b1e8c7e9f9cc5cb56, respectively to operate on char/wchar_t. Now we can switch to the overloaded names (e.g. have noth `isspace(char` and `isspace(wchar_t)`) to simplify the templatized strtointeger implementation from 315dfe5865962d8a3d60e21d1fffce5214fe54ef and make it easier to potentially add templatized strtofloat implementation.
---
Full diff: https://github.com/llvm/llvm-project/pull/167417.diff
4 Files Affected:
- (modified) libc/src/__support/ctype_utils.h (+7)
- (modified) libc/src/__support/str_to_integer.h (+15-43)
- (modified) libc/src/__support/wctype_utils.h (+17-10)
- (modified) libc/src/wctype/iswalpha.cpp (+1-1)
``````````diff
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 61b7a0aeb5b67..b0f9ece5d1a25 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -578,6 +578,13 @@ LIBC_INLINE static constexpr bool isgraph(char ch) {
return 0x20 < ch && ch < 0x7f;
}
+// An overload which provides a way to compare input with specific character
+// values, when input can be of a regular or a wide character type.
+LIBC_INLINE static constexpr bool is_one_of(char ch, char c_value,
+ [[maybe_unused]] wchar_t) {
+ return (ch == c_value);
+}
+
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index ba3f49fa2f47b..f9154ffc90c7e 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -31,22 +31,14 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal {
// Returns the idx to the first character in src that is not a whitespace
-// character (as determined by isspace() / iswspace())
+// character (as determined by isspace())
template <typename CharType>
LIBC_INLINE size_t
first_non_whitespace(const CharType *__restrict src,
size_t src_len = cpp::numeric_limits<size_t>::max()) {
size_t src_cur = 0;
- while (src_cur < src_len) {
- if constexpr (cpp::is_same_v<CharType, char>) {
- if (!internal::isspace(src[src_cur]))
- break;
- } else {
- if (!internal::iswspace(src[src_cur]))
- break;
- }
- ++src_cur;
- }
+ for (; src_cur < src_len && internal::isspace(src[src_cur]); ++src_cur)
+ ;
return src_cur;
}
@@ -54,11 +46,11 @@ first_non_whitespace(const CharType *__restrict src,
// plus sign, minus sign, or neither.
template <typename CharType>
LIBC_INLINE static int get_sign(const CharType *__restrict src) {
- if constexpr (cpp::is_same_v<CharType, char>) {
- return (src[0] == '+') ? 1 : (src[0] == '-' ? -1 : 0);
- } else {
- return (src[0] == L'+') ? 1 : (src[0] == L'-' ? -1 : 0);
- }
+ if (is_one_of(src[0], '+', L'+'))
+ return 1;
+ if (is_one_of(src[0], '-', L'-'))
+ return -1;
+ return 0;
}
// checks if the next 3 characters of the string pointer are the start of a
@@ -68,13 +60,9 @@ LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
size_t src_len) {
if (src_len < 3)
return false;
- if constexpr (cpp::is_same_v<CharType, char>) {
- return src[0] == '0' && tolower(src[1]) == 'x' && isalnum(src[2]) &&
- b36_char_to_int(src[2]) < 16;
- } else {
- return src[0] == L'0' && towlower(src[1]) == L'x' && iswalnum(src[2]) &&
- b36_wchar_to_int(src[2]) < 16;
- }
+ return is_one_of(src[0], '0', L'0') &&
+ is_one_of(tolower(src[1]), 'x', L'x') && isalnum(src[2]) &&
+ b36_char_to_int(src[2]) < 16;
}
// Takes the address of the string pointer and parses the base from the start of
@@ -90,14 +78,8 @@ LIBC_INLINE static int infer_base(const CharType *__restrict src,
// An octal number is defined as "the prefix 0 optionally followed by a
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
// number that starts with 0, including just 0, is an octal number.
- if (src_len > 0) {
- if constexpr (cpp::is_same_v<CharType, char>) {
- if (src[0] == '0')
- return 8;
- } else {
- if (src[0] == L'0')
- return 8;
- }
+ if (src_len > 0 && internal::is_one_of(src[0], '0', L'0')) {
+ return 8;
}
// A decimal number is defined as beginning "with a nonzero digit and
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
@@ -150,18 +132,8 @@ strtointeger(const CharType *__restrict src, int base,
bool is_number = false;
int error_val = 0;
ResultType result = 0;
- while (src_cur < src_len) {
- int cur_digit;
- if constexpr (cpp::is_same_v<CharType, char>) {
- if (!isalnum(src[src_cur]))
- break;
- cur_digit = b36_char_to_int(src[src_cur]);
- } else {
- if (!iswalnum(src[src_cur]))
- break;
- cur_digit = b36_wchar_to_int(src[src_cur]);
- }
-
+ while (src_cur < src_len && isalnum(src[src_cur])) {
+ int cur_digit = b36_char_to_int(src[src_cur]);
if (cur_digit >= base)
break;
diff --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h
index 60b6afb928475..f04efd21db595 100644
--- a/libc/src/__support/wctype_utils.h
+++ b/libc/src/__support/wctype_utils.h
@@ -31,7 +31,7 @@ namespace internal {
// Similarly, do not change these fumarks to show your new solution is faster,
// as well as a way to support non-Anctions to use case ranges. e.g.
-// bool iswlower(wchar_t ch) {
+// bool islower(wchar_t ch) {
// switch(ch) {
// case L'a'...L'z':
// return true;
@@ -41,7 +41,7 @@ namespace internal {
// EBCDIC. Technically we could use some smaller ranges, but that's even harder
// to read.
-LIBC_INLINE static constexpr bool iswlower(wchar_t wch) {
+LIBC_INLINE static constexpr bool islower(wchar_t wch) {
switch (wch) {
case L'a':
case L'b':
@@ -75,7 +75,7 @@ LIBC_INLINE static constexpr bool iswlower(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr bool iswupper(wchar_t wch) {
+LIBC_INLINE static constexpr bool isupper(wchar_t wch) {
switch (wch) {
case L'A':
case L'B':
@@ -109,7 +109,7 @@ LIBC_INLINE static constexpr bool iswupper(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) {
+LIBC_INLINE static constexpr bool isdigit(wchar_t wch) {
switch (wch) {
case L'0':
case L'1':
@@ -127,7 +127,7 @@ LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t tolower(wchar_t wch) {
switch (wch) {
case L'A':
return L'a';
@@ -186,7 +186,7 @@ LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t toupper(wchar_t wch) {
switch (wch) {
case L'a':
return L'A';
@@ -245,7 +245,7 @@ LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalpha(wchar_t wch) {
switch (wch) {
case L'a':
case L'b':
@@ -305,7 +305,7 @@ LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalnum(wchar_t wch) {
switch (wch) {
case L'a':
case L'b':
@@ -375,7 +375,7 @@ LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr int b36_wchar_to_int(wchar_t wch) {
+LIBC_INLINE static constexpr int b36_char_to_int(wchar_t wch) {
switch (wch) {
case L'0':
return 0;
@@ -563,7 +563,7 @@ LIBC_INLINE static constexpr wchar_t int_to_b36_wchar(int num) {
}
}
-LIBC_INLINE static constexpr bool iswspace(wchar_t wch) {
+LIBC_INLINE static constexpr bool isspace(wchar_t wch) {
switch (wch) {
case L' ':
case L'\t':
@@ -577,6 +577,13 @@ LIBC_INLINE static constexpr bool iswspace(wchar_t wch) {
}
}
+// An overload which provides a way to compare input with specific character
+// values, when input can be of a regular or a wide character type.
+LIBC_INLINE static constexpr bool is_one_of(wchar_t ch, [[maybe_unused]] char,
+ wchar_t wc_value) {
+ return (ch == wc_value);
+}
+
// ------------------------------------------------------
// Rationale: Since these classification functions are
// called in other functions, we will avoid the overhead
diff --git a/libc/src/wctype/iswalpha.cpp b/libc/src/wctype/iswalpha.cpp
index e151363b88d0b..01ceac8f68b23 100644
--- a/libc/src/wctype/iswalpha.cpp
+++ b/libc/src/wctype/iswalpha.cpp
@@ -15,7 +15,7 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) {
- return internal::iswalpha(static_cast<wchar_t>(c));
+ return internal::isalpha(static_cast<wchar_t>(c));
}
} // namespace LIBC_NAMESPACE_DECL
``````````
</details>
https://github.com/llvm/llvm-project/pull/167417
More information about the libc-commits
mailing list