[libc-commits] [libc] [llvm] [libc] Change ctype to be encoding independent (PR #110574)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Tue Dec 3 11:37:57 PST 2024
https://github.com/michaelrj-google updated https://github.com/llvm/llvm-project/pull/110574
>From 79a0b0c0ddd562ae4cc237940ad70e85307cbcfe Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Wed, 25 Sep 2024 14:52:14 -0700
Subject: [PATCH 1/5] [libc] Change ctype to be encoding independent
The previous implementation of the ctype functions assumed ASCII.
This patch changes to a switch/case implementation that looks odd, but
actually is easier for the compiler to understand and optimize.
---
libc/src/__support/ctype_utils.h | 298 ++++++++++++++++++++++++++++---
1 file changed, 275 insertions(+), 23 deletions(-)
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 91f6ce8cabd8d0..2e9b23d3bc37f3 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -15,44 +15,296 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-// ------------------------------------------------------
-// Rationale: Since these classification functions are
-// called in other functions, we will avoid the overhead
-// of a function call by inlining them.
-// ------------------------------------------------------
+// -----------------------------------------------------------------------------
+// ****************** WARNING ******************
+// ****************** DO NOT TRY TO OPTIMIZE THESE FUNCTIONS! ******************
+// -----------------------------------------------------------------------------
+// This switch/case form is easier for the compiler to understand, and is
+// optimized into a form that is almost always the same as or better than
+// versions written by hand (see https://godbolt.org/z/qvrebqvvr). Also this
+// form makes these functions encoding independent. If you want to rewrite these
+// functions, make sure you have benchmarks to show your new solution is faster,
+// as well as a way to support non-ASCII character encodings.
-LIBC_INLINE static constexpr bool isalpha(unsigned ch) {
- return (ch | 32) - 'a' < 26;
+LIBC_INLINE static constexpr bool islower(int ch) {
+ switch (ch) {
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+ return true;
+ default:
+ return false;
+ }
}
-LIBC_INLINE static constexpr bool isdigit(unsigned ch) {
- return (ch - '0') < 10;
+LIBC_INLINE static constexpr bool isupper(int ch) {
+ switch (ch) {
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ return true;
+ default:
+ return false;
+ }
}
-LIBC_INLINE static constexpr bool isalnum(unsigned ch) {
- return isalpha(ch) || isdigit(ch);
+LIBC_INLINE static constexpr bool isdigit(int ch) {
+ switch (ch) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return true;
+ default:
+ return false;
+ }
}
-LIBC_INLINE static constexpr bool isgraph(unsigned ch) {
- return 0x20 < ch && ch < 0x7f;
+LIBC_INLINE static constexpr int tolower(int ch) {
+ switch (ch) {
+ case 'A':
+ return 'a';
+ case 'B':
+ return 'b';
+ case 'C':
+ return 'c';
+ case 'D':
+ return 'd';
+ case 'E':
+ return 'e';
+ case 'F':
+ return 'f';
+ case 'G':
+ return 'g';
+ case 'H':
+ return 'h';
+ case 'I':
+ return 'i';
+ case 'J':
+ return 'j';
+ case 'K':
+ return 'k';
+ case 'L':
+ return 'l';
+ case 'M':
+ return 'm';
+ case 'N':
+ return 'n';
+ case 'O':
+ return 'o';
+ case 'P':
+ return 'p';
+ case 'Q':
+ return 'q';
+ case 'R':
+ return 'r';
+ case 'S':
+ return 's';
+ case 'T':
+ return 't';
+ case 'U':
+ return 'u';
+ case 'V':
+ return 'v';
+ case 'W':
+ return 'w';
+ case 'X':
+ return 'x';
+ case 'Y':
+ return 'y';
+ case 'Z':
+ return 'z';
+ default:
+ return ch;
+ }
+}
+
+LIBC_INLINE static constexpr bool isalpha(int ch) {
+ switch (tolower(ch)) {
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+ return true;
+ default:
+ return false;
+ }
}
-LIBC_INLINE static constexpr bool islower(unsigned ch) {
- return (ch - 'a') < 26;
+LIBC_INLINE static constexpr bool isalnum(int ch) {
+ return isalpha(ch) || isdigit(ch);
}
-LIBC_INLINE static constexpr bool isupper(unsigned ch) {
- return (ch - 'A') < 26;
+LIBC_INLINE static constexpr int b36_char_to_int(int ch) {
+ switch (tolower(ch)) {
+ case '0':
+ return 0;
+ case '1':
+ return 1;
+ case '2':
+ return 2;
+ case '3':
+ return 3;
+ case '4':
+ return 4;
+ case '5':
+ return 5;
+ case '6':
+ return 6;
+ case '7':
+ return 7;
+ case '8':
+ return 8;
+ case '9':
+ return 9;
+ case 'a':
+ return 10;
+ case 'b':
+ return 11;
+ case 'c':
+ return 12;
+ case 'd':
+ return 13;
+ case 'e':
+ return 14;
+ case 'f':
+ return 15;
+ case 'g':
+ return 16;
+ case 'h':
+ return 17;
+ case 'i':
+ return 18;
+ case 'j':
+ return 19;
+ case 'k':
+ return 20;
+ case 'l':
+ return 21;
+ case 'm':
+ return 22;
+ case 'n':
+ return 23;
+ case 'o':
+ return 24;
+ case 'p':
+ return 25;
+ case 'q':
+ return 26;
+ case 'r':
+ return 27;
+ case 's':
+ return 28;
+ case 't':
+ return 29;
+ case 'u':
+ return 30;
+ case 'v':
+ return 31;
+ case 'w':
+ return 32;
+ case 'x':
+ return 33;
+ case 'y':
+ return 34;
+ case 'z':
+ return 35;
+ default:
+ return 0;
+ }
}
-LIBC_INLINE static constexpr bool isspace(unsigned ch) {
- return ch == ' ' || (ch - '\t') < 5;
+LIBC_INLINE static constexpr bool isspace(int ch) {
+ switch (ch) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\v':
+ case '\f':
+ case '\r':
+ return true;
+ default:
+ return false;
+ }
}
-LIBC_INLINE static constexpr int tolower(int ch) {
- if (isupper(ch))
- return ch + ('a' - 'A');
- return ch;
+// not yet encoding independent.
+LIBC_INLINE static constexpr bool isgraph(int ch) {
+ return 0x20 < ch && ch < 0x7f;
}
} // namespace internal
>From 795ba04531233172c6619fe3e469279ecb0d155d Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Thu, 14 Nov 2024 11:28:48 -0800
Subject: [PATCH 2/5] Move functions to use ctype utils, cleanup
---
libc/src/__support/ctype_utils.h | 266 +++++++++++++++++-
libc/src/__support/integer_literals.h | 28 +-
libc/src/ctype/isxdigit.cpp | 3 +-
libc/src/ctype/isxdigit_l.cpp | 3 +-
libc/src/stdio/printf_core/fixed_converter.h | 14 +-
.../stdio/printf_core/float_dec_converter.h | 6 +-
.../stdio/printf_core/float_hex_converter.h | 17 +-
.../printf_core/float_inf_nan_converter.h | 9 +-
libc/src/stdio/printf_core/int_converter.h | 21 +-
libc/src/stdio/scanf_core/converter_utils.h | 10 -
libc/src/stdio/scanf_core/float_converter.cpp | 18 +-
libc/src/stdio/scanf_core/int_converter.cpp | 12 +-
libc/src/stdio/scanf_core/ptr_converter.cpp | 4 +-
.../llvm-project-overlay/libc/BUILD.bazel | 2 +
14 files changed, 329 insertions(+), 84 deletions(-)
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 2e9b23d3bc37f3..8521857ce765de 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -171,8 +171,67 @@ LIBC_INLINE static constexpr int tolower(int ch) {
}
}
+LIBC_INLINE static constexpr int toupper(int ch) {
+ switch (ch) {
+ case 'a':
+ return 'A';
+ case 'b':
+ return 'B';
+ case 'c':
+ return 'C';
+ case 'd':
+ return 'D';
+ case 'e':
+ return 'E';
+ case 'f':
+ return 'F';
+ case 'g':
+ return 'G';
+ case 'h':
+ return 'H';
+ case 'i':
+ return 'I';
+ case 'j':
+ return 'J';
+ case 'k':
+ return 'K';
+ case 'l':
+ return 'L';
+ case 'm':
+ return 'M';
+ case 'n':
+ return 'N';
+ case 'o':
+ return 'O';
+ case 'p':
+ return 'P';
+ case 'q':
+ return 'Q';
+ case 'r':
+ return 'R';
+ case 's':
+ return 'S';
+ case 't':
+ return 'T';
+ case 'u':
+ return 'U';
+ case 'v':
+ return 'V';
+ case 'w':
+ return 'W';
+ case 'x':
+ return 'X';
+ case 'y':
+ return 'Y';
+ case 'z':
+ return 'Z';
+ default:
+ return ch;
+ }
+}
+
LIBC_INLINE static constexpr bool isalpha(int ch) {
- switch (tolower(ch)) {
+ switch (ch) {
case 'a':
case 'b':
case 'c':
@@ -199,6 +258,32 @@ LIBC_INLINE static constexpr bool isalpha(int ch) {
case 'x':
case 'y':
case 'z':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
return true;
default:
return false;
@@ -206,11 +291,77 @@ LIBC_INLINE static constexpr bool isalpha(int ch) {
}
LIBC_INLINE static constexpr bool isalnum(int ch) {
- return isalpha(ch) || isdigit(ch);
+ switch (ch) {
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return true;
+ default:
+ return false;
+ }
}
LIBC_INLINE static constexpr int b36_char_to_int(int ch) {
- switch (tolower(ch)) {
+ switch (ch) {
case '0':
return 0;
case '1':
@@ -232,62 +383,171 @@ LIBC_INLINE static constexpr int b36_char_to_int(int ch) {
case '9':
return 9;
case 'a':
+ case 'A':
return 10;
case 'b':
+ case 'B':
return 11;
case 'c':
+ case 'C':
return 12;
case 'd':
+ case 'D':
return 13;
case 'e':
+ case 'E':
return 14;
case 'f':
+ case 'F':
return 15;
case 'g':
+ case 'G':
return 16;
case 'h':
+ case 'H':
return 17;
case 'i':
+ case 'I':
return 18;
case 'j':
+ case 'J':
return 19;
case 'k':
+ case 'K':
return 20;
case 'l':
+ case 'L':
return 21;
case 'm':
+ case 'M':
return 22;
case 'n':
+ case 'N':
return 23;
case 'o':
+ case 'O':
return 24;
case 'p':
+ case 'P':
return 25;
case 'q':
+ case 'Q':
return 26;
case 'r':
+ case 'R':
return 27;
case 's':
+ case 'S':
return 28;
case 't':
+ case 'T':
return 29;
case 'u':
+ case 'U':
return 30;
case 'v':
+ case 'V':
return 31;
case 'w':
+ case 'W':
return 32;
case 'x':
+ case 'X':
return 33;
case 'y':
+ case 'Y':
return 34;
case 'z':
+ case 'Z':
return 35;
default:
return 0;
}
}
+LIBC_INLINE static constexpr int int_to_b36_char(int num) {
+ // Can't actually use LIBC_ASSERT here because it depends on integer_to_string
+ // which depends on this.
+
+ // LIBC_ASSERT(num < 36);
+ switch (num) {
+ case 0:
+ return '0';
+ case 1:
+ return '1';
+ case 2:
+ return '2';
+ case 3:
+ return '3';
+ case 4:
+ return '4';
+ case 5:
+ return '5';
+ case 6:
+ return '6';
+ case 7:
+ return '7';
+ case 8:
+ return '8';
+ case 9:
+ return '9';
+ case 10:
+ return 'a';
+ case 11:
+ return 'b';
+ case 12:
+ return 'c';
+ case 13:
+ return 'd';
+ case 14:
+ return 'e';
+ case 15:
+ return 'f';
+ case 16:
+ return 'g';
+ case 17:
+ return 'h';
+ case 18:
+ return 'i';
+ case 19:
+ return 'j';
+ case 20:
+ return 'k';
+ case 21:
+ return 'l';
+ case 22:
+ return 'm';
+ case 23:
+ return 'n';
+ case 24:
+ return 'o';
+ case 25:
+ return 'p';
+ case 26:
+ return 'q';
+ case 27:
+ return 'r';
+ case 28:
+ return 's';
+ case 29:
+ return 't';
+ case 30:
+ return 'u';
+ case 31:
+ return 'v';
+ case 32:
+ return 'w';
+ case 33:
+ return 'x';
+ case 34:
+ return 'y';
+ case 35:
+ return 'z';
+ default:
+ return '!';
+ }
+}
+
LIBC_INLINE static constexpr bool isspace(int ch) {
switch (ch) {
case ' ':
diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h
index 4c5c4c41666811..0298ec7d088d69 100644
--- a/libc/src/__support/integer_literals.h
+++ b/libc/src/__support/integer_literals.h
@@ -13,12 +13,13 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
#define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
-#include "src/__support/CPP/limits.h" // CHAR_BIT
+#include "src/__support/CPP/limits.h" // CHAR_BIT
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/config.h"
-#include "src/__support/uint128.h" // UInt128
-#include <stddef.h> // size_t
-#include <stdint.h> // uintxx_t
+#include "src/__support/uint128.h" // UInt128
+#include <stddef.h> // size_t
+#include <stdint.h> // uintxx_t
namespace LIBC_NAMESPACE_DECL {
@@ -75,26 +76,13 @@ template <typename T, int base> struct DigitBuffer {
push(*str);
}
- // Returns the digit for a particular character.
- // Returns INVALID_DIGIT if the character is invalid.
- LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) {
- const auto to_lower = [](char c) { return c | 32; };
- const auto is_digit = [](char c) { return c >= '0' && c <= '9'; };
- const auto is_alpha = [](char c) {
- return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
- };
- if (is_digit(c))
- return static_cast<uint8_t>(c - '0');
- if (base > 10 && is_alpha(c))
- return static_cast<uint8_t>(to_lower(c) - 'a' + 10);
- return INVALID_DIGIT;
- }
-
// Adds a single character to this buffer.
LIBC_INLINE constexpr void push(char c) {
if (c == '\'')
return; // ' is valid but not taken into account.
- const uint8_t value = get_digit_value(c);
+ const int b36_val = internal::b36_char_to_int(c);
+ const uint8_t value = static_cast<uint8_t>(
+ b36_val < base && (b36_val != 0 || c == '0') ? b36_val : INVALID_DIGIT);
if (value == INVALID_DIGIT || size >= MAX_DIGITS) {
// During constant evaluation `__builtin_unreachable` will halt the
// compiler as it is not executable. This is preferable over `assert` that
diff --git a/libc/src/ctype/isxdigit.cpp b/libc/src/ctype/isxdigit.cpp
index 6b730c354db083..81f645c6f49fc9 100644
--- a/libc/src/ctype/isxdigit.cpp
+++ b/libc/src/ctype/isxdigit.cpp
@@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) {
const unsigned ch = static_cast<unsigned>(c);
- return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
+ return static_cast<int>(internal::isalnum(ch) &&
+ internal::b36_char_to_int(ch) < 16);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/ctype/isxdigit_l.cpp b/libc/src/ctype/isxdigit_l.cpp
index 8a5c7d4d28ab1c..eddfd20a2da3b4 100644
--- a/libc/src/ctype/isxdigit_l.cpp
+++ b/libc/src/ctype/isxdigit_l.cpp
@@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) {
const unsigned ch = static_cast<unsigned>(c);
- return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
+ return static_cast<int>(internal::isalnum(ch) &&
+ internal::b36_char_to_int(ch) < 16);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/printf_core/fixed_converter.h b/libc/src/stdio/printf_core/fixed_converter.h
index c8812d77b62e34..ba0a62d9fcb87f 100644
--- a/libc/src/stdio/printf_core/fixed_converter.h
+++ b/libc/src/stdio/printf_core/fixed_converter.h
@@ -11,6 +11,7 @@
#include "include/llvm-libc-macros/stdfix-macros.h"
#include "src/__support/CPP/string_view.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/fixed_point/fx_bits.h"
#include "src/__support/fixed_point/fx_rep.h"
#include "src/__support/integer_to_string.h"
@@ -68,10 +69,6 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
using LARep = fixed_point::FXRep<unsigned long accum>;
using StorageType = LARep::StorageType;
- // All of the letters will be defined relative to variable a, which will be
- // the appropriate case based on the name of the conversion. This converts any
- // conversion name into the letter 'a' with the appropriate case.
- const char a = (to_conv.conv_name & 32) | 'A';
FormatFlags flags = to_conv.flags;
bool is_negative;
@@ -179,9 +176,9 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
// unspecified.
RoundDirection round;
char first_digit_after = fraction_digits[precision];
- if (first_digit_after > '5') {
+ if (internal::b36_char_to_int(first_digit_after) > 5) {
round = RoundDirection::Up;
- } else if (first_digit_after < '5') {
+ } else if (internal::b36_char_to_int(first_digit_after) < 5) {
round = RoundDirection::Down;
} else {
// first_digit_after == '5'
@@ -204,7 +201,8 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
keep_rounding = false;
char cur_digit = fraction_digits[digit_to_round];
// if the digit should not be rounded up
- if (round == RoundDirection::Even && ((cur_digit - '0') % 2) == 0) {
+ if (round == RoundDirection::Even &&
+ (internal::b36_char_to_int(cur_digit) % 2) == 0) {
// break out of the loop
break;
}
@@ -246,7 +244,7 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
char sign_char = 0;
// Check if the conv name is uppercase
- if (a == 'A') {
+ if (internal::isupper(to_conv.conv_name)) {
// These flags are only for signed conversions, so this removes them if the
// conversion is unsigned.
flags = FormatFlags(flags &
diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h
index e39ba6ecea8d48..d93457fcafd7f9 100644
--- a/libc/src/stdio/printf_core/float_dec_converter.h
+++ b/libc/src/stdio/printf_core/float_dec_converter.h
@@ -13,6 +13,7 @@
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/big_int.h" // is_big_int_v
+#include "src/__support/ctype_utils.h"
#include "src/__support/float_to_string.h"
#include "src/__support/integer_to_string.h"
#include "src/__support/libc_assert.h"
@@ -587,8 +588,6 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
int exponent = float_bits.get_explicit_exponent();
StorageType mantissa = float_bits.get_explicit_mantissa();
- const char a = (to_conv.conv_name & 32) | 'A';
-
char sign_char = 0;
if (float_bits.is_neg())
@@ -734,7 +733,8 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
round = get_round_direction(last_digit, truncated, float_bits.sign());
RET_IF_RESULT_NEGATIVE(float_writer.write_last_block(
- digits, maximum, round, final_exponent, a + 'E' - 'A'));
+ digits, maximum, round, final_exponent,
+ internal::islower(to_conv.conv_name) ? 'e' : 'E'));
RET_IF_RESULT_NEGATIVE(float_writer.right_pad());
return WRITE_OK;
diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h
index 0b3ff3dd1cbfdc..5d9c42882a589b 100644
--- a/libc/src/stdio/printf_core/float_hex_converter.h
+++ b/libc/src/stdio/printf_core/float_hex_converter.h
@@ -12,6 +12,7 @@
#include "src/__support/CPP/string_view.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
@@ -31,7 +32,6 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
// All of the letters will be defined relative to variable a, which will be
// the appropriate case based on the name of the conversion. This converts any
// conversion name into the letter 'a' with the appropriate case.
- const char a = (to_conv.conv_name & 32) | 'A';
bool is_negative;
int exponent;
@@ -138,9 +138,11 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
size_t mant_cur = mant_len;
size_t first_non_zero = 1;
for (; mant_cur > 0; --mant_cur, mantissa >>= 4) {
- char mant_mod_16 = static_cast<char>(mantissa) & 15;
- char new_digit = static_cast<char>(
- (mant_mod_16 > 9) ? (mant_mod_16 - 10 + a) : (mant_mod_16 + '0'));
+ char mant_mod_16 = static_cast<char>(mantissa % 16);
+ char new_digit = static_cast<char>(internal::int_to_b36_char(mant_mod_16));
+ if (internal::isupper(to_conv.conv_name)) {
+ new_digit = static_cast<char>(internal::toupper(new_digit));
+ }
mant_buffer[mant_cur - 1] = new_digit;
if (new_digit != '0' && first_non_zero < mant_cur)
first_non_zero = mant_cur;
@@ -168,7 +170,8 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
size_t exp_cur = EXP_LEN;
for (; exponent > 0; --exp_cur, exponent /= 10) {
- exp_buffer[exp_cur - 1] = static_cast<char>((exponent % 10) + '0');
+ exp_buffer[exp_cur - 1] =
+ static_cast<char>(internal::int_to_b36_char(exponent % 10));
}
if (exp_cur == EXP_LEN) { // if nothing else was written, write a 0.
exp_buffer[EXP_LEN - 1] = '0';
@@ -187,7 +190,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
constexpr size_t PREFIX_LEN = 2;
char prefix[PREFIX_LEN];
prefix[0] = '0';
- prefix[1] = a + ('x' - 'a');
+ prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X';
const cpp::string_view prefix_str(prefix, PREFIX_LEN);
// If the precision is greater than the actual result, pad with 0s
@@ -200,7 +203,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
constexpr cpp::string_view HEXADECIMAL_POINT(".");
// This is for the letter 'p' before the exponent.
- const char exp_separator = a + ('p' - 'a');
+ const char exp_separator = internal::islower(to_conv.conv_name) ? 'p' : 'P';
constexpr int EXP_SEPARATOR_LEN = 1;
padding = static_cast<int>(to_conv.min_width - (sign_char > 0 ? 1 : 0) -
diff --git a/libc/src/stdio/printf_core/float_inf_nan_converter.h b/libc/src/stdio/printf_core/float_inf_nan_converter.h
index a7da682b835bee..3e41612e21c9fc 100644
--- a/libc/src/stdio/printf_core/float_inf_nan_converter.h
+++ b/libc/src/stdio/printf_core/float_inf_nan_converter.h
@@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FLOAT_INF_NAN_CONVERTER_H
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
@@ -26,8 +27,6 @@ using StorageType = fputil::FPBits<long double>::StorageType;
LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
// All of the letters will be defined relative to variable a, which will be
// the appropriate case based on the case of the conversion.
- const char a = (to_conv.conv_name & 32) | 'A';
-
bool is_negative;
StorageType mantissa;
if (to_conv.length_modifier == LengthModifier::L) {
@@ -66,9 +65,11 @@ LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
if (sign_char)
RET_IF_RESULT_NEGATIVE(writer->write(sign_char));
if (mantissa == 0) { // inf
- RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "inf" : "INF"));
+ RET_IF_RESULT_NEGATIVE(
+ writer->write(internal::islower(to_conv.conv_name) ? "inf" : "INF"));
} else { // nan
- RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "nan" : "NAN"));
+ RET_IF_RESULT_NEGATIVE(
+ writer->write(internal::islower(to_conv.conv_name) ? "nan" : "NAN"));
}
if (padding > 0 && ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index f345e86b97a691..d0af229f89be51 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -11,6 +11,7 @@
#include "src/__support/CPP/span.h"
#include "src/__support/CPP/string_view.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/integer_to_string.h"
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/converter_utils.h"
@@ -23,11 +24,6 @@
namespace LIBC_NAMESPACE_DECL {
namespace printf_core {
-// These functions only work on characters that are already known to be in the
-// alphabet. Their behavior is undefined otherwise.
-LIBC_INLINE constexpr char to_lower(char a) { return a | 32; }
-LIBC_INLINE constexpr bool is_lower(char a) { return (a & 32) > 0; }
-
namespace details {
using HexFmt = IntegerToString<uintmax_t, radix::Hex>;
@@ -49,14 +45,14 @@ LIBC_INLINE constexpr size_t num_buf_size() {
LIBC_INLINE cpp::optional<cpp::string_view>
num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) {
- if (to_lower(conv_name) == 'x') {
- if (is_lower(conv_name))
+ if (internal::tolower(conv_name) == 'x') {
+ if (internal::islower(conv_name))
return HexFmt::format_to(bufref, num);
else
return HexFmtUppercase::format_to(bufref, num);
} else if (conv_name == 'o') {
return OctFmt::format_to(bufref, num);
- } else if (to_lower(conv_name) == 'b') {
+ } else if (internal::tolower(conv_name) == 'b') {
return BinFmt::format_to(bufref, num);
} else {
return DecFmt::format_to(bufref, num);
@@ -72,7 +68,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
bool is_negative = false;
FormatFlags flags = to_conv.flags;
- const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
// If the conversion is signed, then handle negative values.
if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') {
@@ -116,16 +111,16 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
// conversions. Since hexadecimal is unsigned these will never conflict.
size_t prefix_len;
char prefix[2];
- if ((to_lower(to_conv.conv_name) == 'x') &&
+ if ((internal::tolower(to_conv.conv_name) == 'x') &&
((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
prefix_len = 2;
prefix[0] = '0';
- prefix[1] = a + ('x' - 'a');
- } else if ((to_lower(to_conv.conv_name) == 'b') &&
+ prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X';
+ } else if ((internal::tolower(to_conv.conv_name) == 'b') &&
((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
prefix_len = 2;
prefix[0] = '0';
- prefix[1] = a + ('b' - 'a');
+ prefix[1] = internal::islower(to_conv.conv_name) ? 'b' : 'B';
} else {
prefix_len = (sign_char == 0 ? 0 : 1);
prefix[0] = sign_char;
diff --git a/libc/src/stdio/scanf_core/converter_utils.h b/libc/src/stdio/scanf_core/converter_utils.h
index 61954556b838a0..6f4d16cffb19cb 100644
--- a/libc/src/stdio/scanf_core/converter_utils.h
+++ b/libc/src/stdio/scanf_core/converter_utils.h
@@ -19,16 +19,6 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-LIBC_INLINE constexpr char to_lower(char a) { return a | 32; }
-
-LIBC_INLINE constexpr int b36_char_to_int(char input) {
- if (internal::isdigit(input))
- return input - '0';
- if (internal::isalpha(input))
- return to_lower(input) + 10 - 'a';
- return 0;
-}
-
LIBC_INLINE void write_int_with_length(uintmax_t output_val,
const FormatSection &to_conv) {
if ((to_conv.flags & NO_WRITE) != 0) {
diff --git a/libc/src/stdio/scanf_core/float_converter.cpp b/libc/src/stdio/scanf_core/float_converter.cpp
index b2d60a249a5a77..9c714d0727214e 100644
--- a/libc/src/stdio/scanf_core/float_converter.cpp
+++ b/libc/src/stdio/scanf_core/float_converter.cpp
@@ -55,11 +55,12 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
// Handle inf
- if (to_lower(cur_char) == inf_string[0]) {
+ if (internal::tolower(cur_char) == inf_string[0]) {
size_t inf_index = 0;
- for (; inf_index < sizeof(inf_string) && out_str.length() < max_width &&
- to_lower(cur_char) == inf_string[inf_index];
+ for (;
+ inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width &&
+ internal::tolower(cur_char) == inf_string[inf_index];
++inf_index) {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
@@ -78,11 +79,12 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
static const char nan_string[] = "nan";
// Handle nan
- if (to_lower(cur_char) == nan_string[0]) {
+ if (internal::tolower(cur_char) == nan_string[0]) {
size_t nan_index = 0;
- for (; nan_index < sizeof(nan_string) && out_str.length() < max_width &&
- to_lower(cur_char) == nan_string[nan_index];
+ for (;
+ nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width &&
+ internal::tolower(cur_char) == nan_string[nan_index];
++nan_index) {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
@@ -117,7 +119,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
}
// If that next character is an 'x' then this is a hexadecimal number.
- if (to_lower(cur_char) == 'x') {
+ if (internal::tolower(cur_char) == 'x') {
base = 16;
if (!out_str.append(cur_char)) {
@@ -163,7 +165,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
// Handle the exponent, which has an exponent mark, an optional sign, and
// decimal digits.
- if (to_lower(cur_char) == exponent_mark) {
+ if (internal::tolower(cur_char) == exponent_mark) {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp
index ecdac52e84bbde..fce817245c0108 100644
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ b/libc/src/stdio/scanf_core/int_converter.cpp
@@ -80,7 +80,8 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
is_signed = true;
} else if (to_conv.conv_name == 'o') {
base = 8;
- } else if (to_lower(to_conv.conv_name) == 'x' || to_conv.conv_name == 'p') {
+ } else if (internal::tolower(to_conv.conv_name) == 'x' ||
+ to_conv.conv_name == 'p') {
base = 16;
} else if (to_conv.conv_name == 'd') {
base = 10;
@@ -122,7 +123,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
return READ_OK;
}
- if (to_lower(cur_char) == 'x') {
+ if (internal::tolower(cur_char) == 'x') {
// This is a valid hex prefix.
is_number = false;
@@ -175,17 +176,18 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
const uintmax_t max_div_by_base = MAX / base;
- if (internal::isalnum(cur_char) && b36_char_to_int(cur_char) < base) {
+ if (internal::isalnum(cur_char) &&
+ internal::b36_char_to_int(cur_char) < base) {
is_number = true;
}
bool has_overflow = false;
size_t i = 0;
for (; i < max_width && internal::isalnum(cur_char) &&
- b36_char_to_int(cur_char) < base;
+ internal::b36_char_to_int(cur_char) < base;
++i, cur_char = reader->getc()) {
- uintmax_t cur_digit = b36_char_to_int(cur_char);
+ uintmax_t cur_digit = internal::b36_char_to_int(cur_char);
if (result == MAX) {
has_overflow = true;
diff --git a/libc/src/stdio/scanf_core/ptr_converter.cpp b/libc/src/stdio/scanf_core/ptr_converter.cpp
index 1a42a389d74b41..37f002d3da6985 100644
--- a/libc/src/stdio/scanf_core/ptr_converter.cpp
+++ b/libc/src/stdio/scanf_core/ptr_converter.cpp
@@ -8,6 +8,7 @@
#include "src/stdio/scanf_core/ptr_converter.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "src/stdio/scanf_core/converter_utils.h"
#include "src/stdio/scanf_core/core_structs.h"
@@ -24,7 +25,8 @@ int convert_pointer(Reader *reader, const FormatSection &to_conv) {
// Check if it's exactly the nullptr string, if so then it's a nullptr.
char cur_char = reader->getc();
size_t i = 0;
- for (; i < sizeof(nullptr_string) && to_lower(cur_char) == nullptr_string[i];
+ for (; i < (sizeof(nullptr_string) - 1) &&
+ internal::tolower(cur_char) == nullptr_string[i];
++i) {
cur_char = reader->getc();
}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 69a0dada23b4cd..a4c47483805724 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -740,6 +740,7 @@ libc_support_library(
hdrs = ["src/__support/integer_literals.h"],
deps = [
":__support_cpp_limits",
+ ":__support_ctype_utils",
":__support_uint128",
],
)
@@ -4450,6 +4451,7 @@ libc_support_library(
":__support_cpp_limits",
":__support_cpp_span",
":__support_cpp_string_view",
+ ":__support_ctype_utils",
":__support_float_to_string",
":__support_fputil_fenv_impl",
":__support_fputil_fp_bits",
>From d9db3ace0b2312c228595955238798fb07b436e3 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Wed, 20 Nov 2024 16:41:26 -0800
Subject: [PATCH 3/5] Fix everything else
A bunch of string conversion/testing code needed to be updated. I'm
gonna need to clean it up more later but that's gonna be a followup.
Also I need to set up the ctype function in bazel. Blarg.
---
libc/src/__support/high_precision_decimal.h | 10 +++--
libc/src/__support/integer_to_string.h | 7 +--
libc/src/__support/str_to_float.h | 2 +-
libc/src/__support/str_to_integer.h | 10 +----
libc/src/ctype/toupper.cpp | 6 +--
libc/src/ctype/toupper_l.cpp | 4 +-
.../stdio/printf_core/float_hex_converter.h | 3 --
libc/test/UnitTest/MemoryMatcher.cpp | 4 +-
.../src/__support/CPP/stringview_test.cpp | 5 ++-
libc/test/src/ctype/isalnum_test.cpp | 38 +++++++++++++---
libc/test/src/ctype/isalpha_test.cpp | 31 ++++++++++++-
libc/test/src/ctype/isdigit_test.cpp | 31 +++++++++++--
libc/test/src/ctype/islower_test.cpp | 33 ++++++++++++--
libc/test/src/ctype/isupper_test.cpp | 33 ++++++++++++--
libc/test/src/ctype/isxdigit_test.cpp | 35 +++++++++++++--
libc/test/src/ctype/tolower_test.cpp | 44 +++++++++++++++++--
libc/test/src/ctype/toupper_test.cpp | 44 +++++++++++++++++--
libc/test/src/stdlib/StrtolTest.h | 27 ++++++------
libc/test/src/string/strcmp_test.cpp | 20 ++++-----
.../llvm-project-overlay/libc/BUILD.bazel | 1 +
.../libc/test/UnitTest/BUILD.bazel | 5 ++-
21 files changed, 312 insertions(+), 81 deletions(-)
diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h
index ac11649d1d1686..6f3d0470fdfbfd 100644
--- a/libc/src/__support/high_precision_decimal.h
+++ b/libc/src/__support/high_precision_decimal.h
@@ -178,9 +178,11 @@ class HighPrecisionDecimal {
if (digit_index >= this->num_digits) {
return new_digits - 1;
}
- if (this->digits[digit_index] != power_of_five[digit_index] - '0') {
+ if (this->digits[digit_index] !=
+ internal::b36_char_to_int(power_of_five[digit_index])) {
return new_digits -
- ((this->digits[digit_index] < power_of_five[digit_index] - '0')
+ ((this->digits[digit_index] <
+ internal::b36_char_to_int(power_of_five[digit_index]))
? 1
: 0);
}
@@ -337,8 +339,8 @@ class HighPrecisionDecimal {
}
++total_digits;
if (this->num_digits < MAX_NUM_DIGITS) {
- this->digits[this->num_digits] =
- static_cast<uint8_t>(num_string[num_cur] - '0');
+ this->digits[this->num_digits] = static_cast<uint8_t>(
+ internal::b36_char_to_int(num_string[num_cur]));
++this->num_digits;
} else if (num_string[num_cur] != '0') {
this->truncated = true;
diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h
index 40d45a05ceadb2..496060773d9a84 100644
--- a/libc/src/__support/integer_to_string.h
+++ b/libc/src/__support/integer_to_string.h
@@ -69,6 +69,7 @@
#include "src/__support/CPP/type_traits.h"
#include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
#include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
@@ -214,9 +215,9 @@ template <typename T, typename Fmt = radix::Dec> class IntegerToString {
using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
LIBC_INLINE static char digit_char(uint8_t digit) {
- if (digit < 10)
- return '0' + static_cast<char>(digit);
- return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
+ const char result = static_cast<char>(internal::int_to_b36_char(digit));
+ return static_cast<char>(Fmt::IS_UPPERCASE ? internal::toupper(result)
+ : result);
}
LIBC_INLINE static void
diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index 80ea334d15c03f..b4d5646822df37 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -909,7 +909,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
cpp::numeric_limits<StorageType>::max() / BASE;
while (true) {
if (isdigit(src[index])) {
- uint32_t digit = src[index] - '0';
+ uint32_t digit = b36_char_to_int(src[index]);
seen_digit = true;
if (mantissa < bitstype_max_div_by_base) {
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index 86611f9a6902da..8e569e8a7feb08 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -42,14 +42,6 @@ first_non_whitespace(const char *__restrict src,
return src + src_cur;
}
-LIBC_INLINE int b36_char_to_int(char input) {
- if (isdigit(input))
- return input - '0';
- if (isalpha(input))
- return (input | 32) + 10 - 'a';
- return 0;
-}
-
// checks if the next 3 characters of the string pointer are the start of a
// hexadecimal number. Does not advance the string pointer.
LIBC_INLINE bool
@@ -57,7 +49,7 @@ is_hex_start(const char *__restrict src,
size_t src_len = cpp::numeric_limits<size_t>::max()) {
if (src_len < 3)
return false;
- return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
+ return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
b36_char_to_int(*(src + 2)) < 16;
}
diff --git a/libc/src/ctype/toupper.cpp b/libc/src/ctype/toupper.cpp
index b5a23fc7f588bd..1e1e8fc4007118 100644
--- a/libc/src/ctype/toupper.cpp
+++ b/libc/src/ctype/toupper.cpp
@@ -14,10 +14,6 @@
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(int, toupper, (int c)) {
- if (internal::islower(c))
- return c - ('a' - 'A');
- return c;
-}
+LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/ctype/toupper_l.cpp b/libc/src/ctype/toupper_l.cpp
index f536ff36236160..a435ca1ab5d410 100644
--- a/libc/src/ctype/toupper_l.cpp
+++ b/libc/src/ctype/toupper_l.cpp
@@ -15,9 +15,7 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) {
- if (internal::islower(c))
- return c - ('a' - 'A');
- return c;
+ return internal::toupper(c);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h
index 5d9c42882a589b..accca52d281aaa 100644
--- a/libc/src/stdio/printf_core/float_hex_converter.h
+++ b/libc/src/stdio/printf_core/float_hex_converter.h
@@ -29,9 +29,6 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
const FormatSection &to_conv) {
using LDBits = fputil::FPBits<long double>;
using StorageType = LDBits::StorageType;
- // All of the letters will be defined relative to variable a, which will be
- // the appropriate case based on the name of the conversion. This converts any
- // conversion name into the letter 'a' with the appropriate case.
bool is_negative;
int exponent;
diff --git a/libc/test/UnitTest/MemoryMatcher.cpp b/libc/test/UnitTest/MemoryMatcher.cpp
index 244f25572c378d..3cd5174fd7f75b 100644
--- a/libc/test/UnitTest/MemoryMatcher.cpp
+++ b/libc/test/UnitTest/MemoryMatcher.cpp
@@ -8,6 +8,7 @@
#include "MemoryMatcher.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "test/UnitTest/Test.h"
@@ -40,7 +41,8 @@ bool MemoryMatcher::match(MemoryView actualValue) {
static void display(char C) {
const auto print = [](unsigned char I) {
- tlog << static_cast<char>(I < 10 ? '0' + I : 'A' + I - 10);
+ tlog << static_cast<char>(LIBC_NAMESPACE::internal::toupper(
+ LIBC_NAMESPACE::internal::int_to_b36_char(I)));
};
print(static_cast<unsigned char>(C) / 16);
print(static_cast<unsigned char>(C) & 15);
diff --git a/libc/test/src/__support/CPP/stringview_test.cpp b/libc/test/src/__support/CPP/stringview_test.cpp
index 6b68f2a1c47a9d..c9348243745a71 100644
--- a/libc/test/src/__support/CPP/stringview_test.cpp
+++ b/libc/test/src/__support/CPP/stringview_test.cpp
@@ -109,8 +109,6 @@ TEST(LlvmLibcStringViewTest, Observer) {
ASSERT_EQ(ABC.back(), 'c');
}
-bool isDigit(char c) { return c >= '0' && c <= '9'; }
-
TEST(LlvmLibcStringViewTest, FindFirstOf) {
string_view Tmp("abca");
ASSERT_TRUE(Tmp.find_first_of('a') == 0);
@@ -236,6 +234,9 @@ TEST(LlvmLibcStringViewTest, FindFirstNotOf) {
TEST(LlvmLibcStringViewTest, Contains) {
string_view Empty;
+ static_assert(
+ 'a' < 'z',
+ "This test only supports character encodings where 'a' is below 'z'");
for (char c = 'a'; c < 'z'; ++c)
EXPECT_FALSE(Empty.contains(c));
diff --git a/libc/test/src/ctype/isalnum_test.cpp b/libc/test/src/ctype/isalnum_test.cpp
index b71d36111d7256..92915db4e4bcb2 100644
--- a/libc/test/src/ctype/isalnum_test.cpp
+++ b/libc/test/src/ctype/isalnum_test.cpp
@@ -6,18 +6,46 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/span.h"
#include "src/ctype/isalnum.h"
#include "test/UnitTest/Test.h"
+TEST(LlvmLibcIsAlNum, SimpleTest) {
+ EXPECT_NE(LIBC_NAMESPACE::isalnum('a'), 0);
+ EXPECT_NE(LIBC_NAMESPACE::isalnum('B'), 0);
+ EXPECT_NE(LIBC_NAMESPACE::isalnum('3'), 0);
+
+ EXPECT_EQ(LIBC_NAMESPACE::isalnum(' '), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isalnum('?'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isalnum('\0'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isalnum(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char ALNUM_ARRAY[] = {
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+ for (size_t i = 0; i < arr.size(); ++i) {
+ if (static_cast<int>(arr[i]) == ch)
+ return true;
+ }
+ return false;
+}
+
TEST(LlvmLibcIsAlNum, DefaultLocale) {
// Loops through all characters, verifying that numbers and letters
// return non-zero integer and everything else returns a zero.
- for (int c = -255; c < 255; ++c) {
- if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
- ('0' <= c && c <= '9'))
- EXPECT_NE(LIBC_NAMESPACE::isalnum(c), 0);
+ for (int ch = -255; ch < 255; ++ch) {
+ if (in_span(ch, ALNUM_ARRAY))
+ EXPECT_NE(LIBC_NAMESPACE::isalnum(ch), 0);
else
- EXPECT_EQ(LIBC_NAMESPACE::isalnum(c), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isalnum(ch), 0);
}
}
diff --git a/libc/test/src/ctype/isalpha_test.cpp b/libc/test/src/ctype/isalpha_test.cpp
index 10cdb962ee2ee5..ed4bc13969f595 100644
--- a/libc/test/src/ctype/isalpha_test.cpp
+++ b/libc/test/src/ctype/isalpha_test.cpp
@@ -6,15 +6,44 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/span.h"
#include "src/ctype/isalpha.h"
#include "test/UnitTest/Test.h"
+TEST(LlvmLibcIsAlpha, SimpleTest) {
+ EXPECT_NE(LIBC_NAMESPACE::isalpha('a'), 0);
+ EXPECT_NE(LIBC_NAMESPACE::isalpha('B'), 0);
+
+ EXPECT_EQ(LIBC_NAMESPACE::isalpha('3'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isalpha(' '), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isalpha('?'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isalpha('\0'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isalpha(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char ALPHA_ARRAY[] = {
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+ for (size_t i = 0; i < arr.size(); ++i) {
+ if (static_cast<int>(arr[i]) == ch)
+ return true;
+ }
+ return false;
+}
+
TEST(LlvmLibcIsAlpha, DefaultLocale) {
// Loops through all characters, verifying that letters return a
// non-zero integer and everything else returns zero.
+ // TODO: encoding indep
for (int ch = -255; ch < 255; ++ch) {
- if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
+ if (in_span(ch, ALPHA_ARRAY))
EXPECT_NE(LIBC_NAMESPACE::isalpha(ch), 0);
else
EXPECT_EQ(LIBC_NAMESPACE::isalpha(ch), 0);
diff --git a/libc/test/src/ctype/isdigit_test.cpp b/libc/test/src/ctype/isdigit_test.cpp
index a9f84db3ef7e87..fc459ab506e49f 100644
--- a/libc/test/src/ctype/isdigit_test.cpp
+++ b/libc/test/src/ctype/isdigit_test.cpp
@@ -6,15 +6,40 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/span.h"
#include "src/ctype/isdigit.h"
#include "test/UnitTest/Test.h"
+TEST(LlvmLibcIsDigit, SimpleTest) {
+ EXPECT_NE(LIBC_NAMESPACE::isdigit('3'), 0);
+
+ EXPECT_EQ(LIBC_NAMESPACE::isdigit('a'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isdigit('B'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isdigit(' '), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isdigit('?'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isdigit('\0'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isdigit(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char DIGIT_ARRAY[] = {
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+ for (size_t i = 0; i < arr.size(); ++i) {
+ if (static_cast<int>(arr[i]) == ch)
+ return true;
+ }
+ return false;
+}
+
TEST(LlvmLibcIsDigit, DefaultLocale) {
- // Loops through all characters, verifying that numbers return a
- // non-zero integer and everything else returns zero.
+ // Loops through all characters, verifying that numbers and letters
+ // return non-zero integer and everything else returns a zero.
for (int ch = -255; ch < 255; ++ch) {
- if ('0' <= ch && ch <= '9')
+ if (in_span(ch, DIGIT_ARRAY))
EXPECT_NE(LIBC_NAMESPACE::isdigit(ch), 0);
else
EXPECT_EQ(LIBC_NAMESPACE::isdigit(ch), 0);
diff --git a/libc/test/src/ctype/islower_test.cpp b/libc/test/src/ctype/islower_test.cpp
index ba7caf65b6fd39..474feb09a4cf0a 100644
--- a/libc/test/src/ctype/islower_test.cpp
+++ b/libc/test/src/ctype/islower_test.cpp
@@ -6,14 +6,41 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/span.h"
#include "src/ctype/islower.h"
+
#include "test/UnitTest/Test.h"
+TEST(LlvmLibcIsLower, SimpleTest) {
+ EXPECT_NE(LIBC_NAMESPACE::islower('a'), 0);
+
+ EXPECT_EQ(LIBC_NAMESPACE::islower('B'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::islower('3'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::islower(' '), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::islower('?'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::islower('\0'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::islower(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char LOWER_ARRAY[] = {
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+ for (size_t i = 0; i < arr.size(); ++i) {
+ if (static_cast<int>(arr[i]) == ch)
+ return true;
+ }
+ return false;
+}
+
TEST(LlvmLibcIsLower, DefaultLocale) {
- // Loops through all characters, verifying that lowercase letters
- // return a non-zero integer and everything else returns zero.
+ // Loops through all characters, verifying that numbers and letters
+ // return non-zero integer and everything else returns a zero.
for (int ch = -255; ch < 255; ++ch) {
- if ('a' <= ch && ch <= 'z')
+ if (in_span(ch, LOWER_ARRAY))
EXPECT_NE(LIBC_NAMESPACE::islower(ch), 0);
else
EXPECT_EQ(LIBC_NAMESPACE::islower(ch), 0);
diff --git a/libc/test/src/ctype/isupper_test.cpp b/libc/test/src/ctype/isupper_test.cpp
index 05b2fd069ef069..667acc198f3ff0 100644
--- a/libc/test/src/ctype/isupper_test.cpp
+++ b/libc/test/src/ctype/isupper_test.cpp
@@ -6,14 +6,41 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/span.h"
#include "src/ctype/isupper.h"
+
#include "test/UnitTest/Test.h"
+TEST(LlvmLibcIsUpper, SimpleTest) {
+ EXPECT_NE(LIBC_NAMESPACE::isupper('B'), 0);
+
+ EXPECT_EQ(LIBC_NAMESPACE::isupper('a'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isupper('3'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isupper(' '), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isupper('?'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isupper('\0'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isupper(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char UPPER_ARRAY[] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+ for (size_t i = 0; i < arr.size(); ++i) {
+ if (static_cast<int>(arr[i]) == ch)
+ return true;
+ }
+ return false;
+}
+
TEST(LlvmLibcIsUpper, DefaultLocale) {
- // Loops through all characters, verifying that uppercase letters
- // return a non-zero integer and everything else returns zero.
+ // Loops through all characters, verifying that numbers and letters
+ // return non-zero integer and everything else returns a zero.
for (int ch = -255; ch < 255; ++ch) {
- if ('A' <= ch && ch <= 'Z')
+ if (in_span(ch, UPPER_ARRAY))
EXPECT_NE(LIBC_NAMESPACE::isupper(ch), 0);
else
EXPECT_EQ(LIBC_NAMESPACE::isupper(ch), 0);
diff --git a/libc/test/src/ctype/isxdigit_test.cpp b/libc/test/src/ctype/isxdigit_test.cpp
index b8f27a968540cb..29d27a0e497fbe 100644
--- a/libc/test/src/ctype/isxdigit_test.cpp
+++ b/libc/test/src/ctype/isxdigit_test.cpp
@@ -6,13 +6,42 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/span.h"
#include "src/ctype/isxdigit.h"
+
#include "test/UnitTest/Test.h"
-TEST(LlvmLibcIsXDigit, DefaultLocale) {
+TEST(LlvmLibcIsXdigit, SimpleTest) {
+ EXPECT_NE(LIBC_NAMESPACE::isxdigit('a'), 0);
+ EXPECT_NE(LIBC_NAMESPACE::isxdigit('B'), 0);
+ EXPECT_NE(LIBC_NAMESPACE::isxdigit('3'), 0);
+
+ EXPECT_EQ(LIBC_NAMESPACE::isxdigit('z'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isxdigit(' '), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isxdigit('?'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isxdigit('\0'), 0);
+ EXPECT_EQ(LIBC_NAMESPACE::isxdigit(-1), 0);
+}
+
+// TODO: Merge the ctype tests using this framework.
+constexpr char XDIGIT_ARRAY[] = {
+ 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E',
+ 'F', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+};
+
+bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+ for (size_t i = 0; i < arr.size(); ++i) {
+ if (static_cast<int>(arr[i]) == ch)
+ return true;
+ }
+ return false;
+}
+
+TEST(LlvmLibcIsXdigit, DefaultLocale) {
+ // Loops through all characters, verifying that numbers and letters
+ // return non-zero integer and everything else returns a zero.
for (int ch = -255; ch < 255; ++ch) {
- if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') ||
- ('A' <= ch && ch <= 'F'))
+ if (in_span(ch, XDIGIT_ARRAY))
EXPECT_NE(LIBC_NAMESPACE::isxdigit(ch), 0);
else
EXPECT_EQ(LIBC_NAMESPACE::isxdigit(ch), 0);
diff --git a/libc/test/src/ctype/tolower_test.cpp b/libc/test/src/ctype/tolower_test.cpp
index 3770ce4ea68b69..4c5ef9543be272 100644
--- a/libc/test/src/ctype/tolower_test.cpp
+++ b/libc/test/src/ctype/tolower_test.cpp
@@ -6,14 +6,52 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/span.h"
#include "src/ctype/tolower.h"
+
#include "test/UnitTest/Test.h"
+TEST(LlvmLibcToLower, SimpleTest) {
+ EXPECT_EQ(LIBC_NAMESPACE::tolower('a'), int('a'));
+ EXPECT_EQ(LIBC_NAMESPACE::tolower('B'), int('b'));
+ EXPECT_EQ(LIBC_NAMESPACE::tolower('3'), int('3'));
+
+ EXPECT_EQ(LIBC_NAMESPACE::tolower(' '), int(' '));
+ EXPECT_EQ(LIBC_NAMESPACE::tolower('?'), int('?'));
+ EXPECT_EQ(LIBC_NAMESPACE::tolower('\0'), int('\0'));
+ EXPECT_EQ(LIBC_NAMESPACE::tolower(-1), int(-1));
+}
+
+// TODO: Merge the ctype tests using this framework.
+// Invariant: UPPER_ARR and LOWER_ARR are both the complete alphabet in the same
+// order.
+constexpr char UPPER_ARR[] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+};
+constexpr char LOWER_ARR[] = {
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+static_assert(
+ sizeof(UPPER_ARR) == sizeof(LOWER_ARR),
+ "There must be the same number of uppercase and lowercase letters.");
+
+int span_index(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+ for (size_t i = 0; i < arr.size(); ++i) {
+ if (static_cast<int>(arr[i]) == ch)
+ return static_cast<int>(i);
+ }
+ return -1;
+}
+
TEST(LlvmLibcToLower, DefaultLocale) {
for (int ch = -255; ch < 255; ++ch) {
- // This follows pattern 'A' + 32 = 'a'.
- if ('A' <= ch && ch <= 'Z')
- EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), ch + 32);
+ int char_index = span_index(ch, UPPER_ARR);
+ if (char_index != -1)
+ EXPECT_EQ(LIBC_NAMESPACE::tolower(ch),
+ static_cast<int>(LOWER_ARR[char_index]));
else
EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), ch);
}
diff --git a/libc/test/src/ctype/toupper_test.cpp b/libc/test/src/ctype/toupper_test.cpp
index 0413b43fb6009b..7346f31824a550 100644
--- a/libc/test/src/ctype/toupper_test.cpp
+++ b/libc/test/src/ctype/toupper_test.cpp
@@ -6,14 +6,52 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/span.h"
#include "src/ctype/toupper.h"
+
#include "test/UnitTest/Test.h"
+TEST(LlvmLibcToUpper, SimpleTest) {
+ EXPECT_EQ(LIBC_NAMESPACE::toupper('a'), int('A'));
+ EXPECT_EQ(LIBC_NAMESPACE::toupper('B'), int('B'));
+ EXPECT_EQ(LIBC_NAMESPACE::toupper('3'), int('3'));
+
+ EXPECT_EQ(LIBC_NAMESPACE::toupper(' '), int(' '));
+ EXPECT_EQ(LIBC_NAMESPACE::toupper('?'), int('?'));
+ EXPECT_EQ(LIBC_NAMESPACE::toupper('\0'), int('\0'));
+ EXPECT_EQ(LIBC_NAMESPACE::toupper(-1), int(-1));
+}
+
+// TODO: Merge the ctype tests using this framework.
+// Invariant: UPPER_ARR and LOWER_ARR are both the complete alphabet in the same
+// order.
+constexpr char UPPER_ARR[] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+};
+constexpr char LOWER_ARR[] = {
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+static_assert(
+ sizeof(UPPER_ARR) == sizeof(LOWER_ARR),
+ "There must be the same number of uppercase and lowercase letters.");
+
+int span_index(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
+ for (size_t i = 0; i < arr.size(); ++i) {
+ if (static_cast<int>(arr[i]) == ch)
+ return static_cast<int>(i);
+ }
+ return -1;
+}
+
TEST(LlvmLibcToUpper, DefaultLocale) {
for (int ch = -255; ch < 255; ++ch) {
- // This follows pattern 'a' - 32 = 'A'.
- if ('a' <= ch && ch <= 'z')
- EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), ch - 32);
+ int char_index = span_index(ch, LOWER_ARR);
+ if (char_index != -1)
+ EXPECT_EQ(LIBC_NAMESPACE::toupper(ch),
+ static_cast<int>(UPPER_ARR[char_index]));
else
EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), ch);
}
diff --git a/libc/test/src/stdlib/StrtolTest.h b/libc/test/src/stdlib/StrtolTest.h
index 8a67848e4c3302..6cfaddcbedeb6a 100644
--- a/libc/test/src/stdlib/StrtolTest.h
+++ b/libc/test/src/stdlib/StrtolTest.h
@@ -8,6 +8,7 @@
#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/type_traits.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/errno/libc_errno.h"
#include "test/UnitTest/Test.h"
@@ -16,14 +17,6 @@
using LIBC_NAMESPACE::cpp::is_signed_v;
-static inline char int_to_b36_char(int input) {
- if (input < 0 || input > 36)
- return '0';
- if (input < 10)
- return static_cast<char>('0' + input);
- return static_cast<char>('A' + input - 10);
-}
-
template <typename ReturnT>
struct StrtoTest : public LIBC_NAMESPACE::testing::Test {
using FunctionT = ReturnT (*)(const char *, char **, int);
@@ -207,7 +200,8 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test {
char small_string[4] = {'\0', '\0', '\0', '\0'};
for (int base = 2; base <= 36; ++base) {
for (int first_digit = 0; first_digit <= 36; ++first_digit) {
- small_string[0] = int_to_b36_char(first_digit);
+ small_string[0] =
+ LIBC_NAMESPACE::internal::int_to_b36_char(first_digit);
if (first_digit < base) {
LIBC_NAMESPACE::libc_errno = 0;
ASSERT_EQ(func(small_string, nullptr, base),
@@ -223,9 +217,11 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test {
for (int base = 2; base <= 36; ++base) {
for (int first_digit = 0; first_digit <= 36; ++first_digit) {
- small_string[0] = int_to_b36_char(first_digit);
+ small_string[0] =
+ LIBC_NAMESPACE::internal::int_to_b36_char(first_digit);
for (int second_digit = 0; second_digit <= 36; ++second_digit) {
- small_string[1] = int_to_b36_char(second_digit);
+ small_string[1] =
+ LIBC_NAMESPACE::internal::int_to_b36_char(second_digit);
if (first_digit < base && second_digit < base) {
LIBC_NAMESPACE::libc_errno = 0;
ASSERT_EQ(
@@ -248,11 +244,14 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test {
for (int base = 2; base <= 36; ++base) {
for (int first_digit = 0; first_digit <= 36; ++first_digit) {
- small_string[0] = int_to_b36_char(first_digit);
+ small_string[0] =
+ LIBC_NAMESPACE::internal::int_to_b36_char(first_digit);
for (int second_digit = 0; second_digit <= 36; ++second_digit) {
- small_string[1] = int_to_b36_char(second_digit);
+ small_string[1] =
+ LIBC_NAMESPACE::internal::int_to_b36_char(second_digit);
for (int third_digit = 0; third_digit <= limit; ++third_digit) {
- small_string[2] = int_to_b36_char(third_digit);
+ small_string[2] =
+ LIBC_NAMESPACE::internal::int_to_b36_char(third_digit);
if (first_digit < base && second_digit < base &&
third_digit < base) {
diff --git a/libc/test/src/string/strcmp_test.cpp b/libc/test/src/string/strcmp_test.cpp
index ef58dc608c83b0..234447610222f7 100644
--- a/libc/test/src/string/strcmp_test.cpp
+++ b/libc/test/src/string/strcmp_test.cpp
@@ -25,13 +25,13 @@ TEST(LlvmLibcStrCmpTest, EmptyStringShouldNotEqualNonEmptyString) {
const char *s2 = "abc";
int result = LIBC_NAMESPACE::strcmp(empty, s2);
// This should be '\0' - 'a' = -97
- ASSERT_EQ(result, -97);
+ ASSERT_EQ(result, '\0' - 'a');
// Similar case if empty string is second argument.
const char *s3 = "123";
result = LIBC_NAMESPACE::strcmp(s3, empty);
// This should be '1' - '\0' = 49
- ASSERT_EQ(result, 49);
+ ASSERT_EQ(result, '1' - '\0');
}
TEST(LlvmLibcStrCmpTest, EqualStringsShouldReturnZero) {
@@ -50,12 +50,12 @@ TEST(LlvmLibcStrCmpTest, ShouldReturnResultOfFirstDifference) {
const char *s2 = "___C55__";
int result = LIBC_NAMESPACE::strcmp(s1, s2);
// This should return 'B' - 'C' = -1.
- ASSERT_EQ(result, -1);
+ ASSERT_EQ(result, 'B' - 'C');
// Verify operands reversed.
result = LIBC_NAMESPACE::strcmp(s2, s1);
// This should return 'C' - 'B' = 1.
- ASSERT_EQ(result, 1);
+ ASSERT_EQ(result, 'C' - 'B');
}
TEST(LlvmLibcStrCmpTest, CapitalizedLetterShouldNotBeEqual) {
@@ -63,12 +63,12 @@ TEST(LlvmLibcStrCmpTest, CapitalizedLetterShouldNotBeEqual) {
const char *s2 = "abCd";
int result = LIBC_NAMESPACE::strcmp(s1, s2);
// 'c' - 'C' = 32.
- ASSERT_EQ(result, 32);
+ ASSERT_EQ(result, 'c' - 'C');
// Verify operands reversed.
result = LIBC_NAMESPACE::strcmp(s2, s1);
// 'C' - 'c' = -32.
- ASSERT_EQ(result, -32);
+ ASSERT_EQ(result, 'C' - 'c');
}
TEST(LlvmLibcStrCmpTest, UnequalLengthStringsShouldNotReturnZero) {
@@ -76,12 +76,12 @@ TEST(LlvmLibcStrCmpTest, UnequalLengthStringsShouldNotReturnZero) {
const char *s2 = "abcd";
int result = LIBC_NAMESPACE::strcmp(s1, s2);
// '\0' - 'd' = -100.
- ASSERT_EQ(result, -100);
+ ASSERT_EQ(result, -'\0' - 'd');
// Verify operands reversed.
result = LIBC_NAMESPACE::strcmp(s2, s1);
// 'd' - '\0' = 100.
- ASSERT_EQ(result, 100);
+ ASSERT_EQ(result, 'd' - '\0');
}
TEST(LlvmLibcStrCmpTest, StringArgumentSwapChangesSign) {
@@ -89,11 +89,11 @@ TEST(LlvmLibcStrCmpTest, StringArgumentSwapChangesSign) {
const char *b = "b";
int result = LIBC_NAMESPACE::strcmp(b, a);
// 'b' - 'a' = 1.
- ASSERT_EQ(result, 1);
+ ASSERT_EQ(result, 'b' - 'a');
result = LIBC_NAMESPACE::strcmp(a, b);
// 'a' - 'b' = -1.
- ASSERT_EQ(result, -1);
+ ASSERT_EQ(result, 'a' - 'b');
}
TEST(LlvmLibcStrCmpTest, Case) {
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index a4c47483805724..fd790fbdc6e40b 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -773,6 +773,7 @@ libc_support_library(
":__support_cpp_span",
":__support_cpp_string_view",
":__support_cpp_type_traits",
+ ":__support_ctype_utils",
],
)
diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel
index f387741e95d8bd..6db3456edbb708 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel
@@ -63,12 +63,12 @@ libc_support_library(
"//libc:__support_stringutil",
"//libc:__support_uint128",
"//libc:errno",
- "//libc:llvm_libc_macros_stdfix_macros",
- "//llvm:Support",
"//libc:func_aligned_alloc",
"//libc:func_free",
"//libc:func_malloc",
"//libc:func_realloc",
+ "//libc:llvm_libc_macros_stdfix_macros",
+ "//llvm:Support",
],
)
@@ -121,6 +121,7 @@ libc_support_library(
"//libc:__support_cpp_bitset",
"//libc:__support_cpp_span",
"//libc:__support_cpp_type_traits",
+ "//libc:__support_ctype_utils",
"//libc:__support_macros_config",
],
)
>From 8d25d9079f7305e483a293e4cf3f1bd1ef140dc3 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 3 Dec 2024 11:24:08 -0800
Subject: [PATCH 4/5] address comments
---
libc/src/__support/ctype_utils.h | 11 +++++++++++
libc/src/stdio/printf_core/float_hex_converter.h | 3 +--
libc/test/src/ctype/isalnum_test.cpp | 3 +--
libc/test/src/ctype/isalpha_test.cpp | 3 +--
libc/test/src/ctype/isdigit_test.cpp | 3 +--
libc/test/src/ctype/islower_test.cpp | 3 +--
libc/test/src/ctype/isupper_test.cpp | 3 +--
libc/test/src/ctype/isxdigit_test.cpp | 3 +--
libc/test/src/ctype/tolower_test.cpp | 3 +--
libc/test/src/ctype/toupper_test.cpp | 3 +--
10 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 8521857ce765de..be0f25330af9ef 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -26,6 +26,17 @@ namespace internal {
// functions, make sure you have benchmarks to show your new solution is faster,
// as well as a way to support non-ASCII character encodings.
+// Similarly, do not change these functions to use case ranges. e.g.
+// bool islower(int ch) {
+// switch(ch) {
+// case 'a'...'z':
+// return true;
+// }
+// }
+// This assumes the character ranges are contiguous, which they aren't in
+// EBCDIC. Technically we could use some smaller ranges, but that's even harder
+// to read.
+
LIBC_INLINE static constexpr bool islower(int ch) {
switch (ch) {
case 'a':
diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h
index accca52d281aaa..b264b5cf207283 100644
--- a/libc/src/stdio/printf_core/float_hex_converter.h
+++ b/libc/src/stdio/printf_core/float_hex_converter.h
@@ -137,9 +137,8 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
for (; mant_cur > 0; --mant_cur, mantissa >>= 4) {
char mant_mod_16 = static_cast<char>(mantissa % 16);
char new_digit = static_cast<char>(internal::int_to_b36_char(mant_mod_16));
- if (internal::isupper(to_conv.conv_name)) {
+ if (internal::isupper(to_conv.conv_name))
new_digit = static_cast<char>(internal::toupper(new_digit));
- }
mant_buffer[mant_cur - 1] = new_digit;
if (new_digit != '0' && first_non_zero < mant_cur)
first_non_zero = mant_cur;
diff --git a/libc/test/src/ctype/isalnum_test.cpp b/libc/test/src/ctype/isalnum_test.cpp
index 92915db4e4bcb2..18ddd2b14b8c88 100644
--- a/libc/test/src/ctype/isalnum_test.cpp
+++ b/libc/test/src/ctype/isalnum_test.cpp
@@ -32,10 +32,9 @@ constexpr char ALNUM_ARRAY[] = {
};
bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
+ for (size_t i = 0; i < arr.size(); ++i)
if (static_cast<int>(arr[i]) == ch)
return true;
- }
return false;
}
diff --git a/libc/test/src/ctype/isalpha_test.cpp b/libc/test/src/ctype/isalpha_test.cpp
index ed4bc13969f595..e54b580dbe264e 100644
--- a/libc/test/src/ctype/isalpha_test.cpp
+++ b/libc/test/src/ctype/isalpha_test.cpp
@@ -31,10 +31,9 @@ constexpr char ALPHA_ARRAY[] = {
};
bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
+ for (size_t i = 0; i < arr.size(); ++i)
if (static_cast<int>(arr[i]) == ch)
return true;
- }
return false;
}
diff --git a/libc/test/src/ctype/isdigit_test.cpp b/libc/test/src/ctype/isdigit_test.cpp
index fc459ab506e49f..adea55e59c74df 100644
--- a/libc/test/src/ctype/isdigit_test.cpp
+++ b/libc/test/src/ctype/isdigit_test.cpp
@@ -28,10 +28,9 @@ constexpr char DIGIT_ARRAY[] = {
};
bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
+ for (size_t i = 0; i < arr.size(); ++i)
if (static_cast<int>(arr[i]) == ch)
return true;
- }
return false;
}
diff --git a/libc/test/src/ctype/islower_test.cpp b/libc/test/src/ctype/islower_test.cpp
index 474feb09a4cf0a..f9414bd8cbd095 100644
--- a/libc/test/src/ctype/islower_test.cpp
+++ b/libc/test/src/ctype/islower_test.cpp
@@ -29,10 +29,9 @@ constexpr char LOWER_ARRAY[] = {
};
bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
+ for (size_t i = 0; i < arr.size(); ++i)
if (static_cast<int>(arr[i]) == ch)
return true;
- }
return false;
}
diff --git a/libc/test/src/ctype/isupper_test.cpp b/libc/test/src/ctype/isupper_test.cpp
index 667acc198f3ff0..94def1a9dcccdb 100644
--- a/libc/test/src/ctype/isupper_test.cpp
+++ b/libc/test/src/ctype/isupper_test.cpp
@@ -29,10 +29,9 @@ constexpr char UPPER_ARRAY[] = {
};
bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
+ for (size_t i = 0; i < arr.size(); ++i)
if (static_cast<int>(arr[i]) == ch)
return true;
- }
return false;
}
diff --git a/libc/test/src/ctype/isxdigit_test.cpp b/libc/test/src/ctype/isxdigit_test.cpp
index 29d27a0e497fbe..d7253d549907bf 100644
--- a/libc/test/src/ctype/isxdigit_test.cpp
+++ b/libc/test/src/ctype/isxdigit_test.cpp
@@ -30,10 +30,9 @@ constexpr char XDIGIT_ARRAY[] = {
};
bool in_span(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
+ for (size_t i = 0; i < arr.size(); ++i)
if (static_cast<int>(arr[i]) == ch)
return true;
- }
return false;
}
diff --git a/libc/test/src/ctype/tolower_test.cpp b/libc/test/src/ctype/tolower_test.cpp
index 4c5ef9543be272..59432c43297b35 100644
--- a/libc/test/src/ctype/tolower_test.cpp
+++ b/libc/test/src/ctype/tolower_test.cpp
@@ -39,10 +39,9 @@ static_assert(
"There must be the same number of uppercase and lowercase letters.");
int span_index(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
+ for (size_t i = 0; i < arr.size(); ++i)
if (static_cast<int>(arr[i]) == ch)
return static_cast<int>(i);
- }
return -1;
}
diff --git a/libc/test/src/ctype/toupper_test.cpp b/libc/test/src/ctype/toupper_test.cpp
index 7346f31824a550..045b00bbb4b935 100644
--- a/libc/test/src/ctype/toupper_test.cpp
+++ b/libc/test/src/ctype/toupper_test.cpp
@@ -39,10 +39,9 @@ static_assert(
"There must be the same number of uppercase and lowercase letters.");
int span_index(int ch, LIBC_NAMESPACE::cpp::span<const char> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
+ for (size_t i = 0; i < arr.size(); ++i)
if (static_cast<int>(arr[i]) == ch)
return static_cast<int>(i);
- }
return -1;
}
>From c3e76f8531dfd349811b4c266bccbfa5080405ea Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 3 Dec 2024 11:37:32 -0800
Subject: [PATCH 5/5] accept another diff
---
libc/src/__support/integer_to_string.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h
index 496060773d9a84..ea620087584cb0 100644
--- a/libc/src/__support/integer_to_string.h
+++ b/libc/src/__support/integer_to_string.h
@@ -215,7 +215,7 @@ template <typename T, typename Fmt = radix::Dec> class IntegerToString {
using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
LIBC_INLINE static char digit_char(uint8_t digit) {
- const char result = static_cast<char>(internal::int_to_b36_char(digit));
+ const int result = internal::int_to_b36_char(digit);
return static_cast<char>(Fmt::IS_UPPERCASE ? internal::toupper(result)
: result);
}
More information about the libc-commits
mailing list