[libc-commits] [libc] [libc] Add a smaller b36_char_to_int (PR #180841)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Tue Mar 17 12:50:03 PDT 2026
https://github.com/michaelrj-google updated https://github.com/llvm/llvm-project/pull/180841
>From bd8f554b41e6e9b11ee90c41326266d116ad3f4c Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 10 Feb 2026 21:16:57 +0000
Subject: [PATCH 1/2] [libc] Add a smaller b36_char_to_int
For ASCII systems, b36_char_to_int gets compiled into a jump table. That
jump table ends up being pretty large because it covers the range from
'0' (48) to 'z' (122). On size-constrained systems that can assume
ASCII, this patch provides a new flag: LIBC_CONF_CTYPE_SMALLER_ASCII
that forces a smaller implementation that doesn't compile into a jump
table.
---
libc/cmake/modules/LLVMLibCCompileOptionRules.cmake | 4 ++++
libc/config/baremetal/config.json | 5 +++++
libc/config/config.json | 6 ++++++
libc/docs/configure.rst | 2 ++
libc/src/__support/ctype_utils.h | 12 ++++++++++++
5 files changed, 29 insertions(+)
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index b15a9fab09c38..1ce027114f73b 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -147,6 +147,10 @@ function(_get_compile_options_from_config output_var)
endif()
endif()
+ if(LIBC_CONF_CTYPE_SMALLER_ASCII)
+ list(APPEND config_options "-DLIBC_COPT_CTYPE_SMALLER_ASCII")
+ endif()
+
if(LIBC_CONF_PRINTF_DISABLE_WIDE)
list(APPEND config_options "-DLIBC_COPT_PRINTF_DISABLE_WIDE")
endif()
diff --git a/libc/config/baremetal/config.json b/libc/config/baremetal/config.json
index 5edc045b5782f..1c52cd0093e1c 100644
--- a/libc/config/baremetal/config.json
+++ b/libc/config/baremetal/config.json
@@ -70,5 +70,10 @@
"LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR": {
"value": false
}
+ },
+ "ctype": {
+ "LIBC_CONF_CTYPE_SMALLER_ASCII": {
+ "value": true
+ }
}
}
diff --git a/libc/config/config.json b/libc/config/config.json
index 088e94fb2c22f..10ac6b0dc3e9a 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -99,6 +99,12 @@
"doc": "Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled."
}
},
+ "ctype": {
+ "LIBC_CONF_CTYPE_SMALLER_ASCII": {
+ "value": true,
+ "doc": "Shrinks b36_char_to_int by assuming the character encoding is ASCII."
+ }
+ },
"codegen": {
"LIBC_CONF_KEEP_FRAME_POINTER": {
"value": true,
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 1e91a1f14da5e..06d20df261bc1 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -28,6 +28,8 @@ to learn about the defaults for your platform and target.
* **"codegen" options**
- ``LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR``: Enable -fstack-protector-strong to defend against stack smashing attack.
- ``LIBC_CONF_KEEP_FRAME_POINTER``: Keep frame pointer in functions for better debugging experience.
+* **"ctype" options**
+ - ``LIBC_CONF_CTYPE_SMALLER_ASCII``: Shrinks b36_char_to_int by assuming the character encoding is ASCII.
* **"errno" options**
- ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM_INLINE.
* **"fenv" options**
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 515eca18f9e4c..6389309c0a67f 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -371,6 +371,7 @@ LIBC_INLINE constexpr bool isalnum(char ch) {
}
}
+#ifndef LIBC_COPT_SMALL_ASCII_CTYPE
LIBC_INLINE constexpr int b36_char_to_int(char ch) {
switch (ch) {
case '0':
@@ -475,6 +476,17 @@ LIBC_INLINE constexpr int b36_char_to_int(char ch) {
return 0;
}
}
+#else // LIBC_COPT_SMALL_ASCII_CTYPE
+// This version assumes ASCII for the tolower, but generates smaller code since
+// the switch version of this function ends up with a table.
+LIBC_INLINE constexpr int b36_char_to_int(char ch) {
+ if (isdigit(input))
+ return input - '0';
+ if (isalpha(input))
+ return (input | 32) + 10 - 'a';
+ return 0;
+}
+#endif // LIBC_COPT_SMALL_ASCII_CTYPE
LIBC_INLINE constexpr char int_to_b36_char(int num) {
// Can't actually use LIBC_ASSERT here because it depends on integer_to_string
>From 140c1b850468938ee5da365bb04ed2afa3560605 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Fri, 20 Feb 2026 23:29:05 +0000
Subject: [PATCH 2/2] fix build flag, code, config
---
libc/config/config.json | 2 +-
libc/src/__support/ctype_utils.h | 14 ++++++++------
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/libc/config/config.json b/libc/config/config.json
index 10ac6b0dc3e9a..603fa005fcc57 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -101,7 +101,7 @@
},
"ctype": {
"LIBC_CONF_CTYPE_SMALLER_ASCII": {
- "value": true,
+ "value": false,
"doc": "Shrinks b36_char_to_int by assuming the character encoding is ASCII."
}
},
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 6389309c0a67f..e52f6ec425e28 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -371,7 +371,7 @@ LIBC_INLINE constexpr bool isalnum(char ch) {
}
}
-#ifndef LIBC_COPT_SMALL_ASCII_CTYPE
+#ifndef LIBC_COPT_CTYPE_SMALLER_ASCII
LIBC_INLINE constexpr int b36_char_to_int(char ch) {
switch (ch) {
case '0':
@@ -478,12 +478,14 @@ LIBC_INLINE constexpr int b36_char_to_int(char ch) {
}
#else // LIBC_COPT_SMALL_ASCII_CTYPE
// This version assumes ASCII for the tolower, but generates smaller code since
-// the switch version of this function ends up with a table.
+// the switch version of this function ends up with a table. This should only be
+// used when the target is known to be ASCII.
LIBC_INLINE constexpr int b36_char_to_int(char ch) {
- if (isdigit(input))
- return input - '0';
- if (isalpha(input))
- return (input | 32) + 10 - 'a';
+ if (ch >= '0' && ch <= '9')
+ return ch - '0';
+ char ch_unsafe_lower = ch | 32;
+ if (ch_unsafe_lower >= 'a' && ch_unsafe_lower <= 'z')
+ return ch_unsafe_lower - 'a' + 10;
return 0;
}
#endif // LIBC_COPT_SMALL_ASCII_CTYPE
More information about the libc-commits
mailing list