[libc-commits] [libc] [libc] Add a smaller b36_char_to_int (PR #180841)

Michael Jones via libc-commits libc-commits at lists.llvm.org
Tue Mar 17 12:50:03 PDT 2026


https://github.com/michaelrj-google updated https://github.com/llvm/llvm-project/pull/180841

>From bd8f554b41e6e9b11ee90c41326266d116ad3f4c Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 10 Feb 2026 21:16:57 +0000
Subject: [PATCH 1/2] [libc] Add a smaller b36_char_to_int

For ASCII systems, b36_char_to_int gets compiled into a jump table. That
jump table ends up being pretty large because it covers the range from
'0' (48) to 'z' (122). On size-constrained systems that can assume
ASCII, this patch provides a new flag: LIBC_CONF_CTYPE_SMALLER_ASCII
that forces a smaller implementation that doesn't compile into a jump
table.
---
 libc/cmake/modules/LLVMLibCCompileOptionRules.cmake |  4 ++++
 libc/config/baremetal/config.json                   |  5 +++++
 libc/config/config.json                             |  6 ++++++
 libc/docs/configure.rst                             |  2 ++
 libc/src/__support/ctype_utils.h                    | 12 ++++++++++++
 5 files changed, 29 insertions(+)

diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index b15a9fab09c38..1ce027114f73b 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -147,6 +147,10 @@ function(_get_compile_options_from_config output_var)
     endif()
   endif()
 
+  if(LIBC_CONF_CTYPE_SMALLER_ASCII)
+    list(APPEND config_options "-DLIBC_COPT_CTYPE_SMALLER_ASCII")
+  endif()
+
   if(LIBC_CONF_PRINTF_DISABLE_WIDE)
     list(APPEND config_options "-DLIBC_COPT_PRINTF_DISABLE_WIDE")
   endif()
diff --git a/libc/config/baremetal/config.json b/libc/config/baremetal/config.json
index 5edc045b5782f..1c52cd0093e1c 100644
--- a/libc/config/baremetal/config.json
+++ b/libc/config/baremetal/config.json
@@ -70,5 +70,10 @@
     "LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR": {
       "value": false
     }
+  },
+  "ctype": {
+    "LIBC_CONF_CTYPE_SMALLER_ASCII": {
+      "value": true
+    }
   }
 }
diff --git a/libc/config/config.json b/libc/config/config.json
index 088e94fb2c22f..10ac6b0dc3e9a 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -99,6 +99,12 @@
       "doc": "Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled."
     }
   },
+  "ctype": {
+    "LIBC_CONF_CTYPE_SMALLER_ASCII": {
+      "value": true,
+      "doc": "Shrinks b36_char_to_int by assuming the character encoding is ASCII."
+    }
+  },
   "codegen": {
     "LIBC_CONF_KEEP_FRAME_POINTER": {
       "value": true,
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 1e91a1f14da5e..06d20df261bc1 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -28,6 +28,8 @@ to learn about the defaults for your platform and target.
 * **"codegen" options**
     - ``LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR``: Enable -fstack-protector-strong to defend against stack smashing attack.
     - ``LIBC_CONF_KEEP_FRAME_POINTER``: Keep frame pointer in functions for better debugging experience.
+* **"ctype" options**
+    - ``LIBC_CONF_CTYPE_SMALLER_ASCII``: Shrinks b36_char_to_int by assuming the character encoding is ASCII.
 * **"errno" options**
     - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM_INLINE.
 * **"fenv" options**
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 515eca18f9e4c..6389309c0a67f 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -371,6 +371,7 @@ LIBC_INLINE constexpr bool isalnum(char ch) {
   }
 }
 
+#ifndef LIBC_COPT_SMALL_ASCII_CTYPE
 LIBC_INLINE constexpr int b36_char_to_int(char ch) {
   switch (ch) {
   case '0':
@@ -475,6 +476,17 @@ LIBC_INLINE constexpr int b36_char_to_int(char ch) {
     return 0;
   }
 }
+#else  // LIBC_COPT_SMALL_ASCII_CTYPE
+// This version assumes ASCII for the tolower, but generates smaller code since
+// the switch version of this function ends up with a table.
+LIBC_INLINE constexpr int b36_char_to_int(char ch) {
+  if (isdigit(input))
+    return input - '0';
+  if (isalpha(input))
+    return (input | 32) + 10 - 'a';
+  return 0;
+}
+#endif // LIBC_COPT_SMALL_ASCII_CTYPE
 
 LIBC_INLINE constexpr char int_to_b36_char(int num) {
   // Can't actually use LIBC_ASSERT here because it depends on integer_to_string

>From 140c1b850468938ee5da365bb04ed2afa3560605 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Fri, 20 Feb 2026 23:29:05 +0000
Subject: [PATCH 2/2] fix build flag, code, config

---
 libc/config/config.json          |  2 +-
 libc/src/__support/ctype_utils.h | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/libc/config/config.json b/libc/config/config.json
index 10ac6b0dc3e9a..603fa005fcc57 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -101,7 +101,7 @@
   },
   "ctype": {
     "LIBC_CONF_CTYPE_SMALLER_ASCII": {
-      "value": true,
+      "value": false,
       "doc": "Shrinks b36_char_to_int by assuming the character encoding is ASCII."
     }
   },
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 6389309c0a67f..e52f6ec425e28 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -371,7 +371,7 @@ LIBC_INLINE constexpr bool isalnum(char ch) {
   }
 }
 
-#ifndef LIBC_COPT_SMALL_ASCII_CTYPE
+#ifndef LIBC_COPT_CTYPE_SMALLER_ASCII
 LIBC_INLINE constexpr int b36_char_to_int(char ch) {
   switch (ch) {
   case '0':
@@ -478,12 +478,14 @@ LIBC_INLINE constexpr int b36_char_to_int(char ch) {
 }
 #else  // LIBC_COPT_SMALL_ASCII_CTYPE
 // This version assumes ASCII for the tolower, but generates smaller code since
-// the switch version of this function ends up with a table.
+// the switch version of this function ends up with a table. This should only be
+// used when the target is known to be ASCII.
 LIBC_INLINE constexpr int b36_char_to_int(char ch) {
-  if (isdigit(input))
-    return input - '0';
-  if (isalpha(input))
-    return (input | 32) + 10 - 'a';
+  if (ch >= '0' && ch <= '9')
+    return ch - '0';
+  char ch_unsafe_lower = ch | 32;
+  if (ch_unsafe_lower >= 'a' && ch_unsafe_lower <= 'z')
+    return ch_unsafe_lower - 'a' + 10;
   return 0;
 }
 #endif // LIBC_COPT_SMALL_ASCII_CTYPE



More information about the libc-commits mailing list