[libc-commits] [libc] [libc] Add C23 binary prefix (0b/0B) support to strtointeger (PR #202243)

Shubh Pachchigar via libc-commits libc-commits at lists.llvm.org
Sun Jun 7 18:32:21 PDT 2026


https://github.com/shubhe25p updated https://github.com/llvm/llvm-project/pull/202243

>From d43abbb079a6a8a928fcc52bdaa16912cf60391f Mon Sep 17 00:00:00 2001
From: shubhe25p <shubhp at Mac.lan>
Date: Sun, 7 Jun 2026 18:31:20 -0700
Subject: [PATCH] [libc] Add C23 binary prefix (0b/0B) support to strtointeger
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The C23 standard (N3220 ยง7.24.1.7) requires strtol and related functions
to recognize the 0b/0B prefix when base is 2 or 0. This change implements
that support and adds relevant tests.
---
 libc/src/__support/str_to_integer.h           |  19 +++
 .../src/__support/str_to_integer_test.cpp     | 108 +++++++++++++++++-
 libc/test/src/stdlib/StrtolTest.h             |  25 +++-
 libc/test/src/wchar/WcstolTest.h              |  25 +++-
 4 files changed, 170 insertions(+), 7 deletions(-)

diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index 36f1d05eb2844..e6e599738bf44 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -65,6 +65,18 @@ LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
          b36_char_to_int(src[2]) < 16;
 }
 
+// checks if the next 3 characters of the string pointer are the start of a
+// binary number. Does not advance the string pointer.
+template <typename CharType>
+LIBC_INLINE static bool is_binary_start(const CharType *__restrict src,
+                                        size_t src_len) {
+  if (src_len < 3)
+    return false;
+  return is_char_or_wchar(src[0], '0', L'0') &&
+         is_char_or_wchar(tolower(src[1]), 'b', L'b') && isalnum(src[2]) &&
+         b36_char_to_int(src[2]) < 2;
+}
+
 // Takes the address of the string pointer and parses the base from the start of
 // it.
 template <typename CharType>
@@ -75,6 +87,10 @@ LIBC_INLINE static int infer_base(const CharType *__restrict src,
   // with values 10 through 15 respectively." (C standard 6.4.4.1)
   if (is_hex_start(src, src_len))
     return 16;
+  // A binary number is defined as "the prefix 0b or 0B optionally followed
+  // by a sequence of letters and digits." (C standard 7.24.1.7)
+  if (is_binary_start(src, src_len))
+    return 2;
   // An octal number is defined as "the prefix 0 optionally followed by a
   // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
   // number that starts with 0, including just 0, is an octal number.
@@ -120,6 +136,9 @@ strtointeger(const CharType *__restrict src, int base,
   if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
     src_cur = src_cur + 2;
 
+  if (base == 2 && is_binary_start(src + src_cur, src_len - src_cur))
+    src_cur = src_cur + 2;
+
   constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
   ResultType constexpr NEGATIVE_MAX =
       !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
diff --git a/libc/test/src/__support/str_to_integer_test.cpp b/libc/test/src/__support/str_to_integer_test.cpp
index e5ac1d6cbb7b3..1d3e8c8b1cc24 100644
--- a/libc/test/src/__support/str_to_integer_test.cpp
+++ b/libc/test/src/__support/str_to_integer_test.cpp
@@ -12,7 +12,7 @@
 #include "test/UnitTest/Test.h"
 
 // This file is for testing the src_len argument and other internal interface
-// features. Primary testing is done in stdlib/StrolTest.cpp through the public
+// features. Primary testing is done in stdlib/StrolTest.h through the public
 // interface.
 
 TEST(LlvmLibcStrToIntegerTest, SimpleLength) {
@@ -239,3 +239,109 @@ TEST(LlvmLibcStrToIntegerTest, CombinedTests) {
   EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
   ASSERT_EQ(result.value, 0);
 }
+
+TEST(LlvmLibcStrToIntegerTest, Base2PrefixAutoSelect) {
+  auto result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 10);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+  ASSERT_EQ(result.value, 0b10101);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 6);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+  ASSERT_EQ(result.value, 0b1010);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 5);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
+  ASSERT_EQ(result.value, 0b101);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 2);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+  ASSERT_EQ(result.value, 0);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 0);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+  ASSERT_EQ(result.value, 0);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("-0b10101", 0, 10);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(8));
+  ASSERT_EQ(result.value, -0b10101);
+}
+
+TEST(LlvmLibcStrToIntegerTest, Base2PrefixManualSelect) {
+  auto result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 10);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
+  ASSERT_EQ(result.value, 0b10101);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 4);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
+  ASSERT_EQ(result.value, 0b1010);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 3);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
+  ASSERT_EQ(result.value, 0b101);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 2);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(2));
+  ASSERT_EQ(result.value, 0b10);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 1);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+  ASSERT_EQ(result.value, 0b1);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 0);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+  ASSERT_EQ(result.value, 0);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("-10101", 2, 10);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+  ASSERT_EQ(result.value, -0b10101);
+}
+
+TEST(LlvmLibcStrToIntegerTest, Base2PrefixManualSelectWithPrefix) {
+  auto result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 10);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+  ASSERT_EQ(result.value, 0b10101);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 4);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
+  ASSERT_EQ(result.value, 0b10);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 3);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
+  ASSERT_EQ(result.value, 0b1);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 2);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+  ASSERT_EQ(result.value, 0);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 1);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+  ASSERT_EQ(result.value, 0);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 0);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+  ASSERT_EQ(result.value, 0);
+
+  result = LIBC_NAMESPACE::internal::strtointeger<int>("-0b10101", 2, 10);
+  EXPECT_FALSE(result.has_error());
+  EXPECT_EQ(result.parsed_len, ptrdiff_t(8));
+  ASSERT_EQ(result.value, -0b10101);
+}
diff --git a/libc/test/src/stdlib/StrtolTest.h b/libc/test/src/stdlib/StrtolTest.h
index 423e90c5b1ab6..8cdf9a66c4302 100644
--- a/libc/test/src/stdlib/StrtolTest.h
+++ b/libc/test/src/stdlib/StrtolTest.h
@@ -241,9 +241,13 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
                   static_cast<ReturnT>(second_digit + (first_digit * base)));
               ASSERT_ERRNO_SUCCESS();
             } else if (first_digit < base) {
-              // if the base is 16 there is a special case for the prefix 0X.
-              // The number is treated as a one digit hexadecimal.
-              if (base == 16 && first_digit == 0 && second_digit == 33) {
+              /* Special case: if the string is "0x?" for base 16 or "0b?" for
+               * base 2, the "0x"/"0b" prefix is consumed and the third
+               * character is parsed as the actual digit. If the third character
+               * is also invalid for the base, the result is 0.
+               */
+              if ((base == 16 && first_digit == 0 && second_digit == 33) ||
+                  (base == 2 && first_digit == 0 && second_digit == 11)) {
                 if (third_digit < base) {
                   ASSERT_EQ(func(small_string, nullptr, base),
                             static_cast<ReturnT>(third_digit));
@@ -401,6 +405,16 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
     ASSERT_ERRNO_SUCCESS();
     EXPECT_EQ(str_end - base_eight_with_prefix, ptrdiff_t(6));
 
+    const char *base_two_with_prefix = "0b10101";
+    ASSERT_EQ(func(base_two_with_prefix, &str_end, 0), ReturnT(0b10101));
+    ASSERT_ERRNO_SUCCESS();
+    EXPECT_EQ(str_end - base_two_with_prefix, ptrdiff_t(7));
+
+    const char *octal_not_binary = "010101";
+    ASSERT_EQ(func(octal_not_binary, &str_end, 0), ReturnT(010101));
+    ASSERT_ERRNO_SUCCESS();
+    EXPECT_EQ(str_end - octal_not_binary, ptrdiff_t(6));
+
     const char *just_zero = "0";
     ASSERT_EQ(func(just_zero, &str_end, 0), ReturnT(0));
     ASSERT_ERRNO_SUCCESS();
@@ -415,6 +429,11 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
     ASSERT_EQ(func(just_zero_eight, &str_end, 0), ReturnT(0));
     ASSERT_ERRNO_SUCCESS();
     EXPECT_EQ(str_end - just_zero_eight, ptrdiff_t(1));
+
+    const char *just_zero_b = "0b";
+    ASSERT_EQ(func(just_zero_b, &str_end, 0), ReturnT(0));
+    ASSERT_ERRNO_SUCCESS();
+    EXPECT_EQ(str_end - just_zero_b, ptrdiff_t(1));
   }
 };
 
diff --git a/libc/test/src/wchar/WcstolTest.h b/libc/test/src/wchar/WcstolTest.h
index ef1c5ec0ecafe..c67882dd6ddf8 100644
--- a/libc/test/src/wchar/WcstolTest.h
+++ b/libc/test/src/wchar/WcstolTest.h
@@ -242,9 +242,13 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
                   static_cast<ReturnT>(second_digit + (first_digit * base)));
               ASSERT_ERRNO_SUCCESS();
             } else if (first_digit < base) {
-              // if the base is 16 there is a special case for the prefix 0X.
-              // The number is treated as a one digit hexadecimal.
-              if (base == 16 && first_digit == 0 && second_digit == 33) {
+              /* Special case: if the string is "0x?" for base 16 or "0b?" for
+               * base 2, the "0x"/"0b" prefix is consumed and the third
+               * character is parsed as the actual digit. If the third character
+               * is also invalid for the base, the result is 0.
+               */
+              if ((base == 16 && first_digit == 0 && second_digit == 33) ||
+                  (base == 2 && first_digit == 0 && second_digit == 11)) {
                 if (third_digit < base) {
                   ASSERT_EQ(func(small_string, nullptr, base),
                             static_cast<ReturnT>(third_digit));
@@ -402,6 +406,16 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
     ASSERT_ERRNO_SUCCESS();
     EXPECT_EQ(str_end - base_eight_with_prefix, ptrdiff_t(6));
 
+    const wchar_t *base_two_with_prefix = L"0b10101";
+    ASSERT_EQ(func(base_two_with_prefix, &str_end, 0), ReturnT(0b10101));
+    ASSERT_ERRNO_SUCCESS();
+    EXPECT_EQ(str_end - base_two_with_prefix, ptrdiff_t(7));
+
+    const wchar_t *octal_not_binary = L"010101";
+    ASSERT_EQ(func(octal_not_binary, &str_end, 0), ReturnT(010101));
+    ASSERT_ERRNO_SUCCESS();
+    EXPECT_EQ(str_end - octal_not_binary, ptrdiff_t(6));
+
     const wchar_t *just_zero = L"0";
     ASSERT_EQ(func(just_zero, &str_end, 0), ReturnT(0));
     ASSERT_ERRNO_SUCCESS();
@@ -416,6 +430,11 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
     ASSERT_EQ(func(just_zero_eight, &str_end, 0), ReturnT(0));
     ASSERT_ERRNO_SUCCESS();
     EXPECT_EQ(str_end - just_zero_eight, ptrdiff_t(1));
+
+    const wchar_t *just_zero_b = L"0b";
+    ASSERT_EQ(func(just_zero_b, &str_end, 0), ReturnT(0));
+    ASSERT_ERRNO_SUCCESS();
+    EXPECT_EQ(str_end - just_zero_b, ptrdiff_t(1));
   }
 };
 



More information about the libc-commits mailing list