[libc-commits] [libc] [libc] Add C23 binary prefix (0b/0B) support to strtointeger (PR #202243)
Shubh Pachchigar via libc-commits
libc-commits at lists.llvm.org
Sun Jun 7 18:14:30 PDT 2026
https://github.com/shubhe25p created https://github.com/llvm/llvm-project/pull/202243
The C23 standard (N3220 §7.24.1.7) requires strtol and related functions to recognize the 0b/0B prefix when base is 2 or 0. This change implements that support and adds relevant tests.
>From 45ee6335b65019ee881acc43b2bfd4f40c15d5d7 Mon Sep 17 00:00:00 2001
From: shubhe25p <shubhp at Mac.lan>
Date: Sun, 7 Jun 2026 18:12:36 -0700
Subject: [PATCH] [libc] Add C23 binary prefix (0b/0B) support to strtointeger
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The C23 standard (N3220 §7.24.1.7) requires strtol and related functions
to recognize the 0b/0B prefix when base is 2 or 0. This change implements
that support and adds relevant tests.
---
libc/src/__support/str_to_integer.h | 19 +++
.../src/__support/str_to_integer_test.cpp | 108 +++++++++++++++++-
libc/test/src/stdlib/StrtolTest.h | 25 +++-
3 files changed, 148 insertions(+), 4 deletions(-)
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index 36f1d05eb2844..e6e599738bf44 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -65,6 +65,18 @@ LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
b36_char_to_int(src[2]) < 16;
}
+// checks if the next 3 characters of the string pointer are the start of a
+// binary number. Does not advance the string pointer.
+template <typename CharType>
+LIBC_INLINE static bool is_binary_start(const CharType *__restrict src,
+ size_t src_len) {
+ if (src_len < 3)
+ return false;
+ return is_char_or_wchar(src[0], '0', L'0') &&
+ is_char_or_wchar(tolower(src[1]), 'b', L'b') && isalnum(src[2]) &&
+ b36_char_to_int(src[2]) < 2;
+}
+
// Takes the address of the string pointer and parses the base from the start of
// it.
template <typename CharType>
@@ -75,6 +87,10 @@ LIBC_INLINE static int infer_base(const CharType *__restrict src,
// with values 10 through 15 respectively." (C standard 6.4.4.1)
if (is_hex_start(src, src_len))
return 16;
+ // A binary number is defined as "the prefix 0b or 0B optionally followed
+ // by a sequence of letters and digits." (C standard 7.24.1.7)
+ if (is_binary_start(src, src_len))
+ return 2;
// An octal number is defined as "the prefix 0 optionally followed by a
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
// number that starts with 0, including just 0, is an octal number.
@@ -120,6 +136,9 @@ strtointeger(const CharType *__restrict src, int base,
if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
src_cur = src_cur + 2;
+ if (base == 2 && is_binary_start(src + src_cur, src_len - src_cur))
+ src_cur = src_cur + 2;
+
constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
ResultType constexpr NEGATIVE_MAX =
!IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
diff --git a/libc/test/src/__support/str_to_integer_test.cpp b/libc/test/src/__support/str_to_integer_test.cpp
index e5ac1d6cbb7b3..1d3e8c8b1cc24 100644
--- a/libc/test/src/__support/str_to_integer_test.cpp
+++ b/libc/test/src/__support/str_to_integer_test.cpp
@@ -12,7 +12,7 @@
#include "test/UnitTest/Test.h"
// This file is for testing the src_len argument and other internal interface
-// features. Primary testing is done in stdlib/StrolTest.cpp through the public
+// features. Primary testing is done in stdlib/StrolTest.h through the public
// interface.
TEST(LlvmLibcStrToIntegerTest, SimpleLength) {
@@ -239,3 +239,109 @@ TEST(LlvmLibcStrToIntegerTest, CombinedTests) {
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 0);
}
+
+TEST(LlvmLibcStrToIntegerTest, Base2PrefixAutoSelect) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+ ASSERT_EQ(result.value, 0b10101);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 6);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, 0b1010);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 5);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
+ ASSERT_EQ(result.value, 0b101);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 2);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 0, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("-0b10101", 0, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(8));
+ ASSERT_EQ(result.value, -0b10101);
+}
+
+TEST(LlvmLibcStrToIntegerTest, Base2PrefixManualSelect) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
+ ASSERT_EQ(result.value, 0b10101);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 4);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
+ ASSERT_EQ(result.value, 0b1010);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 3);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
+ ASSERT_EQ(result.value, 0b101);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 2);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(2));
+ ASSERT_EQ(result.value, 0b10);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 1);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+ ASSERT_EQ(result.value, 0b1);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("10101", 2, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("-10101", 2, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, -0b10101);
+}
+
+TEST(LlvmLibcStrToIntegerTest, Base2PrefixManualSelectWithPrefix) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+ ASSERT_EQ(result.value, 0b10101);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 4);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
+ ASSERT_EQ(result.value, 0b10);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 3);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
+ ASSERT_EQ(result.value, 0b1);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 2);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 1);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("0b10101", 2, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::strtointeger<int>("-0b10101", 2, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(8));
+ ASSERT_EQ(result.value, -0b10101);
+}
diff --git a/libc/test/src/stdlib/StrtolTest.h b/libc/test/src/stdlib/StrtolTest.h
index 423e90c5b1ab6..8cdf9a66c4302 100644
--- a/libc/test/src/stdlib/StrtolTest.h
+++ b/libc/test/src/stdlib/StrtolTest.h
@@ -241,9 +241,13 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
static_cast<ReturnT>(second_digit + (first_digit * base)));
ASSERT_ERRNO_SUCCESS();
} else if (first_digit < base) {
- // if the base is 16 there is a special case for the prefix 0X.
- // The number is treated as a one digit hexadecimal.
- if (base == 16 && first_digit == 0 && second_digit == 33) {
+ /* Special case: if the string is "0x?" for base 16 or "0b?" for
+ * base 2, the "0x"/"0b" prefix is consumed and the third
+ * character is parsed as the actual digit. If the third character
+ * is also invalid for the base, the result is 0.
+ */
+ if ((base == 16 && first_digit == 0 && second_digit == 33) ||
+ (base == 2 && first_digit == 0 && second_digit == 11)) {
if (third_digit < base) {
ASSERT_EQ(func(small_string, nullptr, base),
static_cast<ReturnT>(third_digit));
@@ -401,6 +405,16 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
ASSERT_ERRNO_SUCCESS();
EXPECT_EQ(str_end - base_eight_with_prefix, ptrdiff_t(6));
+ const char *base_two_with_prefix = "0b10101";
+ ASSERT_EQ(func(base_two_with_prefix, &str_end, 0), ReturnT(0b10101));
+ ASSERT_ERRNO_SUCCESS();
+ EXPECT_EQ(str_end - base_two_with_prefix, ptrdiff_t(7));
+
+ const char *octal_not_binary = "010101";
+ ASSERT_EQ(func(octal_not_binary, &str_end, 0), ReturnT(010101));
+ ASSERT_ERRNO_SUCCESS();
+ EXPECT_EQ(str_end - octal_not_binary, ptrdiff_t(6));
+
const char *just_zero = "0";
ASSERT_EQ(func(just_zero, &str_end, 0), ReturnT(0));
ASSERT_ERRNO_SUCCESS();
@@ -415,6 +429,11 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest {
ASSERT_EQ(func(just_zero_eight, &str_end, 0), ReturnT(0));
ASSERT_ERRNO_SUCCESS();
EXPECT_EQ(str_end - just_zero_eight, ptrdiff_t(1));
+
+ const char *just_zero_b = "0b";
+ ASSERT_EQ(func(just_zero_b, &str_end, 0), ReturnT(0));
+ ASSERT_ERRNO_SUCCESS();
+ EXPECT_EQ(str_end - just_zero_b, ptrdiff_t(1));
}
};
More information about the libc-commits
mailing list