[libc] [llvm] [libc] Templatize strtointeger implementation. (PR #165884)
Alexey Samsonov via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 31 10:00:03 PDT 2025
https://github.com/vonosmas updated https://github.com/llvm/llvm-project/pull/165884
>From e1dae470eafab4f979ac074220b1f01bb4bed0a7 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Fri, 31 Oct 2025 15:57:20 +0000
Subject: [PATCH 1/3] [libc] Templatize strtointeger implementation.
* Removes the copy-pasta implementation of wcstointeger,
and migrate the wcsto* family of functions to use a template
version of strtointeger.
* Fixes the out-of-bound read in the original implementation(s)
when the entire input string consists of whitespaces
(then the sign check can access OOB memory)
The code is currently slightly peppered with "if constexpr" statements
to distinguish between char and wchar_t. We can probably
simplify it in subsequent changes by:
* using overrides, so that internal::isalnum() is overriden for
both char and wchar_t (since C++ luckily allows us to reuse names).
* this wouldn't help for direct comparison with literals -
for this as a somewhat ugly workaround like is_char_literal(c, '0', L'0')
---
libc/src/__support/CMakeLists.txt | 12 --
libc/src/__support/str_to_integer.h | 100 +++++++----
libc/src/__support/wcs_to_integer.h | 155 ------------------
libc/src/wchar/CMakeLists.txt | 8 +-
libc/src/wchar/wcstol.cpp | 4 +-
libc/src/wchar/wcstoll.cpp | 4 +-
libc/src/wchar/wcstoul.cpp | 4 +-
libc/src/wchar/wcstoull.cpp | 4 +-
libc/test/src/__support/CMakeLists.txt | 2 +-
.../src/__support/wcs_to_integer_test.cpp | 100 +++++------
.../llvm-project-overlay/libc/BUILD.bazel | 1 +
11 files changed, 135 insertions(+), 259 deletions(-)
delete mode 100644 libc/src/__support/wcs_to_integer.h
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 0ef09a9b8c9d0..b7af751ec3f27 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -179,19 +179,7 @@ add_header_library(
DEPENDS
.ctype_utils
.str_to_num_result
- libc.hdr.errno_macros
- libc.src.__support.CPP.limits
- libc.src.__support.CPP.type_traits
- libc.src.__support.common
-)
-
-add_header_library(
- wcs_to_integer
- HDRS
- wcs_to_integer.h
- DEPENDS
.wctype_utils
- .str_to_num_result
libc.hdr.errno_macros
libc.src.__support.CPP.limits
libc.src.__support.CPP.type_traits
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index d332c929f2c31..0f87d7a8c8fbf 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -25,36 +25,70 @@
#include "src/__support/macros/config.h"
#include "src/__support/str_to_num_result.h"
#include "src/__support/uint128.h"
+#include "src/__support/wctype_utils.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
// Returns the idx to the first character in src that is not a whitespace
-// character (as determined by isspace())
+// character (as determined by isspace() / iswspace())
+template <typename CharType>
LIBC_INLINE size_t
-first_non_whitespace(const char *__restrict src,
+first_non_whitespace(const CharType *__restrict src,
size_t src_len = cpp::numeric_limits<size_t>::max()) {
size_t src_cur = 0;
- while (src_cur < src_len && internal::isspace(src[src_cur])) {
+ while (src_cur < src_len) {
+ if constexpr (cpp::is_same_v<CharType, char>) {
+ if (!internal::isspace(src[src_cur])) break;
+ } else {
+ if (!internal::iswspace(src[src_cur])) break;
+ }
++src_cur;
}
return src_cur;
}
+// Returns 1 if 'src' starts with + or - sign, and returns 0 otherwise.
+// Writes the sign value to |is_positive|.
+template <typename CharType>
+LIBC_INLINE static size_t
+consume_sign(const CharType *__restrict src, bool *is_positive) {
+ *is_positive = true;
+ if constexpr (cpp::is_same_v<CharType, char>) {
+ if (*src == '+' || *src == '-') {
+ *is_positive = (*src == '+');
+ return 1;
+ }
+ } else {
+ if (*src == L'+' || *src == L'-') {
+ *is_positive = (*src == L'+');
+ return 1;
+ }
+ }
+ return 0;
+}
+
// checks if the next 3 characters of the string pointer are the start of a
// hexadecimal number. Does not advance the string pointer.
-LIBC_INLINE bool
-is_hex_start(const char *__restrict src,
- size_t src_len = cpp::numeric_limits<size_t>::max()) {
+template<typename CharType>
+LIBC_INLINE static bool
+is_hex_start(const CharType *__restrict src, size_t src_len) {
if (src_len < 3)
return false;
- return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
- b36_char_to_int(*(src + 2)) < 16;
+ if constexpr (cpp::is_same_v<CharType, char>) {
+ return src[0] == '0' && tolower(src[1]) == 'x' && isalnum(src[2]) &&
+ b36_char_to_int(src[2]) < 16;
+ } else {
+ return src[0] == L'0' && towlower(src[1]) == L'x' && iswalnum(src[2]) &&
+ b36_wchar_to_int(src[2]) < 16;
+ }
}
// Takes the address of the string pointer and parses the base from the start of
// it.
-LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
+template<typename CharType>
+LIBC_INLINE static int infer_base(const CharType *__restrict src,
+ size_t src_len) {
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
// sequence of the decimal digits and the letters a (or A) through f (or F)
// with values 10 through 15 respectively." (C standard 6.4.4.1)
@@ -63,8 +97,13 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
// An octal number is defined as "the prefix 0 optionally followed by a
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
// number that starts with 0, including just 0, is an octal number.
- if (src_len > 0 && src[0] == '0')
- return 8;
+ if (src_len > 0) {
+ if constexpr (cpp::is_same_v<CharType, char>) {
+ if (src[0] == '0') return 8;
+ } else {
+ if (src[0] == L'0') return 8;
+ }
+ }
// A decimal number is defined as beginning "with a nonzero digit and
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
return 10;
@@ -77,32 +116,26 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
// -----------------------------------------------------------------------------
// Takes a pointer to a string and the base to convert to. This function is used
// as the backend for all of the string to int functions.
-template <class T>
+template <typename T, typename CharType>
LIBC_INLINE StrToNumResult<T>
-strtointeger(const char *__restrict src, int base,
+strtointeger(const CharType *__restrict src, int base,
const size_t src_len = cpp::numeric_limits<size_t>::max()) {
using ResultType = make_integral_or_big_int_unsigned_t<T>;
- ResultType result = 0;
-
- bool is_number = false;
- size_t src_cur = 0;
- int error_val = 0;
-
if (src_len == 0)
return {0, 0, 0};
if (base < 0 || base == 1 || base > 36)
return {0, 0, EINVAL};
- src_cur = first_non_whitespace(src, src_len);
-
- char result_sign = '+';
- if (src[src_cur] == '+' || src[src_cur] == '-') {
- result_sign = src[src_cur];
- ++src_cur;
+ size_t src_cur = first_non_whitespace(src, src_len);
+ if (src_cur == src_len) {
+ return {0, 0, 0};
}
+ bool is_positive;
+ src_cur += consume_sign(src + src_cur, &is_positive);
+
if (base == 0)
base = infer_base(src + src_cur, src_len - src_cur);
@@ -110,8 +143,6 @@ strtointeger(const char *__restrict src, int base,
src_cur = src_cur + 2;
constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
- const bool is_positive = (result_sign == '+');
-
ResultType constexpr NEGATIVE_MAX =
!IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
: cpp::numeric_limits<T>::max();
@@ -120,8 +151,19 @@ strtointeger(const char *__restrict src, int base,
ResultType const abs_max_div_by_base =
abs_max / static_cast<ResultType>(base);
- while (src_cur < src_len && isalnum(src[src_cur])) {
- int cur_digit = b36_char_to_int(src[src_cur]);
+ bool is_number = false;
+ int error_val = 0;
+ ResultType result = 0;
+ while (src_cur < src_len) {
+ int cur_digit;
+ if constexpr (cpp::is_same_v<CharType, char>) {
+ if (!isalnum(src[src_cur])) break;
+ cur_digit = b36_char_to_int(src[src_cur]);
+ } else {
+ if (!iswalnum(src[src_cur])) break;
+ cur_digit = b36_wchar_to_int(src[src_cur]);
+ }
+
if (cur_digit >= base)
break;
diff --git a/libc/src/__support/wcs_to_integer.h b/libc/src/__support/wcs_to_integer.h
deleted file mode 100644
index 4254bd860f77a..0000000000000
--- a/libc/src/__support/wcs_to_integer.h
+++ /dev/null
@@ -1,155 +0,0 @@
-//===-- Widechar string to integer conversion utils -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H
-#define LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H
-
-#include "hdr/errno_macros.h" // For ERANGE
-#include "src/__support/CPP/limits.h"
-#include "src/__support/CPP/type_traits.h"
-#include "src/__support/CPP/type_traits/make_unsigned.h"
-#include "src/__support/big_int.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/str_to_num_result.h"
-#include "src/__support/uint128.h"
-#include "src/__support/wctype_utils.h"
-
-namespace LIBC_NAMESPACE_DECL {
-namespace internal {
-
-// Returns the idx of the first character in src that is not a whitespace
-// character (as determined by iswspace())
-LIBC_INLINE size_t
-first_non_whitespace(const wchar_t *__restrict src,
- size_t src_len = cpp::numeric_limits<size_t>::max()) {
- size_t src_cur = 0;
- while (src_cur < src_len && internal::iswspace(src[src_cur])) {
- ++src_cur;
- }
- return src_cur;
-}
-
-// checks if the next 3 characters of the string pointer are the start of a
-// hexadecimal number. Does not advance the string pointer.
-LIBC_INLINE bool
-is_hex_start(const wchar_t *__restrict src,
- size_t src_len = cpp::numeric_limits<size_t>::max()) {
- if (src_len < 3)
- return false;
- return *src == L'0' && towlower(*(src + 1)) == L'x' && iswalnum(*(src + 2)) &&
- b36_wchar_to_int(*(src + 2)) < 16;
-}
-
-// Takes the address of the string pointer and parses the base from the start of
-// it.
-LIBC_INLINE int infer_base(const wchar_t *__restrict src, size_t src_len) {
- // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
- // sequence of the decimal digits and the letters a (or A) through f (or F)
- // with values 10 through 15 respectively." (C standard 6.4.4.1)
- if (is_hex_start(src, src_len))
- return 16;
- // An octal number is defined as "the prefix 0 optionally followed by a
- // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
- // number that starts with 0, including just 0, is an octal number.
- if (src_len > 0 && src[0] == L'0')
- return 8;
- // A decimal number is defined as beginning "with a nonzero digit and
- // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
- return 10;
-}
-
-template <class T>
-LIBC_INLINE StrToNumResult<T>
-wcstointeger(const wchar_t *__restrict src, int base,
- const size_t src_len = cpp::numeric_limits<size_t>::max()) {
- using ResultType = make_integral_or_big_int_unsigned_t<T>;
-
- ResultType result = 0;
-
- bool is_number = false;
- size_t src_cur = 0;
- int error_val = 0;
-
- if (src_len == 0)
- return {0, 0, 0};
-
- if (base < 0 || base == 1 || base > 36)
- return {0, 0, EINVAL};
-
- src_cur = first_non_whitespace(src, src_len);
-
- wchar_t result_sign = L'+';
- if (src[src_cur] == L'+' || src[src_cur] == L'-') {
- result_sign = src[src_cur];
- ++src_cur;
- }
-
- if (base == 0)
- base = infer_base(src + src_cur, src_len - src_cur);
-
- if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
- src_cur = src_cur + 2;
-
- constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
- const bool is_positive = (result_sign == L'+');
-
- ResultType constexpr NEGATIVE_MAX =
- !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
- : cpp::numeric_limits<T>::max();
- ResultType const abs_max =
- (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
- ResultType const abs_max_div_by_base =
- abs_max / static_cast<ResultType>(base);
-
- while (src_cur < src_len && iswalnum(src[src_cur])) {
- int cur_digit = b36_wchar_to_int(src[src_cur]);
- if (cur_digit >= base)
- break;
-
- is_number = true;
- ++src_cur;
-
- // If the number has already hit the maximum value for the current type then
- // the result cannot change, but we still need to advance src to the end of
- // the number.
- if (result == abs_max) {
- error_val = ERANGE;
- continue;
- }
-
- if (result > abs_max_div_by_base) {
- result = abs_max;
- error_val = ERANGE;
- } else {
- result = result * static_cast<ResultType>(base);
- }
- if (result > abs_max - static_cast<ResultType>(cur_digit)) {
- result = abs_max;
- error_val = ERANGE;
- } else {
- result = result + static_cast<ResultType>(cur_digit);
- }
- }
-
- ptrdiff_t str_len = is_number ? static_cast<ptrdiff_t>(src_cur) : 0;
-
- if (error_val == ERANGE) {
- if (is_positive || IS_UNSIGNED)
- return {cpp::numeric_limits<T>::max(), str_len, error_val};
- else // T is signed and there is a negative overflow
- return {cpp::numeric_limits<T>::min(), str_len, error_val};
- }
-
- return {static_cast<T>(is_positive ? result : -result), str_len, error_val};
-}
-
-} // namespace internal
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif // LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index adde382bf0950..ba27cd77f6bac 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -63,7 +63,7 @@ add_entrypoint_object(
wcstol.h
DEPENDS
libc.src.errno.errno
- libc.src.__support.wcs_to_integer
+ libc.src.__support.str_to_integer
)
add_entrypoint_object(
@@ -74,7 +74,7 @@ add_entrypoint_object(
wcstoll.h
DEPENDS
libc.src.errno.errno
- libc.src.__support.wcs_to_integer
+ libc.src.__support.str_to_integer
)
add_entrypoint_object(
@@ -85,7 +85,7 @@ add_entrypoint_object(
wcstoul.h
DEPENDS
libc.src.errno.errno
- libc.src.__support.wcs_to_integer
+ libc.src.__support.str_to_integer
)
add_entrypoint_object(
@@ -96,7 +96,7 @@ add_entrypoint_object(
wcstoull.h
DEPENDS
libc.src.errno.errno
- libc.src.__support.wcs_to_integer
+ libc.src.__support.str_to_integer
)
add_entrypoint_object(
diff --git a/libc/src/wchar/wcstol.cpp b/libc/src/wchar/wcstol.cpp
index a05718f706dfd..a56b5f91272cd 100644
--- a/libc/src/wchar/wcstol.cpp
+++ b/libc/src/wchar/wcstol.cpp
@@ -10,14 +10,14 @@
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
-#include "src/__support/wcs_to_integer.h"
+#include "src/__support/str_to_integer.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(long, wcstol,
(const wchar_t *__restrict str, wchar_t **__restrict str_end,
int base)) {
- auto result = internal::wcstointeger<long>(str, base);
+ auto result = internal::strtointeger<long>(str, base);
if (result.has_error())
libc_errno = result.error;
diff --git a/libc/src/wchar/wcstoll.cpp b/libc/src/wchar/wcstoll.cpp
index de1299d681cdb..6229d24172b51 100644
--- a/libc/src/wchar/wcstoll.cpp
+++ b/libc/src/wchar/wcstoll.cpp
@@ -10,14 +10,14 @@
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
-#include "src/__support/wcs_to_integer.h"
+#include "src/__support/str_to_integer.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(long long, wcstoll,
(const wchar_t *__restrict str, wchar_t **__restrict str_end,
int base)) {
- auto result = internal::wcstointeger<long long>(str, base);
+ auto result = internal::strtointeger<long long>(str, base);
if (result.has_error())
libc_errno = result.error;
diff --git a/libc/src/wchar/wcstoul.cpp b/libc/src/wchar/wcstoul.cpp
index 79b8c9b5c9fa3..c5639bee1d649 100644
--- a/libc/src/wchar/wcstoul.cpp
+++ b/libc/src/wchar/wcstoul.cpp
@@ -10,14 +10,14 @@
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
-#include "src/__support/wcs_to_integer.h"
+#include "src/__support/str_to_integer.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(unsigned long, wcstoul,
(const wchar_t *__restrict str, wchar_t **__restrict str_end,
int base)) {
- auto result = internal::wcstointeger<unsigned long>(str, base);
+ auto result = internal::strtointeger<unsigned long>(str, base);
if (result.has_error())
libc_errno = result.error;
diff --git a/libc/src/wchar/wcstoull.cpp b/libc/src/wchar/wcstoull.cpp
index 768e03c4bd189..2ab24e9b2b2a1 100644
--- a/libc/src/wchar/wcstoull.cpp
+++ b/libc/src/wchar/wcstoull.cpp
@@ -10,14 +10,14 @@
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
-#include "src/__support/wcs_to_integer.h"
+#include "src/__support/str_to_integer.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(unsigned long long, wcstoull,
(const wchar_t *__restrict str, wchar_t **__restrict str_end,
int base)) {
- auto result = internal::wcstointeger<unsigned long long>(str, base);
+ auto result = internal::strtointeger<unsigned long long>(str, base);
if (result.has_error())
libc_errno = result.error;
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index a02514106a307..138866b4cc869 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -151,7 +151,7 @@ add_libc_test(
wcs_to_integer_test.cpp
DEPENDS
libc.src.__support.integer_literals
- libc.src.__support.wcs_to_integer
+ libc.src.__support.str_to_integer
)
add_libc_test(
diff --git a/libc/test/src/__support/wcs_to_integer_test.cpp b/libc/test/src/__support/wcs_to_integer_test.cpp
index 4554968be67ce..45bbf0bdd0651 100644
--- a/libc/test/src/__support/wcs_to_integer_test.cpp
+++ b/libc/test/src/__support/wcs_to_integer_test.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "src/__support/wcs_to_integer.h"
+#include "src/__support/str_to_integer.h"
#include <stddef.h>
#include "test/UnitTest/Test.h"
@@ -14,224 +14,224 @@
// This file is for testing the src_len argument and other internal interface
// features. Primary testing is done through the public interface.
-TEST(LlvmLibcStrToIntegerTest, SimpleLength) {
- auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"12345", 10, 10);
+TEST(LlvmLibcWcsToIntegerTest, SimpleLength) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>(L"12345", 10, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
ASSERT_EQ(result.value, 12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"12345", 10, 2);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"12345", 10, 2);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(2));
ASSERT_EQ(result.value, 12);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"12345", 10, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"12345", 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}
-TEST(LlvmLibcStrToIntegerTest, LeadingSpaces) {
+TEST(LlvmLibcWcsToIntegerTest, LeadingSpaces) {
auto result =
- LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 15);
+ LIBC_NAMESPACE::internal::strtointeger<int>(L" 12345", 10, 15);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
ASSERT_EQ(result.value, 12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 10);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L" 12345", 10, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
ASSERT_EQ(result.value, 12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 7);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L" 12345", 10, 7);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 12);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 5);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L" 12345", 10, 5);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L" 12345", 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}
-TEST(LlvmLibcStrToIntegerTest, LeadingSign) {
- auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 10);
+TEST(LlvmLibcWcsToIntegerTest, LeadingSign) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>(L"+12345", 10, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 10);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"-12345", 10, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, -12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 6);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"+12345", 10, 6);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 6);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"-12345", 10, 6);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, -12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 3);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"+12345", 10, 3);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
ASSERT_EQ(result.value, 12);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 3);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"-12345", 10, 3);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
ASSERT_EQ(result.value, -12);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 1);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"+12345", 10, 1);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 1);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"-12345", 10, 1);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"+12345", 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"-12345", 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}
-TEST(LlvmLibcStrToIntegerTest, Base16PrefixAutoSelect) {
- auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 10);
+TEST(LlvmLibcWcsToIntegerTest, Base16PrefixAutoSelect) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 0, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 0x12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 7);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 0, 7);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 0x12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 5);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 0, 5);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
ASSERT_EQ(result.value, 0x123);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 2);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 0, 2);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 0, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}
-TEST(LlvmLibcStrToIntegerTest, Base16PrefixManualSelect) {
- auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 10);
+TEST(LlvmLibcWcsToIntegerTest, Base16PrefixManualSelect) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 16, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 0x12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 7);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 16, 7);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 0x12345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 5);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 16, 5);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
ASSERT_EQ(result.value, 0x123);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 2);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 16, 2);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"0x12345", 16, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}
-TEST(LlvmLibcStrToIntegerTest, Base8PrefixAutoSelect) {
- auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 10);
+TEST(LlvmLibcWcsToIntegerTest, Base8PrefixAutoSelect) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 0, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 012345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 6);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 0, 6);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 012345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 4);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 0, 4);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
ASSERT_EQ(result.value, 0123);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 1);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 0, 1);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 0, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}
-TEST(LlvmLibcStrToIntegerTest, Base8PrefixManualSelect) {
- auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 10);
+TEST(LlvmLibcWcsToIntegerTest, Base8PrefixManualSelect) {
+ auto result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 8, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 012345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 6);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 8, 6);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 012345);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 4);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 8, 4);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
ASSERT_EQ(result.value, 0123);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 1);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 8, 1);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L"012345", 8, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}
-TEST(LlvmLibcStrToIntegerTest, CombinedTests) {
+TEST(LlvmLibcWcsToIntegerTest, CombinedTests) {
auto result =
- LIBC_NAMESPACE::internal::wcstointeger<int>(L" -0x123", 0, 10);
+ LIBC_NAMESPACE::internal::strtointeger<int>(L" -0x123", 0, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
ASSERT_EQ(result.value, -0x123);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" -0x123", 0, 8);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L" -0x123", 0, 8);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(8));
ASSERT_EQ(result.value, -0x1);
- result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" -0x123", 0, 7);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(L" -0x123", 0, 7);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 0);
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 599bc4b3d8bbf..5a1e0b53b021c 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -1000,6 +1000,7 @@ libc_support_library(
":__support_ctype_utils",
":__support_str_to_num_result",
":__support_uint128",
+ ":__support_wctype_utils",
":hdr_errno_macros",
],
)
>From 5570118e77f5aa436d1ce373070b20bcb1c4d595 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Fri, 31 Oct 2025 16:29:44 +0000
Subject: [PATCH 2/3] add a test case surfacing OOB access under sanitizers.
---
libc/test/src/__support/str_to_integer_test.cpp | 6 ++++--
libc/test/src/__support/wcs_to_integer_test.cpp | 6 ++++--
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/libc/test/src/__support/str_to_integer_test.cpp b/libc/test/src/__support/str_to_integer_test.cpp
index 1ec882b212b8a..e5ac1d6cbb7b3 100644
--- a/libc/test/src/__support/str_to_integer_test.cpp
+++ b/libc/test/src/__support/str_to_integer_test.cpp
@@ -49,12 +49,14 @@ TEST(LlvmLibcStrToIntegerTest, LeadingSpaces) {
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 12);
- result = LIBC_NAMESPACE::internal::strtointeger<int>(" 12345", 10, 5);
+ // Use a non-null-terminated buffer to test for possible OOB access.
+ char buf[5] = {' ', ' ', ' ', ' ', ' '};
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(buf, 10, 5);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::strtointeger<int>(" 12345", 10, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(buf, 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
diff --git a/libc/test/src/__support/wcs_to_integer_test.cpp b/libc/test/src/__support/wcs_to_integer_test.cpp
index 45bbf0bdd0651..38af778ca2440 100644
--- a/libc/test/src/__support/wcs_to_integer_test.cpp
+++ b/libc/test/src/__support/wcs_to_integer_test.cpp
@@ -48,12 +48,14 @@ TEST(LlvmLibcWcsToIntegerTest, LeadingSpaces) {
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 12);
- result = LIBC_NAMESPACE::internal::strtointeger<int>(L" 12345", 10, 5);
+ // Use a non-null-terminated buffer to test for possible OOB access.
+ wchar_t buf[5] = {L' ', L' ', L' ', L' ', L' '};
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(buf, 10, 5);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
- result = LIBC_NAMESPACE::internal::strtointeger<int>(L" 12345", 10, 0);
+ result = LIBC_NAMESPACE::internal::strtointeger<int>(buf, 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
>From 1033f85afc435b75324899ef50e82ccd8c8b790f Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Fri, 31 Oct 2025 16:59:43 +0000
Subject: [PATCH 3/3] run clang-format
---
libc/src/__support/str_to_integer.h | 32 +++++++++++++++++------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index 0f87d7a8c8fbf..2fea5f674215e 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -39,9 +39,11 @@ first_non_whitespace(const CharType *__restrict src,
size_t src_cur = 0;
while (src_cur < src_len) {
if constexpr (cpp::is_same_v<CharType, char>) {
- if (!internal::isspace(src[src_cur])) break;
+ if (!internal::isspace(src[src_cur]))
+ break;
} else {
- if (!internal::iswspace(src[src_cur])) break;
+ if (!internal::iswspace(src[src_cur]))
+ break;
}
++src_cur;
}
@@ -51,8 +53,8 @@ first_non_whitespace(const CharType *__restrict src,
// Returns 1 if 'src' starts with + or - sign, and returns 0 otherwise.
// Writes the sign value to |is_positive|.
template <typename CharType>
-LIBC_INLINE static size_t
-consume_sign(const CharType *__restrict src, bool *is_positive) {
+LIBC_INLINE static size_t consume_sign(const CharType *__restrict src,
+ bool *is_positive) {
*is_positive = true;
if constexpr (cpp::is_same_v<CharType, char>) {
if (*src == '+' || *src == '-') {
@@ -70,9 +72,9 @@ consume_sign(const CharType *__restrict src, bool *is_positive) {
// checks if the next 3 characters of the string pointer are the start of a
// hexadecimal number. Does not advance the string pointer.
-template<typename CharType>
-LIBC_INLINE static bool
-is_hex_start(const CharType *__restrict src, size_t src_len) {
+template <typename CharType>
+LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
+ size_t src_len) {
if (src_len < 3)
return false;
if constexpr (cpp::is_same_v<CharType, char>) {
@@ -86,7 +88,7 @@ is_hex_start(const CharType *__restrict src, size_t src_len) {
// Takes the address of the string pointer and parses the base from the start of
// it.
-template<typename CharType>
+template <typename CharType>
LIBC_INLINE static int infer_base(const CharType *__restrict src,
size_t src_len) {
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
@@ -99,9 +101,11 @@ LIBC_INLINE static int infer_base(const CharType *__restrict src,
// number that starts with 0, including just 0, is an octal number.
if (src_len > 0) {
if constexpr (cpp::is_same_v<CharType, char>) {
- if (src[0] == '0') return 8;
+ if (src[0] == '0')
+ return 8;
} else {
- if (src[0] == L'0') return 8;
+ if (src[0] == L'0')
+ return 8;
}
}
// A decimal number is defined as beginning "with a nonzero digit and
@@ -153,14 +157,16 @@ strtointeger(const CharType *__restrict src, int base,
bool is_number = false;
int error_val = 0;
- ResultType result = 0;
+ ResultType result = 0;
while (src_cur < src_len) {
int cur_digit;
if constexpr (cpp::is_same_v<CharType, char>) {
- if (!isalnum(src[src_cur])) break;
+ if (!isalnum(src[src_cur]))
+ break;
cur_digit = b36_char_to_int(src[src_cur]);
} else {
- if (!iswalnum(src[src_cur])) break;
+ if (!iswalnum(src[src_cur]))
+ break;
cur_digit = b36_wchar_to_int(src[src_cur]);
}
More information about the llvm-commits
mailing list