[libc-commits] [libc] 23ace05 - [libc] add int to string for extended bases
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Wed Aug 10 11:28:19 PDT 2022
Author: Michael Jones
Date: 2022-08-10T11:28:14-07:00
New Revision: 23ace05e0a7aa8d47da60e6baa4ad6a2d68a83c8
URL: https://github.com/llvm/llvm-project/commit/23ace05e0a7aa8d47da60e6baa4ad6a2d68a83c8
DIFF: https://github.com/llvm/llvm-project/commit/23ace05e0a7aa8d47da60e6baa4ad6a2d68a83c8.diff
LOG: [libc] add int to string for extended bases
The default IntegerToString class only supports base 10, this patch adds
a version which supports any base between 2 and 36 inclusive. This will
be used in an upcoming patch.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D131301
Added:
Modified:
libc/src/__support/integer_to_string.h
libc/test/src/__support/integer_to_string_test.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h
index bd50c5830ad84..f9d5f24ddc747 100644
--- a/libc/src/__support/integer_to_string.h
+++ b/libc/src/__support/integer_to_string.h
@@ -9,14 +9,23 @@
#ifndef LLVM_LIBC_SRC_SUPPORT_INTEGER_TO_STRING_H
#define LLVM_LIBC_SRC_SUPPORT_INTEGER_TO_STRING_H
+#include "src/__support/CPP/ArrayRef.h"
#include "src/__support/CPP/StringView.h"
+#include "src/__support/CPP/optional.h"
#include "src/__support/CPP/type_traits.h"
namespace __llvm_libc {
-template <typename T> class IntegerToString {
+template <typename T, uint8_t BASE = 10> class IntegerToString {
public:
- // We size the string buffer using an approximation algorithm:
+ static constexpr inline size_t floor_log_2(size_t num) {
+ size_t i = 0;
+ for (; num > 1; num /= 2) {
+ ++i;
+ }
+ return i;
+ }
+ // We size the string buffer for base 10 using an approximation algorithm:
//
// size = ceil(sizeof(T) * 5 / 2)
//
@@ -34,8 +43,20 @@ template <typename T> class IntegerToString {
// overhead is small enough to tolerate. In the actual formula below, we
// add an additional byte to accommodate the '-' sign in case of signed
// integers.
- static constexpr size_t BUFSIZE =
- (sizeof(T) * 5 + 1) / 2 + (cpp::is_signed<T>() ? 1 : 0);
+ // For other bases, we approximate by rounding down to the nearest power of
+ // two base, since the space needed is easy to calculate and it won't
+ // overestimate by too much.
+
+ template <uint8_t STATIC_BASE> static constexpr size_t bufsize() {
+ constexpr size_t BITS_PER_DIGIT = floor_log_2(STATIC_BASE);
+ constexpr size_t BUFSIZE_COMMON =
+ ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT);
+ constexpr size_t BUFSIZE_BASE10 = (sizeof(T) * 5 + 1) / 2;
+ return (cpp::is_signed<T>() ? 1 : 0) +
+ (STATIC_BASE == 10 ? BUFSIZE_BASE10 : BUFSIZE_COMMON);
+ }
+
+ static constexpr size_t BUFSIZE = bufsize<BASE>();
private:
static_assert(cpp::is_integral_v<T>,
@@ -44,34 +65,73 @@ template <typename T> class IntegerToString {
using UnsignedType = cpp::make_unsigned_t<T>;
char strbuf[BUFSIZE] = {'\0'};
- size_t len = 0;
+ cpp::StringView str_view;
- constexpr void convert(UnsignedType val) {
- size_t buffptr = BUFSIZE;
- if (val == 0) {
- strbuf[buffptr - 1] = '0';
+ static inline constexpr cpp::StringView
+ convert_alpha_numeric(T val, cpp::MutableArrayRef<char> &buffer,
+ bool lowercase, const uint8_t conv_base) {
+ UnsignedType uval = val < 0 ? UnsignedType(-val) : UnsignedType(val);
+
+ const char a = lowercase ? 'a' : 'A';
+
+ size_t len = 0;
+
+ size_t buffptr = buffer.size();
+ if (uval == 0) {
+ buffer[buffptr - 1] = '0';
--buffptr;
} else {
- for (; val > 0; --buffptr, val /= 10)
- strbuf[buffptr - 1] = (val % 10) + '0';
+ for (; uval > 0; --buffptr, uval /= conv_base) {
+ UnsignedType digit = (uval % conv_base);
+ buffer[buffptr - 1] = digit < 10 ? digit + '0' : digit + a - 10;
+ }
}
- len = BUFSIZE - buffptr;
- }
+ len = buffer.size() - buffptr;
-public:
- constexpr explicit IntegerToString(T val) {
- convert(val < 0 ? UnsignedType(-val) : UnsignedType(val));
if (val < 0) {
// This branch will be taken only for negative signed values.
++len;
- strbuf[BUFSIZE - len] = '-';
+ buffer[buffer.size() - len] = '-';
}
+ cpp::StringView buff_str(buffer.data() + buffer.size() - len, len);
+ return buff_str;
+ }
+
+ // This function exists to check at compile time that the base is valid, as
+ // well as to convert the templated call into a non-templated call. This
+ // allows the compiler to decide to do strength reduction and constant folding
+ // on the base or not, depending on if size or performance is required.
+ template <uint8_t STATIC_BASE = BASE,
+ cpp::enable_if_t<2 <= STATIC_BASE && STATIC_BASE <= 36, int> = 0>
+ static inline constexpr cpp::StringView
+ convert_internal(T val, cpp::MutableArrayRef<char> &buffer, bool lowercase) {
+ return convert_alpha_numeric(val, buffer, lowercase, STATIC_BASE);
+ }
+
+public:
+ template <uint8_t STATIC_BASE = BASE>
+ static inline cpp::optional<cpp::StringView>
+ convert(T val, cpp::MutableArrayRef<char> &buffer, bool lowercase) {
+ // If This function can actually be a constexpr, then the below "if" will be
+ // optimized out.
+ if (buffer.size() < bufsize<STATIC_BASE>())
+ return cpp::optional<cpp::StringView>();
+ return cpp::optional<cpp::StringView>(
+ convert_internal<STATIC_BASE>(val, buffer, lowercase));
}
- cpp::StringView str() const {
- return cpp::StringView(strbuf + BUFSIZE - len, len);
+ constexpr explicit IntegerToString(T val) {
+ cpp::MutableArrayRef<char> bufref(strbuf, BUFSIZE);
+ str_view = convert_internal<BASE>(val, bufref, true);
}
+ constexpr explicit IntegerToString(T val, bool lowercase) {
+ cpp::MutableArrayRef<char> bufref(strbuf, BUFSIZE);
+ str_view = convert_internal<BASE>(val, bufref, lowercase);
+ }
+
+ cpp::StringView str() const { return str_view; }
+
operator cpp::StringView() const { return str(); }
};
diff --git a/libc/test/src/__support/integer_to_string_test.cpp b/libc/test/src/__support/integer_to_string_test.cpp
index 77123ff83e419..ce61254443b0e 100644
--- a/libc/test/src/__support/integer_to_string_test.cpp
+++ b/libc/test/src/__support/integer_to_string_test.cpp
@@ -14,6 +14,7 @@
#include "limits.h"
using __llvm_libc::integer_to_string;
+using __llvm_libc::IntegerToString;
using __llvm_libc::cpp::StringView;
TEST(LlvmLibcIntegerToStringTest, UINT8) {
@@ -183,3 +184,60 @@ TEST(LlvmLibcIntegerToStringTest, INT64) {
EXPECT_EQ(integer_to_string(int64_t(INT64_MIN)).str(),
(StringView("-9223372036854775808")));
}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_10) {
+ EXPECT_EQ((IntegerToString<uint64_t, 10>(int64_t(0)).str()), StringView("0"));
+ EXPECT_EQ((IntegerToString<uint64_t, 10>(int64_t(1234567890123456789)).str()),
+ StringView("1234567890123456789"));
+}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_8) {
+ EXPECT_EQ((IntegerToString<uint64_t, 8>(int64_t(0)).str()), StringView("0"));
+ EXPECT_EQ((IntegerToString<uint64_t, 8>(int64_t(012345)).str()),
+ StringView("12345"));
+ EXPECT_EQ(
+ (IntegerToString<uint64_t, 8>(int64_t(0123456701234567012345)).str()),
+ StringView("123456701234567012345"));
+ EXPECT_EQ(
+ (IntegerToString<uint64_t, 8>(int64_t(01777777777777777777777)).str()),
+ StringView("1777777777777777777777"));
+}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_16) {
+ EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0)).str()), StringView("0"));
+ EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0x12345)).str()),
+ StringView("12345"));
+ EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0x123456789abcdef)).str()),
+ StringView("123456789abcdef"));
+ EXPECT_EQ(
+ (IntegerToString<uint64_t, 16>(int64_t(0x123456789abcdef), false).str()),
+ StringView("123456789ABCDEF"));
+ EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0xffffffffffffffff)).str()),
+ StringView("ffffffffffffffff"));
+}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_2) {
+ EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0)).str()), StringView("0"));
+ EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0xf0c)).str()),
+ StringView("111100001100"));
+ EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0x123abc)).str()),
+ StringView("100100011101010111100"));
+ EXPECT_EQ(
+ (IntegerToString<uint64_t, 2>(int64_t(0xffffffffffffffff)).str()),
+ StringView(
+ "1111111111111111111111111111111111111111111111111111111111111111"));
+}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_36) {
+ EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(0)).str()), StringView("0"));
+ EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(12345)).str()),
+ StringView("9ix"));
+ EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(1047601316295595)).str()),
+ StringView("abcdefghij"));
+ EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(2092218013456445)).str()),
+ StringView("klmnopqrst"));
+ EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(1867590395), false).str()),
+ StringView("UVWXYZ"));
+ EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(0xffffffffffffffff)).str()),
+ StringView("3w5e11264sgsf"));
+}
More information about the libc-commits
mailing list