[libc-commits] [libc] 23ace05 - [libc] add int to string for extended bases

Michael Jones via libc-commits libc-commits at lists.llvm.org
Wed Aug 10 11:28:19 PDT 2022


Author: Michael Jones
Date: 2022-08-10T11:28:14-07:00
New Revision: 23ace05e0a7aa8d47da60e6baa4ad6a2d68a83c8

URL: https://github.com/llvm/llvm-project/commit/23ace05e0a7aa8d47da60e6baa4ad6a2d68a83c8
DIFF: https://github.com/llvm/llvm-project/commit/23ace05e0a7aa8d47da60e6baa4ad6a2d68a83c8.diff

LOG: [libc] add int to string for extended bases

The default IntegerToString class only supports base 10, this patch adds
a version which supports any base between 2 and 36 inclusive. This will
be used in an upcoming patch.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D131301

Added: 
    

Modified: 
    libc/src/__support/integer_to_string.h
    libc/test/src/__support/integer_to_string_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h
index bd50c5830ad84..f9d5f24ddc747 100644
--- a/libc/src/__support/integer_to_string.h
+++ b/libc/src/__support/integer_to_string.h
@@ -9,14 +9,23 @@
 #ifndef LLVM_LIBC_SRC_SUPPORT_INTEGER_TO_STRING_H
 #define LLVM_LIBC_SRC_SUPPORT_INTEGER_TO_STRING_H
 
+#include "src/__support/CPP/ArrayRef.h"
 #include "src/__support/CPP/StringView.h"
+#include "src/__support/CPP/optional.h"
 #include "src/__support/CPP/type_traits.h"
 
 namespace __llvm_libc {
 
-template <typename T> class IntegerToString {
+template <typename T, uint8_t BASE = 10> class IntegerToString {
 public:
-  // We size the string buffer using an approximation algorithm:
+  static constexpr inline size_t floor_log_2(size_t num) {
+    size_t i = 0;
+    for (; num > 1; num /= 2) {
+      ++i;
+    }
+    return i;
+  }
+  // We size the string buffer for base 10 using an approximation algorithm:
   //
   //   size = ceil(sizeof(T) * 5 / 2)
   //
@@ -34,8 +43,20 @@ template <typename T> class IntegerToString {
   // overhead is small enough to tolerate. In the actual formula below, we
   // add an additional byte to accommodate the '-' sign in case of signed
   // integers.
-  static constexpr size_t BUFSIZE =
-      (sizeof(T) * 5 + 1) / 2 + (cpp::is_signed<T>() ? 1 : 0);
+  // For other bases, we approximate by rounding down to the nearest power of
+  // two base, since the space needed is easy to calculate and it won't
+  // overestimate by too much.
+
+  template <uint8_t STATIC_BASE> static constexpr size_t bufsize() {
+    constexpr size_t BITS_PER_DIGIT = floor_log_2(STATIC_BASE);
+    constexpr size_t BUFSIZE_COMMON =
+        ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT);
+    constexpr size_t BUFSIZE_BASE10 = (sizeof(T) * 5 + 1) / 2;
+    return (cpp::is_signed<T>() ? 1 : 0) +
+           (STATIC_BASE == 10 ? BUFSIZE_BASE10 : BUFSIZE_COMMON);
+  }
+
+  static constexpr size_t BUFSIZE = bufsize<BASE>();
 
 private:
   static_assert(cpp::is_integral_v<T>,
@@ -44,34 +65,73 @@ template <typename T> class IntegerToString {
   using UnsignedType = cpp::make_unsigned_t<T>;
 
   char strbuf[BUFSIZE] = {'\0'};
-  size_t len = 0;
+  cpp::StringView str_view;
 
-  constexpr void convert(UnsignedType val) {
-    size_t buffptr = BUFSIZE;
-    if (val == 0) {
-      strbuf[buffptr - 1] = '0';
+  static inline constexpr cpp::StringView
+  convert_alpha_numeric(T val, cpp::MutableArrayRef<char> &buffer,
+                        bool lowercase, const uint8_t conv_base) {
+    UnsignedType uval = val < 0 ? UnsignedType(-val) : UnsignedType(val);
+
+    const char a = lowercase ? 'a' : 'A';
+
+    size_t len = 0;
+
+    size_t buffptr = buffer.size();
+    if (uval == 0) {
+      buffer[buffptr - 1] = '0';
       --buffptr;
     } else {
-      for (; val > 0; --buffptr, val /= 10)
-        strbuf[buffptr - 1] = (val % 10) + '0';
+      for (; uval > 0; --buffptr, uval /= conv_base) {
+        UnsignedType digit = (uval % conv_base);
+        buffer[buffptr - 1] = digit < 10 ? digit + '0' : digit + a - 10;
+      }
     }
-    len = BUFSIZE - buffptr;
-  }
+    len = buffer.size() - buffptr;
 
-public:
-  constexpr explicit IntegerToString(T val) {
-    convert(val < 0 ? UnsignedType(-val) : UnsignedType(val));
     if (val < 0) {
       // This branch will be taken only for negative signed values.
       ++len;
-      strbuf[BUFSIZE - len] = '-';
+      buffer[buffer.size() - len] = '-';
     }
+    cpp::StringView buff_str(buffer.data() + buffer.size() - len, len);
+    return buff_str;
+  }
+
+  // This function exists to check at compile time that the base is valid, as
+  // well as to convert the templated call into a non-templated call. This
+  // allows the compiler to decide to do strength reduction and constant folding
+  // on the base or not, depending on if size or performance is required.
+  template <uint8_t STATIC_BASE = BASE,
+            cpp::enable_if_t<2 <= STATIC_BASE && STATIC_BASE <= 36, int> = 0>
+  static inline constexpr cpp::StringView
+  convert_internal(T val, cpp::MutableArrayRef<char> &buffer, bool lowercase) {
+    return convert_alpha_numeric(val, buffer, lowercase, STATIC_BASE);
+  }
+
+public:
+  template <uint8_t STATIC_BASE = BASE>
+  static inline cpp::optional<cpp::StringView>
+  convert(T val, cpp::MutableArrayRef<char> &buffer, bool lowercase) {
+    // If This function can actually be a constexpr, then the below "if" will be
+    // optimized out.
+    if (buffer.size() < bufsize<STATIC_BASE>())
+      return cpp::optional<cpp::StringView>();
+    return cpp::optional<cpp::StringView>(
+        convert_internal<STATIC_BASE>(val, buffer, lowercase));
   }
 
-  cpp::StringView str() const {
-    return cpp::StringView(strbuf + BUFSIZE - len, len);
+  constexpr explicit IntegerToString(T val) {
+    cpp::MutableArrayRef<char> bufref(strbuf, BUFSIZE);
+    str_view = convert_internal<BASE>(val, bufref, true);
   }
 
+  constexpr explicit IntegerToString(T val, bool lowercase) {
+    cpp::MutableArrayRef<char> bufref(strbuf, BUFSIZE);
+    str_view = convert_internal<BASE>(val, bufref, lowercase);
+  }
+
+  cpp::StringView str() const { return str_view; }
+
   operator cpp::StringView() const { return str(); }
 };
 

diff  --git a/libc/test/src/__support/integer_to_string_test.cpp b/libc/test/src/__support/integer_to_string_test.cpp
index 77123ff83e419..ce61254443b0e 100644
--- a/libc/test/src/__support/integer_to_string_test.cpp
+++ b/libc/test/src/__support/integer_to_string_test.cpp
@@ -14,6 +14,7 @@
 #include "limits.h"
 
 using __llvm_libc::integer_to_string;
+using __llvm_libc::IntegerToString;
 using __llvm_libc::cpp::StringView;
 
 TEST(LlvmLibcIntegerToStringTest, UINT8) {
@@ -183,3 +184,60 @@ TEST(LlvmLibcIntegerToStringTest, INT64) {
   EXPECT_EQ(integer_to_string(int64_t(INT64_MIN)).str(),
             (StringView("-9223372036854775808")));
 }
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_10) {
+  EXPECT_EQ((IntegerToString<uint64_t, 10>(int64_t(0)).str()), StringView("0"));
+  EXPECT_EQ((IntegerToString<uint64_t, 10>(int64_t(1234567890123456789)).str()),
+            StringView("1234567890123456789"));
+}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_8) {
+  EXPECT_EQ((IntegerToString<uint64_t, 8>(int64_t(0)).str()), StringView("0"));
+  EXPECT_EQ((IntegerToString<uint64_t, 8>(int64_t(012345)).str()),
+            StringView("12345"));
+  EXPECT_EQ(
+      (IntegerToString<uint64_t, 8>(int64_t(0123456701234567012345)).str()),
+      StringView("123456701234567012345"));
+  EXPECT_EQ(
+      (IntegerToString<uint64_t, 8>(int64_t(01777777777777777777777)).str()),
+      StringView("1777777777777777777777"));
+}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_16) {
+  EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0)).str()), StringView("0"));
+  EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0x12345)).str()),
+            StringView("12345"));
+  EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0x123456789abcdef)).str()),
+            StringView("123456789abcdef"));
+  EXPECT_EQ(
+      (IntegerToString<uint64_t, 16>(int64_t(0x123456789abcdef), false).str()),
+      StringView("123456789ABCDEF"));
+  EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0xffffffffffffffff)).str()),
+            StringView("ffffffffffffffff"));
+}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_2) {
+  EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0)).str()), StringView("0"));
+  EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0xf0c)).str()),
+            StringView("111100001100"));
+  EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0x123abc)).str()),
+            StringView("100100011101010111100"));
+  EXPECT_EQ(
+      (IntegerToString<uint64_t, 2>(int64_t(0xffffffffffffffff)).str()),
+      StringView(
+          "1111111111111111111111111111111111111111111111111111111111111111"));
+}
+
+TEST(LlvmLibcIntegerToStringTest, UINT64_Base_36) {
+  EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(0)).str()), StringView("0"));
+  EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(12345)).str()),
+            StringView("9ix"));
+  EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(1047601316295595)).str()),
+            StringView("abcdefghij"));
+  EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(2092218013456445)).str()),
+            StringView("klmnopqrst"));
+  EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(1867590395), false).str()),
+            StringView("UVWXYZ"));
+  EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(0xffffffffffffffff)).str()),
+            StringView("3w5e11264sgsf"));
+}


        


More information about the libc-commits mailing list