[libcxx-commits] [libcxx] 3561ee5 - [libc++] Improve charconv base10 algorithm.

Mark de Wever via libcxx-commits libcxx-commits at lists.llvm.org
Tue Jun 21 08:45:46 PDT 2022


Author: Mark de Wever
Date: 2022-06-21T17:45:40+02:00
New Revision: 3561ee586ed0ec8909527c09020b42bd09718da1

URL: https://github.com/llvm/llvm-project/commit/3561ee586ed0ec8909527c09020b42bd09718da1
DIFF: https://github.com/llvm/llvm-project/commit/3561ee586ed0ec8909527c09020b42bd09718da1.diff

LOG: [libc++] Improve charconv base10 algorithm.

This change is a preparation to add the 128-bit integral output.

Before
```
--------------------------------------------------------------
Benchmark                    Time             CPU   Iterations
--------------------------------------------------------------
BM_to_chars_good/2        20.1 ns         20.1 ns     35045000
BM_to_chars_good/3         117 ns          117 ns      5916000
BM_to_chars_good/4        83.7 ns         83.7 ns      8401000
BM_to_chars_good/5        70.6 ns         70.6 ns      9915000
BM_to_chars_good/6        59.9 ns         59.9 ns     11678000
BM_to_chars_good/7        53.9 ns         53.8 ns     12995000
BM_to_chars_good/8        19.0 ns         19.0 ns     37110000
BM_to_chars_good/9        45.9 ns         45.8 ns     15278000
BM_to_chars_good/10       9.24 ns         9.24 ns     75343000
BM_to_chars_good/11       42.6 ns         42.6 ns     16449000
BM_to_chars_good/12       38.8 ns         38.8 ns     18101000
BM_to_chars_good/13       38.8 ns         38.8 ns     17999000
BM_to_chars_good/14       37.7 ns         37.6 ns     18571000
BM_to_chars_good/15       35.8 ns         35.8 ns     19660000
BM_to_chars_good/16       15.4 ns         15.4 ns     46129000
BM_to_chars_good/17       32.3 ns         32.3 ns     21763000
BM_to_chars_good/18       32.8 ns         32.8 ns     21396000
BM_to_chars_good/19       33.4 ns         33.4 ns     21078000
BM_to_chars_good/20       33.3 ns         33.3 ns     21020000
BM_to_chars_good/21       32.3 ns         32.3 ns     21807000
BM_to_chars_good/22       31.6 ns         31.6 ns     22057000
BM_to_chars_good/23       30.7 ns         30.7 ns     22938000
BM_to_chars_good/24       28.3 ns         28.3 ns     24659000
BM_to_chars_good/25       28.2 ns         28.2 ns     24790000
BM_to_chars_good/26       28.4 ns         28.4 ns     24410000
BM_to_chars_good/27       28.7 ns         28.7 ns     24423000
BM_to_chars_good/28       28.9 ns         28.9 ns     24139000
BM_to_chars_good/29       28.9 ns         28.9 ns     24347000
BM_to_chars_good/30       29.2 ns         29.2 ns     24141000
BM_to_chars_good/31       29.6 ns         29.6 ns     23699000
BM_to_chars_good/32       29.5 ns         29.5 ns     23933000
BM_to_chars_good/33       28.9 ns         28.9 ns     24042000
BM_to_chars_good/34       28.7 ns         28.7 ns     24361000
BM_to_chars_good/35       28.3 ns         28.3 ns     24703000
BM_to_chars_good/36       28.1 ns         28.1 ns     24924000
BM_to_chars_bad/2         6.16 ns         6.15 ns    114101000
BM_to_chars_bad/3         14.5 ns         14.5 ns     48244000
BM_to_chars_bad/4         16.9 ns         16.9 ns     41974000
BM_to_chars_bad/5         12.5 ns         12.5 ns     56080000
BM_to_chars_bad/6         10.9 ns         10.9 ns     64036000
BM_to_chars_bad/7         14.5 ns         14.5 ns     47294000
BM_to_chars_bad/8         6.36 ns         6.35 ns    110430000
BM_to_chars_bad/9         12.4 ns         12.4 ns     56448000
BM_to_chars_bad/10        5.13 ns         5.13 ns    137596000
BM_to_chars_bad/11        9.88 ns         9.88 ns     69015000
BM_to_chars_bad/12        10.8 ns         10.8 ns     63990000
BM_to_chars_bad/13        10.7 ns         10.7 ns     65066000
BM_to_chars_bad/14        9.71 ns         9.71 ns     71775000
BM_to_chars_bad/15        9.18 ns         9.18 ns     75267000
BM_to_chars_bad/16        6.12 ns         6.12 ns    115000000
BM_to_chars_bad/17        10.7 ns         10.7 ns     65504000
BM_to_chars_bad/18        10.6 ns         10.6 ns     65685000
BM_to_chars_bad/19        9.98 ns         9.98 ns     69894000
BM_to_chars_bad/20        9.74 ns         9.74 ns     72098000
BM_to_chars_bad/21        9.25 ns         9.25 ns     75184000
BM_to_chars_bad/22        9.10 ns         9.10 ns     75602000
BM_to_chars_bad/23        9.48 ns         9.48 ns     72824000
BM_to_chars_bad/24        9.27 ns         9.27 ns     75112000
BM_to_chars_bad/25        9.61 ns         9.61 ns     72080000
BM_to_chars_bad/26        9.72 ns         9.72 ns     72178000
BM_to_chars_bad/27        10.0 ns         10.0 ns     69733000
BM_to_chars_bad/28        10.3 ns         10.3 ns     67409000
BM_to_chars_bad/29        9.97 ns         9.97 ns     69193000
BM_to_chars_bad/30        10.1 ns         10.1 ns     69007000
BM_to_chars_bad/31        9.68 ns         9.68 ns     72232000
BM_to_chars_bad/32        8.99 ns         8.99 ns     76825000
BM_to_chars_bad/33        8.82 ns         8.82 ns     79293000
BM_to_chars_bad/34        8.64 ns         8.64 ns     80441000
BM_to_chars_bad/35        8.96 ns         8.96 ns     75320000
BM_to_chars_bad/36        8.87 ns         8.87 ns     77293000

```

After
```
--------------------------------------------------------------
Benchmark                    Time             CPU   Iterations
--------------------------------------------------------------
BM_to_chars_good/2        14.7 ns         14.7 ns     47583000
BM_to_chars_good/3         101 ns          101 ns      6901000
BM_to_chars_good/4        68.4 ns         68.4 ns     10088000
BM_to_chars_good/5        58.2 ns         58.2 ns     12007000
BM_to_chars_good/6        51.1 ns         51.1 ns     13687000
BM_to_chars_good/7        45.6 ns         45.6 ns     15323000
BM_to_chars_good/8        14.6 ns         14.6 ns     47795000
BM_to_chars_good/9        40.7 ns         40.7 ns     17371000
BM_to_chars_good/10       7.48 ns         7.48 ns     90931000
BM_to_chars_good/11       37.6 ns         37.6 ns     18542000
BM_to_chars_good/12       35.2 ns         35.2 ns     19922000
BM_to_chars_good/13       34.9 ns         34.9 ns     20105000
BM_to_chars_good/14       33.5 ns         33.5 ns     20863000
BM_to_chars_good/15       31.9 ns         31.9 ns     22014000
BM_to_chars_good/16       11.7 ns         11.7 ns     60012000
BM_to_chars_good/17       28.9 ns         28.9 ns     24148000
BM_to_chars_good/18       29.0 ns         29.0 ns     24317000
BM_to_chars_good/19       28.7 ns         28.7 ns     24363000
BM_to_chars_good/20       28.1 ns         28.1 ns     24899000
BM_to_chars_good/21       27.5 ns         27.5 ns     25499000
BM_to_chars_good/22       26.9 ns         26.9 ns     25929000
BM_to_chars_good/23       26.2 ns         26.2 ns     26828000
BM_to_chars_good/24       25.1 ns         25.1 ns     27742000
BM_to_chars_good/25       25.3 ns         25.3 ns     27720000
BM_to_chars_good/26       25.2 ns         25.2 ns     27789000
BM_to_chars_good/27       25.3 ns         25.3 ns     27777000
BM_to_chars_good/28       25.3 ns         25.3 ns     27643000
BM_to_chars_good/29       25.3 ns         25.3 ns     27750000
BM_to_chars_good/30       25.4 ns         25.4 ns     27566000
BM_to_chars_good/31       25.4 ns         25.4 ns     27611000
BM_to_chars_good/32       25.8 ns         25.8 ns     27218000
BM_to_chars_good/33       25.7 ns         25.7 ns     27070000
BM_to_chars_good/34       26.1 ns         26.1 ns     26693000
BM_to_chars_good/35       26.4 ns         26.4 ns     26486000
BM_to_chars_good/36       26.3 ns         26.3 ns     26619000
BM_to_chars_bad/2         5.99 ns         5.99 ns    118787000
BM_to_chars_bad/3         14.3 ns         14.3 ns     48567000
BM_to_chars_bad/4         16.0 ns         16.0 ns     43239000
BM_to_chars_bad/5         12.6 ns         12.6 ns     55354000
BM_to_chars_bad/6         10.7 ns         10.7 ns     65491000
BM_to_chars_bad/7         14.4 ns         14.4 ns     48723000
BM_to_chars_bad/8         6.50 ns         6.50 ns    104967000
BM_to_chars_bad/9         12.0 ns         12.0 ns     56552000
BM_to_chars_bad/10        5.16 ns         5.16 ns    136380000
BM_to_chars_bad/11        10.5 ns         10.5 ns     66764000
BM_to_chars_bad/12        10.7 ns         10.7 ns     65534000
BM_to_chars_bad/13        11.0 ns         11.0 ns     63426000
BM_to_chars_bad/14        9.90 ns         9.90 ns     68575000
BM_to_chars_bad/15        9.52 ns         9.52 ns     70932000
BM_to_chars_bad/16        6.14 ns         6.14 ns    111762000
BM_to_chars_bad/17        10.6 ns         10.6 ns     65883000
BM_to_chars_bad/18        10.5 ns         10.5 ns     67606000
BM_to_chars_bad/19        9.96 ns         9.96 ns     68898000
BM_to_chars_bad/20        9.40 ns         9.41 ns     73116000
BM_to_chars_bad/21        9.12 ns         9.12 ns     78647000
BM_to_chars_bad/22        8.95 ns         8.95 ns     80211000
BM_to_chars_bad/23        9.50 ns         9.49 ns     73571000
BM_to_chars_bad/24        9.29 ns         9.29 ns     74690000
BM_to_chars_bad/25        9.65 ns         9.65 ns     72877000
BM_to_chars_bad/26        9.78 ns         9.78 ns     70171000
BM_to_chars_bad/27        10.1 ns         10.1 ns     69543000
BM_to_chars_bad/28        10.4 ns         10.4 ns     67582000
BM_to_chars_bad/29       10.00 ns        10.00 ns     70806000
BM_to_chars_bad/30        9.99 ns         9.99 ns     70340000
BM_to_chars_bad/31        9.56 ns         9.56 ns     74159000
BM_to_chars_bad/32        8.97 ns         8.97 ns     78052000
BM_to_chars_bad/33        8.86 ns         8.86 ns     78586000
BM_to_chars_bad/34        8.81 ns         8.81 ns     78562000
BM_to_chars_bad/35        8.90 ns         8.90 ns     77384000
BM_to_chars_bad/36        9.04 ns         9.04 ns     77263000

```

Reviewed By: #libc, ldionne

Differential Revision: https://reviews.llvm.org/D127764

Added: 
    

Modified: 
    libcxx/include/__charconv/to_chars_base_10.h
    libcxx/include/charconv
    libcxx/src/charconv.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h
index 1c6804a852460..91c209559aff9 100644
--- a/libcxx/include/__charconv/to_chars_base_10.h
+++ b/libcxx/include/__charconv/to_chars_base_10.h
@@ -10,10 +10,10 @@
 #ifndef _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H
 #define _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H
 
+#include <__algorithm/copy_n.h>
 #include <__charconv/tables.h>
 #include <__config>
 #include <cstdint>
-#include <cstring>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -25,98 +25,97 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __itoa {
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append1(char* __buffer, _Tp __value) noexcept {
-  *__buffer = '0' + static_cast<char>(__value);
-  return __buffer + 1;
+_LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) noexcept {
+  *__first = '0' + static_cast<char>(__value);
+  return __first + 1;
 }
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append2(char* __buffer, _Tp __value) noexcept {
-  std::memcpy(__buffer, &__table<>::__digits_base_10[(__value)*2], 2);
-  return __buffer + 2;
+_LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) noexcept {
+  return std::copy_n(&__table<>::__digits_base_10[__value * 2], 2, __first);
 }
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append3(char* __buffer, _Tp __value) noexcept {
-  return __itoa::__append2(__itoa::__append1(__buffer, (__value) / 100), (__value) % 100);
+_LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) noexcept {
+  return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100);
 }
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append4(char* __buffer, _Tp __value) noexcept {
-  return __itoa::__append2(__itoa::__append2(__buffer, (__value) / 100), (__value) % 100);
+_LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) noexcept {
+  return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100);
 }
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append2_no_zeros(char* __buffer, _Tp __value) noexcept {
-  if (__value < 10)
-    return __itoa::__append1(__buffer, __value);
-  else
-    return __itoa::__append2(__buffer, __value);
+_LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) noexcept {
+  return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000);
 }
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append4_no_zeros(char* __buffer, _Tp __value) noexcept {
-  if (__value < 100)
-    return __itoa::__append2_no_zeros(__buffer, __value);
-  else if (__value < 1000)
-    return __itoa::__append3(__buffer, __value);
-  else
-    return __itoa::__append4(__buffer, __value);
+_LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) noexcept {
+  return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000);
+}
+
+_LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) noexcept {
+  return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000);
 }
 
+_LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) noexcept {
+  return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000);
+}
+
+_LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) noexcept {
+  return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000);
+}
+
+// This function is used for uint32_t and uint64_t.
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append8_no_zeros(char* __buffer, _Tp __value) noexcept {
-  if (__value < 10000)
-    __buffer = __itoa::__append4_no_zeros(__buffer, __value);
-  else {
-    __buffer = __itoa::__append4_no_zeros(__buffer, __value / 10000);
-    __buffer = __itoa::__append4(__buffer, __value % 10000);
-  }
-  return __buffer;
+_LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept {
+  return __itoa::__append8(__itoa::__append2(__first, static_cast<uint32_t>(__value / 100000000)),
+                           static_cast<uint32_t>(__value % 100000000));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(uint32_t __value, char* __buffer) noexcept {
-  if (__value < 100000000)
-    __buffer = __itoa::__append8_no_zeros(__buffer, __value);
-  else {
-    // __value = aabbbbcccc in decimal
-    const uint32_t __a = __value / 100000000; // 1 to 42
-    __value %= 100000000;
-
-    __buffer = __itoa::__append2_no_zeros(__buffer, __a);
-    __buffer = __itoa::__append4(__buffer, __value / 10000);
-    __buffer = __itoa::__append4(__buffer, __value % 10000);
+_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(char* __first, uint32_t __value) noexcept {
+  if (__value < 1000000) {
+    if (__value < 10000) {
+      if (__value < 100) {
+        // 0 <= __value < 100
+        if (__value < 10)
+          return __itoa::__append1(__first, __value);
+        return __itoa::__append2(__first, __value);
+      }
+      // 100 <= __value < 10'000
+      if (__value < 1000)
+        return __itoa::__append3(__first, __value);
+      return __itoa::__append4(__first, __value);
+    }
+
+    // 10'000 <= __value < 1'000'000
+    if (__value < 100000)
+      return __itoa::__append5(__first, __value);
+    return __itoa::__append6(__first, __value);
   }
 
-  return __buffer;
+  // __value => 1'000'000
+  if (__value < 100000000) {
+    // 1'000'000 <= __value < 100'000'000
+    if (__value < 10000000)
+      return __itoa::__append7(__first, __value);
+    return __itoa::__append8(__first, __value);
+  }
+
+  // 100'000'000 <= __value < max
+  if (__value < 1000000000)
+    return __itoa::__append9(__first, __value);
+  return __itoa::__append10(__first, __value);
 }
 
-_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(uint64_t __value, char* __buffer) noexcept {
-  if (__value < 100000000)
-    __buffer = __itoa::__append8_no_zeros(__buffer, static_cast<uint32_t>(__value));
-  else if (__value < 10000000000000000) {
-    const uint32_t __v0 = static_cast<uint32_t>(__value / 100000000);
-    const uint32_t __v1 = static_cast<uint32_t>(__value % 100000000);
-
-    __buffer = __itoa::__append8_no_zeros(__buffer, __v0);
-    __buffer = __itoa::__append4(__buffer, __v1 / 10000);
-    __buffer = __itoa::__append4(__buffer, __v1 % 10000);
-  } else {
-    const uint32_t __a = static_cast<uint32_t>(__value / 10000000000000000); // 1 to 1844
-    __value %= 10000000000000000;
-
-    __buffer = __itoa::__append4_no_zeros(__buffer, __a);
-
-    const uint32_t __v0 = static_cast<uint32_t>(__value / 100000000);
-    const uint32_t __v1 = static_cast<uint32_t>(__value % 100000000);
-    __buffer = __itoa::__append4(__buffer, __v0 / 10000);
-    __buffer = __itoa::__append4(__buffer, __v0 % 10000);
-    __buffer = __itoa::__append4(__buffer, __v1 / 10000);
-    __buffer = __itoa::__append4(__buffer, __v1 % 10000);
-  }
+_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(char* __buffer, uint64_t __value) noexcept {
+  if (__value <= UINT32_MAX)
+    return __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value));
 
-  return __buffer;
+  // Numbers in the range UINT32_MAX <= val < 10'000'000'000 always contain 10
+  // digits and are outputted after this if statement.
+  if (__value >= 10000000000) {
+    // This function properly deterimines the first non-zero leading digit.
+    __buffer = __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value / 10000000000));
+    __value %= 10000000000;
+  }
+  return __itoa::__append10(__buffer, __value);
 }
 
 } // namespace __itoa

diff  --git a/libcxx/include/charconv b/libcxx/include/charconv
index ce2897cf7c281..b88ef46752a33 100644
--- a/libcxx/include/charconv
+++ b/libcxx/include/charconv
@@ -125,9 +125,9 @@ struct _LIBCPP_HIDDEN __traits_base
         return __t - (__v < __table<>::__pow10_64[__t]) + 1;
     }
 
-    static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p)
+    static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v)
     {
-        return __itoa::__base_10_u64(__v, __p);
+        return __itoa::__base_10_u64(__p, __v);
     }
 
     static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_64)& __pow() { return __table<>::__pow10_64; }
@@ -145,9 +145,9 @@ struct _LIBCPP_HIDDEN
         return __t - (__v < __table<>::__pow10_32[__t]) + 1;
     }
 
-    static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p)
+    static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v)
     {
-        return __itoa::__base_10_u32(__v, __p);
+        return __itoa::__base_10_u32(__p, __v);
     }
 
     static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_32)& __pow() { return __table<>::__pow10_32; }
@@ -262,7 +262,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type)
     auto __
diff  = __last - __first;
 
     if (__tx::digits <= __
diff  || __tx::__width(__value) <= __
diff )
-        return {__tx::__convert(__value, __first), errc(0)};
+        return {__tx::__convert(__first, __value), errc(0)};
     else
         return {__last, errc::value_too_large};
 }

diff  --git a/libcxx/src/charconv.cpp b/libcxx/src/charconv.cpp
index 4139dea5f3aa0..05ee2cbd4ac87 100644
--- a/libcxx/src/charconv.cpp
+++ b/libcxx/src/charconv.cpp
@@ -21,13 +21,13 @@ namespace __itoa
 _LIBCPP_FUNC_VIS char*
 __u32toa(uint32_t value, char* buffer) noexcept
 {
-	return __base_10_u32(value, buffer);
+	return __base_10_u32(buffer, value);
 }
 
 _LIBCPP_FUNC_VIS char*
 __u64toa(uint64_t value, char* buffer) noexcept
 {
-	return __base_10_u64(value, buffer);
+	return __base_10_u64(buffer, value);
 }
 
 }  // namespace __itoa


        


More information about the libcxx-commits mailing list