[libcxx-commits] [libcxx] 3561ee5 - [libc++] Improve charconv base10 algorithm.
Mark de Wever via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Jun 21 08:45:46 PDT 2022
Author: Mark de Wever
Date: 2022-06-21T17:45:40+02:00
New Revision: 3561ee586ed0ec8909527c09020b42bd09718da1
URL: https://github.com/llvm/llvm-project/commit/3561ee586ed0ec8909527c09020b42bd09718da1
DIFF: https://github.com/llvm/llvm-project/commit/3561ee586ed0ec8909527c09020b42bd09718da1.diff
LOG: [libc++] Improve charconv base10 algorithm.
This change is a preparation to add the 128-bit integral output.
Before
```
--------------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------------
BM_to_chars_good/2 20.1 ns 20.1 ns 35045000
BM_to_chars_good/3 117 ns 117 ns 5916000
BM_to_chars_good/4 83.7 ns 83.7 ns 8401000
BM_to_chars_good/5 70.6 ns 70.6 ns 9915000
BM_to_chars_good/6 59.9 ns 59.9 ns 11678000
BM_to_chars_good/7 53.9 ns 53.8 ns 12995000
BM_to_chars_good/8 19.0 ns 19.0 ns 37110000
BM_to_chars_good/9 45.9 ns 45.8 ns 15278000
BM_to_chars_good/10 9.24 ns 9.24 ns 75343000
BM_to_chars_good/11 42.6 ns 42.6 ns 16449000
BM_to_chars_good/12 38.8 ns 38.8 ns 18101000
BM_to_chars_good/13 38.8 ns 38.8 ns 17999000
BM_to_chars_good/14 37.7 ns 37.6 ns 18571000
BM_to_chars_good/15 35.8 ns 35.8 ns 19660000
BM_to_chars_good/16 15.4 ns 15.4 ns 46129000
BM_to_chars_good/17 32.3 ns 32.3 ns 21763000
BM_to_chars_good/18 32.8 ns 32.8 ns 21396000
BM_to_chars_good/19 33.4 ns 33.4 ns 21078000
BM_to_chars_good/20 33.3 ns 33.3 ns 21020000
BM_to_chars_good/21 32.3 ns 32.3 ns 21807000
BM_to_chars_good/22 31.6 ns 31.6 ns 22057000
BM_to_chars_good/23 30.7 ns 30.7 ns 22938000
BM_to_chars_good/24 28.3 ns 28.3 ns 24659000
BM_to_chars_good/25 28.2 ns 28.2 ns 24790000
BM_to_chars_good/26 28.4 ns 28.4 ns 24410000
BM_to_chars_good/27 28.7 ns 28.7 ns 24423000
BM_to_chars_good/28 28.9 ns 28.9 ns 24139000
BM_to_chars_good/29 28.9 ns 28.9 ns 24347000
BM_to_chars_good/30 29.2 ns 29.2 ns 24141000
BM_to_chars_good/31 29.6 ns 29.6 ns 23699000
BM_to_chars_good/32 29.5 ns 29.5 ns 23933000
BM_to_chars_good/33 28.9 ns 28.9 ns 24042000
BM_to_chars_good/34 28.7 ns 28.7 ns 24361000
BM_to_chars_good/35 28.3 ns 28.3 ns 24703000
BM_to_chars_good/36 28.1 ns 28.1 ns 24924000
BM_to_chars_bad/2 6.16 ns 6.15 ns 114101000
BM_to_chars_bad/3 14.5 ns 14.5 ns 48244000
BM_to_chars_bad/4 16.9 ns 16.9 ns 41974000
BM_to_chars_bad/5 12.5 ns 12.5 ns 56080000
BM_to_chars_bad/6 10.9 ns 10.9 ns 64036000
BM_to_chars_bad/7 14.5 ns 14.5 ns 47294000
BM_to_chars_bad/8 6.36 ns 6.35 ns 110430000
BM_to_chars_bad/9 12.4 ns 12.4 ns 56448000
BM_to_chars_bad/10 5.13 ns 5.13 ns 137596000
BM_to_chars_bad/11 9.88 ns 9.88 ns 69015000
BM_to_chars_bad/12 10.8 ns 10.8 ns 63990000
BM_to_chars_bad/13 10.7 ns 10.7 ns 65066000
BM_to_chars_bad/14 9.71 ns 9.71 ns 71775000
BM_to_chars_bad/15 9.18 ns 9.18 ns 75267000
BM_to_chars_bad/16 6.12 ns 6.12 ns 115000000
BM_to_chars_bad/17 10.7 ns 10.7 ns 65504000
BM_to_chars_bad/18 10.6 ns 10.6 ns 65685000
BM_to_chars_bad/19 9.98 ns 9.98 ns 69894000
BM_to_chars_bad/20 9.74 ns 9.74 ns 72098000
BM_to_chars_bad/21 9.25 ns 9.25 ns 75184000
BM_to_chars_bad/22 9.10 ns 9.10 ns 75602000
BM_to_chars_bad/23 9.48 ns 9.48 ns 72824000
BM_to_chars_bad/24 9.27 ns 9.27 ns 75112000
BM_to_chars_bad/25 9.61 ns 9.61 ns 72080000
BM_to_chars_bad/26 9.72 ns 9.72 ns 72178000
BM_to_chars_bad/27 10.0 ns 10.0 ns 69733000
BM_to_chars_bad/28 10.3 ns 10.3 ns 67409000
BM_to_chars_bad/29 9.97 ns 9.97 ns 69193000
BM_to_chars_bad/30 10.1 ns 10.1 ns 69007000
BM_to_chars_bad/31 9.68 ns 9.68 ns 72232000
BM_to_chars_bad/32 8.99 ns 8.99 ns 76825000
BM_to_chars_bad/33 8.82 ns 8.82 ns 79293000
BM_to_chars_bad/34 8.64 ns 8.64 ns 80441000
BM_to_chars_bad/35 8.96 ns 8.96 ns 75320000
BM_to_chars_bad/36 8.87 ns 8.87 ns 77293000
```
After
```
--------------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------------
BM_to_chars_good/2 14.7 ns 14.7 ns 47583000
BM_to_chars_good/3 101 ns 101 ns 6901000
BM_to_chars_good/4 68.4 ns 68.4 ns 10088000
BM_to_chars_good/5 58.2 ns 58.2 ns 12007000
BM_to_chars_good/6 51.1 ns 51.1 ns 13687000
BM_to_chars_good/7 45.6 ns 45.6 ns 15323000
BM_to_chars_good/8 14.6 ns 14.6 ns 47795000
BM_to_chars_good/9 40.7 ns 40.7 ns 17371000
BM_to_chars_good/10 7.48 ns 7.48 ns 90931000
BM_to_chars_good/11 37.6 ns 37.6 ns 18542000
BM_to_chars_good/12 35.2 ns 35.2 ns 19922000
BM_to_chars_good/13 34.9 ns 34.9 ns 20105000
BM_to_chars_good/14 33.5 ns 33.5 ns 20863000
BM_to_chars_good/15 31.9 ns 31.9 ns 22014000
BM_to_chars_good/16 11.7 ns 11.7 ns 60012000
BM_to_chars_good/17 28.9 ns 28.9 ns 24148000
BM_to_chars_good/18 29.0 ns 29.0 ns 24317000
BM_to_chars_good/19 28.7 ns 28.7 ns 24363000
BM_to_chars_good/20 28.1 ns 28.1 ns 24899000
BM_to_chars_good/21 27.5 ns 27.5 ns 25499000
BM_to_chars_good/22 26.9 ns 26.9 ns 25929000
BM_to_chars_good/23 26.2 ns 26.2 ns 26828000
BM_to_chars_good/24 25.1 ns 25.1 ns 27742000
BM_to_chars_good/25 25.3 ns 25.3 ns 27720000
BM_to_chars_good/26 25.2 ns 25.2 ns 27789000
BM_to_chars_good/27 25.3 ns 25.3 ns 27777000
BM_to_chars_good/28 25.3 ns 25.3 ns 27643000
BM_to_chars_good/29 25.3 ns 25.3 ns 27750000
BM_to_chars_good/30 25.4 ns 25.4 ns 27566000
BM_to_chars_good/31 25.4 ns 25.4 ns 27611000
BM_to_chars_good/32 25.8 ns 25.8 ns 27218000
BM_to_chars_good/33 25.7 ns 25.7 ns 27070000
BM_to_chars_good/34 26.1 ns 26.1 ns 26693000
BM_to_chars_good/35 26.4 ns 26.4 ns 26486000
BM_to_chars_good/36 26.3 ns 26.3 ns 26619000
BM_to_chars_bad/2 5.99 ns 5.99 ns 118787000
BM_to_chars_bad/3 14.3 ns 14.3 ns 48567000
BM_to_chars_bad/4 16.0 ns 16.0 ns 43239000
BM_to_chars_bad/5 12.6 ns 12.6 ns 55354000
BM_to_chars_bad/6 10.7 ns 10.7 ns 65491000
BM_to_chars_bad/7 14.4 ns 14.4 ns 48723000
BM_to_chars_bad/8 6.50 ns 6.50 ns 104967000
BM_to_chars_bad/9 12.0 ns 12.0 ns 56552000
BM_to_chars_bad/10 5.16 ns 5.16 ns 136380000
BM_to_chars_bad/11 10.5 ns 10.5 ns 66764000
BM_to_chars_bad/12 10.7 ns 10.7 ns 65534000
BM_to_chars_bad/13 11.0 ns 11.0 ns 63426000
BM_to_chars_bad/14 9.90 ns 9.90 ns 68575000
BM_to_chars_bad/15 9.52 ns 9.52 ns 70932000
BM_to_chars_bad/16 6.14 ns 6.14 ns 111762000
BM_to_chars_bad/17 10.6 ns 10.6 ns 65883000
BM_to_chars_bad/18 10.5 ns 10.5 ns 67606000
BM_to_chars_bad/19 9.96 ns 9.96 ns 68898000
BM_to_chars_bad/20 9.40 ns 9.41 ns 73116000
BM_to_chars_bad/21 9.12 ns 9.12 ns 78647000
BM_to_chars_bad/22 8.95 ns 8.95 ns 80211000
BM_to_chars_bad/23 9.50 ns 9.49 ns 73571000
BM_to_chars_bad/24 9.29 ns 9.29 ns 74690000
BM_to_chars_bad/25 9.65 ns 9.65 ns 72877000
BM_to_chars_bad/26 9.78 ns 9.78 ns 70171000
BM_to_chars_bad/27 10.1 ns 10.1 ns 69543000
BM_to_chars_bad/28 10.4 ns 10.4 ns 67582000
BM_to_chars_bad/29 10.00 ns 10.00 ns 70806000
BM_to_chars_bad/30 9.99 ns 9.99 ns 70340000
BM_to_chars_bad/31 9.56 ns 9.56 ns 74159000
BM_to_chars_bad/32 8.97 ns 8.97 ns 78052000
BM_to_chars_bad/33 8.86 ns 8.86 ns 78586000
BM_to_chars_bad/34 8.81 ns 8.81 ns 78562000
BM_to_chars_bad/35 8.90 ns 8.90 ns 77384000
BM_to_chars_bad/36 9.04 ns 9.04 ns 77263000
```
Reviewed By: #libc, ldionne
Differential Revision: https://reviews.llvm.org/D127764
Added:
Modified:
libcxx/include/__charconv/to_chars_base_10.h
libcxx/include/charconv
libcxx/src/charconv.cpp
Removed:
################################################################################
diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h
index 1c6804a852460..91c209559aff9 100644
--- a/libcxx/include/__charconv/to_chars_base_10.h
+++ b/libcxx/include/__charconv/to_chars_base_10.h
@@ -10,10 +10,10 @@
#ifndef _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H
#define _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H
+#include <__algorithm/copy_n.h>
#include <__charconv/tables.h>
#include <__config>
#include <cstdint>
-#include <cstring>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -25,98 +25,97 @@ _LIBCPP_BEGIN_NAMESPACE_STD
namespace __itoa {
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append1(char* __buffer, _Tp __value) noexcept {
- *__buffer = '0' + static_cast<char>(__value);
- return __buffer + 1;
+_LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) noexcept {
+ *__first = '0' + static_cast<char>(__value);
+ return __first + 1;
}
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append2(char* __buffer, _Tp __value) noexcept {
- std::memcpy(__buffer, &__table<>::__digits_base_10[(__value)*2], 2);
- return __buffer + 2;
+_LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) noexcept {
+ return std::copy_n(&__table<>::__digits_base_10[__value * 2], 2, __first);
}
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append3(char* __buffer, _Tp __value) noexcept {
- return __itoa::__append2(__itoa::__append1(__buffer, (__value) / 100), (__value) % 100);
+_LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) noexcept {
+ return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100);
}
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append4(char* __buffer, _Tp __value) noexcept {
- return __itoa::__append2(__itoa::__append2(__buffer, (__value) / 100), (__value) % 100);
+_LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) noexcept {
+ return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100);
}
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append2_no_zeros(char* __buffer, _Tp __value) noexcept {
- if (__value < 10)
- return __itoa::__append1(__buffer, __value);
- else
- return __itoa::__append2(__buffer, __value);
+_LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) noexcept {
+ return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000);
}
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append4_no_zeros(char* __buffer, _Tp __value) noexcept {
- if (__value < 100)
- return __itoa::__append2_no_zeros(__buffer, __value);
- else if (__value < 1000)
- return __itoa::__append3(__buffer, __value);
- else
- return __itoa::__append4(__buffer, __value);
+_LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) noexcept {
+ return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000);
+}
+
+_LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) noexcept {
+ return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000);
}
+_LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) noexcept {
+ return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000);
+}
+
+_LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) noexcept {
+ return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000);
+}
+
+// This function is used for uint32_t and uint64_t.
template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI char* __append8_no_zeros(char* __buffer, _Tp __value) noexcept {
- if (__value < 10000)
- __buffer = __itoa::__append4_no_zeros(__buffer, __value);
- else {
- __buffer = __itoa::__append4_no_zeros(__buffer, __value / 10000);
- __buffer = __itoa::__append4(__buffer, __value % 10000);
- }
- return __buffer;
+_LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept {
+ return __itoa::__append8(__itoa::__append2(__first, static_cast<uint32_t>(__value / 100000000)),
+ static_cast<uint32_t>(__value % 100000000));
}
-_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(uint32_t __value, char* __buffer) noexcept {
- if (__value < 100000000)
- __buffer = __itoa::__append8_no_zeros(__buffer, __value);
- else {
- // __value = aabbbbcccc in decimal
- const uint32_t __a = __value / 100000000; // 1 to 42
- __value %= 100000000;
-
- __buffer = __itoa::__append2_no_zeros(__buffer, __a);
- __buffer = __itoa::__append4(__buffer, __value / 10000);
- __buffer = __itoa::__append4(__buffer, __value % 10000);
+_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(char* __first, uint32_t __value) noexcept {
+ if (__value < 1000000) {
+ if (__value < 10000) {
+ if (__value < 100) {
+ // 0 <= __value < 100
+ if (__value < 10)
+ return __itoa::__append1(__first, __value);
+ return __itoa::__append2(__first, __value);
+ }
+ // 100 <= __value < 10'000
+ if (__value < 1000)
+ return __itoa::__append3(__first, __value);
+ return __itoa::__append4(__first, __value);
+ }
+
+ // 10'000 <= __value < 1'000'000
+ if (__value < 100000)
+ return __itoa::__append5(__first, __value);
+ return __itoa::__append6(__first, __value);
}
- return __buffer;
+ // __value => 1'000'000
+ if (__value < 100000000) {
+ // 1'000'000 <= __value < 100'000'000
+ if (__value < 10000000)
+ return __itoa::__append7(__first, __value);
+ return __itoa::__append8(__first, __value);
+ }
+
+ // 100'000'000 <= __value < max
+ if (__value < 1000000000)
+ return __itoa::__append9(__first, __value);
+ return __itoa::__append10(__first, __value);
}
-_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(uint64_t __value, char* __buffer) noexcept {
- if (__value < 100000000)
- __buffer = __itoa::__append8_no_zeros(__buffer, static_cast<uint32_t>(__value));
- else if (__value < 10000000000000000) {
- const uint32_t __v0 = static_cast<uint32_t>(__value / 100000000);
- const uint32_t __v1 = static_cast<uint32_t>(__value % 100000000);
-
- __buffer = __itoa::__append8_no_zeros(__buffer, __v0);
- __buffer = __itoa::__append4(__buffer, __v1 / 10000);
- __buffer = __itoa::__append4(__buffer, __v1 % 10000);
- } else {
- const uint32_t __a = static_cast<uint32_t>(__value / 10000000000000000); // 1 to 1844
- __value %= 10000000000000000;
-
- __buffer = __itoa::__append4_no_zeros(__buffer, __a);
-
- const uint32_t __v0 = static_cast<uint32_t>(__value / 100000000);
- const uint32_t __v1 = static_cast<uint32_t>(__value % 100000000);
- __buffer = __itoa::__append4(__buffer, __v0 / 10000);
- __buffer = __itoa::__append4(__buffer, __v0 % 10000);
- __buffer = __itoa::__append4(__buffer, __v1 / 10000);
- __buffer = __itoa::__append4(__buffer, __v1 % 10000);
- }
+_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(char* __buffer, uint64_t __value) noexcept {
+ if (__value <= UINT32_MAX)
+ return __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value));
- return __buffer;
+ // Numbers in the range UINT32_MAX <= val < 10'000'000'000 always contain 10
+ // digits and are outputted after this if statement.
+ if (__value >= 10000000000) {
+ // This function properly deterimines the first non-zero leading digit.
+ __buffer = __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value / 10000000000));
+ __value %= 10000000000;
+ }
+ return __itoa::__append10(__buffer, __value);
}
} // namespace __itoa
diff --git a/libcxx/include/charconv b/libcxx/include/charconv
index ce2897cf7c281..b88ef46752a33 100644
--- a/libcxx/include/charconv
+++ b/libcxx/include/charconv
@@ -125,9 +125,9 @@ struct _LIBCPP_HIDDEN __traits_base
return __t - (__v < __table<>::__pow10_64[__t]) + 1;
}
- static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p)
+ static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v)
{
- return __itoa::__base_10_u64(__v, __p);
+ return __itoa::__base_10_u64(__p, __v);
}
static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_64)& __pow() { return __table<>::__pow10_64; }
@@ -145,9 +145,9 @@ struct _LIBCPP_HIDDEN
return __t - (__v < __table<>::__pow10_32[__t]) + 1;
}
- static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p)
+ static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v)
{
- return __itoa::__base_10_u32(__v, __p);
+ return __itoa::__base_10_u32(__p, __v);
}
static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_32)& __pow() { return __table<>::__pow10_32; }
@@ -262,7 +262,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type)
auto __
diff = __last - __first;
if (__tx::digits <= __
diff || __tx::__width(__value) <= __
diff )
- return {__tx::__convert(__value, __first), errc(0)};
+ return {__tx::__convert(__first, __value), errc(0)};
else
return {__last, errc::value_too_large};
}
diff --git a/libcxx/src/charconv.cpp b/libcxx/src/charconv.cpp
index 4139dea5f3aa0..05ee2cbd4ac87 100644
--- a/libcxx/src/charconv.cpp
+++ b/libcxx/src/charconv.cpp
@@ -21,13 +21,13 @@ namespace __itoa
_LIBCPP_FUNC_VIS char*
__u32toa(uint32_t value, char* buffer) noexcept
{
- return __base_10_u32(value, buffer);
+ return __base_10_u32(buffer, value);
}
_LIBCPP_FUNC_VIS char*
__u64toa(uint64_t value, char* buffer) noexcept
{
- return __base_10_u64(value, buffer);
+ return __base_10_u64(buffer, value);
}
} // namespace __itoa
More information about the libcxx-commits
mailing list