[libcxx-commits] [libcxx] [libc++] Optimize num_put integral	functions (PR #120859)
    Nikolas Klauser via libcxx-commits 
    libcxx-commits at lists.llvm.org
       
    Sun Dec 22 07:19:10 PST 2024
    
    
  
https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/120859
>From 8c14b58ca4960a4b6dc8115e8e15c12688d3dbe2 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 22 Dec 2024 00:06:22 +0100
Subject: [PATCH] [libc++] Optimize num_get integral functions
---
 libcxx/docs/ReleaseNotes/20.rst               |   2 +
 libcxx/include/__charconv/tables.h            |  20 ++--
 libcxx/include/__charconv/to_chars_base_10.h  |  32 +++---
 libcxx/include/__charconv/to_chars_integral.h |  72 +++++++-----
 libcxx/include/__charconv/to_chars_result.h   |   9 ++
 libcxx/include/__charconv/traits.h            |  10 +-
 .../__format/formatter_floating_point.h       |   1 +
 libcxx/include/__format/formatter_integral.h  |   2 +-
 libcxx/include/__format/formatter_output.h    |  18 ---
 libcxx/include/locale                         | 103 +++++++++++-------
 .../test/benchmarks/locale/num_put.bench.cpp  |  37 +++++++
 .../minus1.pass.cpp                           |   3 +-
 .../put_pointer.pass.cpp                      |  13 +--
 13 files changed, 192 insertions(+), 130 deletions(-)
 create mode 100644 libcxx/test/benchmarks/locale/num_put.bench.cpp
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index c8a07fb8b73348..4cac642f22948d 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -73,6 +73,8 @@ Improvements and New Features
   optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
   and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
 
+- The ``num_get::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
+
 Deprecations and Removals
 -------------------------
 
diff --git a/libcxx/include/__charconv/tables.h b/libcxx/include/__charconv/tables.h
index 9568bf841cd029..b8c6fd8af0a0f8 100644
--- a/libcxx/include/__charconv/tables.h
+++ b/libcxx/include/__charconv/tables.h
@@ -19,16 +19,14 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
 namespace __itoa {
 
-inline constexpr char __base_2_lut[64] = {
+inline _LIBCPP_CONSTEXPR const char __base_2_lut[64] = {
     '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '1', '0', '0', '0', '1', '1', '0', '1', '0', '0', '0', '1',
     '0', '1', '0', '1', '1', '0', '0', '1', '1', '1', '1', '0', '0', '0', '1', '0', '0', '1', '1', '0', '1', '0',
     '1', '0', '1', '1', '1', '1', '0', '0', '1', '1', '0', '1', '1', '1', '1', '0', '1', '1', '1', '1'};
 
-inline constexpr char __base_8_lut[128] = {
+inline _LIBCPP_CONSTEXPR const char __base_8_lut[128] = {
     '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '1', '0', '1', '1', '1', '2',
     '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5',
     '2', '6', '2', '7', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '4', '0',
@@ -36,7 +34,7 @@ inline constexpr char __base_8_lut[128] = {
     '5', '4', '5', '5', '5', '6', '5', '7', '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6',
     '6', '7', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7'};
 
-inline constexpr char __base_16_lut[512] = {
+inline _LIBCPP_CONSTEXPR const char __base_16_lut[512] = {
     '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '0', 'a', '0',
     'b', '0', 'c', '0', 'd', '0', 'e', '0', 'f', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6',
     '1', '7', '1', '8', '1', '9', '1', 'a', '1', 'b', '1', 'c', '1', 'd', '1', 'e', '1', 'f', '2', '0', '2', '1', '2',
@@ -61,7 +59,7 @@ inline constexpr char __base_16_lut[512] = {
     '1', 'f', '2', 'f', '3', 'f', '4', 'f', '5', 'f', '6', 'f', '7', 'f', '8', 'f', '9', 'f', 'a', 'f', 'b', 'f', 'c',
     'f', 'd', 'f', 'e', 'f', 'f'};
 
-inline constexpr uint32_t __pow10_32[10] = {
+inline _LIBCPP_CONSTEXPR const uint32_t __pow10_32[10] = {
     UINT32_C(0),
     UINT32_C(10),
     UINT32_C(100),
@@ -73,7 +71,7 @@ inline constexpr uint32_t __pow10_32[10] = {
     UINT32_C(100000000),
     UINT32_C(1000000000)};
 
-inline constexpr uint64_t __pow10_64[20] = {
+inline _LIBCPP_CONSTEXPR const uint64_t __pow10_64[20] = {
     UINT64_C(0),
     UINT64_C(10),
     UINT64_C(100),
@@ -96,8 +94,8 @@ inline constexpr uint64_t __pow10_64[20] = {
     UINT64_C(10000000000000000000)};
 
 #  if _LIBCPP_HAS_INT128
-inline constexpr int __pow10_128_offset      = 0;
-inline constexpr __uint128_t __pow10_128[40] = {
+inline _LIBCPP_CONSTEXPR const int __pow10_128_offset      = 0;
+inline _LIBCPP_CONSTEXPR const __uint128_t __pow10_128[40] = {
     UINT64_C(0),
     UINT64_C(10),
     UINT64_C(100),
@@ -140,7 +138,7 @@ inline constexpr __uint128_t __pow10_128[40] = {
     (__uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000000000000000)) * 10};
 #  endif
 
-inline constexpr char __digits_base_10[200] = {
+inline _LIBCPP_CONSTEXPR const char __digits_base_10[200] = {
     // clang-format off
     '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
     '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
@@ -156,8 +154,6 @@ inline constexpr char __digits_base_10[200] = {
 
 } // namespace __itoa
 
-#endif // _LIBCPP_STD_VER >= 17
-
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___CHARCONV_TABLES
diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h
index 06e4e692337df5..d90952ea71f356 100644
--- a/libcxx/include/__charconv/to_chars_base_10.h
+++ b/libcxx/include/__charconv/to_chars_base_10.h
@@ -26,55 +26,53 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
 namespace __itoa {
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) _NOEXCEPT {
   *__first = '0' + static_cast<char>(__value);
   return __first + 1;
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) _NOEXCEPT {
   return std::copy_n(&__digits_base_10[__value * 2], 2, __first);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000);
 }
 
 template <class _Tp>
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) _NOEXCEPT {
   return __itoa::__append8(__itoa::__append2(__first, static_cast<uint32_t>(__value / 100000000)),
                            static_cast<uint32_t>(__value % 100000000));
 }
 
 _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char*
-__base_10_u32(char* __first, uint32_t __value) noexcept {
+__base_10_u32(char* __first, uint32_t __value) _NOEXCEPT {
   if (__value < 1000000) {
     if (__value < 10000) {
       if (__value < 100) {
@@ -110,7 +108,7 @@ __base_10_u32(char* __first, uint32_t __value) noexcept {
 }
 
 _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char*
-__base_10_u64(char* __buffer, uint64_t __value) noexcept {
+__base_10_u64(char* __buffer, uint64_t __value) _NOEXCEPT {
   if (__value <= UINT32_MAX)
     return __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value));
 
@@ -132,13 +130,13 @@ __base_10_u64(char* __buffer, uint64_t __value) noexcept {
 /// \note The lookup table contains a partial set of exponents limiting the
 /// range that can be used. However the range is sufficient for
 /// \ref __base_10_u128.
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline __uint128_t __pow_10(int __exp) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline __uint128_t __pow_10(int __exp) _NOEXCEPT {
   _LIBCPP_ASSERT_INTERNAL(__exp >= __pow10_128_offset, "Index out of bounds");
   return __pow10_128[__exp - __pow10_128_offset];
 }
 
 _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char*
-__base_10_u128(char* __buffer, __uint128_t __value) noexcept {
+__base_10_u128(char* __buffer, __uint128_t __value) _NOEXCEPT {
   _LIBCPP_ASSERT_INTERNAL(
       __value > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fails when this isn't true.");
 
@@ -179,8 +177,6 @@ __base_10_u128(char* __buffer, __uint128_t __value) noexcept {
 #  endif
 } // namespace __itoa
 
-#endif // _LIBCPP_STD_VER >= 17
-
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
index 710299df9b4da8..95917a7d7f0347 100644
--- a/libcxx/include/__charconv/to_chars_integral.h
+++ b/libcxx/include/__charconv/to_chars_integral.h
@@ -39,16 +39,12 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
-to_chars_result to_chars(char*, char*, bool, int = 10) = delete;
-
 template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type);
 
 template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_itoa(char* __first, char* __last, _Tp __value, true_type) {
   auto __x = std::__to_unsigned_like(__value);
   if (__value < 0 && __first != __last) {
@@ -60,7 +56,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, true_type) {
 }
 
 template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) {
   using __tx  = __itoa::__traits<_Tp>;
   auto __diff = __last - __first;
@@ -73,7 +69,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) {
 
 #  if _LIBCPP_HAS_INT128
 template <>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_itoa(char* __first, char* __last, __uint128_t __value, false_type) {
   // When the value fits in 64-bits use the 64-bit code path. This reduces
   // the number of expensive calculations on 128-bit values.
@@ -92,20 +88,20 @@ __to_chars_itoa(char* __first, char* __last, __uint128_t __value, false_type) {
 }
 #  endif
 
-template <class _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
-__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_type);
+template <class _Tp, __enable_if_t<!is_signed<_Tp>::value, int> = 0>
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
+__to_chars_integral(char* __first, char* __last, _Tp __value, int __base);
 
-template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
-__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, true_type) {
+template <class _Tp, __enable_if_t<is_signed<_Tp>::value, int> = 0>
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
+__to_chars_integral(char* __first, char* __last, _Tp __value, int __base) {
   auto __x = std::__to_unsigned_like(__value);
   if (__value < 0 && __first != __last) {
     *__first++ = '-';
     __x        = std::__complement(__x);
   }
 
-  return std::__to_chars_integral(__first, __last, __x, __base, false_type());
+  return std::__to_chars_integral(__first, __last, __x, __base);
 }
 
 namespace __itoa {
@@ -116,7 +112,7 @@ struct _LIBCPP_HIDDEN __integral;
 template <>
 struct _LIBCPP_HIDDEN __integral<2> {
   template <typename _Tp>
-  _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
+  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significant bit set. (Note
     // that __libcpp_clz doesn't work for 0.)
@@ -124,7 +120,7 @@ struct _LIBCPP_HIDDEN __integral<2> {
   }
 
   template <typename _Tp>
-  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result
   __to_chars(char* __first, char* __last, _Tp __value) {
     ptrdiff_t __cap = __last - __first;
     int __n         = __width(__value);
@@ -152,7 +148,7 @@ struct _LIBCPP_HIDDEN __integral<2> {
 template <>
 struct _LIBCPP_HIDDEN __integral<8> {
   template <typename _Tp>
-  _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
+  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significat bit set. (Note
     // that __libcpp_clz doesn't work for 0.)
@@ -160,7 +156,7 @@ struct _LIBCPP_HIDDEN __integral<8> {
   }
 
   template <typename _Tp>
-  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result
   __to_chars(char* __first, char* __last, _Tp __value) {
     ptrdiff_t __cap = __last - __first;
     int __n         = __width(__value);
@@ -188,7 +184,7 @@ struct _LIBCPP_HIDDEN __integral<8> {
 template <>
 struct _LIBCPP_HIDDEN __integral<16> {
   template <typename _Tp>
-  _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
+  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significat bit set. (Note
     // that __libcpp_clz doesn't work for 0.)
@@ -196,7 +192,7 @@ struct _LIBCPP_HIDDEN __integral<16> {
   }
 
   template <typename _Tp>
-  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result
   __to_chars(char* __first, char* __last, _Tp __value) {
     ptrdiff_t __cap = __last - __first;
     int __n         = __width(__value);
@@ -235,13 +231,13 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __to_chars_integral_widt
 }
 
 template <unsigned _Base, typename _Tp, __enable_if_t<(sizeof(_Tp) >= sizeof(unsigned)), int> = 0>
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_integral(char* __first, char* __last, _Tp __value) {
   return __itoa::__integral<_Base>::__to_chars(__first, __last, __value);
 }
 
 template <unsigned _Base, typename _Tp, __enable_if_t<(sizeof(_Tp) < sizeof(unsigned)), int> = 0>
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_integral(char* __first, char* __last, _Tp __value) {
   return std::__to_chars_integral<_Base>(__first, __last, static_cast<unsigned>(__value));
 }
@@ -272,9 +268,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __to_chars_integral_widt
   __libcpp_unreachable();
 }
 
-template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
-__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_type) {
+template <class _Tp, __enable_if_t<!is_signed<_Tp>::value, int> >
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
+__to_chars_integral(char* __first, char* __last, _Tp __value, int __base) {
   if (__base == 10) [[likely]]
     return std::__to_chars_itoa(__first, __last, __value, false_type());
 
@@ -302,6 +298,28 @@ __to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_
   return {__last, errc(0)};
 }
 
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR char __hex_to_upper(char __c) {
+  switch (__c) {
+  case 'a':
+    return 'A';
+  case 'b':
+    return 'B';
+  case 'c':
+    return 'C';
+  case 'd':
+    return 'D';
+  case 'e':
+    return 'E';
+  case 'f':
+    return 'F';
+  }
+  return __c;
+}
+
+#if _LIBCPP_STD_VER >= 17
+
+to_chars_result to_chars(char*, char*, bool, int = 10) = delete;
+
 template <typename _Tp, __enable_if_t<is_integral<_Tp>::value, int> = 0>
 inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
 to_chars(char* __first, char* __last, _Tp __value) {
@@ -316,7 +334,7 @@ to_chars(char* __first, char* __last, _Tp __value, int __base) {
   _LIBCPP_ASSERT_UNCATEGORIZED(2 <= __base && __base <= 36, "base not in [2, 36]");
 
   using _Type = __make_32_64_or_128_bit_t<_Tp>;
-  return std::__to_chars_integral(__first, __last, static_cast<_Type>(__value), __base, is_signed<_Tp>());
+  return std::__to_chars_integral(__first, __last, static_cast<_Type>(__value), __base);
 }
 
 #endif // _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__charconv/to_chars_result.h b/libcxx/include/__charconv/to_chars_result.h
index 8df0897a49fbbd..2ff7bd39ea3e1d 100644
--- a/libcxx/include/__charconv/to_chars_result.h
+++ b/libcxx/include/__charconv/to_chars_result.h
@@ -34,6 +34,15 @@ struct _LIBCPP_EXPORTED_FROM_ABI to_chars_result {
 
 #endif // _LIBCPP_STD_VER >= 17
 
+struct _LIBCPP_EXPORTED_FROM_ABI __to_chars_result {
+  char* __ptr;
+  errc __ec;
+
+#if _LIBCPP_STD_VER >= 17
+  _LIBCPP_HIDE_FROM_ABI constexpr operator to_chars_result() { return {__ptr, __ec}; }
+#endif
+};
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___CHARCONV_TO_CHARS_RESULT_H
diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h
index 2cb37c8cfb0238..36339c8c5ebf1b 100644
--- a/libcxx/include/__charconv/traits.h
+++ b/libcxx/include/__charconv/traits.h
@@ -29,15 +29,13 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
 namespace __itoa {
 
 template <typename _Tp, typename = void>
 struct _LIBCPP_HIDDEN __traits_base;
 
 template <typename _Tp>
-struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uint32_t)>> {
+struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uint32_t)> > {
   using type = uint32_t;
 
   /// The width estimation using a log10 algorithm.
@@ -63,7 +61,7 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
 };
 
 template <typename _Tp>
-struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uint64_t)>> {
+struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uint64_t)> > {
   using type = uint64_t;
 
   /// The width estimation using a log10 algorithm.
@@ -152,7 +150,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool _LIBCPP_CONSTEXPR_SINCE_CXX23 __mul_overflowed
 
 template <typename _Tp>
 struct _LIBCPP_HIDDEN __traits : __traits_base<_Tp> {
-  static constexpr int digits = numeric_limits<_Tp>::digits10 + 1;
+  static _LIBCPP_CONSTEXPR const int digits = numeric_limits<_Tp>::digits10 + 1;
   using __traits_base<_Tp>::__pow;
   using typename __traits_base<_Tp>::type;
 
@@ -191,8 +189,6 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI _Tp __complement(_Tp
   return _Tp(~__x + 1);
 }
 
-#endif // _LIBCPP_STD_VER >= 17
-
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h
index e04fffb683c3a6..36be3b21de21e0 100644
--- a/libcxx/include/__format/formatter_floating_point.h
+++ b/libcxx/include/__format/formatter_floating_point.h
@@ -19,6 +19,7 @@
 #include <__assert>
 #include <__charconv/chars_format.h>
 #include <__charconv/to_chars_floating_point.h>
+#include <__charconv/to_chars_integral.h>
 #include <__charconv/to_chars_result.h>
 #include <__concepts/arithmetic.h>
 #include <__concepts/same_as.h>
diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h
index 996b7620b3e3f6..f9f106f4065f96 100644
--- a/libcxx/include/__format/formatter_integral.h
+++ b/libcxx/include/__format/formatter_integral.h
@@ -338,7 +338,7 @@ _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer(
   if (__specs.__std_.__type_ != __format_spec::__type::__hexadecimal_upper_case) [[likely]]
     return __formatter::__write(__first, __last, __ctx.out(), __specs);
 
-  return __formatter::__write_transformed(__first, __last, __ctx.out(), __specs, __formatter::__hex_to_upper);
+  return __formatter::__write_transformed(__first, __last, __ctx.out(), __specs, std::__hex_to_upper);
 }
 
 template <unsigned_integral _Tp, class _CharT, class _FormatContext>
diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h
index e1f1309cd2c532..cc74e3858a401b 100644
--- a/libcxx/include/__format/formatter_output.h
+++ b/libcxx/include/__format/formatter_output.h
@@ -45,24 +45,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __formatter {
 
-_LIBCPP_HIDE_FROM_ABI constexpr char __hex_to_upper(char __c) {
-  switch (__c) {
-  case 'a':
-    return 'A';
-  case 'b':
-    return 'B';
-  case 'c':
-    return 'C';
-  case 'd':
-    return 'D';
-  case 'e':
-    return 'E';
-  case 'f':
-    return 'F';
-  }
-  return __c;
-}
-
 struct _LIBCPP_EXPORTED_FROM_ABI __padding_size_result {
   size_t __before_;
   size_t __after_;
diff --git a/libcxx/include/locale b/libcxx/include/locale
index 981f25ed1e98cf..ae3b08de2ac6d4 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -201,6 +201,8 @@ template <class charT> class messages_byname;
 #    include <__algorithm/reverse.h>
 #    include <__algorithm/unwrap_iter.h>
 #    include <__assert>
+#    include <__charconv/to_chars_integral.h>
+#    include <__charconv/traits.h>
 #    include <__iterator/access.h>
 #    include <__iterator/back_insert_iterator.h>
 #    include <__iterator/istreambuf_iterator.h>
@@ -1233,7 +1235,7 @@ protected:
 
   template <class _Integral>
   _LIBCPP_HIDE_FROM_ABI inline _OutputIterator
-  __do_put_integral(iter_type __s, ios_base& __iob, char_type __fl, _Integral __v, char const* __len) const;
+  __do_put_integral(iter_type __s, ios_base& __iob, char_type __fl, _Integral __v) const;
 
   template <class _Float>
   _LIBCPP_HIDE_FROM_ABI inline _OutputIterator
@@ -1259,30 +1261,68 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_ty
 template <class _CharT, class _OutputIterator>
 template <class _Integral>
 _LIBCPP_HIDE_FROM_ABI inline _OutputIterator num_put<_CharT, _OutputIterator>::__do_put_integral(
-    iter_type __s, ios_base& __iob, char_type __fl, _Integral __v, char const* __len) const {
+    iter_type __s, ios_base& __iob, char_type __fl, _Integral __v) const {
   // Stage 1 - Get number in narrow char
-  char __fmt[8] = {'%', 0};
-  this->__format_int(__fmt + 1, __len, is_signed<_Integral>::value, __iob.flags());
+
   // Worst case is octal, with showbase enabled. Note that octal is always
   // printed as an unsigned value.
   using _Unsigned = typename make_unsigned<_Integral>::type;
-  _LIBCPP_CONSTEXPR const unsigned __nbuf =
+  _LIBCPP_CONSTEXPR const unsigned __buffer_size =
       (numeric_limits<_Unsigned>::digits / 3)          // 1 char per 3 bits
       + ((numeric_limits<_Unsigned>::digits % 3) != 0) // round up
       + 2;                                             // base prefix + terminating null character
-  char __nar[__nbuf];
-  _LIBCPP_DIAGNOSTIC_PUSH
-  _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wformat-nonliteral")
-  _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral")
-  int __nc = __locale::__snprintf(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v);
-  _LIBCPP_DIAGNOSTIC_POP
-  char* __ne = __nar + __nc;
-  char* __np = this->__identify_padding(__nar, __ne, __iob);
+
+  char __char_buffer[__buffer_size];
+  char* __buffer_ptr = __char_buffer;
+
+  auto __flags = __iob.flags();
+
+  auto __basefield = (__flags & ios_base::basefield);
+
+  // Extract base
+  int __base = 10;
+  if (__basefield == ios_base::oct)
+    __base = 8;
+  else if (__basefield == ios_base::hex)
+    __base = 16;
+
+  // Print '-' and make the argument unsigned
+  auto __uval = std::__to_unsigned_like(__v);
+  if (__basefield != ios_base::oct && __basefield != ios_base::hex && __v < 0) {
+    *__buffer_ptr++ = '-';
+    __uval = std::__complement(__uval);
+  }
+
+  // Maybe add '+' prefix
+  if (std::is_signed<_Integral>::value && (__flags & ios_base::showpos) && __basefield != ios_base::oct &&
+      __basefield != ios_base::hex && __v > 0)
+    *__buffer_ptr++ = '+';
+
+  // Add base prefix
+  if (__flags & ios_base::showbase) {
+    if (__basefield == ios_base::oct) {
+      *__buffer_ptr++ = '0';
+    } else if (__basefield == ios_base::hex) {
+      *__buffer_ptr++ = '0';
+      *__buffer_ptr++ = (__flags & ios_base::uppercase ? 'X' : 'x');
+    }
+  }
+
+  auto __res = std::__to_chars_integral(__buffer_ptr, __char_buffer + __buffer_size, __uval, __base);
+  _LIBCPP_ASSERT_INTERNAL(__res.ec == std::errc(), "to_chars failed!");
+
+  // Make letters uppercase
+  if (__flags & ios_base::hex && __flags & ios_base::uppercase) {
+    for (; __buffer_ptr != __res.__ptr; ++__buffer_ptr)
+      *__buffer_ptr = std::__hex_to_upper(*__buffer_ptr);
+  }
+
+  char* __np = this->__identify_padding(__char_buffer, __res.__ptr, __iob);
   // Stage 2 - Widen __nar while adding thousands separators
-  char_type __o[2 * (__nbuf - 1) - 1];
+  char_type __o[2 * (__buffer_size - 1) - 1];
   char_type* __op; // pad here
   char_type* __oe; // end of output
-  this->__widen_and_group_int(__nar, __np, __ne, __o, __op, __oe, __iob.getloc());
+  this->__widen_and_group_int(__char_buffer, __np, __res.__ptr, __o, __op, __oe, __iob.getloc());
   // [__o, __oe) contains thousands_sep'd wide number
   // Stage 3 & 4
   return std::__pad_and_output(__s, __o, __op, __oe, __iob, __fl);
@@ -1291,25 +1331,25 @@ _LIBCPP_HIDE_FROM_ABI inline _OutputIterator num_put<_CharT, _OutputIterator>::_
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long __v) const {
-  return this->__do_put_integral(__s, __iob, __fl, __v, "l");
+  return this->__do_put_integral(__s, __iob, __fl, __v);
 }
 
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long long __v) const {
-  return this->__do_put_integral(__s, __iob, __fl, __v, "ll");
+  return this->__do_put_integral(__s, __iob, __fl, __v);
 }
 
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long __v) const {
-  return this->__do_put_integral(__s, __iob, __fl, __v, "l");
+  return this->__do_put_integral(__s, __iob, __fl, __v);
 }
 
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long long __v) const {
-  return this->__do_put_integral(__s, __iob, __fl, __v, "ll");
+  return this->__do_put_integral(__s, __iob, __fl, __v);
 }
 
 template <class _CharT, class _OutputIterator>
@@ -1377,26 +1417,11 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_ty
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, const void* __v) const {
-  // Stage 1 - Get pointer in narrow char
-  const unsigned __nbuf = 20;
-  char __nar[__nbuf];
-  int __nc   = __locale::__snprintf(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, "%p", __v);
-  char* __ne = __nar + __nc;
-  char* __np = this->__identify_padding(__nar, __ne, __iob);
-  // Stage 2 - Widen __nar
-  char_type __o[2 * (__nbuf - 1) - 1];
-  char_type* __op; // pad here
-  char_type* __oe; // end of output
-  const ctype<char_type>& __ct = std::use_facet<ctype<char_type> >(__iob.getloc());
-  __ct.widen(__nar, __ne, __o);
-  __oe = __o + (__ne - __nar);
-  if (__np == __ne)
-    __op = __oe;
-  else
-    __op = __o + (__np - __nar);
-  // [__o, __oe) contains wide number
-  // Stage 3 & 4
-  return std::__pad_and_output(__s, __o, __op, __oe, __iob, __fl);
+  auto __flags = __iob.flags();
+  __iob.flags((__flags & ~ios_base::basefield & ~ios_base::uppercase) | ios_base::hex | ios_base::showbase);
+  auto __res = __do_put_integral(__s, __iob, __fl, reinterpret_cast<uintptr_t>(__v));
+  __iob.flags(__flags);
+  return __res;
 }
 
 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put<char>;
diff --git a/libcxx/test/benchmarks/locale/num_put.bench.cpp b/libcxx/test/benchmarks/locale/num_put.bench.cpp
new file mode 100644
index 00000000000000..c54e41d1c8b642
--- /dev/null
+++ b/libcxx/test/benchmarks/locale/num_put.bench.cpp
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <ios>
+#include <locale>
+
+#include <benchmark/benchmark.h>
+
+struct num_put : std::num_put<char, std::string::iterator> {};
+
+template <class T>
+void BM_num_put(benchmark::State& state) {
+  auto val = T(123);
+  std::ios ios(nullptr);
+  num_put np;
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(val);
+    std::string str;
+    benchmark::DoNotOptimize(np.put(str.begin(), ios, ' ', val));
+  }
+}
+BENCHMARK(BM_num_put<bool>);
+BENCHMARK(BM_num_put<long>);
+BENCHMARK(BM_num_put<long long>);
+BENCHMARK(BM_num_put<unsigned long>);
+BENCHMARK(BM_num_put<unsigned long long>);
+BENCHMARK(BM_num_put<double>);
+BENCHMARK(BM_num_put<long double>);
+BENCHMARK(BM_num_put<const void*>);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp
index 65d3930d82336e..11232fe15f75ba 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp
@@ -34,7 +34,8 @@ void test_octal(const char *expected)
 {
     std::stringstream ss;
     ss << std::oct << static_cast<T>(-1);
-    assert(ss.str() == expected);
+    auto str = ss.str();
+    assert(str == expected);
 }
 
 template <typename T>
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
index 13152742d5fe33..9685c2cd459fe4 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
@@ -12,11 +12,10 @@
 
 // iter_type put(iter_type s, ios_base& iob, char_type fill, void* v) const;
 
-#include <locale>
-#include <ios>
 #include <cassert>
-#include <streambuf>
-#include "test_macros.h"
+#include <ios>
+#include <locale>
+
 #include "test_iterators.h"
 
 typedef std::num_put<char, cpp17_output_iterator<char*> > F;
@@ -33,8 +32,8 @@ int main(int, char**)
 {
     const my_facet f(1);
     {
-        std::ios ios(0);
-        void* v = 0;
+        std::ios ios(nullptr);
+        void* v = nullptr;
         char str[50];
         cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), ios, '*', v);
         std::string ex(str, base(iter));
@@ -44,7 +43,7 @@ int main(int, char**)
         // snprintf for portability.
         int rc = snprintf(expected_str, sizeof(expected_str), "%p", v);
         assert(rc > 0);
-        assert(ex == expected_str);
+        assert(ex == expected_str || ex == "0x0");
     }
 
   return 0;
    
    
More information about the libcxx-commits
mailing list