[libcxx-commits] [libcxx] [libc++] Optimize num_put integral functions (PR #120859)

Fri Feb 28 04:41:18 PST 2025

https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/120859

>From 92fa847291f4ddaf9fd1270d1598c04f8b343dd0 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 22 Dec 2024 00:06:22 +0100
Subject: [PATCH] [libc++] Optimize num_get integral functions

---
 libcxx/docs/ReleaseNotes/20.rst               |   2 +
 libcxx/include/__charconv/tables.h            |  20 ++--
 libcxx/include/__charconv/to_chars_base_10.h  |  32 +++---
 libcxx/include/__charconv/to_chars_integral.h |  72 +++++++-----
 libcxx/include/__charconv/to_chars_result.h   |   9 ++
 libcxx/include/__charconv/traits.h            |  10 +-
 .../__format/formatter_floating_point.h       |   1 +
 libcxx/include/__format/formatter_integral.h  |   2 +-
 libcxx/include/__format/formatter_output.h    |  18 ---
 libcxx/include/locale                         | 104 +++++++++++-------
 libcxx/include/module.modulemap               |   5 +-
 .../test/benchmarks/locale/num_put.bench.cpp  |  39 +++++++
 .../put_pointer.pass.cpp                      |  44 +++-----
 13 files changed, 209 insertions(+), 149 deletions(-)
 create mode 100644 libcxx/test/benchmarks/locale/num_put.bench.cpp

diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index 57ab0c167544b..c7b59545b4fbc 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -120,6 +120,8 @@ Improvements and New Features
 
 - Added :ref:`hardening mode <hardening>` support for ``forward_list`` and ``bitset``.
 
+- The ``num_get::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
+
 Deprecations and Removals
 -------------------------
 
diff --git a/libcxx/include/__charconv/tables.h b/libcxx/include/__charconv/tables.h
index 9568bf841cd02..b8c6fd8af0a0f 100644
--- a/libcxx/include/__charconv/tables.h
+++ b/libcxx/include/__charconv/tables.h
@@ -19,16 +19,14 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
 namespace __itoa {
 
-inline constexpr char __base_2_lut[64] = {
+inline _LIBCPP_CONSTEXPR const char __base_2_lut[64] = {
     '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '1', '0', '0', '0', '1', '1', '0', '1', '0', '0', '0', '1',
     '0', '1', '0', '1', '1', '0', '0', '1', '1', '1', '1', '0', '0', '0', '1', '0', '0', '1', '1', '0', '1', '0',
     '1', '0', '1', '1', '1', '1', '0', '0', '1', '1', '0', '1', '1', '1', '1', '0', '1', '1', '1', '1'};
 
-inline constexpr char __base_8_lut[128] = {
+inline _LIBCPP_CONSTEXPR const char __base_8_lut[128] = {
     '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '1', '0', '1', '1', '1', '2',
     '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5',
     '2', '6', '2', '7', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '4', '0',
@@ -36,7 +34,7 @@ inline constexpr char __base_8_lut[128] = {
     '5', '4', '5', '5', '5', '6', '5', '7', '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6',
     '6', '7', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7'};
 
-inline constexpr char __base_16_lut[512] = {
+inline _LIBCPP_CONSTEXPR const char __base_16_lut[512] = {
     '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '0', 'a', '0',
     'b', '0', 'c', '0', 'd', '0', 'e', '0', 'f', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6',
     '1', '7', '1', '8', '1', '9', '1', 'a', '1', 'b', '1', 'c', '1', 'd', '1', 'e', '1', 'f', '2', '0', '2', '1', '2',
@@ -61,7 +59,7 @@ inline constexpr char __base_16_lut[512] = {
     '1', 'f', '2', 'f', '3', 'f', '4', 'f', '5', 'f', '6', 'f', '7', 'f', '8', 'f', '9', 'f', 'a', 'f', 'b', 'f', 'c',
     'f', 'd', 'f', 'e', 'f', 'f'};
 
-inline constexpr uint32_t __pow10_32[10] = {
+inline _LIBCPP_CONSTEXPR const uint32_t __pow10_32[10] = {
     UINT32_C(0),
     UINT32_C(10),
     UINT32_C(100),
@@ -73,7 +71,7 @@ inline constexpr uint32_t __pow10_32[10] = {
     UINT32_C(100000000),
     UINT32_C(1000000000)};
 
-inline constexpr uint64_t __pow10_64[20] = {
+inline _LIBCPP_CONSTEXPR const uint64_t __pow10_64[20] = {
     UINT64_C(0),
     UINT64_C(10),
     UINT64_C(100),
@@ -96,8 +94,8 @@ inline constexpr uint64_t __pow10_64[20] = {
     UINT64_C(10000000000000000000)};
 
 #  if _LIBCPP_HAS_INT128
-inline constexpr int __pow10_128_offset      = 0;
-inline constexpr __uint128_t __pow10_128[40] = {
+inline _LIBCPP_CONSTEXPR const int __pow10_128_offset      = 0;
+inline _LIBCPP_CONSTEXPR const __uint128_t __pow10_128[40] = {
     UINT64_C(0),
     UINT64_C(10),
     UINT64_C(100),
@@ -140,7 +138,7 @@ inline constexpr __uint128_t __pow10_128[40] = {
     (__uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000000000000000)) * 10};
 #  endif
 
-inline constexpr char __digits_base_10[200] = {
+inline _LIBCPP_CONSTEXPR const char __digits_base_10[200] = {
     // clang-format off
     '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
     '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
@@ -156,8 +154,6 @@ inline constexpr char __digits_base_10[200] = {
 
 } // namespace __itoa
 
-#endif // _LIBCPP_STD_VER >= 17
-
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___CHARCONV_TABLES
diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h
index 06e4e692337df..d90952ea71f35 100644
--- a/libcxx/include/__charconv/to_chars_base_10.h
+++ b/libcxx/include/__charconv/to_chars_base_10.h
@@ -26,55 +26,53 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
 namespace __itoa {
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) _NOEXCEPT {
   *__first = '0' + static_cast<char>(__value);
   return __first + 1;
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) _NOEXCEPT {
   return std::copy_n(&__digits_base_10[__value * 2], 2, __first);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000);
 }
 
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) _NOEXCEPT {
   return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000);
 }
 
 template <class _Tp>
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) _NOEXCEPT {
   return __itoa::__append8(__itoa::__append2(__first, static_cast<uint32_t>(__value / 100000000)),
                            static_cast<uint32_t>(__value % 100000000));
 }
 
 _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char*
-__base_10_u32(char* __first, uint32_t __value) noexcept {
+__base_10_u32(char* __first, uint32_t __value) _NOEXCEPT {
   if (__value < 1000000) {
     if (__value < 10000) {
       if (__value < 100) {
@@ -110,7 +108,7 @@ __base_10_u32(char* __first, uint32_t __value) noexcept {
 }
 
 _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char*
-__base_10_u64(char* __buffer, uint64_t __value) noexcept {
+__base_10_u64(char* __buffer, uint64_t __value) _NOEXCEPT {
   if (__value <= UINT32_MAX)
     return __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value));
 
@@ -132,13 +130,13 @@ __base_10_u64(char* __buffer, uint64_t __value) noexcept {
 /// \note The lookup table contains a partial set of exponents limiting the
 /// range that can be used. However the range is sufficient for
 /// \ref __base_10_u128.
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline __uint128_t __pow_10(int __exp) noexcept {
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline __uint128_t __pow_10(int __exp) _NOEXCEPT {
   _LIBCPP_ASSERT_INTERNAL(__exp >= __pow10_128_offset, "Index out of bounds");
   return __pow10_128[__exp - __pow10_128_offset];
 }
 
 _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char*
-__base_10_u128(char* __buffer, __uint128_t __value) noexcept {
+__base_10_u128(char* __buffer, __uint128_t __value) _NOEXCEPT {
   _LIBCPP_ASSERT_INTERNAL(
       __value > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fails when this isn't true.");
 
@@ -179,8 +177,6 @@ __base_10_u128(char* __buffer, __uint128_t __value) noexcept {
 #  endif
 } // namespace __itoa
 
-#endif // _LIBCPP_STD_VER >= 17
-
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
index 710299df9b4da..238c96d7c7a04 100644
--- a/libcxx/include/__charconv/to_chars_integral.h
+++ b/libcxx/include/__charconv/to_chars_integral.h
@@ -39,16 +39,12 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
-to_chars_result to_chars(char*, char*, bool, int = 10) = delete;
-
 template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type);
 
 template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_itoa(char* __first, char* __last, _Tp __value, true_type) {
   auto __x = std::__to_unsigned_like(__value);
   if (__value < 0 && __first != __last) {
@@ -60,7 +56,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, true_type) {
 }
 
 template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) {
   using __tx  = __itoa::__traits<_Tp>;
   auto __diff = __last - __first;
@@ -73,7 +69,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) {
 
 #  if _LIBCPP_HAS_INT128
 template <>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_itoa(char* __first, char* __last, __uint128_t __value, false_type) {
   // When the value fits in 64-bits use the 64-bit code path. This reduces
   // the number of expensive calculations on 128-bit values.
@@ -92,20 +88,20 @@ __to_chars_itoa(char* __first, char* __last, __uint128_t __value, false_type) {
 }
 #  endif
 
-template <class _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
-__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_type);
+template <class _Tp, __enable_if_t<!is_signed<_Tp>::value, int> = 0>
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
+__to_chars_integral(char* __first, char* __last, _Tp __value, int __base);
 
-template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
-__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, true_type) {
+template <class _Tp, __enable_if_t<is_signed<_Tp>::value, int> = 0>
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
+__to_chars_integral(char* __first, char* __last, _Tp __value, int __base) {
   auto __x = std::__to_unsigned_like(__value);
   if (__value < 0 && __first != __last) {
     *__first++ = '-';
     __x        = std::__complement(__x);
   }
 
-  return std::__to_chars_integral(__first, __last, __x, __base, false_type());
+  return std::__to_chars_integral(__first, __last, __x, __base);
 }
 
 namespace __itoa {
@@ -116,7 +112,7 @@ struct _LIBCPP_HIDDEN __integral;
 template <>
 struct _LIBCPP_HIDDEN __integral<2> {
   template <typename _Tp>
-  _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
+  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significant bit set. (Note
     // that __libcpp_clz doesn't work for 0.)
@@ -124,7 +120,7 @@ struct _LIBCPP_HIDDEN __integral<2> {
   }
 
   template <typename _Tp>
-  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result
   __to_chars(char* __first, char* __last, _Tp __value) {
     ptrdiff_t __cap = __last - __first;
     int __n         = __width(__value);
@@ -152,7 +148,7 @@ struct _LIBCPP_HIDDEN __integral<2> {
 template <>
 struct _LIBCPP_HIDDEN __integral<8> {
   template <typename _Tp>
-  _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
+  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significat bit set. (Note
     // that __libcpp_clz doesn't work for 0.)
@@ -160,7 +156,7 @@ struct _LIBCPP_HIDDEN __integral<8> {
   }
 
   template <typename _Tp>
-  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result
   __to_chars(char* __first, char* __last, _Tp __value) {
     ptrdiff_t __cap = __last - __first;
     int __n         = __width(__value);
@@ -188,7 +184,7 @@ struct _LIBCPP_HIDDEN __integral<8> {
 template <>
 struct _LIBCPP_HIDDEN __integral<16> {
   template <typename _Tp>
-  _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
+  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significat bit set. (Note
     // that __libcpp_clz doesn't work for 0.)
@@ -196,7 +192,7 @@ struct _LIBCPP_HIDDEN __integral<16> {
   }
 
   template <typename _Tp>
-  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result
   __to_chars(char* __first, char* __last, _Tp __value) {
     ptrdiff_t __cap = __last - __first;
     int __n         = __width(__value);
@@ -235,13 +231,13 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __to_chars_integral_widt
 }
 
 template <unsigned _Base, typename _Tp, __enable_if_t<(sizeof(_Tp) >= sizeof(unsigned)), int> = 0>
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_integral(char* __first, char* __last, _Tp __value) {
   return __itoa::__integral<_Base>::__to_chars(__first, __last, __value);
 }
 
 template <unsigned _Base, typename _Tp, __enable_if_t<(sizeof(_Tp) < sizeof(unsigned)), int> = 0>
-_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
+_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
 __to_chars_integral(char* __first, char* __last, _Tp __value) {
   return std::__to_chars_integral<_Base>(__first, __last, static_cast<unsigned>(__value));
 }
@@ -272,9 +268,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __to_chars_integral_widt
   __libcpp_unreachable();
 }
 
-template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
-__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_type) {
+template <class _Tp, __enable_if_t<!is_signed<_Tp>::value, int> >
+inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result
+__to_chars_integral(char* __first, char* __last, _Tp __value, int __base) {
   if (__base == 10) [[likely]]
     return std::__to_chars_itoa(__first, __last, __value, false_type());
 
@@ -302,6 +298,28 @@ __to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_
   return {__last, errc(0)};
 }
 
+_LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 char __hex_to_upper(char __c) {
+  switch (__c) {
+  case 'a':
+    return 'A';
+  case 'b':
+    return 'B';
+  case 'c':
+    return 'C';
+  case 'd':
+    return 'D';
+  case 'e':
+    return 'E';
+  case 'f':
+    return 'F';
+  }
+  return __c;
+}
+
+#if _LIBCPP_STD_VER >= 17
+
+to_chars_result to_chars(char*, char*, bool, int = 10) = delete;
+
 template <typename _Tp, __enable_if_t<is_integral<_Tp>::value, int> = 0>
 inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
 to_chars(char* __first, char* __last, _Tp __value) {
@@ -316,7 +334,7 @@ to_chars(char* __first, char* __last, _Tp __value, int __base) {
   _LIBCPP_ASSERT_UNCATEGORIZED(2 <= __base && __base <= 36, "base not in [2, 36]");
 
   using _Type = __make_32_64_or_128_bit_t<_Tp>;
-  return std::__to_chars_integral(__first, __last, static_cast<_Type>(__value), __base, is_signed<_Tp>());
+  return std::__to_chars_integral(__first, __last, static_cast<_Type>(__value), __base);
 }
 
 #endif // _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__charconv/to_chars_result.h b/libcxx/include/__charconv/to_chars_result.h
index 8df0897a49fbb..41dea4ab14723 100644
--- a/libcxx/include/__charconv/to_chars_result.h
+++ b/libcxx/include/__charconv/to_chars_result.h
@@ -34,6 +34,15 @@ struct _LIBCPP_EXPORTED_FROM_ABI to_chars_result {
 
 #endif // _LIBCPP_STD_VER >= 17
 
+struct __to_chars_result {
+  char* __ptr;
+  errc __ec;
+
+#if _LIBCPP_STD_VER >= 17
+  _LIBCPP_HIDE_FROM_ABI constexpr operator to_chars_result() { return {__ptr, __ec}; }
+#endif
+};
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___CHARCONV_TO_CHARS_RESULT_H
diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h
index dd1fa2354436a..085a3f7886f31 100644
--- a/libcxx/include/__charconv/traits.h
+++ b/libcxx/include/__charconv/traits.h
@@ -30,15 +30,13 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
 namespace __itoa {
 
 template <typename _Tp, typename = void>
 struct _LIBCPP_HIDDEN __traits_base;
 
 template <typename _Tp>
-struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uint32_t)>> {
+struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uint32_t)> > {
   using type = uint32_t;
 
   /// The width estimation using a log10 algorithm.
@@ -64,7 +62,7 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
 };
 
 template <typename _Tp>
-struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uint64_t)>> {
+struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uint64_t)> > {
   using type = uint64_t;
 
   /// The width estimation using a log10 algorithm.
@@ -153,7 +151,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool _LIBCPP_CONSTEXPR_SINCE_CXX23 __mul_overflowed
 
 template <typename _Tp>
 struct _LIBCPP_HIDDEN __traits : __traits_base<_Tp> {
-  static constexpr int digits = numeric_limits<_Tp>::digits10 + 1;
+  static _LIBCPP_CONSTEXPR const int digits = numeric_limits<_Tp>::digits10 + 1;
   using __traits_base<_Tp>::__pow;
   using typename __traits_base<_Tp>::type;
 
@@ -192,8 +190,6 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI _Tp __complement(_Tp
   return _Tp(~__x + 1);
 }
 
-#endif // _LIBCPP_STD_VER >= 17
-
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h
index ac4be9b619355..abd3e9351fd1e 100644
--- a/libcxx/include/__format/formatter_floating_point.h
+++ b/libcxx/include/__format/formatter_floating_point.h
@@ -19,6 +19,7 @@
 #include <__assert>
 #include <__charconv/chars_format.h>
 #include <__charconv/to_chars_floating_point.h>
+#include <__charconv/to_chars_integral.h>
 #include <__charconv/to_chars_result.h>
 #include <__concepts/arithmetic.h>
 #include <__concepts/same_as.h>
diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h
index 996b7620b3e3f..f9f106f4065f9 100644
--- a/libcxx/include/__format/formatter_integral.h
+++ b/libcxx/include/__format/formatter_integral.h
@@ -338,7 +338,7 @@ _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer(
   if (__specs.__std_.__type_ != __format_spec::__type::__hexadecimal_upper_case) [[likely]]
     return __formatter::__write(__first, __last, __ctx.out(), __specs);
 
-  return __formatter::__write_transformed(__first, __last, __ctx.out(), __specs, __formatter::__hex_to_upper);
+  return __formatter::__write_transformed(__first, __last, __ctx.out(), __specs, std::__hex_to_upper);
 }
 
 template <unsigned_integral _Tp, class _CharT, class _FormatContext>
diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h
index e1f1309cd2c53..cc74e3858a401 100644
--- a/libcxx/include/__format/formatter_output.h
+++ b/libcxx/include/__format/formatter_output.h
@@ -45,24 +45,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __formatter {
 
-_LIBCPP_HIDE_FROM_ABI constexpr char __hex_to_upper(char __c) {
-  switch (__c) {
-  case 'a':
-    return 'A';
-  case 'b':
-    return 'B';
-  case 'c':
-    return 'C';
-  case 'd':
-    return 'D';
-  case 'e':
-    return 'E';
-  case 'f':
-    return 'F';
-  }
-  return __c;
-}
-
 struct _LIBCPP_EXPORTED_FROM_ABI __padding_size_result {
   size_t __before_;
   size_t __after_;
diff --git a/libcxx/include/locale b/libcxx/include/locale
index 4f2716fa53d62..046f3cc1708c4 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -201,6 +201,8 @@ template <class charT> class messages_byname;
 #    include <__algorithm/reverse.h>
 #    include <__algorithm/unwrap_iter.h>
 #    include <__assert>
+#    include <__charconv/to_chars_integral.h>
+#    include <__charconv/traits.h>
 #    include <__iterator/access.h>
 #    include <__iterator/back_insert_iterator.h>
 #    include <__iterator/istreambuf_iterator.h>
@@ -210,6 +212,7 @@ template <class charT> class messages_byname;
 #    include <__memory/addressof.h>
 #    include <__memory/unique_ptr.h>
 #    include <__new/exceptions.h>
+#    include <__system_error/errc.h>
 #    include <__type_traits/make_unsigned.h>
 #    include <cerrno>
 #    include <cstdio>
@@ -1221,7 +1224,7 @@ protected:
 
   template <class _Integral>
   _LIBCPP_HIDE_FROM_ABI inline _OutputIterator
-  __do_put_integral(iter_type __s, ios_base& __iob, char_type __fl, _Integral __v, char const* __len) const;
+  __do_put_integral(iter_type __s, ios_base& __iob, char_type __fl, _Integral __v) const;
 
   template <class _Float>
   _LIBCPP_HIDE_FROM_ABI inline _OutputIterator
@@ -1247,30 +1250,68 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_ty
 template <class _CharT, class _OutputIterator>
 template <class _Integral>
 _LIBCPP_HIDE_FROM_ABI inline _OutputIterator num_put<_CharT, _OutputIterator>::__do_put_integral(
-    iter_type __s, ios_base& __iob, char_type __fl, _Integral __v, char const* __len) const {
+    iter_type __s, ios_base& __iob, char_type __fl, _Integral __v) const {
   // Stage 1 - Get number in narrow char
-  char __fmt[8] = {'%', 0};
-  this->__format_int(__fmt + 1, __len, is_signed<_Integral>::value, __iob.flags());
+
   // Worst case is octal, with showbase enabled. Note that octal is always
   // printed as an unsigned value.
   using _Unsigned = typename make_unsigned<_Integral>::type;
-  _LIBCPP_CONSTEXPR const unsigned __nbuf =
+  _LIBCPP_CONSTEXPR const unsigned __buffer_size =
       (numeric_limits<_Unsigned>::digits / 3)          // 1 char per 3 bits
       + ((numeric_limits<_Unsigned>::digits % 3) != 0) // round up
       + 2;                                             // base prefix + terminating null character
-  char __nar[__nbuf];
-  _LIBCPP_DIAGNOSTIC_PUSH
-  _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wformat-nonliteral")
-  _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral")
-  int __nc = __locale::__snprintf(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v);
-  _LIBCPP_DIAGNOSTIC_POP
-  char* __ne = __nar + __nc;
-  char* __np = this->__identify_padding(__nar, __ne, __iob);
+
+  char __char_buffer[__buffer_size];
+  char* __buffer_ptr = __char_buffer;
+
+  auto __flags = __iob.flags();
+
+  auto __basefield = (__flags & ios_base::basefield);
+
+  // Extract base
+  int __base = 10;
+  if (__basefield == ios_base::oct)
+    __base = 8;
+  else if (__basefield == ios_base::hex)
+    __base = 16;
+
+  // Print '-' and make the argument unsigned
+  auto __uval = std::__to_unsigned_like(__v);
+  if (__basefield != ios_base::oct && __basefield != ios_base::hex && __v < 0) {
+    *__buffer_ptr++ = '-';
+    __uval          = std::__complement(__uval);
+  }
+
+  // Maybe add '+' prefix
+  if (std::is_signed<_Integral>::value && (__flags & ios_base::showpos) && __basefield != ios_base::oct &&
+      __basefield != ios_base::hex && __v > 0)
+    *__buffer_ptr++ = '+';
+
+  // Add base prefix
+  if (__flags & ios_base::showbase) {
+    if (__basefield == ios_base::oct) {
+      *__buffer_ptr++ = '0';
+    } else if (__basefield == ios_base::hex) {
+      *__buffer_ptr++ = '0';
+      *__buffer_ptr++ = (__flags & ios_base::uppercase ? 'X' : 'x');
+    }
+  }
+
+  auto __res = std::__to_chars_integral(__buffer_ptr, __char_buffer + __buffer_size, __uval, __base);
+  _LIBCPP_ASSERT_INTERNAL(__res.__ec == std::errc(), "to_chars: invalid maximum buffer size computed?");
+
+  // Make letters uppercase
+  if (__flags & ios_base::hex && __flags & ios_base::uppercase) {
+    for (; __buffer_ptr != __res.__ptr; ++__buffer_ptr)
+      *__buffer_ptr = std::__hex_to_upper(*__buffer_ptr);
+  }
+
+  char* __np = this->__identify_padding(__char_buffer, __res.__ptr, __iob);
   // Stage 2 - Widen __nar while adding thousands separators
-  char_type __o[2 * (__nbuf - 1) - 1];
+  char_type __o[2 * (__buffer_size - 1) - 1];
   char_type* __op; // pad here
   char_type* __oe; // end of output
-  this->__widen_and_group_int(__nar, __np, __ne, __o, __op, __oe, __iob.getloc());
+  this->__widen_and_group_int(__char_buffer, __np, __res.__ptr, __o, __op, __oe, __iob.getloc());
   // [__o, __oe) contains thousands_sep'd wide number
   // Stage 3 & 4
   return std::__pad_and_output(__s, __o, __op, __oe, __iob, __fl);
@@ -1279,25 +1320,25 @@ _LIBCPP_HIDE_FROM_ABI inline _OutputIterator num_put<_CharT, _OutputIterator>::_
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long __v) const {
-  return this->__do_put_integral(__s, __iob, __fl, __v, "l");
+  return this->__do_put_integral(__s, __iob, __fl, __v);
 }
 
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long long __v) const {
-  return this->__do_put_integral(__s, __iob, __fl, __v, "ll");
+  return this->__do_put_integral(__s, __iob, __fl, __v);
 }
 
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long __v) const {
-  return this->__do_put_integral(__s, __iob, __fl, __v, "l");
+  return this->__do_put_integral(__s, __iob, __fl, __v);
 }
 
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long long __v) const {
-  return this->__do_put_integral(__s, __iob, __fl, __v, "ll");
+  return this->__do_put_integral(__s, __iob, __fl, __v);
 }
 
 template <class _CharT, class _OutputIterator>
@@ -1365,26 +1406,11 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_ty
 template <class _CharT, class _OutputIterator>
 _OutputIterator
 num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, const void* __v) const {
-  // Stage 1 - Get pointer in narrow char
-  const unsigned __nbuf = 20;
-  char __nar[__nbuf];
-  int __nc   = __locale::__snprintf(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, "%p", __v);
-  char* __ne = __nar + __nc;
-  char* __np = this->__identify_padding(__nar, __ne, __iob);
-  // Stage 2 - Widen __nar
-  char_type __o[2 * (__nbuf - 1) - 1];
-  char_type* __op; // pad here
-  char_type* __oe; // end of output
-  const ctype<char_type>& __ct = std::use_facet<ctype<char_type> >(__iob.getloc());
-  __ct.widen(__nar, __ne, __o);
-  __oe = __o + (__ne - __nar);
-  if (__np == __ne)
-    __op = __oe;
-  else
-    __op = __o + (__np - __nar);
-  // [__o, __oe) contains wide number
-  // Stage 3 & 4
-  return std::__pad_and_output(__s, __o, __op, __oe, __iob, __fl);
+  auto __flags = __iob.flags();
+  __iob.flags((__flags & ~ios_base::basefield & ~ios_base::uppercase) | ios_base::hex | ios_base::showbase);
+  auto __res = __do_put_integral(__s, __iob, __fl, reinterpret_cast<uintptr_t>(__v));
+  __iob.flags(__flags);
+  return __res;
 }
 
 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put<char>;
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 0f2016e7f5d15..2071b6b52eaef 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -917,7 +917,10 @@ module std [system] {
     module to_chars                   { header "__charconv/to_chars.h" }
     module to_chars_base_10           { header "__charconv/to_chars_base_10.h" }
     module to_chars_floating_point    { header "__charconv/to_chars_floating_point.h" }
-    module to_chars_integral          { header "__charconv/to_chars_integral.h" }
+    module to_chars_integral          {
+      header "__charconv/to_chars_integral.h"
+      export std.charconv.to_chars_result
+    }
     module to_chars_result            { header "__charconv/to_chars_result.h" }
     module traits                     { header "__charconv/traits.h" }
 
diff --git a/libcxx/test/benchmarks/locale/num_put.bench.cpp b/libcxx/test/benchmarks/locale/num_put.bench.cpp
new file mode 100644
index 0000000000000..a39bd09e190f9
--- /dev/null
+++ b/libcxx/test/benchmarks/locale/num_put.bench.cpp
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+#include <ios>
+#include <locale>
+
+#include <benchmark/benchmark.h>
+
+struct num_put : std::num_put<char, std::string::iterator> {};
+
+template <class T>
+void BM_num_put(benchmark::State& state) {
+  auto val = T(123);
+  std::ios ios(nullptr);
+  num_put np;
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(val);
+    std::string str;
+    benchmark::DoNotOptimize(np.put(str.begin(), ios, ' ', val));
+  }
+}
+BENCHMARK(BM_num_put<bool>);
+BENCHMARK(BM_num_put<long>);
+BENCHMARK(BM_num_put<long long>);
+BENCHMARK(BM_num_put<unsigned long>);
+BENCHMARK(BM_num_put<unsigned long long>);
+BENCHMARK(BM_num_put<double>);
+BENCHMARK(BM_num_put<long double>);
+BENCHMARK(BM_num_put<const void*>);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
index 13152742d5fe3..61fdcde26d11d 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
@@ -12,40 +12,32 @@
 
 // iter_type put(iter_type s, ios_base& iob, char_type fill, void* v) const;
 
-#include <locale>
-#include <ios>
+// XFAIL: FROZEN-CXX03-HEADERS-FIXME
+
 #include <cassert>
-#include <streambuf>
-#include "test_macros.h"
+#include <ios>
+#include <locale>
+
 #include "test_iterators.h"
 
 typedef std::num_put<char, cpp17_output_iterator<char*> > F;
 
-class my_facet
-    : public F
-{
+class my_facet : public F {
 public:
-    explicit my_facet(std::size_t refs = 0)
-        : F(refs) {}
+  explicit my_facet(std::size_t refs = 0) : F(refs) {}
 };
 
-int main(int, char**)
-{
-    const my_facet f(1);
-    {
-        std::ios ios(0);
-        void* v = 0;
-        char str[50];
-        cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), ios, '*', v);
-        std::string ex(str, base(iter));
-        char expected_str[32] = {};
-        // num_put::put uses %p for pointer types, but the exact format of %p is
-        // implementation defined behavior for the C library. Compare output to
-        // snprintf for portability.
-        int rc = snprintf(expected_str, sizeof(expected_str), "%p", v);
-        assert(rc > 0);
-        assert(ex == expected_str);
-    }
+int main(int, char**) {
+  const my_facet f(1);
+  {
+    std::ios ios(nullptr);
+    void* v = nullptr;
+    char str[50];
+    cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), ios, '*', v);
+    std::string ex(str, base(iter));
+    assert(!ex.empty());
+    LIBCPP_ASSERT(ex == "0x0");
+  }
 
   return 0;
 }