[libcxx-commits] [libcxx] [libc++] Optimize num_get integral functions (PR #121795)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Thu Feb 27 13:48:47 PST 2025


https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/121795

>From 0ffa10e1e2325d91d114f0b180a01478cfea7cb8 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 5 Jan 2025 13:14:32 +0100
Subject: [PATCH] [libc++] Optimize num_get integral functions

---
 libcxx/docs/ReleaseNotes/21.rst               |   2 +
 libcxx/include/CMakeLists.txt                 |   1 -
 libcxx/include/__algorithm/simd_utils.h       |  26 ++
 libcxx/include/__locale_dir/locale_base_api.h |  19 -
 .../locale_base_api/bsd_locale_fallbacks.h    |  10 -
 .../__locale_dir/locale_base_api/ibm.h        |  11 -
 .../__locale_dir/locale_base_api/musl.h       |  31 --
 .../include/__locale_dir/support/bsd_like.h   |  15 -
 libcxx/include/__locale_dir/support/fuchsia.h |   7 -
 libcxx/include/__locale_dir/support/linux.h   |  23 -
 .../__locale_dir/support/no_locale/strtonum.h |   9 -
 libcxx/include/__locale_dir/support/windows.h |  25 --
 .../__support/xlocale/__strtonum_fallback.h   |   8 -
 libcxx/include/locale                         | 413 +++++++-----------
 libcxx/src/locale.cpp                         |  48 ++
 .../facet.num.get.members/get_long.pass.cpp   |  12 +
 16 files changed, 255 insertions(+), 405 deletions(-)
 delete mode 100644 libcxx/include/__locale_dir/locale_base_api/musl.h

diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 30c6cc5eaf0c2..435e882932e0e 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -49,6 +49,8 @@ Improvements and New Features
 
 - Updated formatting library to Unicode 16.0.0.
 
+- The ``num_get::do_get`` integral overloads have been optimized, resulting in a performance improvement of up to 2.8x.
+
 Deprecations and Removals
 -------------------------
 
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index ee1d7b872b364..6f78a1ba84adb 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -506,7 +506,6 @@ set(files
   __locale_dir/locale_base_api/android.h
   __locale_dir/locale_base_api/bsd_locale_fallbacks.h
   __locale_dir/locale_base_api/ibm.h
-  __locale_dir/locale_base_api/musl.h
   __locale_dir/locale_base_api/openbsd.h
   __locale_dir/pad_and_output.h
   __locale_dir/support/apple.h
diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h
index 4e03723a32854..fd033bb6a73f7 100644
--- a/libcxx/include/__algorithm/simd_utils.h
+++ b/libcxx/include/__algorithm/simd_utils.h
@@ -116,11 +116,37 @@ template <class _VecT, class _Iter>
   }(make_index_sequence<__simd_vector_size_v<_VecT>>{});
 }
 
+// Load the first _Np elements, zero the rest
+_LIBCPP_DIAGNOSTIC_PUSH
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi")
+template <class _VecT, size_t _Np, class _Iter>
+[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __partial_load(_Iter __iter) noexcept {
+  return [=]<size_t... _LoadIndices, size_t... _ZeroIndices>(
+             index_sequence<_LoadIndices...>, index_sequence<_ZeroIndices...>) _LIBCPP_ALWAYS_INLINE noexcept {
+    return _VecT{__iter[_LoadIndices]..., ((void)_ZeroIndices, 0)...};
+  }(make_index_sequence<_Np>{}, make_index_sequence<__simd_vector_size_v<_VecT> - _Np>{});
+}
+
+// Create a vector where every elements is __val
+template <class _VecT>
+[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT
+__broadcast(__simd_vector_underlying_type_t<_VecT> __val) {
+  return [&]<std::size_t... _Indices>(index_sequence<_Indices...>) {
+    return _VecT{((void)_Indices, __val)...};
+  }(make_index_sequence<__simd_vector_size_v<_VecT>>());
+}
+_LIBCPP_DIAGNOSTIC_POP
+
 template <class _Tp, size_t _Np>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
   return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
 }
 
+template <class _Tp, size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __none_of(__simd_vector<_Tp, _Np> __vec) noexcept {
+  return !__builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
+}
+
 template <class _Tp, size_t _Np>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
   using __mask_vec = __simd_vector<bool, _Np>;
diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h
index 92916944227d7..cc03333c8b6b9 100644
--- a/libcxx/include/__locale_dir/locale_base_api.h
+++ b/libcxx/include/__locale_dir/locale_base_api.h
@@ -57,8 +57,6 @@
 //  float               __strtof(const char*, char**, __locale_t);
 //  double              __strtod(const char*, char**, __locale_t);
 //  long double         __strtold(const char*, char**, __locale_t);
-//  long long           __strtoll(const char*, char**, __locale_t);
-//  unsigned long long  __strtoull(const char*, char**, __locale_t);
 // }
 //
 // Character manipulation functions
@@ -108,7 +106,6 @@
 //
 //  int     __snprintf(char*, size_t, __locale_t, const char*, ...); // required by the headers
 //  int     __asprintf(char**, __locale_t, const char*, ...);        // required by the headers
-//  int     __sscanf(const char*, __locale_t, const char*, ...);     // required by the headers
 // }
 
 #if defined(__APPLE__)
@@ -133,8 +130,6 @@
 #    include <__locale_dir/locale_base_api/android.h>
 #  elif defined(__OpenBSD__)
 #    include <__locale_dir/locale_base_api/openbsd.h>
-#  elif defined(__wasi__) || _LIBCPP_HAS_MUSL_LIBC
-#    include <__locale_dir/locale_base_api/musl.h>
 #  endif
 
 #  include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h>
@@ -194,15 +189,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return strtoll_l(__nptr, __endptr, __base, __loc);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return strtoull_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
@@ -309,11 +295,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
     char** __s, __locale_t __loc, const char* __format, _Args&&... __args) {
   return std::__libcpp_asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
 }
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return std::__libcpp_sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
-}
 _LIBCPP_DIAGNOSTIC_POP
 #  undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
index b62a1b737e97f..8cdbe0cd15051 100644
--- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
+++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
@@ -125,16 +125,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l(
   return __res;
 }
 
-inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l(
-    const char* __s, locale_t __l, const char* __format, ...) {
-  va_list __va;
-  va_start(__va, __format);
-  __locale_guard __current(__l);
-  int __res = vsscanf(__s, __format, __va);
-  va_end(__va);
-  return __res;
-}
-
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/ibm.h b/libcxx/include/__locale_dir/locale_base_api/ibm.h
index 1d1d15df9f799..47a83eac7df36 100644
--- a/libcxx/include/__locale_dir/locale_base_api/ibm.h
+++ b/libcxx/include/__locale_dir/locale_base_api/ibm.h
@@ -53,11 +53,6 @@ struct __setAndRestore {
 
 // The following are not POSIX routines.  These are quick-and-dirty hacks
 // to make things pretend to work
-inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t locale) {
-  __setAndRestore __newloc(locale);
-  return ::strtoll(__nptr, __endptr, __base);
-}
-
 inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr, locale_t locale) {
   __setAndRestore __newloc(locale);
   return ::strtod(__nptr, __endptr);
@@ -73,12 +68,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double strtold_l(const char* __nptr, char** __
   return ::strtold(__nptr, __endptr);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t locale) {
-  __setAndRestore __newloc(locale);
-  return ::strtoull(__nptr, __endptr, __base);
-}
-
 inline _LIBCPP_HIDE_FROM_ABI
 _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char* fmt, va_list ap) {
   const size_t buff_size = 256;
diff --git a/libcxx/include/__locale_dir/locale_base_api/musl.h b/libcxx/include/__locale_dir/locale_base_api/musl.h
deleted file mode 100644
index 1653214cdba1e..0000000000000
--- a/libcxx/include/__locale_dir/locale_base_api/musl.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// -*- C++ -*-
-//===-----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This adds support for the extended locale functions that are currently
-// missing from the Musl C library.
-//
-// This only works when the specified locale is "C" or "POSIX", but that's
-// about as good as we can do without implementing full xlocale support
-// in Musl.
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
-#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
-
-#include <cstdlib>
-#include <cwchar>
-
-inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoll(__nptr, __endptr, __base);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoull(__nptr, __endptr, __base);
-}
-
-#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
diff --git a/libcxx/include/__locale_dir/support/bsd_like.h b/libcxx/include/__locale_dir/support/bsd_like.h
index 54eb397358d7a..d9a1f7b536cfd 100644
--- a/libcxx/include/__locale_dir/support/bsd_like.h
+++ b/libcxx/include/__locale_dir/support/bsd_like.h
@@ -77,15 +77,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return ::strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::strtoll_l(__nptr, __endptr, __base, __loc);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::strtoull_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
@@ -219,12 +210,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
     char** __s, __locale_t __loc, const char* __format, _Args&&... __args) {
   return ::asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); // non-standard
 }
-
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return ::sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
-}
 _LIBCPP_DIAGNOSTIC_POP
 #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/libcxx/include/__locale_dir/support/fuchsia.h b/libcxx/include/__locale_dir/support/fuchsia.h
index 4b9e63facb19e..528bfeb0cb6e1 100644
--- a/libcxx/include/__locale_dir/support/fuchsia.h
+++ b/libcxx/include/__locale_dir/support/fuchsia.h
@@ -141,13 +141,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
   __locale_guard __current(__loc);
   return ::asprintf(__s, __format, std::forward<_Args>(__args)...); // non-standard
 }
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  __locale_guard __current(__loc);
-  return std::sscanf(__s, __format, std::forward<_Args>(__args)...);
-}
-
 _LIBCPP_DIAGNOSTIC_POP
 #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h
index f1662c0112603..284c6abff34a7 100644
--- a/libcxx/include/__locale_dir/support/linux.h
+++ b/libcxx/include/__locale_dir/support/linux.h
@@ -94,15 +94,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return ::strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::strtoll_l(__nptr, __endptr, __base, __loc);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::strtoull_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
@@ -257,20 +248,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(
   va_end(__va);
   return __res;
 }
-
-#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs
-_LIBCPP_HIDE_FROM_ABI
-#endif
-inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, ...) {
-  va_list __va;
-  va_start(__va, __format);
-  __locale_guard __current(__loc);
-  int __res = std::vsscanf(__s, __format, __va);
-  va_end(__va);
-  return __res;
-}
-
 } // namespace __locale
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__locale_dir/support/no_locale/strtonum.h b/libcxx/include/__locale_dir/support/no_locale/strtonum.h
index 0e7a32993e736..59544e10e4a4c 100644
--- a/libcxx/include/__locale_dir/support/no_locale/strtonum.h
+++ b/libcxx/include/__locale_dir/support/no_locale/strtonum.h
@@ -34,15 +34,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return std::strtold(__nptr, __endptr);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t) {
-  return std::strtoll(__nptr, __endptr, __base);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t) {
-  return std::strtoull(__nptr, __endptr, __base);
-}
-
 } // namespace __locale
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h
index 0d3089c150081..645fe29f25c40 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -186,14 +186,6 @@ inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr
   return ::_strtod_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::_strtoi64_l(__nptr, __endptr, __base, __loc);
-}
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::_strtoui64_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
@@ -286,23 +278,6 @@ _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snpri
 _LIBCPP_EXPORTED_FROM_ABI
 _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(char** __ret, __locale_t __loc, const char* __format, ...);
 
-_LIBCPP_DIAGNOSTIC_PUSH
-_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat")
-_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates
-#ifdef _LIBCPP_COMPILER_CLANG_BASED
-#  define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__)
-#else
-#  define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */
-#endif
-
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __dest, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return ::_sscanf_l(__dest, __format, __loc, std::forward<_Args>(__args)...);
-}
-_LIBCPP_DIAGNOSTIC_POP
-#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
-
 #if defined(_LIBCPP_BUILDING_LIBRARY)
 struct __locale_guard {
   _LIBCPP_HIDE_FROM_ABI __locale_guard(__locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) {
diff --git a/libcxx/include/__support/xlocale/__strtonum_fallback.h b/libcxx/include/__support/xlocale/__strtonum_fallback.h
index 5275aead35af9..90bd59d36c248 100644
--- a/libcxx/include/__support/xlocale/__strtonum_fallback.h
+++ b/libcxx/include/__support/xlocale/__strtonum_fallback.h
@@ -34,12 +34,4 @@ inline _LIBCPP_HIDE_FROM_ABI long double strtold_l(const char* __nptr, char** __
   return ::strtold(__nptr, __endptr);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoll(__nptr, __endptr, __base);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoull(__nptr, __endptr, __base);
-}
-
 #endif // _LIBCPP___SUPPORT_XLOCALE_STRTONUM_FALLBACK_H
diff --git a/libcxx/include/locale b/libcxx/include/locale
index 86e327fde5695..023ec151fe880 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -198,7 +198,9 @@ template <class charT> class messages_byname;
 #    include <__algorithm/equal.h>
 #    include <__algorithm/find.h>
 #    include <__algorithm/max.h>
+#    include <__algorithm/min.h>
 #    include <__algorithm/reverse.h>
+#    include <__algorithm/simd_utils.h>
 #    include <__algorithm/unwrap_iter.h>
 #    include <__assert>
 #    include <__iterator/access.h>
@@ -375,7 +377,7 @@ struct _LIBCPP_EXPORTED_FROM_ABI __num_get_base {
   static int __get_base(ios_base&);
   static const char __src[33]; // "0123456789abcdefABCDEFxX+-pPiInN"
   // count of leading characters in __src used for parsing integers ("012..X+-")
-  static const size_t __int_chr_cnt = 26;
+  static inline const size_t __int_chr_cnt = 26;
   // count of leading characters in __src used for parsing floating-point values ("012..-pP")
   static const size_t __fp_chr_cnt = 28;
 };
@@ -403,7 +405,8 @@ struct __num_get : protected __num_get_base {
 
   [[__deprecated__("This exists only for ABI compatibility")]] static string
   __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep);
-  static int __stage2_int_loop(
+
+  [[__deprecated__("This exists only for ABI compatibility")]] static int __stage2_int_loop(
       _CharT __ct,
       int __base,
       char* __a,
@@ -415,11 +418,22 @@ struct __num_get : protected __num_get_base {
       unsigned*& __g_end,
       _CharT* __atoms);
 
-  _LIBCPP_HIDE_FROM_ABI static string __stage2_int_prep(ios_base& __iob, _CharT& __thousands_sep) {
-    locale __loc                 = __iob.getloc();
-    const numpunct<_CharT>& __np = use_facet<numpunct<_CharT> >(__loc);
-    __thousands_sep              = __np.thousands_sep();
-    return __np.grouping();
+  _LIBCPP_HIDE_FROM_ABI static ptrdiff_t __atoms_offset(const _CharT* __atoms, _CharT __val) {
+#    if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS
+    if constexpr (is_same<_CharT, char>::value) {
+      _LIBCPP_DIAGNOSTIC_PUSH
+      _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi") // This should be removed once https://llvm.org/PR128361 is fixed
+      using __vec   = __simd_vector<char, 32>;
+      __vec __chars = std::__broadcast<__vec>(__val);
+      __vec __cmp   = std::__partial_load<__vec, __int_chr_cnt>(__atoms);
+      auto __res    = __chars == __cmp;
+      if (std::__none_of(__res))
+        return __int_chr_cnt;
+      return std::min(__int_chr_cnt, std::__find_first_set(__res));
+      _LIBCPP_DIAGNOSTIC_POP
+    }
+#    endif
+    return std::find(__atoms, __atoms + __int_chr_cnt, __val) - __atoms;
   }
 
   _LIBCPP_HIDE_FROM_ABI const _CharT* __do_widen(ios_base& __iob, _CharT* __atoms) const {
@@ -452,54 +466,6 @@ string __num_get<_CharT>::__stage2_float_prep(
   return __np.grouping();
 }
 
-template <class _CharT>
-int __num_get<_CharT>::__stage2_int_loop(
-    _CharT __ct,
-    int __base,
-    char* __a,
-    char*& __a_end,
-    unsigned& __dc,
-    _CharT __thousands_sep,
-    const string& __grouping,
-    unsigned* __g,
-    unsigned*& __g_end,
-    _CharT* __atoms) {
-  if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) {
-    *__a_end++ = __ct == __atoms[24] ? '+' : '-';
-    __dc       = 0;
-    return 0;
-  }
-  if (__grouping.size() != 0 && __ct == __thousands_sep) {
-    if (__g_end - __g < __num_get_buf_sz) {
-      *__g_end++ = __dc;
-      __dc       = 0;
-    }
-    return 0;
-  }
-  ptrdiff_t __f = std::find(__atoms, __atoms + __int_chr_cnt, __ct) - __atoms;
-  if (__f >= 24)
-    return -1;
-  switch (__base) {
-  case 8:
-  case 10:
-    if (__f >= __base)
-      return -1;
-    break;
-  case 16:
-    if (__f < 22)
-      break;
-    if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') {
-      __dc       = 0;
-      *__a_end++ = __src[__f];
-      return 0;
-    }
-    return -1;
-  }
-  *__a_end++ = __src[__f];
-  ++__dc;
-  return 0;
-}
-
 template <class _CharT>
 int __num_get<_CharT>::__stage2_float_loop(
     _CharT __ct,
@@ -637,43 +603,39 @@ protected:
   _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS iter_type
   __do_get_floating_point(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Fp& __v) const;
 
-  template <class _Signed>
-  _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS iter_type
-  __do_get_signed(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Signed& __v) const;
-
-  template <class _Unsigned>
+  template <class _MaybeSigned>
   _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS iter_type
-  __do_get_unsigned(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Unsigned& __v) const;
+  __do_get_integral(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _MaybeSigned& __v) const;
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const;
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long& __v) const {
-    return this->__do_get_signed(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long long& __v) const {
-    return this->__do_get_signed(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned short& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned int& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long long& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, float& __v) const {
@@ -695,65 +657,6 @@ protected:
 template <class _CharT, class _InputIterator>
 locale::id num_get<_CharT, _InputIterator>::id;
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp
-__num_get_signed_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) {
-  if (__a != __a_end) {
-    __libcpp_remove_reference_t<decltype(errno)> __save_errno = errno;
-    errno                                                     = 0;
-    char* __p2;
-    long long __ll = __locale::__strtoll(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE);
-    __libcpp_remove_reference_t<decltype(errno)> __current_errno = errno;
-    if (__current_errno == 0)
-      errno = __save_errno;
-    if (__p2 != __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    } else if (__current_errno == ERANGE || __ll < numeric_limits<_Tp>::min() || numeric_limits<_Tp>::max() < __ll) {
-      __err = ios_base::failbit;
-      if (__ll > 0)
-        return numeric_limits<_Tp>::max();
-      else
-        return numeric_limits<_Tp>::min();
-    }
-    return static_cast<_Tp>(__ll);
-  }
-  __err = ios_base::failbit;
-  return 0;
-}
-
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp
-__num_get_unsigned_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) {
-  if (__a != __a_end) {
-    const bool __negate = *__a == '-';
-    if (__negate && ++__a == __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    }
-    __libcpp_remove_reference_t<decltype(errno)> __save_errno = errno;
-    errno                                                     = 0;
-    char* __p2;
-    unsigned long long __ll = __locale::__strtoull(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE);
-    __libcpp_remove_reference_t<decltype(errno)> __current_errno = errno;
-    if (__current_errno == 0)
-      errno = __save_errno;
-    if (__p2 != __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    } else if (__current_errno == ERANGE || numeric_limits<_Tp>::max() < __ll) {
-      __err = ios_base::failbit;
-      return numeric_limits<_Tp>::max();
-    }
-    _Tp __res = static_cast<_Tp>(__ll);
-    if (__negate)
-      __res = -__res;
-    return __res;
-  }
-  __err = ios_base::failbit;
-  return 0;
-}
-
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _Tp __do_strtod(const char* __a, char** __p2);
 
@@ -822,112 +725,157 @@ _InputIterator num_get<_CharT, _InputIterator>::do_get(
   return __b;
 }
 
-// signed
-
 template <class _CharT, class _InputIterator>
-template <class _Signed>
-_InputIterator num_get<_CharT, _InputIterator>::__do_get_signed(
-    iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Signed& __v) const {
+template <class _MaybeSigned>
+_InputIterator num_get<_CharT, _InputIterator>::__do_get_integral(
+    iter_type __first, iter_type __last, ios_base& __iob, ios_base::iostate& __err, _MaybeSigned& __v) const {
+  using _Unsigned = __make_unsigned_t<_MaybeSigned>;
+
   // Stage 1
   int __base = this->__get_base(__iob);
-  // Stage 2
-  char_type __thousands_sep;
-  const int __atoms_size = __num_get_base::__int_chr_cnt;
-  char_type __atoms1[__atoms_size];
-  const char_type* __atoms = this->__do_widen(__iob, __atoms1);
-  string __grouping        = this->__stage2_int_prep(__iob, __thousands_sep);
-  string __buf;
-  __buf.resize(__buf.capacity());
-  char* __a     = &__buf[0];
-  char* __a_end = __a;
+
+  // Stages 2 & 3
+  // These are combined into a single step where we parse the characters and calculate the value in one go instead of
+  // storing the relevant characters first (in an allocated buffer) and parse the characters after we extracted them.
+  // This makes the whole process significantly faster, since we avoid potential allocations and copies.
+
+  const auto& __numpunct    = use_facet<numpunct<_CharT> >(__iob.getloc());
+  char_type __thousands_sep = __numpunct.thousands_sep();
+  string __grouping         = __numpunct.grouping();
+
+  char_type __atoms_buffer[__num_get_base::__int_chr_cnt];
+  const char_type* __atoms = this->__do_widen(__iob, __atoms_buffer);
   unsigned __g[__num_get_base::__num_get_buf_sz];
   unsigned* __g_end = __g;
   unsigned __dc     = 0;
-  for (; __b != __e; ++__b) {
-    if (__a_end == __a + __buf.size()) {
-      size_t __tmp = __buf.size();
-      __buf.resize(2 * __buf.size());
-      __buf.resize(__buf.capacity());
-      __a     = &__buf[0];
-      __a_end = __a + __tmp;
+
+  if (__first == __last) {
+    __err |= ios_base::eofbit;
+    return __first;
+  }
+
+  while (!__grouping.empty() && *__first == __thousands_sep) {
+    ++__first;
+    if (__g_end - __g < this->__num_get_buf_sz) {
+      *__g_end++ = __dc;
+      __dc       = 0;
     }
-    if (this->__stage2_int_loop(
-            *__b,
-            __base,
-            __a,
-            __a_end,
-            __dc,
-            __thousands_sep,
-            __grouping,
-            __g,
-            __g_end,
-            const_cast<char_type*>(__atoms)))
-      break;
   }
-  if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz)
-    *__g_end++ = __dc;
-  // Stage 3
-  __v = std::__num_get_signed_integral<_Signed>(__a, __a_end, __err, __base);
-  // Digit grouping checked
-  __check_grouping(__grouping, __g, __g_end, __err);
-  // EOF checked
-  if (__b == __e)
+
+  bool __negate = false;
+  // __c == '+' || __c == '-'
+  if (auto __c = *__first; __c == __atoms[24] || __c == __atoms[25]) {
+    __negate = __c == __atoms[25];
+    ++__first;
+  }
+
+  if (__first == __last) {
     __err |= ios_base::eofbit;
-  return __b;
-}
+    return __first;
+  }
 
-// unsigned
+  bool __parsed_num = false;
 
-template <class _CharT, class _InputIterator>
-template <class _Unsigned>
-_InputIterator num_get<_CharT, _InputIterator>::__do_get_unsigned(
-    iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Unsigned& __v) const {
-  // Stage 1
-  int __base = this->__get_base(__iob);
-  // Stage 2
-  char_type __thousands_sep;
-  const int __atoms_size = __num_get_base::__int_chr_cnt;
-  char_type __atoms1[__atoms_size];
-  const char_type* __atoms = this->__do_widen(__iob, __atoms1);
-  string __grouping        = this->__stage2_int_prep(__iob, __thousands_sep);
-  string __buf;
-  __buf.resize(__buf.capacity());
-  char* __a     = &__buf[0];
-  char* __a_end = __a;
-  unsigned __g[__num_get_base::__num_get_buf_sz];
-  unsigned* __g_end = __g;
-  unsigned __dc     = 0;
-  for (; __b != __e; ++__b) {
-    if (__a_end == __a + __buf.size()) {
-      size_t __tmp = __buf.size();
-      __buf.resize(2 * __buf.size());
-      __buf.resize(__buf.capacity());
-      __a     = &__buf[0];
-      __a_end = __a + __tmp;
+  // If we don't have a pre-set base, figure it out
+  if (__base == 0) {
+    auto __c = *__first;
+    // __c == '0'
+    if (__c == __atoms[0]) {
+      ++__first;
+      if (__first == __last) {
+        __err |= ios_base::eofbit;
+        return __first;
+      }
+      // __c2 == 'x' || __c2 == 'X'
+      if (auto __c2 = *__first; __c2 == __atoms[22] || __c2 == __atoms[23]) {
+        __base = 16;
+        ++__first;
+      } else {
+        __base = 8;
+      }
+    } else {
+      __base = 10;
     }
-    if (this->__stage2_int_loop(
-            *__b,
-            __base,
-            __a,
-            __a_end,
-            __dc,
-            __thousands_sep,
-            __grouping,
-            __g,
-            __g_end,
-            const_cast<char_type*>(__atoms)))
+  } else if (__base == 16) {
+    // Try to swallow '0x'
+
+    // *__first == '0'
+    if (*__first == __atoms[0]) {
+      ++__first;
+      if (__first == __last) {
+        __err |= ios_base::eofbit;
+        return __first;
+      }
+      // __c == 'x' || __c == 'X'
+      if (auto __c = *__first; __c == __atoms[22] || __c == __atoms[23])
+        ++__first;
+      else
+        __parsed_num = true; // We only swallowed '0', so we've started to parse a number
+    }
+  }
+
+  // Calculate the actual number
+  _Unsigned __val   = 0;
+  bool __overflowed = false;
+  for (; __first != __last; ++__first) {
+    auto __c = *__first;
+    if (!__grouping.empty() && __c == __thousands_sep) {
+      if (__g_end - __g < this->__num_get_buf_sz) {
+        *__g_end++ = __dc;
+        __dc       = 0;
+      }
+      continue;
+    }
+    auto __offset = this->__atoms_offset(__atoms, __c);
+    if (__offset >= 22)
+      break;
+
+    if (__base == 16 && __offset >= 16)
+      __offset -= 6;
+    if (__offset >= __base)
       break;
+    __overflowed |= __builtin_mul_overflow(__val, __base, &__val) || __builtin_add_overflow(__val, __offset, &__val);
+    __parsed_num = true;
+    ++__dc;
   }
+
+  if (!__parsed_num) {
+    __err |= ios_base::failbit;
+    __v = 0;
+  } else if (__overflowed) {
+    __err |= ios_base::failbit;
+    __v = is_signed<_MaybeSigned>::value && __negate
+            ? numeric_limits<_MaybeSigned>::min()
+            : numeric_limits<_MaybeSigned>::max();
+  } else if (!__negate) {
+    if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max())) {
+      __err |= ios_base::failbit;
+      __v = numeric_limits<_MaybeSigned>::max();
+    } else {
+      __v = __val;
+    }
+  } else if (is_signed<_MaybeSigned>::value) {
+    if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) {
+      __err |= ios_base::failbit;
+      __v = numeric_limits<_MaybeSigned>::min();
+    } else if (__val == static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) {
+      __v = numeric_limits<_MaybeSigned>::min();
+    } else {
+      __v = -__val;
+    }
+  } else {
+    __v = -__val;
+  }
+
   if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz)
     *__g_end++ = __dc;
-  // Stage 3
-  __v = std::__num_get_unsigned_integral<_Unsigned>(__a, __a_end, __err, __base);
+
   // Digit grouping checked
   __check_grouping(__grouping, __g, __g_end, __err);
   // EOF checked
-  if (__b == __e)
+  if (__first == __last)
     __err |= ios_base::eofbit;
-  return __b;
+  return __first;
 }
 
 // floating point
@@ -1005,40 +953,13 @@ _InputIterator num_get<_CharT, _InputIterator>::__do_get_floating_point(
 template <class _CharT, class _InputIterator>
 _InputIterator num_get<_CharT, _InputIterator>::do_get(
     iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const {
-  // Stage 1
-  int __base = 16;
-  // Stage 2
-  char_type __atoms[__num_get_base::__int_chr_cnt];
-  char_type __thousands_sep = char_type();
-  string __grouping;
-  std::use_facet<ctype<_CharT> >(__iob.getloc())
-      .widen(__num_get_base::__src, __num_get_base::__src + __num_get_base::__int_chr_cnt, __atoms);
-  string __buf;
-  __buf.resize(__buf.capacity());
-  char* __a     = &__buf[0];
-  char* __a_end = __a;
-  unsigned __g[__num_get_base::__num_get_buf_sz];
-  unsigned* __g_end = __g;
-  unsigned __dc     = 0;
-  for (; __b != __e; ++__b) {
-    if (__a_end == __a + __buf.size()) {
-      size_t __tmp = __buf.size();
-      __buf.resize(2 * __buf.size());
-      __buf.resize(__buf.capacity());
-      __a     = &__buf[0];
-      __a_end = __a + __tmp;
-    }
-    if (this->__stage2_int_loop(*__b, __base, __a, __a_end, __dc, __thousands_sep, __grouping, __g, __g_end, __atoms))
-      break;
-  }
-  // Stage 3
-  __buf.resize(__a_end - __a);
-  if (__locale::__sscanf(__buf.c_str(), _LIBCPP_GET_C_LOCALE, "%p", &__v) != 1)
-    __err = ios_base::failbit;
-  // EOF checked
-  if (__b == __e)
-    __err |= ios_base::eofbit;
-  return __b;
+  auto __flags = __iob.flags();
+  __iob.flags((__flags & ~ios_base::basefield & ~ios_base::uppercase) | ios_base::hex | ios_base::showbase);
+  uintptr_t __ptr;
+  auto __res = __do_get_integral(__b, __e, __iob, __err, __ptr);
+  __iob.flags(__flags);
+  __v = reinterpret_cast<void*>(__ptr);
+  return __res;
 }
 
 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get<char>;
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 9ea59a3a19ca4..6cd83da904a5d 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -5660,6 +5660,54 @@ string __num_get<_CharT>::__stage2_int_prep(ios_base& __iob, _CharT* __atoms, _C
   return __np.grouping();
 }
 
+template <class _CharT>
+int __num_get<_CharT>::__stage2_int_loop(
+    _CharT __ct,
+    int __base,
+    char* __a,
+    char*& __a_end,
+    unsigned& __dc,
+    _CharT __thousands_sep,
+    const string& __grouping,
+    unsigned* __g,
+    unsigned*& __g_end,
+    _CharT* __atoms) {
+  if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) {
+    *__a_end++ = __ct == __atoms[24] ? '+' : '-';
+    __dc       = 0;
+    return 0;
+  }
+  if (__grouping.size() != 0 && __ct == __thousands_sep) {
+    if (__g_end - __g < __num_get_buf_sz) {
+      *__g_end++ = __dc;
+      __dc       = 0;
+    }
+    return 0;
+  }
+  ptrdiff_t __f = __atoms_offset(__atoms, __ct);
+  if (__f >= 24)
+    return -1;
+  switch (__base) {
+  case 8:
+  case 10:
+    if (__f >= __base)
+      return -1;
+    break;
+  case 16:
+    if (__f < 22)
+      break;
+    if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') {
+      __dc       = 0;
+      *__a_end++ = __src[__f];
+      return 0;
+    }
+    return -1;
+  }
+  *__a_end++ = __src[__f];
+  ++__dc;
+  return 0;
+}
+
 template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<char>;
 _LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<wchar_t>;)
 
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
index 03b74ebf53936..6bf38bd8c72a1 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
@@ -102,6 +102,18 @@ int main(int, char**)
         assert(err == ios.goodbit);
         assert(v == 291);
     }
+    {
+        const char str[] = "a123";
+        std::dec(ios);
+        std::ios_base::iostate err = ios.goodbit;
+        cpp17_input_iterator<const char*> iter =
+            f.get(cpp17_input_iterator<const char*>(str),
+                  cpp17_input_iterator<const char*>(str+sizeof(str)),
+                  ios, err, v);
+        assert(base(iter) == str);
+        assert(err == ios.failbit);
+        assert(v == 0);
+    }
     {
         const char str[] = "0x123";
         std::hex(ios);



More information about the libcxx-commits mailing list