[libcxx-commits] [libcxx] [libc++] Avoid more <format> code in <vector> (PR #185596)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Tue Mar 10 02:26:54 PDT 2026


https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/185596

>From dfd5f73365b13b81c3c3526570ab5d9550341f7e Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Mon, 16 Feb 2026 09:06:02 +0100
Subject: [PATCH] [libc++] Avoid more <format> code in <vector>

This makes it ~45% faster to parse `<vector>` on my system.
---
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__format/formatter_bool.h      |  14 +-
 libcxx/include/__format/formatter_bool_impl.h |  19 ++-
 libcxx/include/__format/formatter_string.h    |   2 +-
 libcxx/include/__format/formatter_tuple.h     |   2 +-
 .../include/__format/parser_std_format_spec.h | 125 ++++-------------
 .../__format/parser_std_format_spec_data.h    | 132 ++++++++++++++++++
 libcxx/include/__format/range_formatter.h     |   2 +-
 libcxx/include/__thread/formatter.h           |   2 +-
 libcxx/include/module.modulemap.in            |   6 +-
 libcxx/include/vector                         |   2 +-
 .../test/libcxx/transitive_includes/cxx26.csv |   2 -
 12 files changed, 194 insertions(+), 115 deletions(-)
 create mode 100644 libcxx/include/__format/parser_std_format_spec_data.h

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 53165f0336b2d..49e993f3845c7 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -415,6 +415,7 @@ set(files
   __format/formatter_tuple.h
   __format/indic_conjunct_break_table.h
   __format/parser_std_format_spec.h
+  __format/parser_std_format_spec_data.h
   __format/range_default_formatter.h
   __format/range_format.h
   __format/range_formatter.h
diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h
index 2280ff131bcde..be34a1b0de452 100644
--- a/libcxx/include/__format/formatter_bool.h
+++ b/libcxx/include/__format/formatter_bool.h
@@ -13,7 +13,7 @@
 #include <__config>
 #include <__format/concepts.h>
 #include <__format/formatter.h>
-#include <__format/parser_std_format_spec.h>
+#include <__format/parser_std_format_spec_data.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -23,17 +23,19 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 20
 
+template <__fmt_char_type _CharT, class _ParseContext>
+_LIBCPP_HIDE_FROM_ABI constexpr
+    typename _ParseContext::iterator __formatter_bool_parse(__format_spec::__parser_data<_CharT>&, _ParseContext&);
+
 template <__fmt_char_type _CharT, class _FormatContext>
 _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
-__formatter_bool_format(bool __value, __format_spec::__parser<_CharT>, _FormatContext&);
+__formatter_bool_format(bool __value, __format_spec::__parser_data<_CharT>, _FormatContext&);
 
 template <__fmt_char_type _CharT>
 struct formatter<bool, _CharT> {
   template <class _ParseContext>
   _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) {
-    typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral);
-    __format_spec::__process_parsed_bool(__parser_, "a bool");
-    return __result;
+    return std::__formatter_bool_parse(__parser_, __ctx);
   }
 
   template <class _FormatContext>
@@ -41,7 +43,7 @@ struct formatter<bool, _CharT> {
     return std::__formatter_bool_format(__value, __parser_, __ctx);
   }
 
-  __format_spec::__parser<_CharT> __parser_;
+  __format_spec::__parser_data<_CharT> __parser_;
 };
 
 #  if _LIBCPP_STD_VER >= 23
diff --git a/libcxx/include/__format/formatter_bool_impl.h b/libcxx/include/__format/formatter_bool_impl.h
index ef1c1a0e77b50..2474bb91988fb 100644
--- a/libcxx/include/__format/formatter_bool_impl.h
+++ b/libcxx/include/__format/formatter_bool_impl.h
@@ -16,6 +16,7 @@
 #include <__format/formatter_bool.h>
 #include <__format/formatter_integral.h>
 #include <__format/parser_std_format_spec.h>
+#include <__utility/scope_guard.h>
 #include <__utility/unreachable.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -26,11 +27,25 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-// This function is separated from formatter<bool> to avoid pulling in a bunch of code from <format> that we aren't
+// These functions are separated from formatter<bool> to avoid pulling in a bunch of code from <format> that we aren't
 // required to provide in other headers where we need formatter<bool> itself to be complete.
+
+template <__fmt_char_type _CharT, class _ParseContext>
+_LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator
+__formatter_bool_parse(__format_spec::__parser_data<_CharT>& __parser_data, _ParseContext& __ctx) {
+  __format_spec::__parser<_CharT> __parser(__parser_data);
+  __scope_guard __guard([&] { __parser_data = __parser; });
+
+  typename _ParseContext::iterator __result = __parser.__parse(__ctx, __format_spec::__fields_integral);
+  __format_spec::__process_parsed_bool(__parser, "a bool");
+  return __result;
+}
+
 template <__fmt_char_type _CharT, class _FormatContext>
 _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
-__formatter_bool_format(bool __value, __format_spec::__parser<_CharT> __parser, _FormatContext& __ctx) {
+__formatter_bool_format(bool __value, __format_spec::__parser_data<_CharT> __parser_data, _FormatContext& __ctx) {
+  __format_spec::__parser<_CharT> __parser(__parser_data);
+
   switch (__parser.__type_) {
   case __format_spec::__type::__default:
   case __format_spec::__type::__string:
diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h
index bad6a4d2bb899..8ca922edbe721 100644
--- a/libcxx/include/__format/formatter_string.h
+++ b/libcxx/include/__format/formatter_string.h
@@ -55,7 +55,7 @@ struct __formatter_string {
   _LIBCPP_HIDE_FROM_ABI constexpr void set_debug_format() { __parser_.__type_ = __format_spec::__type::__debug; }
 #  endif
 
-  __format_spec::__parser<_CharT> __parser_{.__alignment_ = __format_spec::__alignment::__left};
+  __format_spec::__parser<_CharT> __parser_ = {{.__alignment_ = __format_spec::__alignment::__left}};
 };
 
 // Formatter const char*.
diff --git a/libcxx/include/__format/formatter_tuple.h b/libcxx/include/__format/formatter_tuple.h
index 0b095e6f71af4..f27c5cb4c6596 100644
--- a/libcxx/include/__format/formatter_tuple.h
+++ b/libcxx/include/__format/formatter_tuple.h
@@ -126,7 +126,7 @@ struct __formatter_tuple {
     return std::ranges::copy(__closing_bracket_, __ctx.out()).out;
   }
 
-  __format_spec::__parser<_CharT> __parser_{.__alignment_ = __format_spec::__alignment::__left};
+  __format_spec::__parser<_CharT> __parser_ = {{.__alignment_ = __format_spec::__alignment::__left}};
 
 private:
   tuple<formatter<remove_cvref_t<_Args>, _CharT>...> __underlying_;
diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
index 99ab3dc23c295..822dfce2c365b 100644
--- a/libcxx/include/__format/parser_std_format_spec.h
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -26,6 +26,7 @@
 #include <__format/format_error.h>
 #include <__format/format_parse_context.h>
 #include <__format/format_string.h>
+#include <__format/parser_std_format_spec_data.h>
 #include <__format/unicode.h>
 #include <__format/width_estimation_table.h>
 #include <__iterator/concepts.h>
@@ -174,50 +175,6 @@ inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_bra
 inline constexpr __fields __fields_fill_align_width{};
 #  endif
 
-enum class __alignment : uint8_t {
-  /// No alignment is set in the format string.
-  __default,
-  __left,
-  __center,
-  __right,
-  __zero_padding
-};
-
-enum class __sign : uint8_t {
-  /// No sign is set in the format string.
-  ///
-  /// The sign isn't allowed for certain format-types. By using this value
-  /// it's possible to detect whether or not the user explicitly set the sign
-  /// flag. For formatting purposes it behaves the same as \ref __minus.
-  __default,
-  __minus,
-  __plus,
-  __space
-};
-
-enum class __type : uint8_t {
-  __default = 0,
-  __string,
-  __binary_lower_case,
-  __binary_upper_case,
-  __octal,
-  __decimal,
-  __hexadecimal_lower_case,
-  __hexadecimal_upper_case,
-  __pointer_lower_case,
-  __pointer_upper_case,
-  __char,
-  __hexfloat_lower_case,
-  __hexfloat_upper_case,
-  __scientific_lower_case,
-  __scientific_upper_case,
-  __fixed_lower_case,
-  __fixed_upper_case,
-  __general_lower_case,
-  __general_upper_case,
-  __debug
-};
-
 _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) {
   uint32_t __shift = static_cast<uint32_t>(__t);
   if (__shift == 0)
@@ -256,25 +213,6 @@ struct __chrono {
   bool __month_name_           : 1;
 };
 
-// The fill UCS scalar value.
-//
-// This is always an array, with 1, 2, or 4 elements.
-// The size of the data structure is always 32-bits.
-template <class _CharT>
-struct __code_point;
-
-template <>
-struct __code_point<char> {
-  char __data[4] = {' '};
-};
-
-#  if _LIBCPP_HAS_WIDE_CHARACTERS
-template <>
-struct __code_point<wchar_t> {
-  wchar_t __data[4 / sizeof(wchar_t)] = {L' '};
-};
-#  endif
-
 /// Contains the parsed formatting specifications.
 ///
 /// This contains information for both the std-format-spec and the
@@ -335,8 +273,31 @@ static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
 /// set to zero. That way they can be repurposed if a future revision of the
 /// Standards adds new fields to std-format-spec.
 template <class _CharT>
-class __parser {
+class __parser : public __parser_data<_CharT> {
 public:
+  using __base _LIBCPP_NODEBUG = __parser_data<_CharT>;
+
+  using __base::__alignment_;
+  using __base::__alternate_form_;
+  using __base::__clear_brackets_;
+  using __base::__day_of_year_;
+  using __base::__fill_;
+  using __base::__hour_;
+  using __base::__locale_specific_form_;
+  using __base::__month_name_;
+  using __base::__precision_;
+  using __base::__precision_as_arg_;
+  using __base::__sign_;
+  using __base::__type_;
+  using __base::__week_of_year_;
+  using __base::__weekday_;
+  using __base::__weekday_name_;
+  using __base::__width_;
+  using __base::__width_as_arg_;
+
+  __parser() = default;
+  _LIBCPP_HIDE_FROM_ABI constexpr __parser(__parser_data<_CharT> __data) : __parser_data<_CharT>(__data) {}
+
   // Parses the format specification.
   //
   // Depending on whether the parsing is done compile-time or run-time
@@ -484,7 +445,7 @@ class __parser {
         __format_spec::__throw_invalid_option_format_error(__id, "locale-specific form");
     }
 
-    if ((__create_type_mask(__type_) & __type_mask) == 0) {
+    if ((__format_spec::__create_type_mask(__type_) & __type_mask) == 0) {
       if (std::is_constant_evaluated())
         std::__throw_format_error("The format specifier uses an invalid value for the type option");
       else
@@ -521,40 +482,6 @@ class __parser {
         .__fill_{__fill_}};
   }
 
-  __alignment __alignment_     : 3 {__alignment::__default};
-  __sign __sign_               : 2 {__sign::__default};
-  bool __alternate_form_       : 1 {false};
-  bool __locale_specific_form_ : 1 {false};
-  bool __clear_brackets_       : 1 {false};
-  __type __type_{__type::__default};
-
-  // These flags are only used for formatting chrono. Since the struct has
-  // padding space left it's added to this structure.
-  bool __hour_ : 1 {false};
-
-  bool __weekday_name_ : 1 {false};
-  bool __weekday_      : 1 {false};
-
-  bool __day_of_year_  : 1 {false};
-  bool __week_of_year_ : 1 {false};
-
-  bool __month_name_ : 1 {false};
-
-  uint8_t __reserved_0_ : 2 {0};
-  uint8_t __reserved_1_ : 6 {0};
-  // These two flags are only used internally and not part of the
-  // __parsed_specifications. Therefore put them at the end.
-  bool __width_as_arg_     : 1 {false};
-  bool __precision_as_arg_ : 1 {false};
-
-  /// The requested width, either the value or the arg-id.
-  int32_t __width_{0};
-
-  /// The requested precision, either the value or the arg-id.
-  int32_t __precision_{-1};
-
-  __code_point<_CharT> __fill_{};
-
 private:
   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
     switch (__c) {
diff --git a/libcxx/include/__format/parser_std_format_spec_data.h b/libcxx/include/__format/parser_std_format_spec_data.h
new file mode 100644
index 0000000000000..b4e50cfb35a9e
--- /dev/null
+++ b/libcxx/include/__format/parser_std_format_spec_data.h
@@ -0,0 +1,132 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_DATA_H
+#define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_DATA_H
+
+#include <__config>
+#include <cstdint>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 20
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+namespace __format_spec {
+
+enum class __alignment : uint8_t {
+  /// No alignment is set in the format string.
+  __default,
+  __left,
+  __center,
+  __right,
+  __zero_padding
+};
+
+enum class __sign : uint8_t {
+  /// No sign is set in the format string.
+  ///
+  /// The sign isn't allowed for certain format-types. By using this value
+  /// it's possible to detect whether or not the user explicitly set the sign
+  /// flag. For formatting purposes it behaves the same as \ref __minus.
+  __default,
+  __minus,
+  __plus,
+  __space
+};
+
+enum class __type : uint8_t {
+  __default = 0,
+  __string,
+  __binary_lower_case,
+  __binary_upper_case,
+  __octal,
+  __decimal,
+  __hexadecimal_lower_case,
+  __hexadecimal_upper_case,
+  __pointer_lower_case,
+  __pointer_upper_case,
+  __char,
+  __hexfloat_lower_case,
+  __hexfloat_upper_case,
+  __scientific_lower_case,
+  __scientific_upper_case,
+  __fixed_lower_case,
+  __fixed_upper_case,
+  __general_lower_case,
+  __general_upper_case,
+  __debug
+};
+
+// The fill UCS scalar value.
+//
+// This is always an array, with 1, 2, or 4 elements.
+// The size of the data structure is always 32-bits.
+template <class _CharT>
+struct __code_point;
+
+template <>
+struct __code_point<char> {
+  char __data[4] = {' '};
+};
+
+#  if _LIBCPP_HAS_WIDE_CHARACTERS
+template <>
+struct __code_point<wchar_t> {
+  wchar_t __data[4 / sizeof(wchar_t)] = {L' '};
+};
+#  endif
+
+template <class _CharT>
+struct __parser_data {
+  __alignment __alignment_     : 3 {__alignment::__default};
+  __sign __sign_               : 2 {__sign::__default};
+  bool __alternate_form_       : 1 {false};
+  bool __locale_specific_form_ : 1 {false};
+  bool __clear_brackets_       : 1 {false};
+  __type __type_{__type::__default};
+
+  // These flags are only used for formatting chrono. Since the struct has
+  // padding space left it's added to this structure.
+  bool __hour_ : 1 {false};
+
+  bool __weekday_name_ : 1 {false};
+  bool __weekday_      : 1 {false};
+
+  bool __day_of_year_  : 1 {false};
+  bool __week_of_year_ : 1 {false};
+
+  bool __month_name_ : 1 {false};
+
+  uint8_t __reserved_0_ : 2 {0};
+  uint8_t __reserved_1_ : 6 {0};
+  // These two flags are only used internally and not part of the
+  // __parsed_specifications. Therefore put them at the end.
+  bool __width_as_arg_     : 1 {false};
+  bool __precision_as_arg_ : 1 {false};
+
+  /// The requested width, either the value or the arg-id.
+  int32_t __width_{0};
+
+  /// The requested precision, either the value or the arg-id.
+  int32_t __precision_{-1};
+
+  __code_point<_CharT> __fill_{};
+};
+
+} // namespace __format_spec
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 20
+
+#endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_DATA_H
diff --git a/libcxx/include/__format/range_formatter.h b/libcxx/include/__format/range_formatter.h
index 06d2b4cb4b9f4..0240ef10b3580 100644
--- a/libcxx/include/__format/range_formatter.h
+++ b/libcxx/include/__format/range_formatter.h
@@ -207,7 +207,7 @@ struct range_formatter {
     return ranges::copy(__closing_bracket_, __ctx.out()).out;
   }
 
-  __format_spec::__parser<_CharT> __parser_{.__alignment_ = __format_spec::__alignment::__left};
+  __format_spec::__parser<_CharT> __parser_ = {{.__alignment_ = __format_spec::__alignment::__left}};
 
 private:
   template <contiguous_iterator _Iterator>
diff --git a/libcxx/include/__thread/formatter.h b/libcxx/include/__thread/formatter.h
index 826607d47b469..f4169265aeecc 100644
--- a/libcxx/include/__thread/formatter.h
+++ b/libcxx/include/__thread/formatter.h
@@ -68,7 +68,7 @@ struct formatter<__thread_id, _CharT> {
     return __formatter::__format_integer(reinterpret_cast<_Cp>(__get_underlying_id(__id)), __ctx, __specs);
   }
 
-  __format_spec::__parser<_CharT> __parser_{.__alignment_ = __format_spec::__alignment::__right};
+  __format_spec::__parser<_CharT> __parser_ = {{.__alignment_ = __format_spec::__alignment::__right}};
 };
 
 #  endif // _LIBCPP_HAS_THREADS
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index d6e8289b7c8b0..d56a9e2d16ee7 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -1384,7 +1384,11 @@ module std [system] {
     module formatter_tuple                    { header "__format/formatter_tuple.h" }
     module fwd                                { header "__fwd/format.h" }
     module indic_conjunct_break_table         { header "__format/indic_conjunct_break_table.h" }
-    module parser_std_format_spec             { header "__format/parser_std_format_spec.h" }
+    module parser_std_format_spec             {
+      header "__format/parser_std_format_spec.h"
+      export std.format.parser_std_format_spec_data
+    }
+    module parser_std_format_spec_data        { header "__format/parser_std_format_spec_data.h" }
     module range_default_formatter            { header "__format/range_default_formatter.h" }
     module range_format                       { header "__format/range_format.h" }
     module range_formatter                    { header "__format/range_formatter.h" }
diff --git a/libcxx/include/vector b/libcxx/include/vector
index 7f260a096ca60..21ef0e4476d1c 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -354,6 +354,7 @@ template<class T, class charT> requires is-vector-bool-reference<T> // Since C++
 #    include <clocale>
 #    include <cstddef>
 #    include <cstdlib>
+#    include <string>
 #    include <typeinfo>
 #  endif
 
@@ -368,7 +369,6 @@ template<class T, class charT> requires is-vector-bool-reference<T> // Since C++
 #      include <locale>
 #    endif
 #    include <optional>
-#    include <string>
 #    include <string_view>
 #    include <tuple>
 #    include <type_traits>
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 12716c7f496b1..c8b6d136c6147 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -757,7 +757,6 @@ queue initializer_list
 queue iosfwd
 queue limits
 queue stdexcept
-queue string
 queue string_view
 queue tuple
 queue vector
@@ -1129,7 +1128,6 @@ vector initializer_list
 vector iosfwd
 vector limits
 vector stdexcept
-vector string
 vector string_view
 vector tuple
 vector version



More information about the libcxx-commits mailing list