[libcxx-commits] [libcxx] [libc++] Add build flags to optimize <format> and <charconv> for code size. (PR #98003)

Robbie Litchfield via libcxx-commits libcxx-commits at lists.llvm.org
Mon Jul 8 02:52:02 PDT 2024


https://github.com/BlamKiwi updated https://github.com/llvm/llvm-project/pull/98003

>From 3dee8a480ce6cbf649af17e2814d52c6df181d08 Mon Sep 17 00:00:00 2001
From: Robbie Litchfield <blam.kiwi at gmail.com>
Date: Mon, 8 Jul 2024 17:11:00 +1200
Subject: [PATCH 1/4] Add build flag to enable code size optimizations

---
 libcxx/CMakeLists.txt           | 9 +++++++++
 libcxx/include/__config_site.in | 1 +
 2 files changed, 10 insertions(+)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index e098bd574eec7..6d70116a228e5 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -99,6 +99,14 @@ option(LIBCXX_ENABLE_WIDE_CHARACTERS
    support the C functionality for wide characters. When wide characters are
    not supported, several parts of the library will be disabled, notably the
    wide character specializations of std::basic_string." ON)
+option(LIBCXX_FORMAT_OPTIMIZE_SIZE
+  "Whether to optimize <format> for code size over performance. Optimizing
+   for size can be useful when porting to platforms that have limited memory
+   resources. When optimizing for size, libc++ may take codepaths that eliminate 
+   expensive lookup tables or allow the compiler to agressively remove unused 
+   functions and data. When optimizing for size, formatting and <charconv> may 
+   become significantly slower. Disabling localization, unicode and wide character
+   support can further minimize code size." OFF)
 
 # To use time zone support in libc++ the platform needs to have the IANA
 # database installed. Libc++ will fail to build if this is enabled on a
@@ -783,6 +791,7 @@ config_define_if_not(LIBCXX_ENABLE_UNICODE _LIBCPP_HAS_NO_UNICODE)
 config_define_if_not(LIBCXX_ENABLE_WIDE_CHARACTERS _LIBCPP_HAS_NO_WIDE_CHARACTERS)
 config_define_if_not(LIBCXX_ENABLE_TIME_ZONE_DATABASE _LIBCPP_HAS_NO_TIME_ZONE_DATABASE)
 config_define_if_not(LIBCXX_ENABLE_VENDOR_AVAILABILITY_ANNOTATIONS _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS)
+config_define_if(LIBCXX_FORMAT_OPTIMIZE_SIZE _LIBCPP_FORMAT_OPTIMIZE_SIZE)
 
 # TODO(LLVM 19): Produce a deprecation warning.
 if (LIBCXX_ENABLE_ASSERTIONS)
diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in
index 67022146c9082..39b5ccbc1f147 100644
--- a/libcxx/include/__config_site.in
+++ b/libcxx/include/__config_site.in
@@ -31,6 +31,7 @@
 #cmakedefine _LIBCPP_HAS_NO_STD_MODULES
 #cmakedefine _LIBCPP_HAS_NO_TIME_ZONE_DATABASE
 #cmakedefine _LIBCPP_INSTRUMENTED_WITH_ASAN
+#cmakedefine __LIBCPP_FORMAT_OPTIMIZE_SIZE
 
 // PSTL backends
 #cmakedefine _LIBCPP_PSTL_BACKEND_SERIAL

>From 64760bbc50fa9403e475749c40d92d4156c16a72 Mon Sep 17 00:00:00 2001
From: Robbie Litchfield <blam.kiwi at gmail.com>
Date: Mon, 8 Jul 2024 20:10:28 +1200
Subject: [PATCH 2/4] Change basic_format_arg_value to dispatch formatting
 behind a function pointer

---
 libcxx/include/__config_site.in            |  2 +-
 libcxx/include/__format/format_arg.h       | 82 +++++++++++++++++-----
 libcxx/include/__format/format_functions.h | 15 +---
 3 files changed, 68 insertions(+), 31 deletions(-)

diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in
index 39b5ccbc1f147..fff37c342336c 100644
--- a/libcxx/include/__config_site.in
+++ b/libcxx/include/__config_site.in
@@ -31,7 +31,7 @@
 #cmakedefine _LIBCPP_HAS_NO_STD_MODULES
 #cmakedefine _LIBCPP_HAS_NO_TIME_ZONE_DATABASE
 #cmakedefine _LIBCPP_INSTRUMENTED_WITH_ASAN
-#cmakedefine __LIBCPP_FORMAT_OPTIMIZE_SIZE
+#cmakedefine _LIBCPP_FORMAT_OPTIMIZE_SIZE
 
 // PSTL backends
 #cmakedefine _LIBCPP_PSTL_BACKEND_SERIAL
diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h
index aa02f81dc40e2..cbcf7a1cdb0ef 100644
--- a/libcxx/include/__format/format_arg.h
+++ b/libcxx/include/__format/format_arg.h
@@ -250,29 +250,79 @@ class __basic_format_arg_value {
     __handle __handle_;
   };
 
+  // Visiting a value for formatting is dispatched via a function pointer to prevent
+  // unused formatters from being instantiated. This can lead to significant code size 
+  // reductions on statically linked targets esp. at lower optimization levels.
+
+  void (__basic_format_arg_value::*__format_)(
+      basic_format_parse_context<_CharT>& __parse_ctx, _Context& __ctx, bool __parse) const;
+
+  _LIBCPP_HIDE_FROM_ABI void
+  __visit(basic_format_parse_context<_CharT>& __parse_ctx, _Context& __ctx, bool __parse) const {
+    ((*this).*__format_)(__parse_ctx, __ctx, __parse);
+  }
+
+  template <auto __member>
+  _LIBCPP_HIDE_FROM_ABI void
+  __format(basic_format_parse_context<_CharT>& __parse_ctx, _Context& __ctx, bool __parse) const {
+    using _Tp = std::decay_t<decltype((*this).*__member)>;
+    if constexpr (same_as<_Tp, monostate>)
+      std::__throw_format_error("The argument index value is too large for the number of arguments supplied");
+    else if constexpr (same_as<_Tp, __handle>)
+      __handle_.__format_(__parse_ctx, __ctx, __handle_.__ptr_);
+    else {
+      formatter<_Tp, _CharT> __formatter;
+      if (__parse)
+        __parse_ctx.advance_to(__formatter.parse(__parse_ctx));
+      __ctx.advance_to(__formatter.format((*this).*__member, __ctx));
+    }
+  }
+
   // These constructors contain the exact storage type used. If adjustments are
   // required, these will be done in __create_format_arg.
 
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value() noexcept : __monostate_() {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(bool __value) noexcept : __boolean_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(_CharT __value) noexcept : __char_type_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(int __value) noexcept : __int_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(unsigned __value) noexcept : __unsigned_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(long long __value) noexcept : __long_long_(__value) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value() noexcept
+      : __monostate_(), __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__monostate_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(bool __value) noexcept
+      : __boolean_(__value), __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__boolean_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(_CharT __value) noexcept
+      : __char_type_(__value),
+        __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__char_type_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(int __value) noexcept
+      : __int_(__value),
+        __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__basic_format_arg_value::__int_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(unsigned __value) noexcept
+      : __unsigned_(__value), __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__unsigned_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(long long __value) noexcept
+      : __long_long_(__value),
+        __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__long_long_>) {}
   _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(unsigned long long __value) noexcept
-      : __unsigned_long_long_(__value) {}
+      : __unsigned_long_long_(__value),
+        __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__unsigned_long_long_>) {}
 #  ifndef _LIBCPP_HAS_NO_INT128
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__int128_t __value) noexcept : __i128_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__uint128_t __value) noexcept : __u128_(__value) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__int128_t __value) noexcept
+      : __i128_(__value), __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__i128_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__uint128_t __value) noexcept
+      : __u128_(__value), __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__u128_>) {}
 #  endif
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(float __value) noexcept : __float_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(double __value) noexcept : __double_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(long double __value) noexcept : __long_double_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(const _CharT* __value) noexcept : __const_char_type_ptr_(__value) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(float __value) noexcept
+      : __float_(__value), __format_(&__format<&__basic_format_arg_value::__basic_format_arg_value::__float_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(double __value) noexcept
+      : __double_(__value), __format_(&__format<&__basic_format_arg_value::__basic_format_arg_value::__double_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(long double __value) noexcept
+      : __long_double_(__value),
+        __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__long_double_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(const _CharT* __value) noexcept
+      : __const_char_type_ptr_(__value),
+        __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__const_char_type_ptr_>) {}
   _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(basic_string_view<_CharT> __value) noexcept
-      : __string_view_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(const void* __value) noexcept : __ptr_(__value) {}
-  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__handle&& __value) noexcept : __handle_(std::move(__value)) {}
+      : __string_view_(__value),
+        __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__string_view_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(const void* __value) noexcept
+      : __ptr_(__value), __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__ptr_>) {}
+  _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__handle&& __value) noexcept
+      : __handle_(std::move(__value)),
+        __format_(&__basic_format_arg_value::__format<&__basic_format_arg_value::__handle_>) {}
 };
 
 template <class _Context>
diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h
index d14b49aff1495..74befaabda3cd 100644
--- a/libcxx/include/__format/format_functions.h
+++ b/libcxx/include/__format/format_functions.h
@@ -273,20 +273,7 @@ __handle_replacement_field(_Iterator __begin, _Iterator __end, _ParseCtx& __pars
     else if (__parse)
       __format::__compile_time_visit_format_arg(__parse_ctx, __ctx, __type);
   } else
-    std::__visit_format_arg(
-        [&](auto __arg) {
-          if constexpr (same_as<decltype(__arg), monostate>)
-            std::__throw_format_error("The argument index value is too large for the number of arguments supplied");
-          else if constexpr (same_as<decltype(__arg), typename basic_format_arg<_Ctx>::handle>)
-            __arg.format(__parse_ctx, __ctx);
-          else {
-            formatter<decltype(__arg), _CharT> __formatter;
-            if (__parse)
-              __parse_ctx.advance_to(__formatter.parse(__parse_ctx));
-            __ctx.advance_to(__formatter.format(__arg, __ctx));
-          }
-        },
-        __ctx.arg(__r.__value));
+    __ctx.arg(__r.__value).__value_.__visit(__parse_ctx, __ctx, __parse);
 
   __begin = __parse_ctx.begin();
   if (__begin == __end || *__begin != _CharT('}'))

>From 16ebdfd2860d913108dae9e3eb2f5b5ea4c18747 Mon Sep 17 00:00:00 2001
From: Robbie Litchfield <blam.kiwi at gmail.com>
Date: Mon, 8 Jul 2024 20:25:57 +1200
Subject: [PATCH 3/4] Add build flag to control <charconv> optimize for size

---
 libcxx/CMakeLists.txt                         | 12 ++++--------
 libcxx/include/__charconv/to_chars_integral.h |  7 +++++++
 libcxx/include/__config_site.in               |  2 +-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 6d70116a228e5..f66fc386246a1 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -99,14 +99,10 @@ option(LIBCXX_ENABLE_WIDE_CHARACTERS
    support the C functionality for wide characters. When wide characters are
    not supported, several parts of the library will be disabled, notably the
    wide character specializations of std::basic_string." ON)
-option(LIBCXX_FORMAT_OPTIMIZE_SIZE
-  "Whether to optimize <format> for code size over performance. Optimizing
+option(LIBCXX_CHARCONV_OPTIMIZE_SIZE
+  "Whether to optimize <charconv> for code size over performance. Optimizing
    for size can be useful when porting to platforms that have limited memory
-   resources. When optimizing for size, libc++ may take codepaths that eliminate 
-   expensive lookup tables or allow the compiler to agressively remove unused 
-   functions and data. When optimizing for size, formatting and <charconv> may 
-   become significantly slower. Disabling localization, unicode and wide character
-   support can further minimize code size." OFF)
+   resources." OFF)
 
 # To use time zone support in libc++ the platform needs to have the IANA
 # database installed. Libc++ will fail to build if this is enabled on a
@@ -791,7 +787,7 @@ config_define_if_not(LIBCXX_ENABLE_UNICODE _LIBCPP_HAS_NO_UNICODE)
 config_define_if_not(LIBCXX_ENABLE_WIDE_CHARACTERS _LIBCPP_HAS_NO_WIDE_CHARACTERS)
 config_define_if_not(LIBCXX_ENABLE_TIME_ZONE_DATABASE _LIBCPP_HAS_NO_TIME_ZONE_DATABASE)
 config_define_if_not(LIBCXX_ENABLE_VENDOR_AVAILABILITY_ANNOTATIONS _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS)
-config_define_if(LIBCXX_FORMAT_OPTIMIZE_SIZE _LIBCPP_FORMAT_OPTIMIZE_SIZE)
+config_define_if(LIBCXX_CHARCONV_OPTIMIZE_SIZE _LIBCPP_CHARCONV_OPTIMIZE_SIZE)
 
 # TODO(LLVM 19): Produce a deprecation warning.
 if (LIBCXX_ENABLE_ASSERTIONS)
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
index 0369f4dfb9bda..5ffb7202e848b 100644
--- a/libcxx/include/__charconv/to_chars_integral.h
+++ b/libcxx/include/__charconv/to_chars_integral.h
@@ -274,6 +274,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __to_chars_integral_widt
 template <typename _Tp>
 inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
 __to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_type) {
+# ifndef _LIBCPP_CHARCONV_OPTIMIZE_SIZE
   if (__base == 10) [[likely]]
     return std::__to_chars_itoa(__first, __last, __value, false_type());
 
@@ -285,6 +286,7 @@ __to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_
   case 16:
     return std::__to_chars_integral<16>(__first, __last, __value);
   }
+# endif
 
   ptrdiff_t __cap = __last - __first;
   int __n         = std::__to_chars_integral_width(__value, __base);
@@ -306,7 +308,12 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result
 to_chars(char* __first, char* __last, _Tp __value) {
   using _Type = __make_32_64_or_128_bit_t<_Tp>;
   static_assert(!is_same<_Type, void>::value, "unsupported integral type used in to_chars");
+
+# ifdef _LIBCPP_CHARCONV_OPTIMIZE_SIZE
+  return std::__to_chars_integral(__first, __last, static_cast<_Type>(__value), 10, is_signed<_Tp>());
+# else
   return std::__to_chars_itoa(__first, __last, static_cast<_Type>(__value), is_signed<_Tp>());
+# endif
 }
 
 template <typename _Tp, __enable_if_t<is_integral<_Tp>::value, int> = 0>
diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in
index fff37c342336c..20d9d75c27fff 100644
--- a/libcxx/include/__config_site.in
+++ b/libcxx/include/__config_site.in
@@ -31,7 +31,7 @@
 #cmakedefine _LIBCPP_HAS_NO_STD_MODULES
 #cmakedefine _LIBCPP_HAS_NO_TIME_ZONE_DATABASE
 #cmakedefine _LIBCPP_INSTRUMENTED_WITH_ASAN
-#cmakedefine _LIBCPP_FORMAT_OPTIMIZE_SIZE
+#cmakedefine _LIBCPP_CHARCONV_OPTIMIZE_SIZE
 
 // PSTL backends
 #cmakedefine _LIBCPP_PSTL_BACKEND_SERIAL

>From 8172ce196896dca50dab9a79bb800da452781ec4 Mon Sep 17 00:00:00 2001
From: Robbie Litchfield <blam.kiwi at gmail.com>
Date: Mon, 8 Jul 2024 20:55:24 +1200
Subject: [PATCH 4/4] Change format spec to not create temporary strings if
 exceptions are disabled

---
 libcxx/include/__format/parser_std_format_spec.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
index 150bdde89f3b3..694ce47ddad9b 100644
--- a/libcxx/include/__format/parser_std_format_spec.h
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -54,13 +54,21 @@ namespace __format_spec {
 
 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void
 __throw_invalid_option_format_error(const char* __id, const char* __option) {
+# ifndef _LIBCPP_HAS_NO_EXCEPTIONS
   std::__throw_format_error(
       (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str());
+# else
+  std::__throw_format_error("The format specifier does not allow the given option");
+# endif
 }
 
 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) {
+# ifndef _LIBCPP_HAS_NO_EXCEPTIONS
   std::__throw_format_error(
       (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str());
+# else
+  std::__throw_format_error("The format specifier uses an invalid value for the type option");
+# endif
 }
 
 template <contiguous_iterator _Iterator, class _ParseContext>



More information about the libcxx-commits mailing list