[libcxx-commits] [libcxx] 6589729 - [libc++][format] Improves parsing speed.

Mark de Wever via libcxx-commits libcxx-commits at lists.llvm.org
Wed Jul 13 08:39:14 PDT 2022


Author: Mark de Wever
Date: 2022-07-13T17:39:09+02:00
New Revision: 65897292065ef73c6ac74c4b3f86f0431c91cb49

URL: https://github.com/llvm/llvm-project/commit/65897292065ef73c6ac74c4b3f86f0431c91cb49
DIFF: https://github.com/llvm/llvm-project/commit/65897292065ef73c6ac74c4b3f86f0431c91cb49.diff

LOG: [libc++][format] Improves parsing speed.

A format string like "{}" is quite common. In this case avoid parsing
the format-spec when it's not present. Before the parsing was always
called, therefore some refactoring is done to make sure the formatters
work properly when their parse member isn't called.

>From the wording it's not entirely clear whether this optimization is
allowed

[tab:formatter]
```
  and the range [pc.begin(), pc.end()) from the last call to f.parse(pc).
```
Implies there's always a call to `f.parse` even when the format-spec
isn't present. Therefore this optimization isn't done for handle
classes; it's unclear whether that would break user defined formatters.

The improvements give a small reduciton is code size:
 719408	  12472	    488	 732368	  b2cd0	before
 718824	  12472	    488	 731784	  b2a88	after

The performance benefits when not using a format-spec are:

```
Comparing ./formatter_int.libcxx.out-baseline to ./formatter_int.libcxx.out
Benchmark                                                               Time             CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------------------
BM_Basic<uint32_t>                                                   -0.0688         -0.0687            67            62            67            62
BM_Basic<int32_t>                                                    -0.1105         -0.1107            73            65            73            65
BM_Basic<uint64_t>                                                   -0.1053         -0.1049            95            85            95            85
BM_Basic<int64_t>                                                    -0.0889         -0.0888            93            85            93            85
BM_BasicLow<__uint128_t>                                             -0.0655         -0.0655            96            90            96            90
BM_BasicLow<__int128_t>                                              -0.0693         -0.0694            97            90            97            90
BM_Basic<__uint128_t>                                                -0.0359         -0.0359           256           247           256           247
BM_Basic<__int128_t>                                                 -0.0414         -0.0414           239           229           239           229
```

For the cases where a format-spec is used the results remain similar,
some are faster some are slower, differing per run.

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D129426

Added: 
    

Modified: 
    libcxx/include/__format/formatter_bool.h
    libcxx/include/__format/formatter_char.h
    libcxx/include/__format/formatter_integral.h
    libcxx/include/__format/formatter_output.h
    libcxx/include/__format/parser_std_format_spec.h
    libcxx/include/format

Removed: 
    


################################################################################
diff  --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h
index 4c9d3fc774731..cdb0631f87d46 100644
--- a/libcxx/include/__format/formatter_bool.h
+++ b/libcxx/include/__format/formatter_bool.h
@@ -47,6 +47,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<bool, _CharT>
 
   _LIBCPP_HIDE_FROM_ABI auto format(bool __value, auto& __ctx) const -> decltype(__ctx.out()) {
     switch (__parser_.__type_) {
+    case __format_spec::__type::__default:
     case __format_spec::__type::__string:
       return __formatter::__format_bool(__value, __ctx, __parser_.__get_parsed_std_specifications(__ctx));
 

diff  --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h
index cd54abba348a1..a3ca36ec0a62c 100644
--- a/libcxx/include/__format/formatter_char.h
+++ b/libcxx/include/__format/formatter_char.h
@@ -41,7 +41,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_char {
   }
 
   _LIBCPP_HIDE_FROM_ABI auto format(_CharT __value, auto& __ctx) const -> decltype(__ctx.out()) {
-    if (__parser_.__type_ == __format_spec::__type::__char)
+    if (__parser_.__type_ == __format_spec::__type::__default || __parser_.__type_ == __format_spec::__type::__char)
       return __formatter::__format_char(__value, __ctx.out(), __parser_.__get_parsed_std_specifications(__ctx));
 
     if constexpr (sizeof(_CharT) <= sizeof(int))

diff  --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h
index 4ad6de0ec66f4..d6fa5ec18eb83 100644
--- a/libcxx/include/__format/formatter_integral.h
+++ b/libcxx/include/__format/formatter_integral.h
@@ -207,10 +207,6 @@ _LIBCPP_HIDE_FROM_ABI auto __format_integer(
     char* __end,
     const char* __prefix,
     int __base) -> decltype(__ctx.out()) {
-  _LIBCPP_ASSERT(
-      __specs.__alignment_ != __format_spec::__alignment::__default,
-      "the caller should adjust the default to the value required by the type");
-
   char* __first = __formatter::__insert_sign(__begin, __negative, __specs.__std_.__sign_);
   if (__specs.__std_.__alternate_form_ && __prefix)
     while (*__prefix)
@@ -280,6 +276,7 @@ _LIBCPP_HIDE_FROM_ABI auto __format_integer(
     return __formatter::__format_integer(
         __value, __ctx, __specs, __negative, __array.begin(), __array.end(), __value != 0 ? "0" : nullptr, 8);
   }
+  case __format_spec::__type::__default:
   case __format_spec::__type::__decimal: {
     array<char, __formatter::__buffer_size<decltype(__value), 10>()> __array;
     return __formatter::__format_integer(

diff  --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h
index fabc04b9a0fe3..c59cbbeeb5dd7 100644
--- a/libcxx/include/__format/formatter_output.h
+++ b/libcxx/include/__format/formatter_output.h
@@ -59,14 +59,11 @@ struct _LIBCPP_TYPE_VIS __padding_size_result {
 _LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result
 __padding_size(size_t __size, size_t __width, __format_spec::__alignment __align) {
   _LIBCPP_ASSERT(__width > __size, "don't call this function when no padding is required");
-  _LIBCPP_ASSERT(__align != __format_spec::__alignment::__default,
-                 "the caller should adjust the default to the value required by the type");
   _LIBCPP_ASSERT(__align != __format_spec::__alignment::__zero_padding,
                  "the caller should have handled the zero-padding");
 
   size_t __fill = __width - __size;
   switch (__align) {
-  case __format_spec::__alignment::__default:
   case __format_spec::__alignment::__zero_padding:
     __libcpp_unreachable();
 
@@ -81,6 +78,7 @@ __padding_size(size_t __size, size_t __width, __format_spec::__alignment __align
     size_t __after = __fill - __before;
     return {__before, __after};
   }
+  case __format_spec::__alignment::__default:
   case __format_spec::__alignment::__right:
     return {__fill, 0};
   }
@@ -91,9 +89,6 @@ template <class _OutIt, class _CharT>
 _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first,
                                                               const char* __last, string&& __grouping, _CharT __sep,
                                                               __format_spec::__parsed_specifications<_CharT> __specs) {
-  _LIBCPP_ASSERT(__specs.__alignment_ != __format_spec::__alignment::__default,
-                 "the caller should adjust the default to the value required by the type");
-
   int __size = (__first - __begin) +    // [sign][prefix]
                (__last - __first) +     // data
                (__grouping.size() - 1); // number of separator characters

diff  --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
index 319ef24d3ea7f..034fc55a44dce 100644
--- a/libcxx/include/__format/parser_std_format_spec.h
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -1040,18 +1040,10 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT
   __format_spec::__process_display_type_bool_string(__parser);
 }
 
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_integer(__parser<_CharT>& __parser) {
-  if (__parser.__alignment_ == __alignment::__default)
-    __parser.__alignment_ = __alignment::__right;
-}
-
 template <class _CharT>
 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) {
   switch (__parser.__type_) {
   case __format_spec::__type::__default:
-    __parser.__type_ = __format_spec::__type::__string;
-    [[fallthrough]];
   case __format_spec::__type::__string:
     __format_spec::__process_display_type_bool_string(__parser);
     break;
@@ -1062,7 +1054,6 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __p
   case __format_spec::__type::__decimal:
   case __format_spec::__type::__hexadecimal_lower_case:
   case __format_spec::__type::__hexadecimal_upper_case:
-    __process_display_type_integer(__parser);
     break;
 
   default:
@@ -1074,8 +1065,6 @@ template <class _CharT>
 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) {
   switch (__parser.__type_) {
   case __format_spec::__type::__default:
-    __parser.__type_ = __format_spec::__type::__char;
-    [[fallthrough]];
   case __format_spec::__type::__char:
     __format_spec::__process_display_type_char(__parser);
     break;
@@ -1086,7 +1075,6 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __p
   case __format_spec::__type::__decimal:
   case __format_spec::__type::__hexadecimal_lower_case:
   case __format_spec::__type::__hexadecimal_upper_case:
-    __format_spec::__process_display_type_integer(__parser);
     break;
 
   default:
@@ -1098,15 +1086,12 @@ template <class _CharT>
 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) {
   switch (__parser.__type_) {
   case __format_spec::__type::__default:
-    __parser.__type_ = __format_spec::__type::__decimal;
-    [[fallthrough]];
   case __format_spec::__type::__binary_lower_case:
   case __format_spec::__type::__binary_upper_case:
   case __format_spec::__type::__octal:
   case __format_spec::__type::__decimal:
   case __format_spec::__type::__hexadecimal_lower_case:
   case __format_spec::__type::__hexadecimal_upper_case:
-    __format_spec::__process_display_type_integer(__parser);
     break;
 
   case __format_spec::__type::__char:
@@ -1120,8 +1105,6 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>&
 
 template <class _CharT>
 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) {
-  __format_spec::__process_display_type_integer(__parser);
-
   switch (__parser.__type_) {
   case __format_spec::__type::__default:
     // When no precision specified then it keeps default since that

diff  --git a/libcxx/include/format b/libcxx/include/format
index dd7d08dbbe0b0..60197d24523f2 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -376,6 +376,7 @@ __handle_replacement_field(const _CharT* __begin, const _CharT* __end,
   __format::__parse_number_result __r =
       __format::__parse_arg_id(__begin, __end, __parse_ctx);
 
+  bool __parse = *__r.__ptr == _CharT(':');
   switch (*__r.__ptr) {
   case _CharT(':'):
     // The arg-id has a format-specifier, advance the input to the format-spec.
@@ -395,7 +396,7 @@ __handle_replacement_field(const _CharT* __begin, const _CharT* __end,
     if (__type == __arg_t::__handle)
       __ctx.__handle(__r.__value).__parse(__parse_ctx);
     else
-      __format::__compile_time_visit_format_arg(__parse_ctx, __ctx, __type);
+        __format::__compile_time_visit_format_arg(__parse_ctx, __ctx, __type);
   } else
     _VSTD::visit_format_arg(
         [&](auto __arg) {
@@ -405,7 +406,8 @@ __handle_replacement_field(const _CharT* __begin, const _CharT* __end,
             __arg.format(__parse_ctx, __ctx);
           else {
             formatter<decltype(__arg), _CharT> __formatter;
-            __parse_ctx.advance_to(__formatter.parse(__parse_ctx));
+            if (__parse)
+              __parse_ctx.advance_to(__formatter.parse(__parse_ctx));
             __ctx.advance_to(__formatter.format(__arg, __ctx));
           }
         },


        


More information about the libcxx-commits mailing list