[llvm-branch-commits] [libcxx] [libc++][format][6/7] Optimizes formatted_size. (PR #101835)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Aug 3 09:07:06 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Mark de Wever (mordante)
<details>
<summary>Changes</summary>
__formatted_size_buffer is not used in the public library interface so the changes are not an ABI break.
Before
----------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1 47.6 ns 47.6 ns 14729701 bytes_per_second=20.055Mi/s
BM_formatted_size_string<char>/2 23.8 ns 23.8 ns 29445832 bytes_per_second=80.1657Mi/s
BM_formatted_size_string<char>/4 11.9 ns 11.9 ns 58813944 bytes_per_second=320.771Mi/s
BM_formatted_size_string<char>/8 5.95 ns 5.95 ns 117601928 bytes_per_second=1.25307Gi/s
BM_formatted_size_string<char>/16 2.97 ns 2.97 ns 235800528 bytes_per_second=5.02342Gi/s
BM_formatted_size_string<char>/32 1.49 ns 1.49 ns 471177152 bytes_per_second=20.0428Gi/s
BM_formatted_size_string<char>/64 0.745 ns 0.744 ns 942161408 bytes_per_second=80.1172Gi/s
BM_formatted_size_string<char>/128 0.377 ns 0.376 ns 1861005568 bytes_per_second=316.85Gi/s
BM_formatted_size_string<char>/256 0.200 ns 0.200 ns 3503415296 bytes_per_second=1.16508Ti/s
BM_formatted_size_string<char>/512 0.111 ns 0.110 ns 6351184384 bytes_per_second=4.21671Ti/s
BM_formatted_size_string<char>/1024 0.067 ns 0.067 ns 10441098240 bytes_per_second=13.9409Ti/s
BM_formatted_size_string<char>/2048 0.045 ns 0.045 ns 15404886016 bytes_per_second=41.0182Ti/s
BM_formatted_size_string<char>/4096 0.036 ns 0.036 ns 19634089984 bytes_per_second=104.431Ti/s
BM_formatted_size_string<char>/8192 0.030 ns 0.030 ns 23265501184 bytes_per_second=247.504Ti/s
BM_formatted_size_string<char>/16384 0.027 ns 0.027 ns 25556238336 bytes_per_second=545.066Ti/s
BM_formatted_size_string<char>/32768 0.027 ns 0.027 ns 26036731904 bytes_per_second=1.08282Pi/s
BM_formatted_size_string<char>/65536 0.027 ns 0.027 ns 26192379904 bytes_per_second=2.17753Pi/s
BM_formatted_size_string<char>/131072 0.026 ns 0.026 ns 26622033920 bytes_per_second=4.4243Pi/s
BM_formatted_size_string<char>/262144 0.027 ns 0.027 ns 25452085248 bytes_per_second=8.48295Pi/s
BM_formatted_size_string<char>/524288 0.028 ns 0.028 ns 24593825792 bytes_per_second=16.3639Pi/s
BM_formatted_size_string<char>/1048576 0.028 ns 0.028 ns 24775753728 bytes_per_second=32.8865Pi/s
BM_formatted_size_string<wchar_t>/1 47.0 ns 46.9 ns 14912646 bytes_per_second=81.3132Mi/s
BM_formatted_size_string<wchar_t>/2 23.4 ns 23.3 ns 29964298 bytes_per_second=326.845Mi/s
BM_formatted_size_string<wchar_t>/4 11.7 ns 11.7 ns 59734380 bytes_per_second=1.26993Gi/s
BM_formatted_size_string<wchar_t>/8 5.84 ns 5.84 ns 120131824 bytes_per_second=5.10665Gi/s
BM_formatted_size_string<wchar_t>/16 2.92 ns 2.92 ns 239874128 bytes_per_second=20.4273Gi/s
BM_formatted_size_string<wchar_t>/32 1.48 ns 1.48 ns 473672928 bytes_per_second=80.6502Gi/s
BM_formatted_size_string<wchar_t>/64 0.797 ns 0.796 ns 877660480 bytes_per_second=299.351Gi/s
BM_formatted_size_string<wchar_t>/128 0.450 ns 0.449 ns 1568318336 bytes_per_second=1.03679Ti/s
BM_formatted_size_string<wchar_t>/256 0.273 ns 0.273 ns 2571588096 bytes_per_second=3.41553Ti/s
BM_formatted_size_string<wchar_t>/512 0.181 ns 0.181 ns 3868106240 bytes_per_second=10.2834Ti/s
BM_formatted_size_string<wchar_t>/1024 0.143 ns 0.143 ns 4902914048 bytes_per_second=26.0513Ti/s
BM_formatted_size_string<wchar_t>/2048 0.121 ns 0.121 ns 5801660416 bytes_per_second=61.775Ti/s
BM_formatted_size_string<wchar_t>/4096 0.110 ns 0.110 ns 6380191744 bytes_per_second=135.763Ti/s
BM_formatted_size_string<wchar_t>/8192 0.107 ns 0.107 ns 6531432448 bytes_per_second=278.051Ti/s
BM_formatted_size_string<wchar_t>/16384 0.107 ns 0.107 ns 6531842048 bytes_per_second=556.731Ti/s
BM_formatted_size_string<wchar_t>/32768 0.106 ns 0.105 ns 6648430592 bytes_per_second=1.10374Pi/s
BM_formatted_size_string<wchar_t>/65536 0.110 ns 0.110 ns 6352273408 bytes_per_second=2.11267Pi/s
BM_formatted_size_string<wchar_t>/131072 0.114 ns 0.114 ns 6139805696 bytes_per_second=4.07511Pi/s
BM_formatted_size_string<wchar_t>/262144 0.114 ns 0.114 ns 6169821184 bytes_per_second=8.20475Pi/s
BM_formatted_size_string<wchar_t>/524288 0.114 ns 0.113 ns 6168248320 bytes_per_second=16.4288Pi/s
BM_formatted_size_string<wchar_t>/1048576 0.120 ns 0.120 ns 5817499648 bytes_per_second=30.9853Pi/s
After
----------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1 44.6 ns 44.4 ns 15768750 bytes_per_second=21.4556Mi/s
BM_formatted_size_string<char>/2 22.3 ns 22.2 ns 31477648 bytes_per_second=85.8719Mi/s
BM_formatted_size_string<char>/4 11.1 ns 11.1 ns 63031396 bytes_per_second=343.327Mi/s
BM_formatted_size_string<char>/8 5.57 ns 5.56 ns 126105968 bytes_per_second=1.34101Gi/s
BM_formatted_size_string<char>/16 2.78 ns 2.78 ns 251793552 bytes_per_second=5.36504Gi/s
BM_formatted_size_string<char>/32 1.39 ns 1.39 ns 504195136 bytes_per_second=21.4672Gi/s
BM_formatted_size_string<char>/64 0.696 ns 0.694 ns 1008341184 bytes_per_second=85.8541Gi/s
BM_formatted_size_string<char>/128 0.348 ns 0.347 ns 2016425984 bytes_per_second=343.386Gi/s
BM_formatted_size_string<char>/256 0.174 ns 0.174 ns 4031530496 bytes_per_second=1.33811Ti/s
BM_formatted_size_string<char>/512 0.087 ns 0.087 ns 8058140160 bytes_per_second=5.36282Ti/s
BM_formatted_size_string<char>/1024 0.044 ns 0.043 ns 16131134464 bytes_per_second=21.4476Ti/s
BM_formatted_size_string<char>/2048 0.022 ns 0.022 ns 32242241536 bytes_per_second=85.938Ti/s
BM_formatted_size_string<char>/4096 0.011 ns 0.011 ns 64521842688 bytes_per_second=343.596Ti/s
BM_formatted_size_string<char>/8192 0.005 ns 0.005 ns 129048797184 bytes_per_second=1.34065Pi/s
BM_formatted_size_string<char>/16384 0.003 ns 0.003 ns 258152202240 bytes_per_second=5.37696Pi/s
BM_formatted_size_string<char>/32768 0.001 ns 0.001 ns 516496818176 bytes_per_second=21.4635Pi/s
BM_formatted_size_string<char>/65536 0.001 ns 0.001 ns 1000000061440 bytes_per_second=85.868Pi/s
BM_formatted_size_string<char>/131072 0.000 ns 0.000 ns 1000000061440 bytes_per_second=343.451Pi/s
BM_formatted_size_string<char>/262144 0.000 ns 0.000 ns 1000000192512 bytes_per_second=1.33951Ei/s
BM_formatted_size_string<char>/524288 0.000 ns 0.000 ns 1000000192512 bytes_per_second=5.35714Ei/s
BM_formatted_size_string<char>/1048576 0.000 ns 0.000 ns 1000000716800 bytes_per_second=21.4307Ei/s
BM_formatted_size_string<wchar_t>/1 43.3 ns 43.2 ns 16196853 bytes_per_second=88.2127Mi/s
BM_formatted_size_string<wchar_t>/2 21.6 ns 21.6 ns 32428704 bytes_per_second=353.328Mi/s
BM_formatted_size_string<wchar_t>/4 10.8 ns 10.8 ns 64858356 bytes_per_second=1.38065Gi/s
BM_formatted_size_string<wchar_t>/8 5.42 ns 5.40 ns 129503096 bytes_per_second=5.51619Gi/s
BM_formatted_size_string<wchar_t>/16 2.71 ns 2.70 ns 259444512 bytes_per_second=22.056Gi/s
BM_formatted_size_string<wchar_t>/32 1.35 ns 1.35 ns 517963680 bytes_per_second=88.217Gi/s
BM_formatted_size_string<wchar_t>/64 0.678 ns 0.676 ns 1035243264 bytes_per_second=352.499Gi/s
BM_formatted_size_string<wchar_t>/128 0.339 ns 0.338 ns 2069611264 bytes_per_second=1.37816Ti/s
BM_formatted_size_string<wchar_t>/256 0.169 ns 0.169 ns 4146004480 bytes_per_second=5.51363Ti/s
BM_formatted_size_string<wchar_t>/512 0.085 ns 0.085 ns 8282438656 bytes_per_second=22.023Ti/s
BM_formatted_size_string<wchar_t>/1024 0.042 ns 0.042 ns 16622990336 bytes_per_second=88.6687Ti/s
BM_formatted_size_string<wchar_t>/2048 0.021 ns 0.021 ns 33236293632 bytes_per_second=354.449Ti/s
BM_formatted_size_string<wchar_t>/4096 0.011 ns 0.011 ns 66449469440 bytes_per_second=1.38568Pi/s
BM_formatted_size_string<wchar_t>/8192 0.005 ns 0.005 ns 133098782720 bytes_per_second=5.54274Pi/s
BM_formatted_size_string<wchar_t>/16384 0.003 ns 0.003 ns 266227466240 bytes_per_second=22.1089Pi/s
BM_formatted_size_string<wchar_t>/32768 0.001 ns 0.001 ns 532288503808 bytes_per_second=88.5187Pi/s
BM_formatted_size_string<wchar_t>/65536 0.001 ns 0.001 ns 1000000061440 bytes_per_second=354.202Pi/s
BM_formatted_size_string<wchar_t>/131072 0.000 ns 0.000 ns 1000000061440 bytes_per_second=1.38438Ei/s
BM_formatted_size_string<wchar_t>/262144 0.000 ns 0.000 ns 1000000192512 bytes_per_second=5.54102Ei/s
BM_formatted_size_string<wchar_t>/524288 0.000 ns 0.000 ns 1000000192512 bytes_per_second=22.0527Ei/s
BM_formatted_size_string<wchar_t>/1048576 0.000 ns 0.000 ns 1000000716800 bytes_per_second=88.673Ei/s
Comparison
Benchmark Time CPU Time Old Time New CPU Old CPU New
----------------------------------------------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1 -0.0639 -0.0653 48 45 48 44
BM_formatted_size_string<char>/2 -0.0651 -0.0665 24 22 24 22
BM_formatted_size_string<char>/4 -0.0642 -0.0657 12 11 12 11
BM_formatted_size_string<char>/8 -0.0643 -0.0656 6 6 6 6
BM_formatted_size_string<char>/16 -0.0623 -0.0637 3 3 3 3
BM_formatted_size_string<char>/32 -0.0650 -0.0664 1 1 1 1
BM_formatted_size_string<char>/64 -0.0655 -0.0668 1 1 1 1
BM_formatted_size_string<char>/128 -0.0756 -0.0773 0 0 0 0
BM_formatted_size_string<char>/256 -0.1280 -0.1293 0 0 0 0
BM_formatted_size_string<char>/512 -0.2126 -0.2137 0 0 0 0
BM_formatted_size_string<char>/1024 -0.3489 -0.3500 0 0 0 0
BM_formatted_size_string<char>/2048 -0.5220 -0.5227 0 0 0 0
BM_formatted_size_string<char>/4096 -0.6955 -0.6961 0 0 0 0
BM_formatted_size_string<char>/8192 -0.8194 -0.8197 0 0 0 0
BM_formatted_size_string<char>/16384 -0.9009 -0.9010 0 0 0 0
BM_formatted_size_string<char>/32768 -0.9495 -0.9496 0 0 0 0
BM_formatted_size_string<char>/65536 -0.9746 -0.9746 0 0 0 0
BM_formatted_size_string<char>/131072 -0.9871 -0.9871 0 0 0 0
BM_formatted_size_string<char>/262144 -0.9938 -0.9938 0 0 0 0
BM_formatted_size_string<char>/524288 -0.9970 -0.9970 0 0 0 0
BM_formatted_size_string<char>/1048576 -0.9985 -0.9985 0 0 0 0
BM_formatted_size_string<wchar_t>/1 -0.0769 -0.0782 47 43 47 43
BM_formatted_size_string<wchar_t>/2 -0.0737 -0.0750 23 22 23 22
BM_formatted_size_string<wchar_t>/4 -0.0788 -0.0802 12 11 12 11
BM_formatted_size_string<wchar_t>/8 -0.0729 -0.0742 6 5 6 5
BM_formatted_size_string<wchar_t>/16 -0.0724 -0.0738 3 3 3 3
BM_formatted_size_string<wchar_t>/32 -0.0844 -0.0858 1 1 1 1
BM_formatted_size_string<wchar_t>/64 -0.1493 -0.1508 1 1 1 1
BM_formatted_size_string<wchar_t>/128 -0.2466 -0.2477 0 0 0 0
BM_formatted_size_string<wchar_t>/256 -0.3796 -0.3805 0 0 0 0
BM_formatted_size_string<wchar_t>/512 -0.5323 -0.5331 0 0 0 0
BM_formatted_size_string<wchar_t>/1024 -0.7058 -0.7062 0 0 0 0
BM_formatted_size_string<wchar_t>/2048 -0.8255 -0.8257 0 0 0 0
BM_formatted_size_string<wchar_t>/4096 -0.9042 -0.9043 0 0 0 0
BM_formatted_size_string<wchar_t>/8192 -0.9509 -0.9510 0 0 0 0
BM_formatted_size_string<wchar_t>/16384 -0.9754 -0.9754 0 0 0 0
BM_formatted_size_string<wchar_t>/32768 -0.9875 -0.9875 0 0 0 0
BM_formatted_size_string<wchar_t>/65536 -0.9940 -0.9940 0 0 0 0
BM_formatted_size_string<wchar_t>/131072 -0.9971 -0.9971 0 0 0 0
BM_formatted_size_string<wchar_t>/262144 -0.9986 -0.9986 0 0 0 0
BM_formatted_size_string<wchar_t>/524288 -0.9993 -0.9993 0 0 0 0
BM_formatted_size_string<wchar_t>/1048576 -0.9997 -0.9997 0 0 0 0
OVERALL_GEOMEAN -0.8740 -0.8742 0 0 0 0
---
Full diff: https://github.com/llvm/llvm-project/pull/101835.diff
1 Files Affected:
- (modified) libcxx/include/__format/buffer.h (+29-22)
``````````diff
diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h
index c41ea73b3eb7e..ca2334f93fd04 100644
--- a/libcxx/include/__format/buffer.h
+++ b/libcxx/include/__format/buffer.h
@@ -603,28 +603,6 @@ class _LIBCPP_TEMPLATE_VIS __format_buffer {
typename __writer_selector<_OutIt, _CharT>::type __writer_;
};
-/// A buffer that counts the number of insertions.
-///
-/// Since \ref formatted_size only needs to know the size, the output itself is
-/// discarded.
-template <__fmt_char_type _CharT>
-class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer {
-public:
- _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return __output_.__make_output_iterator(); }
-
- _LIBCPP_HIDE_FROM_ABI void __flush(const _CharT*, size_t __n) { __size_ += __n; }
-
- _LIBCPP_HIDE_FROM_ABI size_t __result() && {
- __output_.__flush(0);
- return __size_;
- }
-
-private:
- __internal_storage<_CharT> __storage_;
- __output_buffer<_CharT> __output_{__storage_.__begin(), __storage_.__buffer_size, this};
- size_t __size_{0};
-};
-
// ***** ***** ***** LLVM-20 classes ***** ***** *****
// A dynamically growing buffer.
@@ -817,6 +795,35 @@ class _LIBCPP_TEMPLATE_VIS __format_to_n_buffer : private __buffer_selector<_Out
__max_output_size __max_output_size_;
};
+// A buffer that counts the number of insertions.
+//
+// Since formatted_size only needs to know the size, the output itself is
+// discarded.
+template <__fmt_char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer : private __output_buffer<_CharT> {
+public:
+ using _Base = __output_buffer<_CharT>;
+
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __formatted_size_buffer()
+ : _Base{nullptr, 0, __prepare_write, std::addressof(__max_output_size_)} {}
+
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return _Base::__make_output_iterator(); }
+
+ // This function does not need to be r-value qualified, however this is
+ // consistent with similar objects.
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __result() && { return __max_output_size_.__code_units_written(); }
+
+private:
+ __max_output_size __max_output_size_{0};
+
+ _LIBCPP_HIDE_FROM_ABI static void
+ __prepare_write([[maybe_unused]] __output_buffer<_CharT>& __buffer, [[maybe_unused]] size_t __size_hint) {
+ // Note this function does not satisfy the requirement of giving a 1 code unit buffer.
+ _LIBCPP_ASSERT_INTERNAL(
+ false, "Since __max_output_size_.__max_size_ == 0 there should never be call to this function.");
+ }
+};
+
// ***** ***** ***** LLVM-19 and LLVM-20 class ***** ***** *****
// A dynamically growing buffer intended to be used for retargeting a context.
``````````
</details>
https://github.com/llvm/llvm-project/pull/101835
More information about the llvm-branch-commits
mailing list