[llvm-branch-commits] [libcxx] [libc++][format][6/7] Optimizes formatted_size. (PR #101835)

Mark de Wever via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Aug 3 09:06:37 PDT 2024


https://github.com/mordante created https://github.com/llvm/llvm-project/pull/101835

__formatted_size_buffer is not used in the public library interface so the changes are not an ABI break.

Before
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                47.6 ns         47.6 ns     14729701 bytes_per_second=20.055Mi/s
BM_formatted_size_string<char>/2                23.8 ns         23.8 ns     29445832 bytes_per_second=80.1657Mi/s
BM_formatted_size_string<char>/4                11.9 ns         11.9 ns     58813944 bytes_per_second=320.771Mi/s
BM_formatted_size_string<char>/8                5.95 ns         5.95 ns    117601928 bytes_per_second=1.25307Gi/s
BM_formatted_size_string<char>/16               2.97 ns         2.97 ns    235800528 bytes_per_second=5.02342Gi/s
BM_formatted_size_string<char>/32               1.49 ns         1.49 ns    471177152 bytes_per_second=20.0428Gi/s
BM_formatted_size_string<char>/64              0.745 ns        0.744 ns    942161408 bytes_per_second=80.1172Gi/s
BM_formatted_size_string<char>/128             0.377 ns        0.376 ns   1861005568 bytes_per_second=316.85Gi/s
BM_formatted_size_string<char>/256             0.200 ns        0.200 ns   3503415296 bytes_per_second=1.16508Ti/s
BM_formatted_size_string<char>/512             0.111 ns        0.110 ns   6351184384 bytes_per_second=4.21671Ti/s
BM_formatted_size_string<char>/1024            0.067 ns        0.067 ns   10441098240 bytes_per_second=13.9409Ti/s
BM_formatted_size_string<char>/2048            0.045 ns        0.045 ns   15404886016 bytes_per_second=41.0182Ti/s
BM_formatted_size_string<char>/4096            0.036 ns        0.036 ns   19634089984 bytes_per_second=104.431Ti/s
BM_formatted_size_string<char>/8192            0.030 ns        0.030 ns   23265501184 bytes_per_second=247.504Ti/s
BM_formatted_size_string<char>/16384           0.027 ns        0.027 ns   25556238336 bytes_per_second=545.066Ti/s
BM_formatted_size_string<char>/32768           0.027 ns        0.027 ns   26036731904 bytes_per_second=1.08282Pi/s
BM_formatted_size_string<char>/65536           0.027 ns        0.027 ns   26192379904 bytes_per_second=2.17753Pi/s
BM_formatted_size_string<char>/131072          0.026 ns        0.026 ns   26622033920 bytes_per_second=4.4243Pi/s
BM_formatted_size_string<char>/262144          0.027 ns        0.027 ns   25452085248 bytes_per_second=8.48295Pi/s
BM_formatted_size_string<char>/524288          0.028 ns        0.028 ns   24593825792 bytes_per_second=16.3639Pi/s
BM_formatted_size_string<char>/1048576         0.028 ns        0.028 ns   24775753728 bytes_per_second=32.8865Pi/s
BM_formatted_size_string<wchar_t>/1             47.0 ns         46.9 ns     14912646 bytes_per_second=81.3132Mi/s
BM_formatted_size_string<wchar_t>/2             23.4 ns         23.3 ns     29964298 bytes_per_second=326.845Mi/s
BM_formatted_size_string<wchar_t>/4             11.7 ns         11.7 ns     59734380 bytes_per_second=1.26993Gi/s
BM_formatted_size_string<wchar_t>/8             5.84 ns         5.84 ns    120131824 bytes_per_second=5.10665Gi/s
BM_formatted_size_string<wchar_t>/16            2.92 ns         2.92 ns    239874128 bytes_per_second=20.4273Gi/s
BM_formatted_size_string<wchar_t>/32            1.48 ns         1.48 ns    473672928 bytes_per_second=80.6502Gi/s
BM_formatted_size_string<wchar_t>/64           0.797 ns        0.796 ns    877660480 bytes_per_second=299.351Gi/s
BM_formatted_size_string<wchar_t>/128          0.450 ns        0.449 ns   1568318336 bytes_per_second=1.03679Ti/s
BM_formatted_size_string<wchar_t>/256          0.273 ns        0.273 ns   2571588096 bytes_per_second=3.41553Ti/s
BM_formatted_size_string<wchar_t>/512          0.181 ns        0.181 ns   3868106240 bytes_per_second=10.2834Ti/s
BM_formatted_size_string<wchar_t>/1024         0.143 ns        0.143 ns   4902914048 bytes_per_second=26.0513Ti/s
BM_formatted_size_string<wchar_t>/2048         0.121 ns        0.121 ns   5801660416 bytes_per_second=61.775Ti/s
BM_formatted_size_string<wchar_t>/4096         0.110 ns        0.110 ns   6380191744 bytes_per_second=135.763Ti/s
BM_formatted_size_string<wchar_t>/8192         0.107 ns        0.107 ns   6531432448 bytes_per_second=278.051Ti/s
BM_formatted_size_string<wchar_t>/16384        0.107 ns        0.107 ns   6531842048 bytes_per_second=556.731Ti/s
BM_formatted_size_string<wchar_t>/32768        0.106 ns        0.105 ns   6648430592 bytes_per_second=1.10374Pi/s
BM_formatted_size_string<wchar_t>/65536        0.110 ns        0.110 ns   6352273408 bytes_per_second=2.11267Pi/s
BM_formatted_size_string<wchar_t>/131072       0.114 ns        0.114 ns   6139805696 bytes_per_second=4.07511Pi/s
BM_formatted_size_string<wchar_t>/262144       0.114 ns        0.114 ns   6169821184 bytes_per_second=8.20475Pi/s
BM_formatted_size_string<wchar_t>/524288       0.114 ns        0.113 ns   6168248320 bytes_per_second=16.4288Pi/s
BM_formatted_size_string<wchar_t>/1048576      0.120 ns        0.120 ns   5817499648 bytes_per_second=30.9853Pi/s

After
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                44.6 ns         44.4 ns     15768750 bytes_per_second=21.4556Mi/s
BM_formatted_size_string<char>/2                22.3 ns         22.2 ns     31477648 bytes_per_second=85.8719Mi/s
BM_formatted_size_string<char>/4                11.1 ns         11.1 ns     63031396 bytes_per_second=343.327Mi/s
BM_formatted_size_string<char>/8                5.57 ns         5.56 ns    126105968 bytes_per_second=1.34101Gi/s
BM_formatted_size_string<char>/16               2.78 ns         2.78 ns    251793552 bytes_per_second=5.36504Gi/s
BM_formatted_size_string<char>/32               1.39 ns         1.39 ns    504195136 bytes_per_second=21.4672Gi/s
BM_formatted_size_string<char>/64              0.696 ns        0.694 ns   1008341184 bytes_per_second=85.8541Gi/s
BM_formatted_size_string<char>/128             0.348 ns        0.347 ns   2016425984 bytes_per_second=343.386Gi/s
BM_formatted_size_string<char>/256             0.174 ns        0.174 ns   4031530496 bytes_per_second=1.33811Ti/s
BM_formatted_size_string<char>/512             0.087 ns        0.087 ns   8058140160 bytes_per_second=5.36282Ti/s
BM_formatted_size_string<char>/1024            0.044 ns        0.043 ns   16131134464 bytes_per_second=21.4476Ti/s
BM_formatted_size_string<char>/2048            0.022 ns        0.022 ns   32242241536 bytes_per_second=85.938Ti/s
BM_formatted_size_string<char>/4096            0.011 ns        0.011 ns   64521842688 bytes_per_second=343.596Ti/s
BM_formatted_size_string<char>/8192            0.005 ns        0.005 ns   129048797184 bytes_per_second=1.34065Pi/s
BM_formatted_size_string<char>/16384           0.003 ns        0.003 ns   258152202240 bytes_per_second=5.37696Pi/s
BM_formatted_size_string<char>/32768           0.001 ns        0.001 ns   516496818176 bytes_per_second=21.4635Pi/s
BM_formatted_size_string<char>/65536           0.001 ns        0.001 ns   1000000061440 bytes_per_second=85.868Pi/s
BM_formatted_size_string<char>/131072          0.000 ns        0.000 ns   1000000061440 bytes_per_second=343.451Pi/s
BM_formatted_size_string<char>/262144          0.000 ns        0.000 ns   1000000192512 bytes_per_second=1.33951Ei/s
BM_formatted_size_string<char>/524288          0.000 ns        0.000 ns   1000000192512 bytes_per_second=5.35714Ei/s
BM_formatted_size_string<char>/1048576         0.000 ns        0.000 ns   1000000716800 bytes_per_second=21.4307Ei/s
BM_formatted_size_string<wchar_t>/1             43.3 ns         43.2 ns     16196853 bytes_per_second=88.2127Mi/s
BM_formatted_size_string<wchar_t>/2             21.6 ns         21.6 ns     32428704 bytes_per_second=353.328Mi/s
BM_formatted_size_string<wchar_t>/4             10.8 ns         10.8 ns     64858356 bytes_per_second=1.38065Gi/s
BM_formatted_size_string<wchar_t>/8             5.42 ns         5.40 ns    129503096 bytes_per_second=5.51619Gi/s
BM_formatted_size_string<wchar_t>/16            2.71 ns         2.70 ns    259444512 bytes_per_second=22.056Gi/s
BM_formatted_size_string<wchar_t>/32            1.35 ns         1.35 ns    517963680 bytes_per_second=88.217Gi/s
BM_formatted_size_string<wchar_t>/64           0.678 ns        0.676 ns   1035243264 bytes_per_second=352.499Gi/s
BM_formatted_size_string<wchar_t>/128          0.339 ns        0.338 ns   2069611264 bytes_per_second=1.37816Ti/s
BM_formatted_size_string<wchar_t>/256          0.169 ns        0.169 ns   4146004480 bytes_per_second=5.51363Ti/s
BM_formatted_size_string<wchar_t>/512          0.085 ns        0.085 ns   8282438656 bytes_per_second=22.023Ti/s
BM_formatted_size_string<wchar_t>/1024         0.042 ns        0.042 ns   16622990336 bytes_per_second=88.6687Ti/s
BM_formatted_size_string<wchar_t>/2048         0.021 ns        0.021 ns   33236293632 bytes_per_second=354.449Ti/s
BM_formatted_size_string<wchar_t>/4096         0.011 ns        0.011 ns   66449469440 bytes_per_second=1.38568Pi/s
BM_formatted_size_string<wchar_t>/8192         0.005 ns        0.005 ns   133098782720 bytes_per_second=5.54274Pi/s
BM_formatted_size_string<wchar_t>/16384        0.003 ns        0.003 ns   266227466240 bytes_per_second=22.1089Pi/s
BM_formatted_size_string<wchar_t>/32768        0.001 ns        0.001 ns   532288503808 bytes_per_second=88.5187Pi/s
BM_formatted_size_string<wchar_t>/65536        0.001 ns        0.001 ns   1000000061440 bytes_per_second=354.202Pi/s
BM_formatted_size_string<wchar_t>/131072       0.000 ns        0.000 ns   1000000061440 bytes_per_second=1.38438Ei/s
BM_formatted_size_string<wchar_t>/262144       0.000 ns        0.000 ns   1000000192512 bytes_per_second=5.54102Ei/s
BM_formatted_size_string<wchar_t>/524288       0.000 ns        0.000 ns   1000000192512 bytes_per_second=22.0527Ei/s
BM_formatted_size_string<wchar_t>/1048576      0.000 ns        0.000 ns   1000000716800 bytes_per_second=88.673Ei/s

Comparison
Benchmark                                                   Time             CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                         -0.0639         -0.0653            48            45            48            44
BM_formatted_size_string<char>/2                         -0.0651         -0.0665            24            22            24            22
BM_formatted_size_string<char>/4                         -0.0642         -0.0657            12            11            12            11
BM_formatted_size_string<char>/8                         -0.0643         -0.0656             6             6             6             6
BM_formatted_size_string<char>/16                        -0.0623         -0.0637             3             3             3             3
BM_formatted_size_string<char>/32                        -0.0650         -0.0664             1             1             1             1
BM_formatted_size_string<char>/64                        -0.0655         -0.0668             1             1             1             1
BM_formatted_size_string<char>/128                       -0.0756         -0.0773             0             0             0             0
BM_formatted_size_string<char>/256                       -0.1280         -0.1293             0             0             0             0
BM_formatted_size_string<char>/512                       -0.2126         -0.2137             0             0             0             0
BM_formatted_size_string<char>/1024                      -0.3489         -0.3500             0             0             0             0
BM_formatted_size_string<char>/2048                      -0.5220         -0.5227             0             0             0             0
BM_formatted_size_string<char>/4096                      -0.6955         -0.6961             0             0             0             0
BM_formatted_size_string<char>/8192                      -0.8194         -0.8197             0             0             0             0
BM_formatted_size_string<char>/16384                     -0.9009         -0.9010             0             0             0             0
BM_formatted_size_string<char>/32768                     -0.9495         -0.9496             0             0             0             0
BM_formatted_size_string<char>/65536                     -0.9746         -0.9746             0             0             0             0
BM_formatted_size_string<char>/131072                    -0.9871         -0.9871             0             0             0             0
BM_formatted_size_string<char>/262144                    -0.9938         -0.9938             0             0             0             0
BM_formatted_size_string<char>/524288                    -0.9970         -0.9970             0             0             0             0
BM_formatted_size_string<char>/1048576                   -0.9985         -0.9985             0             0             0             0
BM_formatted_size_string<wchar_t>/1                      -0.0769         -0.0782            47            43            47            43
BM_formatted_size_string<wchar_t>/2                      -0.0737         -0.0750            23            22            23            22
BM_formatted_size_string<wchar_t>/4                      -0.0788         -0.0802            12            11            12            11
BM_formatted_size_string<wchar_t>/8                      -0.0729         -0.0742             6             5             6             5
BM_formatted_size_string<wchar_t>/16                     -0.0724         -0.0738             3             3             3             3
BM_formatted_size_string<wchar_t>/32                     -0.0844         -0.0858             1             1             1             1
BM_formatted_size_string<wchar_t>/64                     -0.1493         -0.1508             1             1             1             1
BM_formatted_size_string<wchar_t>/128                    -0.2466         -0.2477             0             0             0             0
BM_formatted_size_string<wchar_t>/256                    -0.3796         -0.3805             0             0             0             0
BM_formatted_size_string<wchar_t>/512                    -0.5323         -0.5331             0             0             0             0
BM_formatted_size_string<wchar_t>/1024                   -0.7058         -0.7062             0             0             0             0
BM_formatted_size_string<wchar_t>/2048                   -0.8255         -0.8257             0             0             0             0
BM_formatted_size_string<wchar_t>/4096                   -0.9042         -0.9043             0             0             0             0
BM_formatted_size_string<wchar_t>/8192                   -0.9509         -0.9510             0             0             0             0
BM_formatted_size_string<wchar_t>/16384                  -0.9754         -0.9754             0             0             0             0
BM_formatted_size_string<wchar_t>/32768                  -0.9875         -0.9875             0             0             0             0
BM_formatted_size_string<wchar_t>/65536                  -0.9940         -0.9940             0             0             0             0
BM_formatted_size_string<wchar_t>/131072                 -0.9971         -0.9971             0             0             0             0
BM_formatted_size_string<wchar_t>/262144                 -0.9986         -0.9986             0             0             0             0
BM_formatted_size_string<wchar_t>/524288                 -0.9993         -0.9993             0             0             0             0
BM_formatted_size_string<wchar_t>/1048576                -0.9997         -0.9997             0             0             0             0
OVERALL_GEOMEAN                                          -0.8740         -0.8742             0             0             0             0

>From b4563824913a27cc40d8f1415e96949aeb60b0a3 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq at xs4all.nl>
Date: Sat, 30 Mar 2024 17:35:56 +0100
Subject: [PATCH] [libc++][format][6/7] Optimizes formatted_size.

__formatted_size_buffer is not used in the public library interface so the
changes are not an ABI break.

Before
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                47.6 ns         47.6 ns     14729701 bytes_per_second=20.055Mi/s
BM_formatted_size_string<char>/2                23.8 ns         23.8 ns     29445832 bytes_per_second=80.1657Mi/s
BM_formatted_size_string<char>/4                11.9 ns         11.9 ns     58813944 bytes_per_second=320.771Mi/s
BM_formatted_size_string<char>/8                5.95 ns         5.95 ns    117601928 bytes_per_second=1.25307Gi/s
BM_formatted_size_string<char>/16               2.97 ns         2.97 ns    235800528 bytes_per_second=5.02342Gi/s
BM_formatted_size_string<char>/32               1.49 ns         1.49 ns    471177152 bytes_per_second=20.0428Gi/s
BM_formatted_size_string<char>/64              0.745 ns        0.744 ns    942161408 bytes_per_second=80.1172Gi/s
BM_formatted_size_string<char>/128             0.377 ns        0.376 ns   1861005568 bytes_per_second=316.85Gi/s
BM_formatted_size_string<char>/256             0.200 ns        0.200 ns   3503415296 bytes_per_second=1.16508Ti/s
BM_formatted_size_string<char>/512             0.111 ns        0.110 ns   6351184384 bytes_per_second=4.21671Ti/s
BM_formatted_size_string<char>/1024            0.067 ns        0.067 ns   10441098240 bytes_per_second=13.9409Ti/s
BM_formatted_size_string<char>/2048            0.045 ns        0.045 ns   15404886016 bytes_per_second=41.0182Ti/s
BM_formatted_size_string<char>/4096            0.036 ns        0.036 ns   19634089984 bytes_per_second=104.431Ti/s
BM_formatted_size_string<char>/8192            0.030 ns        0.030 ns   23265501184 bytes_per_second=247.504Ti/s
BM_formatted_size_string<char>/16384           0.027 ns        0.027 ns   25556238336 bytes_per_second=545.066Ti/s
BM_formatted_size_string<char>/32768           0.027 ns        0.027 ns   26036731904 bytes_per_second=1.08282Pi/s
BM_formatted_size_string<char>/65536           0.027 ns        0.027 ns   26192379904 bytes_per_second=2.17753Pi/s
BM_formatted_size_string<char>/131072          0.026 ns        0.026 ns   26622033920 bytes_per_second=4.4243Pi/s
BM_formatted_size_string<char>/262144          0.027 ns        0.027 ns   25452085248 bytes_per_second=8.48295Pi/s
BM_formatted_size_string<char>/524288          0.028 ns        0.028 ns   24593825792 bytes_per_second=16.3639Pi/s
BM_formatted_size_string<char>/1048576         0.028 ns        0.028 ns   24775753728 bytes_per_second=32.8865Pi/s
BM_formatted_size_string<wchar_t>/1             47.0 ns         46.9 ns     14912646 bytes_per_second=81.3132Mi/s
BM_formatted_size_string<wchar_t>/2             23.4 ns         23.3 ns     29964298 bytes_per_second=326.845Mi/s
BM_formatted_size_string<wchar_t>/4             11.7 ns         11.7 ns     59734380 bytes_per_second=1.26993Gi/s
BM_formatted_size_string<wchar_t>/8             5.84 ns         5.84 ns    120131824 bytes_per_second=5.10665Gi/s
BM_formatted_size_string<wchar_t>/16            2.92 ns         2.92 ns    239874128 bytes_per_second=20.4273Gi/s
BM_formatted_size_string<wchar_t>/32            1.48 ns         1.48 ns    473672928 bytes_per_second=80.6502Gi/s
BM_formatted_size_string<wchar_t>/64           0.797 ns        0.796 ns    877660480 bytes_per_second=299.351Gi/s
BM_formatted_size_string<wchar_t>/128          0.450 ns        0.449 ns   1568318336 bytes_per_second=1.03679Ti/s
BM_formatted_size_string<wchar_t>/256          0.273 ns        0.273 ns   2571588096 bytes_per_second=3.41553Ti/s
BM_formatted_size_string<wchar_t>/512          0.181 ns        0.181 ns   3868106240 bytes_per_second=10.2834Ti/s
BM_formatted_size_string<wchar_t>/1024         0.143 ns        0.143 ns   4902914048 bytes_per_second=26.0513Ti/s
BM_formatted_size_string<wchar_t>/2048         0.121 ns        0.121 ns   5801660416 bytes_per_second=61.775Ti/s
BM_formatted_size_string<wchar_t>/4096         0.110 ns        0.110 ns   6380191744 bytes_per_second=135.763Ti/s
BM_formatted_size_string<wchar_t>/8192         0.107 ns        0.107 ns   6531432448 bytes_per_second=278.051Ti/s
BM_formatted_size_string<wchar_t>/16384        0.107 ns        0.107 ns   6531842048 bytes_per_second=556.731Ti/s
BM_formatted_size_string<wchar_t>/32768        0.106 ns        0.105 ns   6648430592 bytes_per_second=1.10374Pi/s
BM_formatted_size_string<wchar_t>/65536        0.110 ns        0.110 ns   6352273408 bytes_per_second=2.11267Pi/s
BM_formatted_size_string<wchar_t>/131072       0.114 ns        0.114 ns   6139805696 bytes_per_second=4.07511Pi/s
BM_formatted_size_string<wchar_t>/262144       0.114 ns        0.114 ns   6169821184 bytes_per_second=8.20475Pi/s
BM_formatted_size_string<wchar_t>/524288       0.114 ns        0.113 ns   6168248320 bytes_per_second=16.4288Pi/s
BM_formatted_size_string<wchar_t>/1048576      0.120 ns        0.120 ns   5817499648 bytes_per_second=30.9853Pi/s

After
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                44.6 ns         44.4 ns     15768750 bytes_per_second=21.4556Mi/s
BM_formatted_size_string<char>/2                22.3 ns         22.2 ns     31477648 bytes_per_second=85.8719Mi/s
BM_formatted_size_string<char>/4                11.1 ns         11.1 ns     63031396 bytes_per_second=343.327Mi/s
BM_formatted_size_string<char>/8                5.57 ns         5.56 ns    126105968 bytes_per_second=1.34101Gi/s
BM_formatted_size_string<char>/16               2.78 ns         2.78 ns    251793552 bytes_per_second=5.36504Gi/s
BM_formatted_size_string<char>/32               1.39 ns         1.39 ns    504195136 bytes_per_second=21.4672Gi/s
BM_formatted_size_string<char>/64              0.696 ns        0.694 ns   1008341184 bytes_per_second=85.8541Gi/s
BM_formatted_size_string<char>/128             0.348 ns        0.347 ns   2016425984 bytes_per_second=343.386Gi/s
BM_formatted_size_string<char>/256             0.174 ns        0.174 ns   4031530496 bytes_per_second=1.33811Ti/s
BM_formatted_size_string<char>/512             0.087 ns        0.087 ns   8058140160 bytes_per_second=5.36282Ti/s
BM_formatted_size_string<char>/1024            0.044 ns        0.043 ns   16131134464 bytes_per_second=21.4476Ti/s
BM_formatted_size_string<char>/2048            0.022 ns        0.022 ns   32242241536 bytes_per_second=85.938Ti/s
BM_formatted_size_string<char>/4096            0.011 ns        0.011 ns   64521842688 bytes_per_second=343.596Ti/s
BM_formatted_size_string<char>/8192            0.005 ns        0.005 ns   129048797184 bytes_per_second=1.34065Pi/s
BM_formatted_size_string<char>/16384           0.003 ns        0.003 ns   258152202240 bytes_per_second=5.37696Pi/s
BM_formatted_size_string<char>/32768           0.001 ns        0.001 ns   516496818176 bytes_per_second=21.4635Pi/s
BM_formatted_size_string<char>/65536           0.001 ns        0.001 ns   1000000061440 bytes_per_second=85.868Pi/s
BM_formatted_size_string<char>/131072          0.000 ns        0.000 ns   1000000061440 bytes_per_second=343.451Pi/s
BM_formatted_size_string<char>/262144          0.000 ns        0.000 ns   1000000192512 bytes_per_second=1.33951Ei/s
BM_formatted_size_string<char>/524288          0.000 ns        0.000 ns   1000000192512 bytes_per_second=5.35714Ei/s
BM_formatted_size_string<char>/1048576         0.000 ns        0.000 ns   1000000716800 bytes_per_second=21.4307Ei/s
BM_formatted_size_string<wchar_t>/1             43.3 ns         43.2 ns     16196853 bytes_per_second=88.2127Mi/s
BM_formatted_size_string<wchar_t>/2             21.6 ns         21.6 ns     32428704 bytes_per_second=353.328Mi/s
BM_formatted_size_string<wchar_t>/4             10.8 ns         10.8 ns     64858356 bytes_per_second=1.38065Gi/s
BM_formatted_size_string<wchar_t>/8             5.42 ns         5.40 ns    129503096 bytes_per_second=5.51619Gi/s
BM_formatted_size_string<wchar_t>/16            2.71 ns         2.70 ns    259444512 bytes_per_second=22.056Gi/s
BM_formatted_size_string<wchar_t>/32            1.35 ns         1.35 ns    517963680 bytes_per_second=88.217Gi/s
BM_formatted_size_string<wchar_t>/64           0.678 ns        0.676 ns   1035243264 bytes_per_second=352.499Gi/s
BM_formatted_size_string<wchar_t>/128          0.339 ns        0.338 ns   2069611264 bytes_per_second=1.37816Ti/s
BM_formatted_size_string<wchar_t>/256          0.169 ns        0.169 ns   4146004480 bytes_per_second=5.51363Ti/s
BM_formatted_size_string<wchar_t>/512          0.085 ns        0.085 ns   8282438656 bytes_per_second=22.023Ti/s
BM_formatted_size_string<wchar_t>/1024         0.042 ns        0.042 ns   16622990336 bytes_per_second=88.6687Ti/s
BM_formatted_size_string<wchar_t>/2048         0.021 ns        0.021 ns   33236293632 bytes_per_second=354.449Ti/s
BM_formatted_size_string<wchar_t>/4096         0.011 ns        0.011 ns   66449469440 bytes_per_second=1.38568Pi/s
BM_formatted_size_string<wchar_t>/8192         0.005 ns        0.005 ns   133098782720 bytes_per_second=5.54274Pi/s
BM_formatted_size_string<wchar_t>/16384        0.003 ns        0.003 ns   266227466240 bytes_per_second=22.1089Pi/s
BM_formatted_size_string<wchar_t>/32768        0.001 ns        0.001 ns   532288503808 bytes_per_second=88.5187Pi/s
BM_formatted_size_string<wchar_t>/65536        0.001 ns        0.001 ns   1000000061440 bytes_per_second=354.202Pi/s
BM_formatted_size_string<wchar_t>/131072       0.000 ns        0.000 ns   1000000061440 bytes_per_second=1.38438Ei/s
BM_formatted_size_string<wchar_t>/262144       0.000 ns        0.000 ns   1000000192512 bytes_per_second=5.54102Ei/s
BM_formatted_size_string<wchar_t>/524288       0.000 ns        0.000 ns   1000000192512 bytes_per_second=22.0527Ei/s
BM_formatted_size_string<wchar_t>/1048576      0.000 ns        0.000 ns   1000000716800 bytes_per_second=88.673Ei/s

Comparison
Benchmark                                                   Time             CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                         -0.0639         -0.0653            48            45            48            44
BM_formatted_size_string<char>/2                         -0.0651         -0.0665            24            22            24            22
BM_formatted_size_string<char>/4                         -0.0642         -0.0657            12            11            12            11
BM_formatted_size_string<char>/8                         -0.0643         -0.0656             6             6             6             6
BM_formatted_size_string<char>/16                        -0.0623         -0.0637             3             3             3             3
BM_formatted_size_string<char>/32                        -0.0650         -0.0664             1             1             1             1
BM_formatted_size_string<char>/64                        -0.0655         -0.0668             1             1             1             1
BM_formatted_size_string<char>/128                       -0.0756         -0.0773             0             0             0             0
BM_formatted_size_string<char>/256                       -0.1280         -0.1293             0             0             0             0
BM_formatted_size_string<char>/512                       -0.2126         -0.2137             0             0             0             0
BM_formatted_size_string<char>/1024                      -0.3489         -0.3500             0             0             0             0
BM_formatted_size_string<char>/2048                      -0.5220         -0.5227             0             0             0             0
BM_formatted_size_string<char>/4096                      -0.6955         -0.6961             0             0             0             0
BM_formatted_size_string<char>/8192                      -0.8194         -0.8197             0             0             0             0
BM_formatted_size_string<char>/16384                     -0.9009         -0.9010             0             0             0             0
BM_formatted_size_string<char>/32768                     -0.9495         -0.9496             0             0             0             0
BM_formatted_size_string<char>/65536                     -0.9746         -0.9746             0             0             0             0
BM_formatted_size_string<char>/131072                    -0.9871         -0.9871             0             0             0             0
BM_formatted_size_string<char>/262144                    -0.9938         -0.9938             0             0             0             0
BM_formatted_size_string<char>/524288                    -0.9970         -0.9970             0             0             0             0
BM_formatted_size_string<char>/1048576                   -0.9985         -0.9985             0             0             0             0
BM_formatted_size_string<wchar_t>/1                      -0.0769         -0.0782            47            43            47            43
BM_formatted_size_string<wchar_t>/2                      -0.0737         -0.0750            23            22            23            22
BM_formatted_size_string<wchar_t>/4                      -0.0788         -0.0802            12            11            12            11
BM_formatted_size_string<wchar_t>/8                      -0.0729         -0.0742             6             5             6             5
BM_formatted_size_string<wchar_t>/16                     -0.0724         -0.0738             3             3             3             3
BM_formatted_size_string<wchar_t>/32                     -0.0844         -0.0858             1             1             1             1
BM_formatted_size_string<wchar_t>/64                     -0.1493         -0.1508             1             1             1             1
BM_formatted_size_string<wchar_t>/128                    -0.2466         -0.2477             0             0             0             0
BM_formatted_size_string<wchar_t>/256                    -0.3796         -0.3805             0             0             0             0
BM_formatted_size_string<wchar_t>/512                    -0.5323         -0.5331             0             0             0             0
BM_formatted_size_string<wchar_t>/1024                   -0.7058         -0.7062             0             0             0             0
BM_formatted_size_string<wchar_t>/2048                   -0.8255         -0.8257             0             0             0             0
BM_formatted_size_string<wchar_t>/4096                   -0.9042         -0.9043             0             0             0             0
BM_formatted_size_string<wchar_t>/8192                   -0.9509         -0.9510             0             0             0             0
BM_formatted_size_string<wchar_t>/16384                  -0.9754         -0.9754             0             0             0             0
BM_formatted_size_string<wchar_t>/32768                  -0.9875         -0.9875             0             0             0             0
BM_formatted_size_string<wchar_t>/65536                  -0.9940         -0.9940             0             0             0             0
BM_formatted_size_string<wchar_t>/131072                 -0.9971         -0.9971             0             0             0             0
BM_formatted_size_string<wchar_t>/262144                 -0.9986         -0.9986             0             0             0             0
BM_formatted_size_string<wchar_t>/524288                 -0.9993         -0.9993             0             0             0             0
BM_formatted_size_string<wchar_t>/1048576                -0.9997         -0.9997             0             0             0             0
OVERALL_GEOMEAN                                          -0.8740         -0.8742             0             0             0             0
---
 libcxx/include/__format/buffer.h | 51 ++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h
index c41ea73b3eb7e..ca2334f93fd04 100644
--- a/libcxx/include/__format/buffer.h
+++ b/libcxx/include/__format/buffer.h
@@ -603,28 +603,6 @@ class _LIBCPP_TEMPLATE_VIS __format_buffer {
   typename __writer_selector<_OutIt, _CharT>::type __writer_;
 };
 
-/// A buffer that counts the number of insertions.
-///
-/// Since \ref formatted_size only needs to know the size, the output itself is
-/// discarded.
-template <__fmt_char_type _CharT>
-class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer {
-public:
-  _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return __output_.__make_output_iterator(); }
-
-  _LIBCPP_HIDE_FROM_ABI void __flush(const _CharT*, size_t __n) { __size_ += __n; }
-
-  _LIBCPP_HIDE_FROM_ABI size_t __result() && {
-    __output_.__flush(0);
-    return __size_;
-  }
-
-private:
-  __internal_storage<_CharT> __storage_;
-  __output_buffer<_CharT> __output_{__storage_.__begin(), __storage_.__buffer_size, this};
-  size_t __size_{0};
-};
-
 // ***** ***** ***** LLVM-20 classes ***** ***** *****
 
 // A dynamically growing buffer.
@@ -817,6 +795,35 @@ class _LIBCPP_TEMPLATE_VIS __format_to_n_buffer : private __buffer_selector<_Out
   __max_output_size __max_output_size_;
 };
 
+// A buffer that counts the number of insertions.
+//
+// Since formatted_size only needs to know the size, the output itself is
+// discarded.
+template <__fmt_char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer : private __output_buffer<_CharT> {
+public:
+  using _Base = __output_buffer<_CharT>;
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __formatted_size_buffer()
+      : _Base{nullptr, 0, __prepare_write, std::addressof(__max_output_size_)} {}
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return _Base::__make_output_iterator(); }
+
+  // This function does not need to be r-value qualified, however this is
+  // consistent with similar objects.
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __result() && { return __max_output_size_.__code_units_written(); }
+
+private:
+  __max_output_size __max_output_size_{0};
+
+  _LIBCPP_HIDE_FROM_ABI static void
+  __prepare_write([[maybe_unused]] __output_buffer<_CharT>& __buffer, [[maybe_unused]] size_t __size_hint) {
+    // Note this function does not satisfy the requirement of giving a 1 code unit buffer.
+    _LIBCPP_ASSERT_INTERNAL(
+        false, "Since __max_output_size_.__max_size_ == 0 there should never be call to this function.");
+  }
+};
+
 // ***** ***** ***** LLVM-19 and LLVM-20 class ***** ***** *****
 
 // A dynamically growing buffer intended to be used for retargeting a context.



More information about the llvm-branch-commits mailing list