[libcxx-commits] [libcxx] [libc++] Improve the performance of std::make_heap a bit (PR #154092)

via libcxx-commits libcxx-commits at lists.llvm.org
Mon Aug 25 09:49:14 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libcxx

Author: Nikolas Klauser (philnik777)

<details>
<summary>Changes</summary>

```
----------------------------------------------------------------------------------------------------
Benchmark                                                                        old             new
----------------------------------------------------------------------------------------------------
BM_MakeHeap_uint32_Random_1                                                  1.02 ns         1.07 ns
BM_MakeHeap_uint32_Random_4                                                  3.35 ns         2.87 ns
BM_MakeHeap_uint32_Random_16                                                 3.11 ns         2.68 ns
BM_MakeHeap_uint32_Random_64                                                 3.24 ns         2.85 ns
BM_MakeHeap_uint32_Random_256                                                3.10 ns         2.77 ns
BM_MakeHeap_uint32_Random_1024                                               3.03 ns         2.63 ns
BM_MakeHeap_uint32_Random_16384                                              3.04 ns         2.63 ns
BM_MakeHeap_uint32_Random_262144                                             3.06 ns         2.65 ns
BM_MakeHeap_uint32_Ascending_1                                               1.04 ns         1.07 ns
BM_MakeHeap_uint32_Ascending_4                                               1.30 ns        0.879 ns
BM_MakeHeap_uint32_Ascending_16                                              1.23 ns        0.958 ns
BM_MakeHeap_uint32_Ascending_64                                              1.38 ns        0.879 ns
BM_MakeHeap_uint32_Ascending_256                                             1.59 ns        0.933 ns
BM_MakeHeap_uint32_Ascending_1024                                            1.54 ns        0.968 ns
BM_MakeHeap_uint32_Ascending_16384                                           1.58 ns         1.01 ns
BM_MakeHeap_uint32_Ascending_262144                                          1.69 ns         1.11 ns
BM_MakeHeap_uint32_Descending_1                                              1.07 ns         1.05 ns
BM_MakeHeap_uint32_Descending_4                                             0.809 ns        0.549 ns
BM_MakeHeap_uint32_Descending_16                                            0.705 ns        0.520 ns
BM_MakeHeap_uint32_Descending_64                                            0.692 ns        0.405 ns
BM_MakeHeap_uint32_Descending_256                                           0.729 ns        0.429 ns
BM_MakeHeap_uint32_Descending_1024                                          0.682 ns        0.378 ns
BM_MakeHeap_uint32_Descending_16384                                         0.666 ns        0.362 ns
BM_MakeHeap_uint32_Descending_262144                                        0.667 ns        0.361 ns
BM_MakeHeap_uint32_SingleElement_1                                           1.07 ns         1.10 ns
BM_MakeHeap_uint32_SingleElement_4                                           1.30 ns        0.666 ns
BM_MakeHeap_uint32_SingleElement_16                                          1.35 ns        0.861 ns
BM_MakeHeap_uint32_SingleElement_64                                          1.46 ns        0.827 ns
BM_MakeHeap_uint32_SingleElement_256                                         1.60 ns        0.896 ns
BM_MakeHeap_uint32_SingleElement_1024                                        1.55 ns        0.942 ns
BM_MakeHeap_uint32_SingleElement_16384                                       1.59 ns        0.986 ns
BM_MakeHeap_uint32_SingleElement_262144                                      1.64 ns         1.07 ns
BM_MakeHeap_uint32_PipeOrgan_1                                               1.04 ns         1.09 ns
BM_MakeHeap_uint32_PipeOrgan_4                                              0.989 ns        0.836 ns
BM_MakeHeap_uint32_PipeOrgan_16                                              1.24 ns        0.958 ns
BM_MakeHeap_uint32_PipeOrgan_64                                              1.41 ns        0.885 ns
BM_MakeHeap_uint32_PipeOrgan_256                                             1.57 ns        0.912 ns
BM_MakeHeap_uint32_PipeOrgan_1024                                            1.55 ns        0.951 ns
BM_MakeHeap_uint32_PipeOrgan_16384                                           1.58 ns        0.985 ns
BM_MakeHeap_uint32_PipeOrgan_262144                                          1.66 ns         1.12 ns
BM_MakeHeap_uint32_Heap_1                                                    1.08 ns         1.09 ns
BM_MakeHeap_uint32_Heap_4                                                   0.800 ns        0.572 ns
BM_MakeHeap_uint32_Heap_16                                                  0.704 ns        0.532 ns
BM_MakeHeap_uint32_Heap_64                                                  0.694 ns        0.405 ns
BM_MakeHeap_uint32_Heap_256                                                 0.736 ns        0.427 ns
BM_MakeHeap_uint32_Heap_1024                                                0.682 ns        0.381 ns
BM_MakeHeap_uint32_Heap_16384                                               0.665 ns        0.363 ns
BM_MakeHeap_uint32_Heap_262144                                              0.666 ns        0.372 ns
BM_MakeHeap_uint32_QuickSortAdversary_1                                      1.05 ns         1.09 ns
BM_MakeHeap_uint32_QuickSortAdversary_4                                      1.30 ns        0.843 ns
BM_MakeHeap_uint32_QuickSortAdversary_16                                     1.23 ns        0.946 ns
BM_MakeHeap_uint32_QuickSortAdversary_64                                     1.25 ns        0.725 ns
BM_MakeHeap_uint32_QuickSortAdversary_256                                    1.30 ns        0.757 ns
BM_MakeHeap_uint32_QuickSortAdversary_1024                                   1.26 ns        0.749 ns
BM_MakeHeap_uint32_QuickSortAdversary_16384                                 0.809 ns        0.455 ns
BM_MakeHeap_uint32_QuickSortAdversary_262144                                0.721 ns        0.376 ns
BM_MakeHeap_uint64_Random_1                                                  1.06 ns         1.04 ns
BM_MakeHeap_uint64_Random_4                                                  3.41 ns         2.86 ns
BM_MakeHeap_uint64_Random_16                                                 3.21 ns         2.79 ns
BM_MakeHeap_uint64_Random_64                                                 3.40 ns         3.09 ns
BM_MakeHeap_uint64_Random_256                                                3.22 ns         2.98 ns
BM_MakeHeap_uint64_Random_1024                                               3.10 ns         2.82 ns
BM_MakeHeap_uint64_Random_16384                                              3.10 ns         2.81 ns
BM_MakeHeap_uint64_Random_262144                                             3.18 ns         2.93 ns
BM_MakeHeap_uint64_Ascending_1                                               1.09 ns         1.08 ns
BM_MakeHeap_uint64_Ascending_4                                               1.35 ns        0.893 ns
BM_MakeHeap_uint64_Ascending_16                                              1.32 ns         1.01 ns
BM_MakeHeap_uint64_Ascending_64                                              1.50 ns        0.951 ns
BM_MakeHeap_uint64_Ascending_256                                             1.68 ns         1.01 ns
BM_MakeHeap_uint64_Ascending_1024                                            1.65 ns         1.00 ns
BM_MakeHeap_uint64_Ascending_16384                                           1.76 ns         1.09 ns
BM_MakeHeap_uint64_Ascending_262144                                          2.05 ns         1.32 ns
BM_MakeHeap_uint64_Descending_1                                              1.13 ns         1.07 ns
BM_MakeHeap_uint64_Descending_4                                             0.825 ns        0.589 ns
BM_MakeHeap_uint64_Descending_16                                            0.742 ns        0.531 ns
BM_MakeHeap_uint64_Descending_64                                            0.757 ns        0.502 ns
BM_MakeHeap_uint64_Descending_256                                           0.769 ns        0.463 ns
BM_MakeHeap_uint64_Descending_1024                                          0.711 ns        0.414 ns
BM_MakeHeap_uint64_Descending_16384                                         0.692 ns        0.385 ns
BM_MakeHeap_uint64_Descending_262144                                        0.692 ns        0.397 ns
BM_MakeHeap_uint64_SingleElement_1                                           1.09 ns         1.10 ns
BM_MakeHeap_uint64_SingleElement_4                                           1.33 ns        0.745 ns
BM_MakeHeap_uint64_SingleElement_16                                          1.41 ns        0.952 ns
BM_MakeHeap_uint64_SingleElement_64                                          1.58 ns        0.944 ns
BM_MakeHeap_uint64_SingleElement_256                                         1.71 ns        0.975 ns
BM_MakeHeap_uint64_SingleElement_1024                                        1.59 ns         1.01 ns
BM_MakeHeap_uint64_SingleElement_16384                                       1.72 ns         1.10 ns
BM_MakeHeap_uint64_SingleElement_262144                                      2.01 ns         1.42 ns
BM_MakeHeap_uint64_PipeOrgan_1                                               1.10 ns         1.15 ns
BM_MakeHeap_uint64_PipeOrgan_4                                               1.04 ns        0.889 ns
BM_MakeHeap_uint64_PipeOrgan_16                                              1.32 ns         1.04 ns
BM_MakeHeap_uint64_PipeOrgan_64                                              1.49 ns        0.978 ns
BM_MakeHeap_uint64_PipeOrgan_256                                             1.62 ns        0.995 ns
BM_MakeHeap_uint64_PipeOrgan_1024                                            1.58 ns        0.994 ns
BM_MakeHeap_uint64_PipeOrgan_16384                                           1.66 ns         1.10 ns
BM_MakeHeap_uint64_PipeOrgan_262144                                          1.84 ns         1.36 ns
BM_MakeHeap_uint64_Heap_1                                                    1.06 ns         1.08 ns
BM_MakeHeap_uint64_Heap_4                                                   0.793 ns        0.606 ns
BM_MakeHeap_uint64_Heap_16                                                  0.732 ns        0.525 ns
BM_MakeHeap_uint64_Heap_64                                                  0.747 ns        0.493 ns
BM_MakeHeap_uint64_Heap_256                                                 0.759 ns        0.446 ns
BM_MakeHeap_uint64_Heap_1024                                                0.706 ns        0.402 ns
BM_MakeHeap_uint64_Heap_16384                                               0.662 ns        0.384 ns
BM_MakeHeap_uint64_Heap_262144                                              0.676 ns        0.382 ns
BM_MakeHeap_uint64_QuickSortAdversary_1                                      1.05 ns         1.03 ns
BM_MakeHeap_uint64_QuickSortAdversary_4                                      1.31 ns        0.885 ns
BM_MakeHeap_uint64_QuickSortAdversary_16                                     1.25 ns        0.964 ns
BM_MakeHeap_uint64_QuickSortAdversary_64                                     1.28 ns        0.804 ns
BM_MakeHeap_uint64_QuickSortAdversary_256                                    1.32 ns        0.825 ns
BM_MakeHeap_uint64_QuickSortAdversary_1024                                   1.28 ns        0.768 ns
BM_MakeHeap_uint64_QuickSortAdversary_16384                                 0.831 ns        0.490 ns
BM_MakeHeap_uint64_QuickSortAdversary_262144                                0.709 ns        0.398 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1                                    1.02 ns         1.07 ns
BM_MakeHeap_pair<uint32, uint32>_Random_4                                    3.94 ns         3.72 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16                                   4.58 ns         3.85 ns
BM_MakeHeap_pair<uint32, uint32>_Random_64                                   5.32 ns         4.20 ns
BM_MakeHeap_pair<uint32, uint32>_Random_256                                  5.46 ns         4.27 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1024                                 5.53 ns         4.21 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16384                                5.58 ns         4.29 ns
BM_MakeHeap_pair<uint32, uint32>_Random_262144                               5.56 ns         4.44 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1                                 1.07 ns         1.03 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_4                                 1.96 ns         1.61 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16                                2.10 ns         2.06 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_64                                2.35 ns         2.28 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_256                               2.58 ns         2.41 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1024                              2.57 ns         2.45 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16384                             2.61 ns         2.56 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_262144                            2.76 ns         2.79 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1                                1.07 ns         1.05 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_4                                1.21 ns         1.05 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16                               1.14 ns        0.980 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_64                               1.16 ns         1.01 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_256                              1.18 ns        0.973 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1024                             1.13 ns        0.945 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16384                            1.11 ns        0.960 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_262144                           1.11 ns        0.929 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1                             1.08 ns         1.09 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_4                             1.94 ns         1.36 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16                            2.22 ns         1.90 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_64                            2.46 ns         2.19 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_256                           2.55 ns         2.38 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1024                          2.51 ns         2.45 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16384                         2.57 ns         2.56 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_262144                        2.74 ns         2.83 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1                                 1.07 ns         1.11 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_4                                 1.42 ns         1.54 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16                                2.01 ns         1.99 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_64                                2.33 ns         2.27 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_256                               2.48 ns         2.40 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1024                              2.48 ns         2.46 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16384                             2.54 ns         2.57 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_262144                            2.68 ns         2.76 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1                                      1.08 ns         1.08 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_4                                      1.24 ns         1.02 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16                                     1.16 ns         1.02 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_64                                     1.17 ns         1.00 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_256                                    1.20 ns        0.992 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1024                                   1.13 ns        0.942 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16384                                  1.12 ns        0.936 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_262144                                 1.13 ns        0.945 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1                        1.08 ns         1.06 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_4                        1.92 ns         1.93 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16                       2.13 ns         2.07 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_64                       2.36 ns         2.29 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_256                      2.56 ns         2.41 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1024                     2.50 ns         2.46 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16384                    2.57 ns         2.57 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_262144                   2.67 ns         2.89 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1                          0.979 ns         1.05 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_4                           5.14 ns         4.88 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16                          7.93 ns         5.95 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_64                          9.08 ns         6.63 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_256                         9.10 ns         6.47 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1024                        9.03 ns         6.43 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16384                       9.09 ns         6.60 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_262144                      9.19 ns         6.86 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1                        1.04 ns         1.02 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_4                        2.13 ns         2.55 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16                       1.87 ns         2.67 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_64                       2.27 ns         3.06 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_256                      2.41 ns         3.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1024                     2.27 ns         3.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16384                    2.19 ns         3.40 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_262144                   2.37 ns         3.78 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1                      0.981 ns         1.04 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_4                       1.38 ns         1.61 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16                      1.18 ns         1.35 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_64                      1.29 ns         1.35 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_256                     1.20 ns         1.28 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1024                    1.06 ns         1.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16384                   1.01 ns         1.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_262144                 0.982 ns         1.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1                   0.967 ns         1.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_4                    2.34 ns         2.22 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16                   2.46 ns         2.86 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_64                   2.74 ns         3.61 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_256                  2.75 ns         3.74 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1024                 2.71 ns         3.69 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16384                2.71 ns         4.03 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_262144               2.92 ns         4.58 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1                       0.968 ns         1.01 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_4                        1.95 ns         2.15 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16                       1.89 ns         2.41 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_64                       2.45 ns         3.00 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_256                      2.61 ns         3.09 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1024                     2.49 ns         3.14 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16384                    2.38 ns         3.38 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_262144                   2.48 ns         3.90 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1                             1.04 ns        0.964 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_4                             1.43 ns         1.43 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16                            1.16 ns         1.29 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_64                            1.26 ns         1.28 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_256                           1.27 ns         1.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1024                          1.28 ns         1.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16384                         1.19 ns         1.17 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_262144                        1.19 ns         1.14 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1               1.05 ns        0.988 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_4               2.22 ns         2.77 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16              1.92 ns         2.54 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_64              2.27 ns         2.83 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_256             2.43 ns         2.94 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1024            2.25 ns         2.89 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16384           2.18 ns         3.06 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_262144          2.35 ns         3.39 ns
BM_MakeHeap_string_Random_1                                                  1.35 ns         1.25 ns
BM_MakeHeap_string_Random_4                                                  10.9 ns         10.2 ns
BM_MakeHeap_string_Random_16                                                 23.3 ns         20.8 ns
BM_MakeHeap_string_Random_64                                                 27.4 ns         24.6 ns
BM_MakeHeap_string_Random_256                                                28.8 ns         25.6 ns
BM_MakeHeap_string_Random_1024                                               30.1 ns         27.9 ns
BM_MakeHeap_string_Random_16384                                              35.1 ns         35.5 ns
BM_MakeHeap_string_Random_262144                                             68.9 ns         82.1 ns
BM_MakeHeap_string_Ascending_1                                               1.32 ns         1.25 ns
BM_MakeHeap_string_Ascending_4                                               8.25 ns         8.83 ns
BM_MakeHeap_string_Ascending_16                                              18.7 ns         18.6 ns
BM_MakeHeap_string_Ascending_64                                              20.9 ns         23.3 ns
BM_MakeHeap_string_Ascending_256                                             21.8 ns         23.7 ns
BM_MakeHeap_string_Ascending_1024                                            24.4 ns         26.8 ns
BM_MakeHeap_string_Ascending_16384                                           35.3 ns         35.6 ns
BM_MakeHeap_string_Ascending_262144                                          55.2 ns         70.2 ns
BM_MakeHeap_string_Descending_1                                              1.25 ns         1.25 ns
BM_MakeHeap_string_Descending_4                                              7.69 ns         8.02 ns
BM_MakeHeap_string_Descending_16                                             19.5 ns         18.0 ns
BM_MakeHeap_string_Descending_64                                             18.2 ns         18.1 ns
BM_MakeHeap_string_Descending_256                                            18.0 ns         18.0 ns
BM_MakeHeap_string_Descending_1024                                           20.9 ns         20.1 ns
BM_MakeHeap_string_Descending_16384                                          27.3 ns         25.4 ns
BM_MakeHeap_string_Descending_262144                                         30.7 ns         33.2 ns
BM_MakeHeap_string_SingleElement_1                                           1.32 ns         1.29 ns
BM_MakeHeap_string_SingleElement_4                                           8.21 ns         7.52 ns
BM_MakeHeap_string_SingleElement_16                                          15.6 ns         14.1 ns
BM_MakeHeap_string_SingleElement_64                                          19.4 ns         18.9 ns
BM_MakeHeap_string_SingleElement_256                                         14.4 ns         14.3 ns
BM_MakeHeap_string_SingleElement_1024                                        11.3 ns         11.4 ns
BM_MakeHeap_string_SingleElement_16384                                       12.3 ns         11.4 ns
BM_MakeHeap_string_SingleElement_262144                                      29.3 ns         29.5 ns
BM_MakeHeap_string_PipeOrgan_1                                               1.30 ns         1.34 ns
BM_MakeHeap_string_PipeOrgan_4                                               8.01 ns         8.54 ns
BM_MakeHeap_string_PipeOrgan_16                                              18.7 ns         18.4 ns
BM_MakeHeap_string_PipeOrgan_64                                              21.7 ns         22.9 ns
BM_MakeHeap_string_PipeOrgan_256                                             21.9 ns         23.6 ns
BM_MakeHeap_string_PipeOrgan_1024                                            24.5 ns         26.2 ns
BM_MakeHeap_string_PipeOrgan_16384                                           35.6 ns         35.1 ns
BM_MakeHeap_string_PipeOrgan_262144                                          55.0 ns         69.0 ns
BM_MakeHeap_string_Heap_1                                                    1.26 ns         1.24 ns
BM_MakeHeap_string_Heap_4                                                    8.07 ns         7.95 ns
BM_MakeHeap_string_Heap_16                                                   22.0 ns         18.8 ns
BM_MakeHeap_string_Heap_64                                                   19.4 ns         16.9 ns
BM_MakeHeap_string_Heap_256                                                  14.7 ns         12.3 ns
BM_MakeHeap_string_Heap_1024                                                 10.5 ns         8.55 ns
BM_MakeHeap_string_Heap_16384                                                10.2 ns         8.72 ns
BM_MakeHeap_string_Heap_262144                                               18.2 ns         17.2 ns
BM_MakeHeap_string_QuickSortAdversary_1                                      1.25 ns         1.35 ns
BM_MakeHeap_string_QuickSortAdversary_4                                      10.6 ns         10.3 ns
BM_MakeHeap_string_QuickSortAdversary_16                                     25.7 ns         23.2 ns
BM_MakeHeap_string_QuickSortAdversary_64                                     25.9 ns         24.0 ns
BM_MakeHeap_string_QuickSortAdversary_256                                    23.5 ns         21.4 ns
BM_MakeHeap_string_QuickSortAdversary_1024                                   22.3 ns         19.9 ns
BM_MakeHeap_string_QuickSortAdversary_16384                                  23.4 ns         21.7 ns
BM_MakeHeap_string_QuickSortAdversary_262144                                 49.0 ns         53.2 ns
BM_MakeHeap_float_Random_1                                                   1.09 ns         1.05 ns
BM_MakeHeap_float_Random_4                                                   4.07 ns         4.40 ns
BM_MakeHeap_float_Random_16                                                  5.49 ns         4.59 ns
BM_MakeHeap_float_Random_64                                                  6.45 ns         4.84 ns
BM_MakeHeap_float_Random_256                                                 6.67 ns         4.81 ns
BM_MakeHeap_float_Random_1024                                                6.69 ns         4.74 ns
BM_MakeHeap_float_Random_16384                                               6.67 ns         4.73 ns
BM_MakeHeap_float_Random_262144                                              6.58 ns         4.65 ns
BM_MakeHeap_float_Ascending_1                                                1.12 ns         1.04 ns
BM_MakeHeap_float_Ascending_4                                                1.10 ns        0.937 ns
BM_MakeHeap_float_Ascending_16                                               1.26 ns         1.37 ns
BM_MakeHeap_float_Ascending_64                                               1.38 ns         1.28 ns
BM_MakeHeap_float_Ascending_256                                              1.39 ns         1.30 ns
BM_MakeHeap_float_Ascending_1024                                             1.32 ns         1.39 ns
BM_MakeHeap_float_Ascending_16384                                            1.24 ns         1.39 ns
BM_MakeHeap_float_Ascending_262144                                           1.31 ns         1.71 ns
BM_MakeHeap_float_Descending_1                                               1.05 ns         1.05 ns
BM_MakeHeap_float_Descending_4                                              0.835 ns        0.655 ns
BM_MakeHeap_float_Descending_16                                             0.728 ns        0.605 ns
BM_MakeHeap_float_Descending_64                                             0.628 ns        0.516 ns
BM_MakeHeap_float_Descending_256                                            0.656 ns        0.554 ns
BM_MakeHeap_float_Descending_1024                                           0.611 ns        0.515 ns
BM_MakeHeap_float_Descending_16384                                          0.597 ns        0.500 ns
BM_MakeHeap_float_Descending_262144                                         0.592 ns        0.502 ns
BM_MakeHeap_float_SingleElement_1                                            1.06 ns         1.03 ns
BM_MakeHeap_float_SingleElement_4                                            1.08 ns        0.968 ns
BM_MakeHeap_float_SingleElement_16                                           1.08 ns         1.24 ns
BM_MakeHeap_float_SingleElement_64                                           1.13 ns         1.26 ns
BM_MakeHeap_float_SingleElement_256                                          1.20 ns         1.29 ns
BM_MakeHeap_float_SingleElement_1024                                         1.16 ns         1.37 ns
BM_MakeHeap_float_SingleElement_16384                                        1.11 ns         1.39 ns
BM_MakeHeap_float_SingleElement_262144                                       1.15 ns         1.62 ns
BM_MakeHeap_float_PipeOrgan_1                                                1.08 ns         1.07 ns
BM_MakeHeap_float_PipeOrgan_4                                               0.966 ns         1.02 ns
BM_MakeHeap_float_PipeOrgan_16                                               1.12 ns         1.31 ns
BM_MakeHeap_float_PipeOrgan_64                                               1.06 ns         1.27 ns
BM_MakeHeap_float_PipeOrgan_256                                              1.18 ns         1.28 ns
BM_MakeHeap_float_PipeOrgan_1024                                             1.14 ns         1.39 ns
BM_MakeHeap_float_PipeOrgan_16384                                            1.09 ns         1.39 ns
BM_MakeHeap_float_PipeOrgan_262144                                           1.15 ns         1.68 ns
BM_MakeHeap_float_Heap_1                                                     1.04 ns         1.03 ns
BM_MakeHeap_float_Heap_4                                                    0.821 ns        0.656 ns
BM_MakeHeap_float_Heap_16                                                   0.771 ns        0.618 ns
BM_MakeHeap_float_Heap_64                                                   0.683 ns        0.519 ns
BM_MakeHeap_float_Heap_256                                                  0.658 ns        0.529 ns
BM_MakeHeap_float_Heap_1024                                                 0.668 ns        0.478 ns
BM_MakeHeap_float_Heap_16384                                                0.717 ns        0.451 ns
BM_MakeHeap_float_Heap_262144                                               0.683 ns        0.428 ns
BM_MakeHeap_float_QuickSortAdversary_1                                       1.03 ns         1.08 ns
BM_MakeHeap_float_QuickSortAdversary_4                                       1.07 ns         1.13 ns
BM_MakeHeap_float_QuickSortAdversary_16                                      1.04 ns         1.36 ns
BM_MakeHeap_float_QuickSortAdversary_64                                     0.987 ns         1.10 ns
BM_MakeHeap_float_QuickSortAdversary_256                                     1.13 ns         1.11 ns
BM_MakeHeap_float_QuickSortAdversary_1024                                    1.01 ns         1.23 ns
BM_MakeHeap_float_QuickSortAdversary_16384                                  0.643 ns        0.633 ns
BM_MakeHeap_float_QuickSortAdversary_262144                                 0.579 ns        0.695 ns
```

Fixes #<!-- -->120752

---
Full diff: https://github.com/llvm/llvm-project/pull/154092.diff


4 Files Affected:

- (modified) libcxx/include/__algorithm/make_heap.h (+10-4) 
- (modified) libcxx/include/__algorithm/partial_sort.h (+1-1) 
- (modified) libcxx/include/__algorithm/partial_sort_copy.h (+1-1) 
- (modified) libcxx/include/__algorithm/sift_down.h (+19-18) 


``````````diff
diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index e8f0cdb27333a..7eb5d6b89492d 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -12,6 +12,7 @@
 #include <__algorithm/comp.h>
 #include <__algorithm/comp_ref_type.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/push_heap.h>
 #include <__algorithm/sift_down.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
@@ -31,13 +32,18 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare&& __comp) {
   __comp_ref_type<_Compare> __comp_ref = __comp;
 
-  using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
-  difference_type __n   = __last - __first;
+  using __diff_t = __iter_diff_t<_RandomAccessIterator>;
+  const __diff_t __n   = __last - __first;
+
+  const __diff_t __odd_n = (__n & 1) ? __n : __n - 1;
+
   if (__n > 1) {
     // start from the first parent, there is no need to consider children
-    for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) {
-      std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
+
+    for (__diff_t __start = (__odd_n - 2) / 2; __start >= 0; --__start) {
+      std::__sift_down<_AlgPolicy, true>(__first, __comp_ref, __odd_n, __start);
     }
+    std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n);
   }
 }
 
diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h
index 7f8d0c49147e3..4b39ae0cf2dfc 100644
--- a/libcxx/include/__algorithm/partial_sort.h
+++ b/libcxx/include/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
   for (; __i != __last; ++__i) {
     if (__comp(*__i, *__first)) {
       _IterOps<_AlgPolicy>::iter_swap(__i, __first);
-      std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+      std::__sift_down<_AlgPolicy, false>(__first, __comp, __len, 0);
     }
   }
   std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);
diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 172f53b290d54..2230dfc9cc4ad 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
     for (; __first != __last; ++__first)
       if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
         *__result_first = *__first;
-        std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+        std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0);
       }
     std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
   }
diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index 42803e30631fb..e01c9b2b00f86 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -24,59 +24,60 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
+template <class _AlgPolicy, bool __assume_both_children, class _Compare, class _RandomAccessIterator>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __sift_down(_RandomAccessIterator __first,
             _Compare&& __comp,
-            typename iterator_traits<_RandomAccessIterator>::difference_type __len,
-            _RandomAccessIterator __start) {
+            __iter_diff_t<_RandomAccessIterator> __len,
+            __iter_diff_t<_RandomAccessIterator> __start) {
   using _Ops = _IterOps<_AlgPolicy>;
 
   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
   // left-child of __start is at 2 * __start + 1
   // right-child of __start is at 2 * __start + 2
-  difference_type __child = __start - __first;
+  difference_type __child = __start;
 
   if (__len < 2 || (__len - 2) / 2 < __child)
     return;
 
-  __child                         = 2 * __child + 1;
-  _RandomAccessIterator __child_i = __first + __child;
+  __child = 2 * __child + 1;
 
-  if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
+  if _LIBCPP_CONSTEXPR (__assume_both_children) {
+    // right-child exists and is greater than left-child
+    __child += __comp(__first[__child], __first[__child + 1]);
+  } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
     // right-child exists and is greater than left-child
-    ++__child_i;
     ++__child;
   }
 
   // check if we are in heap-order
-  if (__comp(*__child_i, *__start))
+  if (__comp(__first[__child], __first[__start]))
     // we are, __start is larger than its largest child
     return;
 
-  value_type __top(_Ops::__iter_move(__start));
+  value_type __top(_Ops::__iter_move(__first + __start));
   do {
     // we are not in heap-order, swap the parent with its largest child
-    *__start = _Ops::__iter_move(__child_i);
-    __start  = __child_i;
+    __first[__start] = _Ops::__iter_move(__first + __child);
+    __start          = __child;
 
     if ((__len - 2) / 2 < __child)
       break;
 
     // recompute the child based off of the updated parent
-    __child   = 2 * __child + 1;
-    __child_i = __first + __child;
+    __child = 2 * __child + 1;
 
-    if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
+    if _LIBCPP_CONSTEXPR (__assume_both_children) {
+      __child += __comp(__first[__child], __first[__child + 1]);
+    } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
       // right-child exists and is greater than left-child
-      ++__child_i;
       ++__child;
     }
 
     // check if we are in heap-order
-  } while (!__comp(*__child_i, __top));
-  *__start = std::move(__top);
+  } while (!__comp(__first[__child], __top));
+  __first[__start] = std::move(__top);
 }
 
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>

``````````

</details>


https://github.com/llvm/llvm-project/pull/154092


More information about the libcxx-commits mailing list