[libcxx-commits] [libcxx] [libc++] Optimize heap operations (PR #159917)

via libcxx-commits libcxx-commits at lists.llvm.org
Sat Sep 20 04:49:52 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libcxx

Author: None (nukyan)

<details>
<summary>Changes</summary>

Reopening for #<!-- -->121480, these changes are intended to clean up the code and a little bit performance improving.

Here is the output of `libcxx/test/benchmarks/algorithms/make_heap.bench.cpp`.
```
------------------------------------------------------------------------------------------
Benchmark                                                               Before       After
------------------------------------------------------------------------------------------
BM_MakeHeap_uint32_Random_1                                            1.22 ns     1.68 ns
BM_MakeHeap_uint32_Random_4                                            3.65 ns     3.80 ns
BM_MakeHeap_uint32_Random_16                                           4.19 ns     4.18 ns
BM_MakeHeap_uint32_Random_64                                           4.54 ns     4.50 ns
BM_MakeHeap_uint32_Random_256                                          4.40 ns     4.27 ns
BM_MakeHeap_uint32_Random_1024                                         4.31 ns     4.17 ns
BM_MakeHeap_uint32_Random_16384                                        4.55 ns     4.39 ns
BM_MakeHeap_uint32_Random_262144                                       4.63 ns     4.43 ns
BM_MakeHeap_uint32_Ascending_1                                         1.19 ns     1.66 ns
BM_MakeHeap_uint32_Ascending_4                                         1.10 ns     1.21 ns
BM_MakeHeap_uint32_Ascending_16                                        1.25 ns     1.30 ns
BM_MakeHeap_uint32_Ascending_64                                        1.18 ns     1.13 ns
BM_MakeHeap_uint32_Ascending_256                                       1.24 ns     1.16 ns
BM_MakeHeap_uint32_Ascending_1024                                      1.28 ns     1.21 ns
BM_MakeHeap_uint32_Ascending_16384                                     1.45 ns     1.39 ns
BM_MakeHeap_uint32_Ascending_262144                                    1.61 ns     1.52 ns
BM_MakeHeap_uint32_Descending_1                                        1.18 ns     1.69 ns
BM_MakeHeap_uint32_Descending_4                                       0.702 ns    0.831 ns
BM_MakeHeap_uint32_Descending_16                                      0.638 ns    0.628 ns
BM_MakeHeap_uint32_Descending_64                                      0.565 ns    0.522 ns
BM_MakeHeap_uint32_Descending_256                                     0.587 ns    0.521 ns
BM_MakeHeap_uint32_Descending_1024                                    0.579 ns    0.523 ns
BM_MakeHeap_uint32_Descending_16384                                   0.536 ns    0.465 ns
BM_MakeHeap_uint32_Descending_262144                                  0.535 ns    0.472 ns
BM_MakeHeap_uint32_SingleElement_1                                     1.18 ns     1.70 ns
BM_MakeHeap_uint32_SingleElement_4                                    0.814 ns    0.926 ns
BM_MakeHeap_uint32_SingleElement_16                                    1.23 ns     1.27 ns
BM_MakeHeap_uint32_SingleElement_64                                    1.16 ns     1.08 ns
BM_MakeHeap_uint32_SingleElement_256                                   1.23 ns     1.15 ns
BM_MakeHeap_uint32_SingleElement_1024                                  1.27 ns     1.19 ns
BM_MakeHeap_uint32_SingleElement_16384                                 1.45 ns     1.39 ns
BM_MakeHeap_uint32_SingleElement_262144                                1.69 ns     1.80 ns
BM_MakeHeap_uint32_PipeOrgan_1                                         1.19 ns     1.69 ns
BM_MakeHeap_uint32_PipeOrgan_4                                         1.09 ns     1.17 ns
BM_MakeHeap_uint32_PipeOrgan_16                                        1.30 ns     1.27 ns
BM_MakeHeap_uint32_PipeOrgan_64                                        1.22 ns     1.16 ns
BM_MakeHeap_uint32_PipeOrgan_256                                       1.28 ns     1.20 ns
BM_MakeHeap_uint32_PipeOrgan_1024                                      1.34 ns     1.24 ns
BM_MakeHeap_uint32_PipeOrgan_16384                                     1.47 ns     1.38 ns
BM_MakeHeap_uint32_PipeOrgan_262144                                    1.69 ns     1.71 ns
BM_MakeHeap_uint32_Heap_1                                              1.21 ns     1.68 ns
BM_MakeHeap_uint32_Heap_4                                             0.701 ns    0.831 ns
BM_MakeHeap_uint32_Heap_16                                            0.638 ns    0.629 ns
BM_MakeHeap_uint32_Heap_64                                            0.564 ns    0.525 ns
BM_MakeHeap_uint32_Heap_256                                           0.586 ns    0.522 ns
BM_MakeHeap_uint32_Heap_1024                                          0.578 ns    0.519 ns
BM_MakeHeap_uint32_Heap_16384                                         0.536 ns    0.466 ns
BM_MakeHeap_uint32_Heap_262144                                        0.536 ns    0.472 ns
BM_MakeHeap_uint32_QuickSortAdversary_1                                1.18 ns     1.68 ns
BM_MakeHeap_uint32_QuickSortAdversary_4                                1.10 ns     1.20 ns
BM_MakeHeap_uint32_QuickSortAdversary_16                               1.25 ns     1.30 ns
BM_MakeHeap_uint32_QuickSortAdversary_64                               1.01 ns    0.929 ns
BM_MakeHeap_uint32_QuickSortAdversary_256                              1.03 ns    0.963 ns
BM_MakeHeap_uint32_QuickSortAdversary_1024                             1.06 ns     1.04 ns
BM_MakeHeap_uint32_QuickSortAdversary_16384                           0.659 ns    0.610 ns
BM_MakeHeap_uint32_QuickSortAdversary_262144                          0.566 ns    0.509 ns
BM_MakeHeap_uint64_Random_1                                            1.24 ns     1.71 ns
BM_MakeHeap_uint64_Random_4                                            3.76 ns     3.99 ns
BM_MakeHeap_uint64_Random_16                                           3.94 ns     3.73 ns
BM_MakeHeap_uint64_Random_64                                           4.45 ns     4.39 ns
BM_MakeHeap_uint64_Random_256                                          4.17 ns     4.06 ns
BM_MakeHeap_uint64_Random_1024                                         3.86 ns     3.81 ns
BM_MakeHeap_uint64_Random_16384                                        4.07 ns     4.05 ns
BM_MakeHeap_uint64_Random_262144                                       4.34 ns     4.25 ns
BM_MakeHeap_uint64_Ascending_1                                         1.21 ns     1.70 ns
BM_MakeHeap_uint64_Ascending_4                                         1.18 ns     1.28 ns
BM_MakeHeap_uint64_Ascending_16                                        1.24 ns     1.28 ns
BM_MakeHeap_uint64_Ascending_64                                        1.32 ns     1.29 ns
BM_MakeHeap_uint64_Ascending_256                                       1.50 ns     1.47 ns
BM_MakeHeap_uint64_Ascending_1024                                      1.46 ns     1.41 ns
BM_MakeHeap_uint64_Ascending_16384                                     1.71 ns     1.67 ns
BM_MakeHeap_uint64_Ascending_262144                                    2.34 ns     2.29 ns
BM_MakeHeap_uint64_Descending_1                                        1.20 ns     1.72 ns
BM_MakeHeap_uint64_Descending_4                                       0.798 ns    0.952 ns
BM_MakeHeap_uint64_Descending_16                                      0.683 ns    0.703 ns
BM_MakeHeap_uint64_Descending_64                                      0.662 ns    0.619 ns
BM_MakeHeap_uint64_Descending_256                                     0.839 ns    0.793 ns
BM_MakeHeap_uint64_Descending_1024                                    0.688 ns    0.613 ns
BM_MakeHeap_uint64_Descending_16384                                   0.646 ns    0.573 ns
BM_MakeHeap_uint64_Descending_262144                                  0.657 ns    0.583 ns
BM_MakeHeap_uint64_SingleElement_1                                     1.27 ns     1.67 ns
BM_MakeHeap_uint64_SingleElement_4                                    0.892 ns    0.996 ns
BM_MakeHeap_uint64_SingleElement_16                                    1.17 ns     1.17 ns
BM_MakeHeap_uint64_SingleElement_64                                    1.30 ns     1.23 ns
BM_MakeHeap_uint64_SingleElement_256                                   1.49 ns     1.44 ns
BM_MakeHeap_uint64_SingleElement_1024                                  1.47 ns     1.40 ns
BM_MakeHeap_uint64_SingleElement_16384                                 1.77 ns     1.76 ns
BM_MakeHeap_uint64_SingleElement_262144                                2.50 ns     2.45 ns
BM_MakeHeap_uint64_PipeOrgan_1                                         1.28 ns     1.66 ns
BM_MakeHeap_uint64_PipeOrgan_4                                         1.14 ns     1.25 ns
BM_MakeHeap_uint64_PipeOrgan_16                                        1.27 ns     1.27 ns
BM_MakeHeap_uint64_PipeOrgan_64                                        1.33 ns     1.27 ns
BM_MakeHeap_uint64_PipeOrgan_256                                       1.49 ns     1.44 ns
BM_MakeHeap_uint64_PipeOrgan_1024                                      1.47 ns     1.39 ns
BM_MakeHeap_uint64_PipeOrgan_16384                                     1.71 ns     1.66 ns
BM_MakeHeap_uint64_PipeOrgan_262144                                    2.33 ns     2.28 ns
BM_MakeHeap_uint64_Heap_1                                              1.27 ns     1.70 ns
BM_MakeHeap_uint64_Heap_4                                             0.798 ns    0.976 ns
BM_MakeHeap_uint64_Heap_16                                            0.685 ns    0.706 ns
BM_MakeHeap_uint64_Heap_64                                            0.663 ns    0.617 ns
BM_MakeHeap_uint64_Heap_256                                           0.839 ns    0.792 ns
BM_MakeHeap_uint64_Heap_1024                                          0.688 ns    0.614 ns
BM_MakeHeap_uint64_Heap_16384                                         0.644 ns    0.573 ns
BM_MakeHeap_uint64_Heap_262144                                        0.651 ns    0.581 ns
BM_MakeHeap_uint64_QuickSortAdversary_1                                1.23 ns     1.73 ns
BM_MakeHeap_uint64_QuickSortAdversary_4                                1.18 ns     1.28 ns
BM_MakeHeap_uint64_QuickSortAdversary_16                               1.24 ns     1.28 ns
BM_MakeHeap_uint64_QuickSortAdversary_64                               1.14 ns     1.06 ns
BM_MakeHeap_uint64_QuickSortAdversary_256                              1.31 ns     1.27 ns
BM_MakeHeap_uint64_QuickSortAdversary_1024                             1.16 ns     1.09 ns
BM_MakeHeap_uint64_QuickSortAdversary_16384                           0.796 ns    0.728 ns
BM_MakeHeap_uint64_QuickSortAdversary_262144                          0.697 ns    0.620 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1                              1.19 ns     1.71 ns
BM_MakeHeap_pair<uint32, uint32>_Random_4                              4.74 ns     4.64 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16                             5.77 ns     5.93 ns
BM_MakeHeap_pair<uint32, uint32>_Random_64                             6.72 ns     6.63 ns
BM_MakeHeap_pair<uint32, uint32>_Random_256                            6.68 ns     6.69 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1024                           6.63 ns     6.64 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16384                          6.94 ns     7.05 ns
BM_MakeHeap_pair<uint32, uint32>_Random_262144                         7.00 ns     7.23 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1                           1.18 ns     1.71 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_4                           1.98 ns     1.94 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16                          2.40 ns     2.31 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_64                          2.72 ns     2.61 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_256                         2.94 ns     2.86 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1024                        2.94 ns     2.84 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16384                       2.98 ns     2.86 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_262144                      3.42 ns     3.31 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1                          1.20 ns     1.67 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_4                          1.26 ns     1.27 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16                         1.35 ns     1.30 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_64                         1.40 ns     1.32 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_256                        1.43 ns     1.35 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1024                       1.34 ns     1.27 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16384                      1.31 ns     1.22 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_262144                     1.31 ns     1.22 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1                       1.20 ns     1.68 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_4                       1.98 ns     1.95 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16                      2.56 ns     2.49 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_64                      2.83 ns     2.73 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_256                     3.04 ns     2.94 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1024                    2.99 ns     2.89 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16384                   3.02 ns     2.89 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_262144                  3.46 ns     3.33 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1                           1.18 ns     1.68 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_4                           1.65 ns     1.70 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16                          2.38 ns     2.31 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_64                          2.75 ns     2.66 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_256                         3.00 ns     2.88 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1024                        2.95 ns     2.88 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16384                       2.99 ns     2.88 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_262144                      3.42 ns     3.32 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1                                1.18 ns     1.68 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_4                                1.26 ns     1.26 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16                               1.35 ns     1.30 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_64                               1.40 ns     1.32 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_256                              1.43 ns     1.35 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1024                             1.34 ns     1.26 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16384                            1.31 ns     1.22 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_262144                           1.31 ns     1.22 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1                  1.20 ns     1.69 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_4                  1.98 ns     1.94 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16                 2.47 ns     2.38 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_64                 2.73 ns     2.64 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_256                2.93 ns     2.85 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1024               2.91 ns     2.82 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16384              2.96 ns     2.86 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_262144             3.42 ns     3.31 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1                     1.30 ns     2.05 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_4                     6.23 ns     6.53 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16                    9.31 ns     9.32 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_64                    10.4 ns     10.6 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_256                   9.96 ns     10.3 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1024                  9.67 ns     9.93 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16384                 10.3 ns     10.3 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_262144                11.0 ns     11.0 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1                  1.30 ns     2.04 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_4                  2.54 ns     3.03 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16                 2.66 ns     2.80 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_64                 3.39 ns     3.62 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_256                3.47 ns     3.76 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1024               3.36 ns     3.44 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16384              3.56 ns     3.58 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_262144             4.88 ns     4.96 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1                 1.30 ns     1.90 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_4                 1.88 ns     2.42 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16                1.73 ns     2.04 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_64                2.11 ns     2.55 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_256               1.87 ns     2.23 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1024              1.65 ns     1.94 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16384             1.59 ns     1.80 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_262144            1.67 ns     1.86 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1              1.29 ns     2.11 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_4              2.88 ns     3.27 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16             3.32 ns     3.43 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_64             4.16 ns     4.46 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_256            4.04 ns     4.36 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1024           3.86 ns     3.81 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16384          4.26 ns     4.21 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_262144         5.50 ns     5.46 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1                  1.32 ns     1.75 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_4                  2.17 ns     2.35 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16                 2.58 ns     2.71 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_64                 3.52 ns     3.77 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_256                3.52 ns     3.85 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1024               3.39 ns     3.54 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16384              3.63 ns     3.73 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_262144             4.94 ns     5.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1                       1.42 ns     2.03 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_4                       2.00 ns     2.44 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16                      1.60 ns     1.81 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_64                      2.06 ns     2.32 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_256                     1.79 ns     2.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1024                    1.60 ns     1.81 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16384                   1.58 ns     1.74 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_262144                  1.68 ns     1.78 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1         1.46 ns     2.00 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_4         2.72 ns     2.96 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16        2.60 ns     2.75 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_64        3.39 ns     3.65 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_256       3.39 ns     3.59 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1024      3.18 ns     3.36 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16384     3.43 ns4    3.63 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_262144    4.63 ns4    4.81 ns
BM_MakeHeap_string_Random_1                                            1.90 ns     2.50 ns
BM_MakeHeap_string_Random_4                                            18.1 ns     18.7 ns
BM_MakeHeap_string_Random_16                                           26.7 ns     27.6 ns
BM_MakeHeap_string_Random_64                                           32.5 ns     33.8 ns
BM_MakeHeap_string_Random_256                                          32.4 ns     32.9 ns
BM_MakeHeap_string_Random_1024                                         34.1 ns     33.9 ns
BM_MakeHeap_string_Random_16384                                        61.1 ns     61.2 ns
BM_MakeHeap_string_Random_262144                                        124 ns      122 ns
BM_MakeHeap_string_Ascending_1                                         1.90 ns     2.46 ns
BM_MakeHeap_string_Ascending_4                                         14.6 ns     15.2 ns
BM_MakeHeap_string_Ascending_16                                        21.8 ns     22.5 ns
BM_MakeHeap_string_Ascending_64                                        28.8 ns     29.7 ns
BM_MakeHeap_string_Ascending_256                                       28.6 ns     29.5 ns
BM_MakeHeap_string_Ascending_1024                                      28.6 ns     29.0 ns
BM_MakeHeap_string_Ascending_16384                                     56.9 ns     56.5 ns
BM_MakeHeap_string_Ascending_262144                                     116 ns      112 ns
BM_MakeHeap_string_Descending_1                                        1.89 ns     2.47 ns
BM_MakeHeap_string_Descending_4                                        13.0 ns     13.1 ns
BM_MakeHeap_string_Descending_16                                       18.6 ns     18.6 ns
BM_MakeHeap_string_Descending_64                                       23.1 ns     22.7 ns
BM_MakeHeap_string_Descending_256                                      21.6 ns     21.7 ns
BM_MakeHeap_string_Descending_1024                                     21.2 ns     21.1 ns
BM_MakeHeap_string_Descending_16384                                    36.2 ns     36.2 ns
BM_MakeHeap_string_Descending_262144                                   42.6 ns     41.0 ns
BM_MakeHeap_string_SingleElement_1                                     2.02 ns     2.42 ns
BM_MakeHeap_string_SingleElement_4                                     13.9 ns     13.9 ns
BM_MakeHeap_string_SingleElement_16                                    20.9 ns     21.2 ns
BM_MakeHeap_string_SingleElement_64                                    27.4 ns     26.9 ns
BM_MakeHeap_string_SingleElement_256                                   23.1 ns     23.1 ns
BM_MakeHeap_string_SingleElement_1024                                  20.8 ns     20.3 ns
BM_MakeHeap_string_SingleElement_16384                                 25.6 ns     26.4 ns
BM_MakeHeap_string_SingleElement_262144                                44.8 ns     44.3 ns
BM_MakeHeap_string_PipeOrgan_1                                         2.00 ns     2.43 ns
BM_MakeHeap_string_PipeOrgan_4                                         14.1 ns     14.0 ns
BM_MakeHeap_string_PipeOrgan_16                                        22.2 ns     22.0 ns
BM_MakeHeap_string_PipeOrgan_64                                        29.5 ns     28.9 ns
BM_MakeHeap_string_PipeOrgan_256                                       29.8 ns     29.2 ns
BM_MakeHeap_string_PipeOrgan_1024                                      29.3 ns     29.1 ns
BM_MakeHeap_string_PipeOrgan_16384                                     57.1 ns     56.8 ns
BM_MakeHeap_string_PipeOrgan_262144                                     114 ns      107 ns
BM_MakeHeap_string_Heap_1                                              1.94 ns     2.41 ns
BM_MakeHeap_string_Heap_4                                              12.8 ns     13.0 ns
BM_MakeHeap_string_Heap_16                                             18.2 ns     18.4 ns
BM_MakeHeap_string_Heap_64                                             22.2 ns     22.4 ns
BM_MakeHeap_string_Heap_256                                            18.3 ns     18.3 ns
BM_MakeHeap_string_Heap_1024                                           14.8 ns     14.4 ns
BM_MakeHeap_string_Heap_16384                                          17.6 ns     19.3 ns
BM_MakeHeap_string_Heap_262144                                         23.1 ns     23.2 ns
BM_MakeHeap_string_QuickSortAdversary_1                                2.06 ns     2.44 ns
BM_MakeHeap_string_QuickSortAdversary_4                                18.9 ns     18.7 ns
BM_MakeHeap_string_QuickSortAdversary_16                               28.7 ns     27.4 ns
BM_MakeHeap_string_QuickSortAdversary_64                               33.6 ns     32.2 ns
BM_MakeHeap_string_QuickSortAdversary_256                              30.9 ns     30.2 ns
BM_MakeHeap_string_QuickSortAdversary_1024                             30.0 ns     29.9 ns
BM_MakeHeap_string_QuickSortAdversary_16384                            39.5 ns     39.1 ns
BM_MakeHeap_string_QuickSortAdversary_262144                           63.3 ns     61.7 ns
BM_MakeHeap_float_Random_1                                             1.71 ns     1.68 ns
BM_MakeHeap_float_Random_4                                             4.33 ns     4.43 ns
BM_MakeHeap_float_Random_16                                            4.59 ns     4.60 ns
BM_MakeHeap_float_Random_64                                            5.26 ns     5.27 ns
BM_MakeHeap_float_Random_256                                           4.83 ns     4.96 ns
BM_MakeHeap_float_Random_1024                                          4.73 ns     4.69 ns
BM_MakeHeap_float_Random_16384                                         4.74 ns     4.69 ns
BM_MakeHeap_float_Random_262144                                        4.79 ns     4.72 ns
BM_MakeHeap_float_Ascending_1                                          1.75 ns     1.69 ns
BM_MakeHeap_float_Ascending_4                                          1.39 ns     1.44 ns
BM_MakeHeap_float_Ascending_16                                         1.60 ns     1.69 ns
BM_MakeHeap_float_Ascending_64                                         1.66 ns     1.59 ns
BM_MakeHeap_float_Ascending_256                                        1.77 ns     1.67 ns
BM_MakeHeap_float_Ascending_1024                                       1.83 ns     1.72 ns
BM_MakeHeap_float_Ascending_16384                                      1.94 ns     1.83 ns
BM_MakeHeap_float_Ascending_262144                                     2.05 ns     1.96 ns
BM_MakeHeap_float_Descending_1                                         1.72 ns     1.62 ns
BM_MakeHeap_float_Descending_4                                        0.837 ns    0.898 ns
BM_MakeHeap_float_Descending_16                                       0.750 ns    0.688 ns
BM_MakeHeap_float_Descending_64                                       0.690 ns    0.598 ns
BM_MakeHeap_float_Descending_256                                      0.718 ns    0.585 ns
BM_MakeHeap_float_Descending_1024                                     0.696 ns    0.568 ns
BM_MakeHeap_float_Descending_16384                                    0.673 ns    0.531 ns
BM_MakeHeap_float_Descending_262144                                   0.674 ns    0.536 ns
BM_MakeHeap_float_SingleElement_1                                      1.67 ns     1.66 ns
BM_MakeHeap_float_SingleElement_4                                      1.00 ns     1.04 ns
BM_MakeHeap_float_SingleElement_16                                     1.49 ns     1.49 ns
BM_MakeHeap_float_SingleElement_64                                     1.63 ns     1.53 ns
BM_MakeHeap_float_SingleElement_256                                    1.76 ns     1.64 ns
BM_MakeHeap_float_SingleElement_1024                                   1.82 ns     1.70 ns
BM_MakeHeap_float_SingleElement_16384                                  1.93 ns     1.82 ns
BM_MakeHeap_float_SingleElement_262144                                 2.11 ns     2.03 ns
BM_MakeHeap_float_PipeOrgan_1                                          1.70 ns     1.62 ns
BM_MakeHeap_float_PipeOrgan_4                                          1.31 ns     1.37 ns
BM_MakeHeap_float_PipeOrgan_16                                         1.66 ns     1.67 ns
BM_MakeHeap_float_PipeOrgan_64                                         1.67 ns     1.58 ns
BM_MakeHeap_float_PipeOrgan_256                                        1.76 ns     1.64 ns
BM_MakeHeap_float_PipeOrgan_1024                                       1.83 ns     1.71 ns
BM_MakeHeap_float_PipeOrgan_16384                                      1.93 ns     1.84 ns
BM_MakeHeap_float_PipeOrgan_262144                                     2.07 ns     1.97 ns
BM_MakeHeap_float_Heap_1                                               1.67 ns     1.65 ns
BM_MakeHeap_float_Heap_4                                              0.836 ns    0.902 ns
BM_MakeHeap_float_Heap_16                                             0.751 ns    0.702 ns
BM_MakeHeap_float_Heap_64                                             0.692 ns    0.598 ns
BM_MakeHeap_float_Heap_256                                            0.718 ns    0.587 ns
BM_MakeHeap_float_Heap_1024                                           0.700 ns    0.578 ns
BM_MakeHeap_float_Heap_16384                                          0.676 ns    0.533 ns
BM_MakeHeap_float_Heap_262144                                         0.675 ns    0.535 ns
BM_MakeHeap_float_QuickSortAdversary_1                                 1.68 ns     1.67 ns
BM_MakeHeap_float_QuickSortAdversary_4                                 1.38 ns     1.44 ns
BM_MakeHeap_float_QuickSortAdversary_16                                1.61 ns     1.68 ns
BM_MakeHeap_float_QuickSortAdversary_64                                1.35 ns     1.27 ns
BM_MakeHeap_float_QuickSortAdversary_256                               1.44 ns     1.34 ns
BM_MakeHeap_float_QuickSortAdversary_1024                              1.46 ns     1.34 ns
BM_MakeHeap_float_QuickSortAdversary_16384                            0.853 ns    0.716 ns
BM_MakeHeap_float_QuickSortAdversary_262144                           0.710 ns    0.571 ns
```

---
Full diff: https://github.com/llvm/llvm-project/pull/159917.diff


7 Files Affected:

- (modified) libcxx/include/__algorithm/make_heap.h (+4-4) 
- (modified) libcxx/include/__algorithm/partial_sort_copy.h (+3-2) 
- (modified) libcxx/include/__algorithm/pop_heap.h (+17-19) 
- (modified) libcxx/include/__algorithm/push_heap.h (+23-25) 
- (modified) libcxx/include/__algorithm/ranges_pop_heap.h (+3-1) 
- (modified) libcxx/include/__algorithm/sift_down.h (+21-16) 
- (modified) libcxx/include/__algorithm/sort_heap.h (+2-2) 


``````````diff
diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index 8cfeda2b59811..9de58fc65ce21 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -36,7 +36,7 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
   using __diff_t     = __iter_diff_t<_RandomAccessIterator>;
   const __diff_t __n = __last - __first;
 
-  static const bool __assume_both_children = is_arithmetic<__iter_value_type<_RandomAccessIterator> >::value;
+  constexpr bool __assume_both_children = is_arithmetic<__iter_value_type<_RandomAccessIterator> >::value;
 
   // While it would be correct to always assume we have both children, in practice we observed this to be a performance
   // improvement only for arithmetic types.
@@ -45,11 +45,11 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
   if (__n > 1) {
     // start from the first parent, there is no need to consider children
 
-    for (__diff_t __start = (__sift_down_n - 2) / 2; __start >= 0; --__start) {
-      std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, __start);
+    for (__diff_t __start = __sift_down_n / 2; __start != 0;) {
+      std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, --__start);
     }
     if _LIBCPP_CONSTEXPR (__assume_both_children)
-      std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n);
+      std::__sift_up<_AlgPolicy>(__first, --__last, __comp);
   }
 }
 
diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 2230dfc9cc4ad..0b8a0fb7e2830 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -52,16 +52,17 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
   _RandomAccessIterator __r = __result_first;
   auto&& __projected_comp   = std::__make_projected(__comp, __proj2);
 
-  if (__r != __result_last) {
+  if (__result_first != __result_last) {
     for (; __first != __last && __r != __result_last; ++__first, (void)++__r)
       *__r = *__first;
     std::__make_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
     typename iterator_traits<_RandomAccessIterator>::difference_type __len = __r - __result_first;
-    for (; __first != __last; ++__first)
+    for (; __first != __last; ++__first) {
       if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
         *__result_first = *__first;
         std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0);
       }
+    }
     std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
   }
 
diff --git a/libcxx/include/__algorithm/pop_heap.h b/libcxx/include/__algorithm/pop_heap.h
index 6d23830097ff9..57f079ace0d6e 100644
--- a/libcxx/include/__algorithm/pop_heap.h
+++ b/libcxx/include/__algorithm/pop_heap.h
@@ -33,28 +33,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __pop_heap(_RandomAccessIterator __first,
-           _RandomAccessIterator __last,
+           _RandomAccessIterator __bottom,
            _Compare& __comp,
            typename iterator_traits<_RandomAccessIterator>::difference_type __len) {
-  // Calling `pop_heap` on an empty range is undefined behavior, but in practice it will be a no-op.
-  _LIBCPP_ASSERT_PEDANTIC(__len > 0, "The heap given to pop_heap must be non-empty");
+  using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
 
-  __comp_ref_type<_Compare> __comp_ref = __comp;
+  value_type __top             = _IterOps<_AlgPolicy>::__iter_move(__first); // create a hole at __first
+  _RandomAccessIterator __hole = std::__floyd_sift_down<_AlgPolicy>(__first, __comp, __len);
 
-  using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
-  if (__len > 1) {
-    value_type __top             = _IterOps<_AlgPolicy>::__iter_move(__first); // create a hole at __first
-    _RandomAccessIterator __hole = std::__floyd_sift_down<_AlgPolicy>(__first, __comp_ref, __len);
-    --__last;
-
-    if (__hole == __last) {
-      *__hole = std::move(__top);
-    } else {
-      *__hole = _IterOps<_AlgPolicy>::__iter_move(__last);
-      ++__hole;
-      *__last = std::move(__top);
-      std::__sift_up<_AlgPolicy>(__first, __hole, __comp_ref, __hole - __first);
-    }
+  if (__hole == __bottom) {
+    *__hole = std::move(__top);
+  } else {
+    *__hole   = _IterOps<_AlgPolicy>::__iter_move(__bottom);
+    *__bottom = std::move(__top);
+    std::__sift_up<_AlgPolicy>(__first, __hole, __comp);
   }
 }
 
@@ -64,8 +56,14 @@ pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare _
   static_assert(std::is_copy_constructible<_RandomAccessIterator>::value, "Iterators must be copy constructible.");
   static_assert(std::is_copy_assignable<_RandomAccessIterator>::value, "Iterators must be copy assignable.");
 
+  // Calling `pop_heap` on an empty range is undefined behavior, but in practice it will be a no-op.
+  _LIBCPP_ASSERT_PEDANTIC(__len > 0, "The heap given to pop_heap must be non-empty");
+
+  __comp_ref_type<_Compare> __comp_ref = __comp;
+
   typename iterator_traits<_RandomAccessIterator>::difference_type __len = __last - __first;
-  std::__pop_heap<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp, __len);
+  if (__len > 1)
+    std::__pop_heap<_ClassicAlgPolicy>(std::move(__first), std::move(--__last), __comp_ref, __len);
 }
 
 template <class _RandomAccessIterator>
diff --git a/libcxx/include/__algorithm/push_heap.h b/libcxx/include/__algorithm/push_heap.h
index ec0b445f2b70f..0de948bcc0226 100644
--- a/libcxx/include/__algorithm/push_heap.h
+++ b/libcxx/include/__algorithm/push_heap.h
@@ -29,37 +29,35 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
-__sift_up(_RandomAccessIterator __first,
-          _RandomAccessIterator __last,
-          _Compare&& __comp,
-          typename iterator_traits<_RandomAccessIterator>::difference_type __len) {
-  using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
-
-  if (__len > 1) {
-    __len                       = (__len - 2) / 2;
-    _RandomAccessIterator __ptr = __first + __len;
-
-    if (__comp(*__ptr, *--__last)) {
-      value_type __t(_IterOps<_AlgPolicy>::__iter_move(__last));
-      do {
-        *__last = _IterOps<_AlgPolicy>::__iter_move(__ptr);
-        __last  = __ptr;
-        if (__len == 0)
-          break;
-        __len = (__len - 1) / 2;
-        __ptr = __first + __len;
-      } while (__comp(*__ptr, __t));
-
-      *__last = std::move(__t);
-    }
+__sift_up(_RandomAccessIterator __first, _RandomAccessIterator __bottom, _Compare&& __comp) {
+  using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
+  using value_type      = typename iterator_traits<_RandomAccessIterator>::value_type;
+
+  difference_type __parent = __bottom - __first;
+  _LIBCPP_ASSERT_INTERNAL(__parent > 0, "shouldn't be called unless __bottom - __first > 0");
+  __parent                         = (__parent - 1) / 2;
+  _RandomAccessIterator __parent_i = __first + __parent;
+
+  if (__comp(*__parent_i, *__bottom)) {
+    value_type __t(_IterOps<_AlgPolicy>::__iter_move(__bottom));
+    do {
+      *__bottom = _IterOps<_AlgPolicy>::__iter_move(__parent_i);
+      __bottom  = __parent_i;
+      if (__parent == 0)
+        break;
+      __parent   = (__parent - 1) / 2;
+      __parent_i = __first + __parent;
+    } while (__comp(*__parent_i, __t));
+
+    *__bottom = std::move(__t);
   }
 }
 
 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) {
-  typename iterator_traits<_RandomAccessIterator>::difference_type __len = __last - __first;
-  std::__sift_up<_AlgPolicy, __comp_ref_type<_Compare> >(std::move(__first), std::move(__last), __comp, __len);
+  if (__first != __last)
+    std::__sift_up<_AlgPolicy, __comp_ref_type<_Compare> >(std::move(__first), std::move(--__last), __comp);
 }
 
 template <class _RandomAccessIterator, class _Compare>
diff --git a/libcxx/include/__algorithm/ranges_pop_heap.h b/libcxx/include/__algorithm/ranges_pop_heap.h
index eccf54c094e3d..b6e994464a1f5 100644
--- a/libcxx/include/__algorithm/ranges_pop_heap.h
+++ b/libcxx/include/__algorithm/ranges_pop_heap.h
@@ -48,7 +48,9 @@ struct __pop_heap {
     auto __len       = __last_iter - __first;
 
     auto&& __projected_comp = std::__make_projected(__comp, __proj);
-    std::__pop_heap<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp, __len);
+
+    if (__len > 1)
+      std::__pop_heap<_RangeAlgPolicy>(std::move(__first), ranges::prev(__last_iter), __projected_comp, __len);
 
     return __last_iter;
   }
diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index e01c9b2b00f86..6f2b59ca59b1a 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -38,7 +38,7 @@ __sift_down(_RandomAccessIterator __first,
   // right-child of __start is at 2 * __start + 2
   difference_type __child = __start;
 
-  if (__len < 2 || (__len - 2) / 2 < __child)
+  if (__len < 2)
     return;
 
   __child = 2 * __child + 1;
@@ -62,7 +62,7 @@ __sift_down(_RandomAccessIterator __first,
     __first[__start] = _Ops::__iter_move(__first + __child);
     __start          = __child;
 
-    if ((__len - 2) / 2 < __child)
+    if (__len / 2 - 1 < __child)
       break;
 
     // recompute the child based off of the updated parent
@@ -85,29 +85,34 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator __floy
     _RandomAccessIterator __first,
     _Compare&& __comp,
     typename iterator_traits<_RandomAccessIterator>::difference_type __len) {
-  using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
-  _LIBCPP_ASSERT_INTERNAL(__len >= 2, "shouldn't be called unless __len >= 2");
+  _LIBCPP_ASSERT_INTERNAL(__len > 1, "shouldn't be called unless __len > 1");
 
-  _RandomAccessIterator __hole    = __first;
-  _RandomAccessIterator __child_i = __first;
-  difference_type __child         = 0;
+  using _Ops = _IterOps<_AlgPolicy>;
 
-  while (true) {
-    __child_i += difference_type(__child + 1);
-    __child = 2 * __child + 1;
+  typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
 
-    if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
-      // right-child exists and is greater than left-child
-      ++__child_i;
-      ++__child;
+  difference_type __child      = 1;
+  _RandomAccessIterator __hole = __first, __child_i = __first;
+
+  while (true) {
+    __child_i += __child;
+    __child *= 2;
+
+    if (__child < __len) {
+      _RandomAccessIterator __right_i = _Ops::next(__child_i);
+      if (__comp(*__child_i, *__right_i)) {
+        // right-child exists and is greater than left-child
+        __child_i = __right_i;
+        ++__child;
+      }
     }
 
     // swap __hole with its largest child
-    *__hole = _IterOps<_AlgPolicy>::__iter_move(__child_i);
+    *__hole = _Ops::__iter_move(__child_i);
     __hole  = __child_i;
 
     // if __hole is now a leaf, we're done
-    if (__child > (__len - 2) / 2)
+    if (__child > __len / 2)
       return __hole;
   }
 }
diff --git a/libcxx/include/__algorithm/sort_heap.h b/libcxx/include/__algorithm/sort_heap.h
index f20b110c7fd12..f2d73ab793650 100644
--- a/libcxx/include/__algorithm/sort_heap.h
+++ b/libcxx/include/__algorithm/sort_heap.h
@@ -36,8 +36,8 @@ __sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
   __comp_ref_type<_Compare> __comp_ref = __comp;
 
   using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
-  for (difference_type __n = __last - __first; __n > 1; --__last, (void)--__n)
-    std::__pop_heap<_AlgPolicy>(__first, __last, __comp_ref, __n);
+  for (difference_type __n = __last - __first; __n > 1; --__n)
+    std::__pop_heap<_AlgPolicy>(__first, --__last, __comp_ref, __n);
   std::__check_strict_weak_ordering_sorted(__first, __saved_last, __comp_ref);
 }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/159917


More information about the libcxx-commits mailing list