[libcxx-commits] [libcxx] [libc++] Optimize heap operations (PR #159917)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Sep 20 04:49:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: None (nukyan)
<details>
<summary>Changes</summary>
Reopening for #<!-- -->121480, these changes are intended to clean up the code and a little bit performance improving.
Here is the output of `libcxx/test/benchmarks/algorithms/make_heap.bench.cpp`.
```
------------------------------------------------------------------------------------------
Benchmark Before After
------------------------------------------------------------------------------------------
BM_MakeHeap_uint32_Random_1 1.22 ns 1.68 ns
BM_MakeHeap_uint32_Random_4 3.65 ns 3.80 ns
BM_MakeHeap_uint32_Random_16 4.19 ns 4.18 ns
BM_MakeHeap_uint32_Random_64 4.54 ns 4.50 ns
BM_MakeHeap_uint32_Random_256 4.40 ns 4.27 ns
BM_MakeHeap_uint32_Random_1024 4.31 ns 4.17 ns
BM_MakeHeap_uint32_Random_16384 4.55 ns 4.39 ns
BM_MakeHeap_uint32_Random_262144 4.63 ns 4.43 ns
BM_MakeHeap_uint32_Ascending_1 1.19 ns 1.66 ns
BM_MakeHeap_uint32_Ascending_4 1.10 ns 1.21 ns
BM_MakeHeap_uint32_Ascending_16 1.25 ns 1.30 ns
BM_MakeHeap_uint32_Ascending_64 1.18 ns 1.13 ns
BM_MakeHeap_uint32_Ascending_256 1.24 ns 1.16 ns
BM_MakeHeap_uint32_Ascending_1024 1.28 ns 1.21 ns
BM_MakeHeap_uint32_Ascending_16384 1.45 ns 1.39 ns
BM_MakeHeap_uint32_Ascending_262144 1.61 ns 1.52 ns
BM_MakeHeap_uint32_Descending_1 1.18 ns 1.69 ns
BM_MakeHeap_uint32_Descending_4 0.702 ns 0.831 ns
BM_MakeHeap_uint32_Descending_16 0.638 ns 0.628 ns
BM_MakeHeap_uint32_Descending_64 0.565 ns 0.522 ns
BM_MakeHeap_uint32_Descending_256 0.587 ns 0.521 ns
BM_MakeHeap_uint32_Descending_1024 0.579 ns 0.523 ns
BM_MakeHeap_uint32_Descending_16384 0.536 ns 0.465 ns
BM_MakeHeap_uint32_Descending_262144 0.535 ns 0.472 ns
BM_MakeHeap_uint32_SingleElement_1 1.18 ns 1.70 ns
BM_MakeHeap_uint32_SingleElement_4 0.814 ns 0.926 ns
BM_MakeHeap_uint32_SingleElement_16 1.23 ns 1.27 ns
BM_MakeHeap_uint32_SingleElement_64 1.16 ns 1.08 ns
BM_MakeHeap_uint32_SingleElement_256 1.23 ns 1.15 ns
BM_MakeHeap_uint32_SingleElement_1024 1.27 ns 1.19 ns
BM_MakeHeap_uint32_SingleElement_16384 1.45 ns 1.39 ns
BM_MakeHeap_uint32_SingleElement_262144 1.69 ns 1.80 ns
BM_MakeHeap_uint32_PipeOrgan_1 1.19 ns 1.69 ns
BM_MakeHeap_uint32_PipeOrgan_4 1.09 ns 1.17 ns
BM_MakeHeap_uint32_PipeOrgan_16 1.30 ns 1.27 ns
BM_MakeHeap_uint32_PipeOrgan_64 1.22 ns 1.16 ns
BM_MakeHeap_uint32_PipeOrgan_256 1.28 ns 1.20 ns
BM_MakeHeap_uint32_PipeOrgan_1024 1.34 ns 1.24 ns
BM_MakeHeap_uint32_PipeOrgan_16384 1.47 ns 1.38 ns
BM_MakeHeap_uint32_PipeOrgan_262144 1.69 ns 1.71 ns
BM_MakeHeap_uint32_Heap_1 1.21 ns 1.68 ns
BM_MakeHeap_uint32_Heap_4 0.701 ns 0.831 ns
BM_MakeHeap_uint32_Heap_16 0.638 ns 0.629 ns
BM_MakeHeap_uint32_Heap_64 0.564 ns 0.525 ns
BM_MakeHeap_uint32_Heap_256 0.586 ns 0.522 ns
BM_MakeHeap_uint32_Heap_1024 0.578 ns 0.519 ns
BM_MakeHeap_uint32_Heap_16384 0.536 ns 0.466 ns
BM_MakeHeap_uint32_Heap_262144 0.536 ns 0.472 ns
BM_MakeHeap_uint32_QuickSortAdversary_1 1.18 ns 1.68 ns
BM_MakeHeap_uint32_QuickSortAdversary_4 1.10 ns 1.20 ns
BM_MakeHeap_uint32_QuickSortAdversary_16 1.25 ns 1.30 ns
BM_MakeHeap_uint32_QuickSortAdversary_64 1.01 ns 0.929 ns
BM_MakeHeap_uint32_QuickSortAdversary_256 1.03 ns 0.963 ns
BM_MakeHeap_uint32_QuickSortAdversary_1024 1.06 ns 1.04 ns
BM_MakeHeap_uint32_QuickSortAdversary_16384 0.659 ns 0.610 ns
BM_MakeHeap_uint32_QuickSortAdversary_262144 0.566 ns 0.509 ns
BM_MakeHeap_uint64_Random_1 1.24 ns 1.71 ns
BM_MakeHeap_uint64_Random_4 3.76 ns 3.99 ns
BM_MakeHeap_uint64_Random_16 3.94 ns 3.73 ns
BM_MakeHeap_uint64_Random_64 4.45 ns 4.39 ns
BM_MakeHeap_uint64_Random_256 4.17 ns 4.06 ns
BM_MakeHeap_uint64_Random_1024 3.86 ns 3.81 ns
BM_MakeHeap_uint64_Random_16384 4.07 ns 4.05 ns
BM_MakeHeap_uint64_Random_262144 4.34 ns 4.25 ns
BM_MakeHeap_uint64_Ascending_1 1.21 ns 1.70 ns
BM_MakeHeap_uint64_Ascending_4 1.18 ns 1.28 ns
BM_MakeHeap_uint64_Ascending_16 1.24 ns 1.28 ns
BM_MakeHeap_uint64_Ascending_64 1.32 ns 1.29 ns
BM_MakeHeap_uint64_Ascending_256 1.50 ns 1.47 ns
BM_MakeHeap_uint64_Ascending_1024 1.46 ns 1.41 ns
BM_MakeHeap_uint64_Ascending_16384 1.71 ns 1.67 ns
BM_MakeHeap_uint64_Ascending_262144 2.34 ns 2.29 ns
BM_MakeHeap_uint64_Descending_1 1.20 ns 1.72 ns
BM_MakeHeap_uint64_Descending_4 0.798 ns 0.952 ns
BM_MakeHeap_uint64_Descending_16 0.683 ns 0.703 ns
BM_MakeHeap_uint64_Descending_64 0.662 ns 0.619 ns
BM_MakeHeap_uint64_Descending_256 0.839 ns 0.793 ns
BM_MakeHeap_uint64_Descending_1024 0.688 ns 0.613 ns
BM_MakeHeap_uint64_Descending_16384 0.646 ns 0.573 ns
BM_MakeHeap_uint64_Descending_262144 0.657 ns 0.583 ns
BM_MakeHeap_uint64_SingleElement_1 1.27 ns 1.67 ns
BM_MakeHeap_uint64_SingleElement_4 0.892 ns 0.996 ns
BM_MakeHeap_uint64_SingleElement_16 1.17 ns 1.17 ns
BM_MakeHeap_uint64_SingleElement_64 1.30 ns 1.23 ns
BM_MakeHeap_uint64_SingleElement_256 1.49 ns 1.44 ns
BM_MakeHeap_uint64_SingleElement_1024 1.47 ns 1.40 ns
BM_MakeHeap_uint64_SingleElement_16384 1.77 ns 1.76 ns
BM_MakeHeap_uint64_SingleElement_262144 2.50 ns 2.45 ns
BM_MakeHeap_uint64_PipeOrgan_1 1.28 ns 1.66 ns
BM_MakeHeap_uint64_PipeOrgan_4 1.14 ns 1.25 ns
BM_MakeHeap_uint64_PipeOrgan_16 1.27 ns 1.27 ns
BM_MakeHeap_uint64_PipeOrgan_64 1.33 ns 1.27 ns
BM_MakeHeap_uint64_PipeOrgan_256 1.49 ns 1.44 ns
BM_MakeHeap_uint64_PipeOrgan_1024 1.47 ns 1.39 ns
BM_MakeHeap_uint64_PipeOrgan_16384 1.71 ns 1.66 ns
BM_MakeHeap_uint64_PipeOrgan_262144 2.33 ns 2.28 ns
BM_MakeHeap_uint64_Heap_1 1.27 ns 1.70 ns
BM_MakeHeap_uint64_Heap_4 0.798 ns 0.976 ns
BM_MakeHeap_uint64_Heap_16 0.685 ns 0.706 ns
BM_MakeHeap_uint64_Heap_64 0.663 ns 0.617 ns
BM_MakeHeap_uint64_Heap_256 0.839 ns 0.792 ns
BM_MakeHeap_uint64_Heap_1024 0.688 ns 0.614 ns
BM_MakeHeap_uint64_Heap_16384 0.644 ns 0.573 ns
BM_MakeHeap_uint64_Heap_262144 0.651 ns 0.581 ns
BM_MakeHeap_uint64_QuickSortAdversary_1 1.23 ns 1.73 ns
BM_MakeHeap_uint64_QuickSortAdversary_4 1.18 ns 1.28 ns
BM_MakeHeap_uint64_QuickSortAdversary_16 1.24 ns 1.28 ns
BM_MakeHeap_uint64_QuickSortAdversary_64 1.14 ns 1.06 ns
BM_MakeHeap_uint64_QuickSortAdversary_256 1.31 ns 1.27 ns
BM_MakeHeap_uint64_QuickSortAdversary_1024 1.16 ns 1.09 ns
BM_MakeHeap_uint64_QuickSortAdversary_16384 0.796 ns 0.728 ns
BM_MakeHeap_uint64_QuickSortAdversary_262144 0.697 ns 0.620 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1 1.19 ns 1.71 ns
BM_MakeHeap_pair<uint32, uint32>_Random_4 4.74 ns 4.64 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16 5.77 ns 5.93 ns
BM_MakeHeap_pair<uint32, uint32>_Random_64 6.72 ns 6.63 ns
BM_MakeHeap_pair<uint32, uint32>_Random_256 6.68 ns 6.69 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1024 6.63 ns 6.64 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16384 6.94 ns 7.05 ns
BM_MakeHeap_pair<uint32, uint32>_Random_262144 7.00 ns 7.23 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1 1.18 ns 1.71 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_4 1.98 ns 1.94 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16 2.40 ns 2.31 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_64 2.72 ns 2.61 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_256 2.94 ns 2.86 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1024 2.94 ns 2.84 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16384 2.98 ns 2.86 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_262144 3.42 ns 3.31 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1 1.20 ns 1.67 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_4 1.26 ns 1.27 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16 1.35 ns 1.30 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_64 1.40 ns 1.32 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_256 1.43 ns 1.35 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1024 1.34 ns 1.27 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16384 1.31 ns 1.22 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_262144 1.31 ns 1.22 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1 1.20 ns 1.68 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_4 1.98 ns 1.95 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16 2.56 ns 2.49 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_64 2.83 ns 2.73 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_256 3.04 ns 2.94 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1024 2.99 ns 2.89 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16384 3.02 ns 2.89 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_262144 3.46 ns 3.33 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1 1.18 ns 1.68 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_4 1.65 ns 1.70 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16 2.38 ns 2.31 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_64 2.75 ns 2.66 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_256 3.00 ns 2.88 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1024 2.95 ns 2.88 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16384 2.99 ns 2.88 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_262144 3.42 ns 3.32 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1 1.18 ns 1.68 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_4 1.26 ns 1.26 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16 1.35 ns 1.30 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_64 1.40 ns 1.32 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_256 1.43 ns 1.35 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1024 1.34 ns 1.26 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16384 1.31 ns 1.22 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_262144 1.31 ns 1.22 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1 1.20 ns 1.69 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_4 1.98 ns 1.94 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16 2.47 ns 2.38 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_64 2.73 ns 2.64 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_256 2.93 ns 2.85 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1024 2.91 ns 2.82 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16384 2.96 ns 2.86 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_262144 3.42 ns 3.31 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1 1.30 ns 2.05 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_4 6.23 ns 6.53 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16 9.31 ns 9.32 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_64 10.4 ns 10.6 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_256 9.96 ns 10.3 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1024 9.67 ns 9.93 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16384 10.3 ns 10.3 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_262144 11.0 ns 11.0 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1 1.30 ns 2.04 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_4 2.54 ns 3.03 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16 2.66 ns 2.80 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_64 3.39 ns 3.62 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_256 3.47 ns 3.76 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1024 3.36 ns 3.44 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16384 3.56 ns 3.58 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_262144 4.88 ns 4.96 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1 1.30 ns 1.90 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_4 1.88 ns 2.42 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16 1.73 ns 2.04 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_64 2.11 ns 2.55 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_256 1.87 ns 2.23 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1024 1.65 ns 1.94 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16384 1.59 ns 1.80 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_262144 1.67 ns 1.86 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1 1.29 ns 2.11 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_4 2.88 ns 3.27 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16 3.32 ns 3.43 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_64 4.16 ns 4.46 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_256 4.04 ns 4.36 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1024 3.86 ns 3.81 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16384 4.26 ns 4.21 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_262144 5.50 ns 5.46 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1 1.32 ns 1.75 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_4 2.17 ns 2.35 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16 2.58 ns 2.71 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_64 3.52 ns 3.77 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_256 3.52 ns 3.85 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1024 3.39 ns 3.54 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16384 3.63 ns 3.73 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_262144 4.94 ns 5.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1 1.42 ns 2.03 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_4 2.00 ns 2.44 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16 1.60 ns 1.81 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_64 2.06 ns 2.32 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_256 1.79 ns 2.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1024 1.60 ns 1.81 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16384 1.58 ns 1.74 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_262144 1.68 ns 1.78 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1 1.46 ns 2.00 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_4 2.72 ns 2.96 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16 2.60 ns 2.75 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_64 3.39 ns 3.65 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_256 3.39 ns 3.59 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1024 3.18 ns 3.36 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16384 3.43 ns4 3.63 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_262144 4.63 ns4 4.81 ns
BM_MakeHeap_string_Random_1 1.90 ns 2.50 ns
BM_MakeHeap_string_Random_4 18.1 ns 18.7 ns
BM_MakeHeap_string_Random_16 26.7 ns 27.6 ns
BM_MakeHeap_string_Random_64 32.5 ns 33.8 ns
BM_MakeHeap_string_Random_256 32.4 ns 32.9 ns
BM_MakeHeap_string_Random_1024 34.1 ns 33.9 ns
BM_MakeHeap_string_Random_16384 61.1 ns 61.2 ns
BM_MakeHeap_string_Random_262144 124 ns 122 ns
BM_MakeHeap_string_Ascending_1 1.90 ns 2.46 ns
BM_MakeHeap_string_Ascending_4 14.6 ns 15.2 ns
BM_MakeHeap_string_Ascending_16 21.8 ns 22.5 ns
BM_MakeHeap_string_Ascending_64 28.8 ns 29.7 ns
BM_MakeHeap_string_Ascending_256 28.6 ns 29.5 ns
BM_MakeHeap_string_Ascending_1024 28.6 ns 29.0 ns
BM_MakeHeap_string_Ascending_16384 56.9 ns 56.5 ns
BM_MakeHeap_string_Ascending_262144 116 ns 112 ns
BM_MakeHeap_string_Descending_1 1.89 ns 2.47 ns
BM_MakeHeap_string_Descending_4 13.0 ns 13.1 ns
BM_MakeHeap_string_Descending_16 18.6 ns 18.6 ns
BM_MakeHeap_string_Descending_64 23.1 ns 22.7 ns
BM_MakeHeap_string_Descending_256 21.6 ns 21.7 ns
BM_MakeHeap_string_Descending_1024 21.2 ns 21.1 ns
BM_MakeHeap_string_Descending_16384 36.2 ns 36.2 ns
BM_MakeHeap_string_Descending_262144 42.6 ns 41.0 ns
BM_MakeHeap_string_SingleElement_1 2.02 ns 2.42 ns
BM_MakeHeap_string_SingleElement_4 13.9 ns 13.9 ns
BM_MakeHeap_string_SingleElement_16 20.9 ns 21.2 ns
BM_MakeHeap_string_SingleElement_64 27.4 ns 26.9 ns
BM_MakeHeap_string_SingleElement_256 23.1 ns 23.1 ns
BM_MakeHeap_string_SingleElement_1024 20.8 ns 20.3 ns
BM_MakeHeap_string_SingleElement_16384 25.6 ns 26.4 ns
BM_MakeHeap_string_SingleElement_262144 44.8 ns 44.3 ns
BM_MakeHeap_string_PipeOrgan_1 2.00 ns 2.43 ns
BM_MakeHeap_string_PipeOrgan_4 14.1 ns 14.0 ns
BM_MakeHeap_string_PipeOrgan_16 22.2 ns 22.0 ns
BM_MakeHeap_string_PipeOrgan_64 29.5 ns 28.9 ns
BM_MakeHeap_string_PipeOrgan_256 29.8 ns 29.2 ns
BM_MakeHeap_string_PipeOrgan_1024 29.3 ns 29.1 ns
BM_MakeHeap_string_PipeOrgan_16384 57.1 ns 56.8 ns
BM_MakeHeap_string_PipeOrgan_262144 114 ns 107 ns
BM_MakeHeap_string_Heap_1 1.94 ns 2.41 ns
BM_MakeHeap_string_Heap_4 12.8 ns 13.0 ns
BM_MakeHeap_string_Heap_16 18.2 ns 18.4 ns
BM_MakeHeap_string_Heap_64 22.2 ns 22.4 ns
BM_MakeHeap_string_Heap_256 18.3 ns 18.3 ns
BM_MakeHeap_string_Heap_1024 14.8 ns 14.4 ns
BM_MakeHeap_string_Heap_16384 17.6 ns 19.3 ns
BM_MakeHeap_string_Heap_262144 23.1 ns 23.2 ns
BM_MakeHeap_string_QuickSortAdversary_1 2.06 ns 2.44 ns
BM_MakeHeap_string_QuickSortAdversary_4 18.9 ns 18.7 ns
BM_MakeHeap_string_QuickSortAdversary_16 28.7 ns 27.4 ns
BM_MakeHeap_string_QuickSortAdversary_64 33.6 ns 32.2 ns
BM_MakeHeap_string_QuickSortAdversary_256 30.9 ns 30.2 ns
BM_MakeHeap_string_QuickSortAdversary_1024 30.0 ns 29.9 ns
BM_MakeHeap_string_QuickSortAdversary_16384 39.5 ns 39.1 ns
BM_MakeHeap_string_QuickSortAdversary_262144 63.3 ns 61.7 ns
BM_MakeHeap_float_Random_1 1.71 ns 1.68 ns
BM_MakeHeap_float_Random_4 4.33 ns 4.43 ns
BM_MakeHeap_float_Random_16 4.59 ns 4.60 ns
BM_MakeHeap_float_Random_64 5.26 ns 5.27 ns
BM_MakeHeap_float_Random_256 4.83 ns 4.96 ns
BM_MakeHeap_float_Random_1024 4.73 ns 4.69 ns
BM_MakeHeap_float_Random_16384 4.74 ns 4.69 ns
BM_MakeHeap_float_Random_262144 4.79 ns 4.72 ns
BM_MakeHeap_float_Ascending_1 1.75 ns 1.69 ns
BM_MakeHeap_float_Ascending_4 1.39 ns 1.44 ns
BM_MakeHeap_float_Ascending_16 1.60 ns 1.69 ns
BM_MakeHeap_float_Ascending_64 1.66 ns 1.59 ns
BM_MakeHeap_float_Ascending_256 1.77 ns 1.67 ns
BM_MakeHeap_float_Ascending_1024 1.83 ns 1.72 ns
BM_MakeHeap_float_Ascending_16384 1.94 ns 1.83 ns
BM_MakeHeap_float_Ascending_262144 2.05 ns 1.96 ns
BM_MakeHeap_float_Descending_1 1.72 ns 1.62 ns
BM_MakeHeap_float_Descending_4 0.837 ns 0.898 ns
BM_MakeHeap_float_Descending_16 0.750 ns 0.688 ns
BM_MakeHeap_float_Descending_64 0.690 ns 0.598 ns
BM_MakeHeap_float_Descending_256 0.718 ns 0.585 ns
BM_MakeHeap_float_Descending_1024 0.696 ns 0.568 ns
BM_MakeHeap_float_Descending_16384 0.673 ns 0.531 ns
BM_MakeHeap_float_Descending_262144 0.674 ns 0.536 ns
BM_MakeHeap_float_SingleElement_1 1.67 ns 1.66 ns
BM_MakeHeap_float_SingleElement_4 1.00 ns 1.04 ns
BM_MakeHeap_float_SingleElement_16 1.49 ns 1.49 ns
BM_MakeHeap_float_SingleElement_64 1.63 ns 1.53 ns
BM_MakeHeap_float_SingleElement_256 1.76 ns 1.64 ns
BM_MakeHeap_float_SingleElement_1024 1.82 ns 1.70 ns
BM_MakeHeap_float_SingleElement_16384 1.93 ns 1.82 ns
BM_MakeHeap_float_SingleElement_262144 2.11 ns 2.03 ns
BM_MakeHeap_float_PipeOrgan_1 1.70 ns 1.62 ns
BM_MakeHeap_float_PipeOrgan_4 1.31 ns 1.37 ns
BM_MakeHeap_float_PipeOrgan_16 1.66 ns 1.67 ns
BM_MakeHeap_float_PipeOrgan_64 1.67 ns 1.58 ns
BM_MakeHeap_float_PipeOrgan_256 1.76 ns 1.64 ns
BM_MakeHeap_float_PipeOrgan_1024 1.83 ns 1.71 ns
BM_MakeHeap_float_PipeOrgan_16384 1.93 ns 1.84 ns
BM_MakeHeap_float_PipeOrgan_262144 2.07 ns 1.97 ns
BM_MakeHeap_float_Heap_1 1.67 ns 1.65 ns
BM_MakeHeap_float_Heap_4 0.836 ns 0.902 ns
BM_MakeHeap_float_Heap_16 0.751 ns 0.702 ns
BM_MakeHeap_float_Heap_64 0.692 ns 0.598 ns
BM_MakeHeap_float_Heap_256 0.718 ns 0.587 ns
BM_MakeHeap_float_Heap_1024 0.700 ns 0.578 ns
BM_MakeHeap_float_Heap_16384 0.676 ns 0.533 ns
BM_MakeHeap_float_Heap_262144 0.675 ns 0.535 ns
BM_MakeHeap_float_QuickSortAdversary_1 1.68 ns 1.67 ns
BM_MakeHeap_float_QuickSortAdversary_4 1.38 ns 1.44 ns
BM_MakeHeap_float_QuickSortAdversary_16 1.61 ns 1.68 ns
BM_MakeHeap_float_QuickSortAdversary_64 1.35 ns 1.27 ns
BM_MakeHeap_float_QuickSortAdversary_256 1.44 ns 1.34 ns
BM_MakeHeap_float_QuickSortAdversary_1024 1.46 ns 1.34 ns
BM_MakeHeap_float_QuickSortAdversary_16384 0.853 ns 0.716 ns
BM_MakeHeap_float_QuickSortAdversary_262144 0.710 ns 0.571 ns
```
---
Full diff: https://github.com/llvm/llvm-project/pull/159917.diff
7 Files Affected:
- (modified) libcxx/include/__algorithm/make_heap.h (+4-4)
- (modified) libcxx/include/__algorithm/partial_sort_copy.h (+3-2)
- (modified) libcxx/include/__algorithm/pop_heap.h (+17-19)
- (modified) libcxx/include/__algorithm/push_heap.h (+23-25)
- (modified) libcxx/include/__algorithm/ranges_pop_heap.h (+3-1)
- (modified) libcxx/include/__algorithm/sift_down.h (+21-16)
- (modified) libcxx/include/__algorithm/sort_heap.h (+2-2)
``````````diff
diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index 8cfeda2b59811..9de58fc65ce21 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -36,7 +36,7 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
using __diff_t = __iter_diff_t<_RandomAccessIterator>;
const __diff_t __n = __last - __first;
- static const bool __assume_both_children = is_arithmetic<__iter_value_type<_RandomAccessIterator> >::value;
+ constexpr bool __assume_both_children = is_arithmetic<__iter_value_type<_RandomAccessIterator> >::value;
// While it would be correct to always assume we have both children, in practice we observed this to be a performance
// improvement only for arithmetic types.
@@ -45,11 +45,11 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
if (__n > 1) {
// start from the first parent, there is no need to consider children
- for (__diff_t __start = (__sift_down_n - 2) / 2; __start >= 0; --__start) {
- std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, __start);
+ for (__diff_t __start = __sift_down_n / 2; __start != 0;) {
+ std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, --__start);
}
if _LIBCPP_CONSTEXPR (__assume_both_children)
- std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n);
+ std::__sift_up<_AlgPolicy>(__first, --__last, __comp);
}
}
diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 2230dfc9cc4ad..0b8a0fb7e2830 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -52,16 +52,17 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
_RandomAccessIterator __r = __result_first;
auto&& __projected_comp = std::__make_projected(__comp, __proj2);
- if (__r != __result_last) {
+ if (__result_first != __result_last) {
for (; __first != __last && __r != __result_last; ++__first, (void)++__r)
*__r = *__first;
std::__make_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
typename iterator_traits<_RandomAccessIterator>::difference_type __len = __r - __result_first;
- for (; __first != __last; ++__first)
+ for (; __first != __last; ++__first) {
if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
*__result_first = *__first;
std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0);
}
+ }
std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
}
diff --git a/libcxx/include/__algorithm/pop_heap.h b/libcxx/include/__algorithm/pop_heap.h
index 6d23830097ff9..57f079ace0d6e 100644
--- a/libcxx/include/__algorithm/pop_heap.h
+++ b/libcxx/include/__algorithm/pop_heap.h
@@ -33,28 +33,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__pop_heap(_RandomAccessIterator __first,
- _RandomAccessIterator __last,
+ _RandomAccessIterator __bottom,
_Compare& __comp,
typename iterator_traits<_RandomAccessIterator>::difference_type __len) {
- // Calling `pop_heap` on an empty range is undefined behavior, but in practice it will be a no-op.
- _LIBCPP_ASSERT_PEDANTIC(__len > 0, "The heap given to pop_heap must be non-empty");
+ using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
- __comp_ref_type<_Compare> __comp_ref = __comp;
+ value_type __top = _IterOps<_AlgPolicy>::__iter_move(__first); // create a hole at __first
+ _RandomAccessIterator __hole = std::__floyd_sift_down<_AlgPolicy>(__first, __comp, __len);
- using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
- if (__len > 1) {
- value_type __top = _IterOps<_AlgPolicy>::__iter_move(__first); // create a hole at __first
- _RandomAccessIterator __hole = std::__floyd_sift_down<_AlgPolicy>(__first, __comp_ref, __len);
- --__last;
-
- if (__hole == __last) {
- *__hole = std::move(__top);
- } else {
- *__hole = _IterOps<_AlgPolicy>::__iter_move(__last);
- ++__hole;
- *__last = std::move(__top);
- std::__sift_up<_AlgPolicy>(__first, __hole, __comp_ref, __hole - __first);
- }
+ if (__hole == __bottom) {
+ *__hole = std::move(__top);
+ } else {
+ *__hole = _IterOps<_AlgPolicy>::__iter_move(__bottom);
+ *__bottom = std::move(__top);
+ std::__sift_up<_AlgPolicy>(__first, __hole, __comp);
}
}
@@ -64,8 +56,14 @@ pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare _
static_assert(std::is_copy_constructible<_RandomAccessIterator>::value, "Iterators must be copy constructible.");
static_assert(std::is_copy_assignable<_RandomAccessIterator>::value, "Iterators must be copy assignable.");
+ // Calling `pop_heap` on an empty range is undefined behavior, but in practice it will be a no-op.
+ _LIBCPP_ASSERT_PEDANTIC(__len > 0, "The heap given to pop_heap must be non-empty");
+
+ __comp_ref_type<_Compare> __comp_ref = __comp;
+
typename iterator_traits<_RandomAccessIterator>::difference_type __len = __last - __first;
- std::__pop_heap<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp, __len);
+ if (__len > 1)
+ std::__pop_heap<_ClassicAlgPolicy>(std::move(__first), std::move(--__last), __comp_ref, __len);
}
template <class _RandomAccessIterator>
diff --git a/libcxx/include/__algorithm/push_heap.h b/libcxx/include/__algorithm/push_heap.h
index ec0b445f2b70f..0de948bcc0226 100644
--- a/libcxx/include/__algorithm/push_heap.h
+++ b/libcxx/include/__algorithm/push_heap.h
@@ -29,37 +29,35 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
-__sift_up(_RandomAccessIterator __first,
- _RandomAccessIterator __last,
- _Compare&& __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __len) {
- using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
-
- if (__len > 1) {
- __len = (__len - 2) / 2;
- _RandomAccessIterator __ptr = __first + __len;
-
- if (__comp(*__ptr, *--__last)) {
- value_type __t(_IterOps<_AlgPolicy>::__iter_move(__last));
- do {
- *__last = _IterOps<_AlgPolicy>::__iter_move(__ptr);
- __last = __ptr;
- if (__len == 0)
- break;
- __len = (__len - 1) / 2;
- __ptr = __first + __len;
- } while (__comp(*__ptr, __t));
-
- *__last = std::move(__t);
- }
+__sift_up(_RandomAccessIterator __first, _RandomAccessIterator __bottom, _Compare&& __comp) {
+ using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
+ using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
+
+ difference_type __parent = __bottom - __first;
+ _LIBCPP_ASSERT_INTERNAL(__parent > 0, "shouldn't be called unless __bottom - __first > 0");
+ __parent = (__parent - 1) / 2;
+ _RandomAccessIterator __parent_i = __first + __parent;
+
+ if (__comp(*__parent_i, *__bottom)) {
+ value_type __t(_IterOps<_AlgPolicy>::__iter_move(__bottom));
+ do {
+ *__bottom = _IterOps<_AlgPolicy>::__iter_move(__parent_i);
+ __bottom = __parent_i;
+ if (__parent == 0)
+ break;
+ __parent = (__parent - 1) / 2;
+ __parent_i = __first + __parent;
+ } while (__comp(*__parent_i, __t));
+
+ *__bottom = std::move(__t);
}
}
template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) {
- typename iterator_traits<_RandomAccessIterator>::difference_type __len = __last - __first;
- std::__sift_up<_AlgPolicy, __comp_ref_type<_Compare> >(std::move(__first), std::move(__last), __comp, __len);
+ if (__first != __last)
+ std::__sift_up<_AlgPolicy, __comp_ref_type<_Compare> >(std::move(__first), std::move(--__last), __comp);
}
template <class _RandomAccessIterator, class _Compare>
diff --git a/libcxx/include/__algorithm/ranges_pop_heap.h b/libcxx/include/__algorithm/ranges_pop_heap.h
index eccf54c094e3d..b6e994464a1f5 100644
--- a/libcxx/include/__algorithm/ranges_pop_heap.h
+++ b/libcxx/include/__algorithm/ranges_pop_heap.h
@@ -48,7 +48,9 @@ struct __pop_heap {
auto __len = __last_iter - __first;
auto&& __projected_comp = std::__make_projected(__comp, __proj);
- std::__pop_heap<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp, __len);
+
+ if (__len > 1)
+ std::__pop_heap<_RangeAlgPolicy>(std::move(__first), ranges::prev(__last_iter), __projected_comp, __len);
return __last_iter;
}
diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index e01c9b2b00f86..6f2b59ca59b1a 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -38,7 +38,7 @@ __sift_down(_RandomAccessIterator __first,
// right-child of __start is at 2 * __start + 2
difference_type __child = __start;
- if (__len < 2 || (__len - 2) / 2 < __child)
+ if (__len < 2)
return;
__child = 2 * __child + 1;
@@ -62,7 +62,7 @@ __sift_down(_RandomAccessIterator __first,
__first[__start] = _Ops::__iter_move(__first + __child);
__start = __child;
- if ((__len - 2) / 2 < __child)
+ if (__len / 2 - 1 < __child)
break;
// recompute the child based off of the updated parent
@@ -85,29 +85,34 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator __floy
_RandomAccessIterator __first,
_Compare&& __comp,
typename iterator_traits<_RandomAccessIterator>::difference_type __len) {
- using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
- _LIBCPP_ASSERT_INTERNAL(__len >= 2, "shouldn't be called unless __len >= 2");
+ _LIBCPP_ASSERT_INTERNAL(__len > 1, "shouldn't be called unless __len > 1");
- _RandomAccessIterator __hole = __first;
- _RandomAccessIterator __child_i = __first;
- difference_type __child = 0;
+ using _Ops = _IterOps<_AlgPolicy>;
- while (true) {
- __child_i += difference_type(__child + 1);
- __child = 2 * __child + 1;
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
- // right-child exists and is greater than left-child
- ++__child_i;
- ++__child;
+ difference_type __child = 1;
+ _RandomAccessIterator __hole = __first, __child_i = __first;
+
+ while (true) {
+ __child_i += __child;
+ __child *= 2;
+
+ if (__child < __len) {
+ _RandomAccessIterator __right_i = _Ops::next(__child_i);
+ if (__comp(*__child_i, *__right_i)) {
+ // right-child exists and is greater than left-child
+ __child_i = __right_i;
+ ++__child;
+ }
}
// swap __hole with its largest child
- *__hole = _IterOps<_AlgPolicy>::__iter_move(__child_i);
+ *__hole = _Ops::__iter_move(__child_i);
__hole = __child_i;
// if __hole is now a leaf, we're done
- if (__child > (__len - 2) / 2)
+ if (__child > __len / 2)
return __hole;
}
}
diff --git a/libcxx/include/__algorithm/sort_heap.h b/libcxx/include/__algorithm/sort_heap.h
index f20b110c7fd12..f2d73ab793650 100644
--- a/libcxx/include/__algorithm/sort_heap.h
+++ b/libcxx/include/__algorithm/sort_heap.h
@@ -36,8 +36,8 @@ __sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
__comp_ref_type<_Compare> __comp_ref = __comp;
using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
- for (difference_type __n = __last - __first; __n > 1; --__last, (void)--__n)
- std::__pop_heap<_AlgPolicy>(__first, __last, __comp_ref, __n);
+ for (difference_type __n = __last - __first; __n > 1; --__n)
+ std::__pop_heap<_AlgPolicy>(__first, --__last, __comp_ref, __n);
std::__check_strict_weak_ordering_sorted(__first, __saved_last, __comp_ref);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/159917
More information about the libcxx-commits
mailing list