[libcxx-commits] [libcxx] [libc++] Improve the performance of std::make_heap a bit (PR #154092)
via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Aug 25 09:49:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Nikolas Klauser (philnik777)
<details>
<summary>Changes</summary>
```
----------------------------------------------------------------------------------------------------
Benchmark old new
----------------------------------------------------------------------------------------------------
BM_MakeHeap_uint32_Random_1 1.02 ns 1.07 ns
BM_MakeHeap_uint32_Random_4 3.35 ns 2.87 ns
BM_MakeHeap_uint32_Random_16 3.11 ns 2.68 ns
BM_MakeHeap_uint32_Random_64 3.24 ns 2.85 ns
BM_MakeHeap_uint32_Random_256 3.10 ns 2.77 ns
BM_MakeHeap_uint32_Random_1024 3.03 ns 2.63 ns
BM_MakeHeap_uint32_Random_16384 3.04 ns 2.63 ns
BM_MakeHeap_uint32_Random_262144 3.06 ns 2.65 ns
BM_MakeHeap_uint32_Ascending_1 1.04 ns 1.07 ns
BM_MakeHeap_uint32_Ascending_4 1.30 ns 0.879 ns
BM_MakeHeap_uint32_Ascending_16 1.23 ns 0.958 ns
BM_MakeHeap_uint32_Ascending_64 1.38 ns 0.879 ns
BM_MakeHeap_uint32_Ascending_256 1.59 ns 0.933 ns
BM_MakeHeap_uint32_Ascending_1024 1.54 ns 0.968 ns
BM_MakeHeap_uint32_Ascending_16384 1.58 ns 1.01 ns
BM_MakeHeap_uint32_Ascending_262144 1.69 ns 1.11 ns
BM_MakeHeap_uint32_Descending_1 1.07 ns 1.05 ns
BM_MakeHeap_uint32_Descending_4 0.809 ns 0.549 ns
BM_MakeHeap_uint32_Descending_16 0.705 ns 0.520 ns
BM_MakeHeap_uint32_Descending_64 0.692 ns 0.405 ns
BM_MakeHeap_uint32_Descending_256 0.729 ns 0.429 ns
BM_MakeHeap_uint32_Descending_1024 0.682 ns 0.378 ns
BM_MakeHeap_uint32_Descending_16384 0.666 ns 0.362 ns
BM_MakeHeap_uint32_Descending_262144 0.667 ns 0.361 ns
BM_MakeHeap_uint32_SingleElement_1 1.07 ns 1.10 ns
BM_MakeHeap_uint32_SingleElement_4 1.30 ns 0.666 ns
BM_MakeHeap_uint32_SingleElement_16 1.35 ns 0.861 ns
BM_MakeHeap_uint32_SingleElement_64 1.46 ns 0.827 ns
BM_MakeHeap_uint32_SingleElement_256 1.60 ns 0.896 ns
BM_MakeHeap_uint32_SingleElement_1024 1.55 ns 0.942 ns
BM_MakeHeap_uint32_SingleElement_16384 1.59 ns 0.986 ns
BM_MakeHeap_uint32_SingleElement_262144 1.64 ns 1.07 ns
BM_MakeHeap_uint32_PipeOrgan_1 1.04 ns 1.09 ns
BM_MakeHeap_uint32_PipeOrgan_4 0.989 ns 0.836 ns
BM_MakeHeap_uint32_PipeOrgan_16 1.24 ns 0.958 ns
BM_MakeHeap_uint32_PipeOrgan_64 1.41 ns 0.885 ns
BM_MakeHeap_uint32_PipeOrgan_256 1.57 ns 0.912 ns
BM_MakeHeap_uint32_PipeOrgan_1024 1.55 ns 0.951 ns
BM_MakeHeap_uint32_PipeOrgan_16384 1.58 ns 0.985 ns
BM_MakeHeap_uint32_PipeOrgan_262144 1.66 ns 1.12 ns
BM_MakeHeap_uint32_Heap_1 1.08 ns 1.09 ns
BM_MakeHeap_uint32_Heap_4 0.800 ns 0.572 ns
BM_MakeHeap_uint32_Heap_16 0.704 ns 0.532 ns
BM_MakeHeap_uint32_Heap_64 0.694 ns 0.405 ns
BM_MakeHeap_uint32_Heap_256 0.736 ns 0.427 ns
BM_MakeHeap_uint32_Heap_1024 0.682 ns 0.381 ns
BM_MakeHeap_uint32_Heap_16384 0.665 ns 0.363 ns
BM_MakeHeap_uint32_Heap_262144 0.666 ns 0.372 ns
BM_MakeHeap_uint32_QuickSortAdversary_1 1.05 ns 1.09 ns
BM_MakeHeap_uint32_QuickSortAdversary_4 1.30 ns 0.843 ns
BM_MakeHeap_uint32_QuickSortAdversary_16 1.23 ns 0.946 ns
BM_MakeHeap_uint32_QuickSortAdversary_64 1.25 ns 0.725 ns
BM_MakeHeap_uint32_QuickSortAdversary_256 1.30 ns 0.757 ns
BM_MakeHeap_uint32_QuickSortAdversary_1024 1.26 ns 0.749 ns
BM_MakeHeap_uint32_QuickSortAdversary_16384 0.809 ns 0.455 ns
BM_MakeHeap_uint32_QuickSortAdversary_262144 0.721 ns 0.376 ns
BM_MakeHeap_uint64_Random_1 1.06 ns 1.04 ns
BM_MakeHeap_uint64_Random_4 3.41 ns 2.86 ns
BM_MakeHeap_uint64_Random_16 3.21 ns 2.79 ns
BM_MakeHeap_uint64_Random_64 3.40 ns 3.09 ns
BM_MakeHeap_uint64_Random_256 3.22 ns 2.98 ns
BM_MakeHeap_uint64_Random_1024 3.10 ns 2.82 ns
BM_MakeHeap_uint64_Random_16384 3.10 ns 2.81 ns
BM_MakeHeap_uint64_Random_262144 3.18 ns 2.93 ns
BM_MakeHeap_uint64_Ascending_1 1.09 ns 1.08 ns
BM_MakeHeap_uint64_Ascending_4 1.35 ns 0.893 ns
BM_MakeHeap_uint64_Ascending_16 1.32 ns 1.01 ns
BM_MakeHeap_uint64_Ascending_64 1.50 ns 0.951 ns
BM_MakeHeap_uint64_Ascending_256 1.68 ns 1.01 ns
BM_MakeHeap_uint64_Ascending_1024 1.65 ns 1.00 ns
BM_MakeHeap_uint64_Ascending_16384 1.76 ns 1.09 ns
BM_MakeHeap_uint64_Ascending_262144 2.05 ns 1.32 ns
BM_MakeHeap_uint64_Descending_1 1.13 ns 1.07 ns
BM_MakeHeap_uint64_Descending_4 0.825 ns 0.589 ns
BM_MakeHeap_uint64_Descending_16 0.742 ns 0.531 ns
BM_MakeHeap_uint64_Descending_64 0.757 ns 0.502 ns
BM_MakeHeap_uint64_Descending_256 0.769 ns 0.463 ns
BM_MakeHeap_uint64_Descending_1024 0.711 ns 0.414 ns
BM_MakeHeap_uint64_Descending_16384 0.692 ns 0.385 ns
BM_MakeHeap_uint64_Descending_262144 0.692 ns 0.397 ns
BM_MakeHeap_uint64_SingleElement_1 1.09 ns 1.10 ns
BM_MakeHeap_uint64_SingleElement_4 1.33 ns 0.745 ns
BM_MakeHeap_uint64_SingleElement_16 1.41 ns 0.952 ns
BM_MakeHeap_uint64_SingleElement_64 1.58 ns 0.944 ns
BM_MakeHeap_uint64_SingleElement_256 1.71 ns 0.975 ns
BM_MakeHeap_uint64_SingleElement_1024 1.59 ns 1.01 ns
BM_MakeHeap_uint64_SingleElement_16384 1.72 ns 1.10 ns
BM_MakeHeap_uint64_SingleElement_262144 2.01 ns 1.42 ns
BM_MakeHeap_uint64_PipeOrgan_1 1.10 ns 1.15 ns
BM_MakeHeap_uint64_PipeOrgan_4 1.04 ns 0.889 ns
BM_MakeHeap_uint64_PipeOrgan_16 1.32 ns 1.04 ns
BM_MakeHeap_uint64_PipeOrgan_64 1.49 ns 0.978 ns
BM_MakeHeap_uint64_PipeOrgan_256 1.62 ns 0.995 ns
BM_MakeHeap_uint64_PipeOrgan_1024 1.58 ns 0.994 ns
BM_MakeHeap_uint64_PipeOrgan_16384 1.66 ns 1.10 ns
BM_MakeHeap_uint64_PipeOrgan_262144 1.84 ns 1.36 ns
BM_MakeHeap_uint64_Heap_1 1.06 ns 1.08 ns
BM_MakeHeap_uint64_Heap_4 0.793 ns 0.606 ns
BM_MakeHeap_uint64_Heap_16 0.732 ns 0.525 ns
BM_MakeHeap_uint64_Heap_64 0.747 ns 0.493 ns
BM_MakeHeap_uint64_Heap_256 0.759 ns 0.446 ns
BM_MakeHeap_uint64_Heap_1024 0.706 ns 0.402 ns
BM_MakeHeap_uint64_Heap_16384 0.662 ns 0.384 ns
BM_MakeHeap_uint64_Heap_262144 0.676 ns 0.382 ns
BM_MakeHeap_uint64_QuickSortAdversary_1 1.05 ns 1.03 ns
BM_MakeHeap_uint64_QuickSortAdversary_4 1.31 ns 0.885 ns
BM_MakeHeap_uint64_QuickSortAdversary_16 1.25 ns 0.964 ns
BM_MakeHeap_uint64_QuickSortAdversary_64 1.28 ns 0.804 ns
BM_MakeHeap_uint64_QuickSortAdversary_256 1.32 ns 0.825 ns
BM_MakeHeap_uint64_QuickSortAdversary_1024 1.28 ns 0.768 ns
BM_MakeHeap_uint64_QuickSortAdversary_16384 0.831 ns 0.490 ns
BM_MakeHeap_uint64_QuickSortAdversary_262144 0.709 ns 0.398 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1 1.02 ns 1.07 ns
BM_MakeHeap_pair<uint32, uint32>_Random_4 3.94 ns 3.72 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16 4.58 ns 3.85 ns
BM_MakeHeap_pair<uint32, uint32>_Random_64 5.32 ns 4.20 ns
BM_MakeHeap_pair<uint32, uint32>_Random_256 5.46 ns 4.27 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1024 5.53 ns 4.21 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16384 5.58 ns 4.29 ns
BM_MakeHeap_pair<uint32, uint32>_Random_262144 5.56 ns 4.44 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1 1.07 ns 1.03 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_4 1.96 ns 1.61 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16 2.10 ns 2.06 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_64 2.35 ns 2.28 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_256 2.58 ns 2.41 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1024 2.57 ns 2.45 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16384 2.61 ns 2.56 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_262144 2.76 ns 2.79 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1 1.07 ns 1.05 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_4 1.21 ns 1.05 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16 1.14 ns 0.980 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_64 1.16 ns 1.01 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_256 1.18 ns 0.973 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1024 1.13 ns 0.945 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16384 1.11 ns 0.960 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_262144 1.11 ns 0.929 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1 1.08 ns 1.09 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_4 1.94 ns 1.36 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16 2.22 ns 1.90 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_64 2.46 ns 2.19 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_256 2.55 ns 2.38 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1024 2.51 ns 2.45 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16384 2.57 ns 2.56 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_262144 2.74 ns 2.83 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1 1.07 ns 1.11 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_4 1.42 ns 1.54 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16 2.01 ns 1.99 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_64 2.33 ns 2.27 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_256 2.48 ns 2.40 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1024 2.48 ns 2.46 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16384 2.54 ns 2.57 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_262144 2.68 ns 2.76 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1 1.08 ns 1.08 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_4 1.24 ns 1.02 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16 1.16 ns 1.02 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_64 1.17 ns 1.00 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_256 1.20 ns 0.992 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1024 1.13 ns 0.942 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16384 1.12 ns 0.936 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_262144 1.13 ns 0.945 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1 1.08 ns 1.06 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_4 1.92 ns 1.93 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16 2.13 ns 2.07 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_64 2.36 ns 2.29 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_256 2.56 ns 2.41 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1024 2.50 ns 2.46 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16384 2.57 ns 2.57 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_262144 2.67 ns 2.89 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1 0.979 ns 1.05 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_4 5.14 ns 4.88 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16 7.93 ns 5.95 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_64 9.08 ns 6.63 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_256 9.10 ns 6.47 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1024 9.03 ns 6.43 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16384 9.09 ns 6.60 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_262144 9.19 ns 6.86 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1 1.04 ns 1.02 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_4 2.13 ns 2.55 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16 1.87 ns 2.67 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_64 2.27 ns 3.06 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_256 2.41 ns 3.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1024 2.27 ns 3.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16384 2.19 ns 3.40 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_262144 2.37 ns 3.78 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1 0.981 ns 1.04 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_4 1.38 ns 1.61 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16 1.18 ns 1.35 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_64 1.29 ns 1.35 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_256 1.20 ns 1.28 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1024 1.06 ns 1.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16384 1.01 ns 1.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_262144 0.982 ns 1.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1 0.967 ns 1.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_4 2.34 ns 2.22 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16 2.46 ns 2.86 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_64 2.74 ns 3.61 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_256 2.75 ns 3.74 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1024 2.71 ns 3.69 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16384 2.71 ns 4.03 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_262144 2.92 ns 4.58 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1 0.968 ns 1.01 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_4 1.95 ns 2.15 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16 1.89 ns 2.41 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_64 2.45 ns 3.00 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_256 2.61 ns 3.09 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1024 2.49 ns 3.14 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16384 2.38 ns 3.38 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_262144 2.48 ns 3.90 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1 1.04 ns 0.964 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_4 1.43 ns 1.43 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16 1.16 ns 1.29 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_64 1.26 ns 1.28 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_256 1.27 ns 1.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1024 1.28 ns 1.13 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16384 1.19 ns 1.17 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_262144 1.19 ns 1.14 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1 1.05 ns 0.988 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_4 2.22 ns 2.77 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16 1.92 ns 2.54 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_64 2.27 ns 2.83 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_256 2.43 ns 2.94 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1024 2.25 ns 2.89 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16384 2.18 ns 3.06 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_262144 2.35 ns 3.39 ns
BM_MakeHeap_string_Random_1 1.35 ns 1.25 ns
BM_MakeHeap_string_Random_4 10.9 ns 10.2 ns
BM_MakeHeap_string_Random_16 23.3 ns 20.8 ns
BM_MakeHeap_string_Random_64 27.4 ns 24.6 ns
BM_MakeHeap_string_Random_256 28.8 ns 25.6 ns
BM_MakeHeap_string_Random_1024 30.1 ns 27.9 ns
BM_MakeHeap_string_Random_16384 35.1 ns 35.5 ns
BM_MakeHeap_string_Random_262144 68.9 ns 82.1 ns
BM_MakeHeap_string_Ascending_1 1.32 ns 1.25 ns
BM_MakeHeap_string_Ascending_4 8.25 ns 8.83 ns
BM_MakeHeap_string_Ascending_16 18.7 ns 18.6 ns
BM_MakeHeap_string_Ascending_64 20.9 ns 23.3 ns
BM_MakeHeap_string_Ascending_256 21.8 ns 23.7 ns
BM_MakeHeap_string_Ascending_1024 24.4 ns 26.8 ns
BM_MakeHeap_string_Ascending_16384 35.3 ns 35.6 ns
BM_MakeHeap_string_Ascending_262144 55.2 ns 70.2 ns
BM_MakeHeap_string_Descending_1 1.25 ns 1.25 ns
BM_MakeHeap_string_Descending_4 7.69 ns 8.02 ns
BM_MakeHeap_string_Descending_16 19.5 ns 18.0 ns
BM_MakeHeap_string_Descending_64 18.2 ns 18.1 ns
BM_MakeHeap_string_Descending_256 18.0 ns 18.0 ns
BM_MakeHeap_string_Descending_1024 20.9 ns 20.1 ns
BM_MakeHeap_string_Descending_16384 27.3 ns 25.4 ns
BM_MakeHeap_string_Descending_262144 30.7 ns 33.2 ns
BM_MakeHeap_string_SingleElement_1 1.32 ns 1.29 ns
BM_MakeHeap_string_SingleElement_4 8.21 ns 7.52 ns
BM_MakeHeap_string_SingleElement_16 15.6 ns 14.1 ns
BM_MakeHeap_string_SingleElement_64 19.4 ns 18.9 ns
BM_MakeHeap_string_SingleElement_256 14.4 ns 14.3 ns
BM_MakeHeap_string_SingleElement_1024 11.3 ns 11.4 ns
BM_MakeHeap_string_SingleElement_16384 12.3 ns 11.4 ns
BM_MakeHeap_string_SingleElement_262144 29.3 ns 29.5 ns
BM_MakeHeap_string_PipeOrgan_1 1.30 ns 1.34 ns
BM_MakeHeap_string_PipeOrgan_4 8.01 ns 8.54 ns
BM_MakeHeap_string_PipeOrgan_16 18.7 ns 18.4 ns
BM_MakeHeap_string_PipeOrgan_64 21.7 ns 22.9 ns
BM_MakeHeap_string_PipeOrgan_256 21.9 ns 23.6 ns
BM_MakeHeap_string_PipeOrgan_1024 24.5 ns 26.2 ns
BM_MakeHeap_string_PipeOrgan_16384 35.6 ns 35.1 ns
BM_MakeHeap_string_PipeOrgan_262144 55.0 ns 69.0 ns
BM_MakeHeap_string_Heap_1 1.26 ns 1.24 ns
BM_MakeHeap_string_Heap_4 8.07 ns 7.95 ns
BM_MakeHeap_string_Heap_16 22.0 ns 18.8 ns
BM_MakeHeap_string_Heap_64 19.4 ns 16.9 ns
BM_MakeHeap_string_Heap_256 14.7 ns 12.3 ns
BM_MakeHeap_string_Heap_1024 10.5 ns 8.55 ns
BM_MakeHeap_string_Heap_16384 10.2 ns 8.72 ns
BM_MakeHeap_string_Heap_262144 18.2 ns 17.2 ns
BM_MakeHeap_string_QuickSortAdversary_1 1.25 ns 1.35 ns
BM_MakeHeap_string_QuickSortAdversary_4 10.6 ns 10.3 ns
BM_MakeHeap_string_QuickSortAdversary_16 25.7 ns 23.2 ns
BM_MakeHeap_string_QuickSortAdversary_64 25.9 ns 24.0 ns
BM_MakeHeap_string_QuickSortAdversary_256 23.5 ns 21.4 ns
BM_MakeHeap_string_QuickSortAdversary_1024 22.3 ns 19.9 ns
BM_MakeHeap_string_QuickSortAdversary_16384 23.4 ns 21.7 ns
BM_MakeHeap_string_QuickSortAdversary_262144 49.0 ns 53.2 ns
BM_MakeHeap_float_Random_1 1.09 ns 1.05 ns
BM_MakeHeap_float_Random_4 4.07 ns 4.40 ns
BM_MakeHeap_float_Random_16 5.49 ns 4.59 ns
BM_MakeHeap_float_Random_64 6.45 ns 4.84 ns
BM_MakeHeap_float_Random_256 6.67 ns 4.81 ns
BM_MakeHeap_float_Random_1024 6.69 ns 4.74 ns
BM_MakeHeap_float_Random_16384 6.67 ns 4.73 ns
BM_MakeHeap_float_Random_262144 6.58 ns 4.65 ns
BM_MakeHeap_float_Ascending_1 1.12 ns 1.04 ns
BM_MakeHeap_float_Ascending_4 1.10 ns 0.937 ns
BM_MakeHeap_float_Ascending_16 1.26 ns 1.37 ns
BM_MakeHeap_float_Ascending_64 1.38 ns 1.28 ns
BM_MakeHeap_float_Ascending_256 1.39 ns 1.30 ns
BM_MakeHeap_float_Ascending_1024 1.32 ns 1.39 ns
BM_MakeHeap_float_Ascending_16384 1.24 ns 1.39 ns
BM_MakeHeap_float_Ascending_262144 1.31 ns 1.71 ns
BM_MakeHeap_float_Descending_1 1.05 ns 1.05 ns
BM_MakeHeap_float_Descending_4 0.835 ns 0.655 ns
BM_MakeHeap_float_Descending_16 0.728 ns 0.605 ns
BM_MakeHeap_float_Descending_64 0.628 ns 0.516 ns
BM_MakeHeap_float_Descending_256 0.656 ns 0.554 ns
BM_MakeHeap_float_Descending_1024 0.611 ns 0.515 ns
BM_MakeHeap_float_Descending_16384 0.597 ns 0.500 ns
BM_MakeHeap_float_Descending_262144 0.592 ns 0.502 ns
BM_MakeHeap_float_SingleElement_1 1.06 ns 1.03 ns
BM_MakeHeap_float_SingleElement_4 1.08 ns 0.968 ns
BM_MakeHeap_float_SingleElement_16 1.08 ns 1.24 ns
BM_MakeHeap_float_SingleElement_64 1.13 ns 1.26 ns
BM_MakeHeap_float_SingleElement_256 1.20 ns 1.29 ns
BM_MakeHeap_float_SingleElement_1024 1.16 ns 1.37 ns
BM_MakeHeap_float_SingleElement_16384 1.11 ns 1.39 ns
BM_MakeHeap_float_SingleElement_262144 1.15 ns 1.62 ns
BM_MakeHeap_float_PipeOrgan_1 1.08 ns 1.07 ns
BM_MakeHeap_float_PipeOrgan_4 0.966 ns 1.02 ns
BM_MakeHeap_float_PipeOrgan_16 1.12 ns 1.31 ns
BM_MakeHeap_float_PipeOrgan_64 1.06 ns 1.27 ns
BM_MakeHeap_float_PipeOrgan_256 1.18 ns 1.28 ns
BM_MakeHeap_float_PipeOrgan_1024 1.14 ns 1.39 ns
BM_MakeHeap_float_PipeOrgan_16384 1.09 ns 1.39 ns
BM_MakeHeap_float_PipeOrgan_262144 1.15 ns 1.68 ns
BM_MakeHeap_float_Heap_1 1.04 ns 1.03 ns
BM_MakeHeap_float_Heap_4 0.821 ns 0.656 ns
BM_MakeHeap_float_Heap_16 0.771 ns 0.618 ns
BM_MakeHeap_float_Heap_64 0.683 ns 0.519 ns
BM_MakeHeap_float_Heap_256 0.658 ns 0.529 ns
BM_MakeHeap_float_Heap_1024 0.668 ns 0.478 ns
BM_MakeHeap_float_Heap_16384 0.717 ns 0.451 ns
BM_MakeHeap_float_Heap_262144 0.683 ns 0.428 ns
BM_MakeHeap_float_QuickSortAdversary_1 1.03 ns 1.08 ns
BM_MakeHeap_float_QuickSortAdversary_4 1.07 ns 1.13 ns
BM_MakeHeap_float_QuickSortAdversary_16 1.04 ns 1.36 ns
BM_MakeHeap_float_QuickSortAdversary_64 0.987 ns 1.10 ns
BM_MakeHeap_float_QuickSortAdversary_256 1.13 ns 1.11 ns
BM_MakeHeap_float_QuickSortAdversary_1024 1.01 ns 1.23 ns
BM_MakeHeap_float_QuickSortAdversary_16384 0.643 ns 0.633 ns
BM_MakeHeap_float_QuickSortAdversary_262144 0.579 ns 0.695 ns
```
Fixes #<!-- -->120752
---
Full diff: https://github.com/llvm/llvm-project/pull/154092.diff
4 Files Affected:
- (modified) libcxx/include/__algorithm/make_heap.h (+10-4)
- (modified) libcxx/include/__algorithm/partial_sort.h (+1-1)
- (modified) libcxx/include/__algorithm/partial_sort_copy.h (+1-1)
- (modified) libcxx/include/__algorithm/sift_down.h (+19-18)
``````````diff
diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index e8f0cdb27333a..7eb5d6b89492d 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -12,6 +12,7 @@
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/push_heap.h>
#include <__algorithm/sift_down.h>
#include <__config>
#include <__iterator/iterator_traits.h>
@@ -31,13 +32,18 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare&& __comp) {
__comp_ref_type<_Compare> __comp_ref = __comp;
- using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
- difference_type __n = __last - __first;
+ using __diff_t = __iter_diff_t<_RandomAccessIterator>;
+ const __diff_t __n = __last - __first;
+
+ const __diff_t __odd_n = (__n & 1) ? __n : __n - 1;
+
if (__n > 1) {
// start from the first parent, there is no need to consider children
- for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) {
- std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
+
+ for (__diff_t __start = (__odd_n - 2) / 2; __start >= 0; --__start) {
+ std::__sift_down<_AlgPolicy, true>(__first, __comp_ref, __odd_n, __start);
}
+ std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n);
}
}
diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h
index 7f8d0c49147e3..4b39ae0cf2dfc 100644
--- a/libcxx/include/__algorithm/partial_sort.h
+++ b/libcxx/include/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
for (; __i != __last; ++__i) {
if (__comp(*__i, *__first)) {
_IterOps<_AlgPolicy>::iter_swap(__i, __first);
- std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+ std::__sift_down<_AlgPolicy, false>(__first, __comp, __len, 0);
}
}
std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);
diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 172f53b290d54..2230dfc9cc4ad 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
for (; __first != __last; ++__first)
if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
*__result_first = *__first;
- std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+ std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0);
}
std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
}
diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index 42803e30631fb..e01c9b2b00f86 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -24,59 +24,60 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
+template <class _AlgPolicy, bool __assume_both_children, class _Compare, class _RandomAccessIterator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__sift_down(_RandomAccessIterator __first,
_Compare&& __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- _RandomAccessIterator __start) {
+ __iter_diff_t<_RandomAccessIterator> __len,
+ __iter_diff_t<_RandomAccessIterator> __start) {
using _Ops = _IterOps<_AlgPolicy>;
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
// left-child of __start is at 2 * __start + 1
// right-child of __start is at 2 * __start + 2
- difference_type __child = __start - __first;
+ difference_type __child = __start;
if (__len < 2 || (__len - 2) / 2 < __child)
return;
- __child = 2 * __child + 1;
- _RandomAccessIterator __child_i = __first + __child;
+ __child = 2 * __child + 1;
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
+ if _LIBCPP_CONSTEXPR (__assume_both_children) {
+ // right-child exists and is greater than left-child
+ __child += __comp(__first[__child], __first[__child + 1]);
+ } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
// right-child exists and is greater than left-child
- ++__child_i;
++__child;
}
// check if we are in heap-order
- if (__comp(*__child_i, *__start))
+ if (__comp(__first[__child], __first[__start]))
// we are, __start is larger than its largest child
return;
- value_type __top(_Ops::__iter_move(__start));
+ value_type __top(_Ops::__iter_move(__first + __start));
do {
// we are not in heap-order, swap the parent with its largest child
- *__start = _Ops::__iter_move(__child_i);
- __start = __child_i;
+ __first[__start] = _Ops::__iter_move(__first + __child);
+ __start = __child;
if ((__len - 2) / 2 < __child)
break;
// recompute the child based off of the updated parent
- __child = 2 * __child + 1;
- __child_i = __first + __child;
+ __child = 2 * __child + 1;
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
+ if _LIBCPP_CONSTEXPR (__assume_both_children) {
+ __child += __comp(__first[__child], __first[__child + 1]);
+ } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
// right-child exists and is greater than left-child
- ++__child_i;
++__child;
}
// check if we are in heap-order
- } while (!__comp(*__child_i, __top));
- *__start = std::move(__top);
+ } while (!__comp(__first[__child], __top));
+ __first[__start] = std::move(__top);
}
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
``````````
</details>
https://github.com/llvm/llvm-project/pull/154092
More information about the libcxx-commits
mailing list