[libcxx-commits] [libcxx] 04518e7 - [libc++] Improve the performance of std::make_heap a bit (#154092)

via libcxx-commits libcxx-commits at lists.llvm.org
Mon Sep 8 03:00:08 PDT 2025


Author: Nikolas Klauser
Date: 2025-09-08T12:00:04+02:00
New Revision: 04518e7357b17e0802445b724377f844511a661f

URL: https://github.com/llvm/llvm-project/commit/04518e7357b17e0802445b724377f844511a661f
DIFF: https://github.com/llvm/llvm-project/commit/04518e7357b17e0802445b724377f844511a661f.diff

LOG: [libc++] Improve the performance of std::make_heap a bit (#154092)

```
Apple M4:
-----------------------------------------------------------------------------------------------------
Benchmark                                                                         old             new
-----------------------------------------------------------------------------------------------------
BM_MakeHeap_uint32_Random_1                                                  0.285 ns        0.271 ns
BM_MakeHeap_uint32_Random_4                                                   2.09 ns         1.80 ns
BM_MakeHeap_uint32_Random_16                                                  1.85 ns         1.83 ns
BM_MakeHeap_uint32_Random_64                                                  1.92 ns         1.50 ns
BM_MakeHeap_uint32_Random_256                                                 2.10 ns         1.87 ns
BM_MakeHeap_uint32_Random_1024                                                1.73 ns         1.86 ns
BM_MakeHeap_uint32_Random_16384                                               2.17 ns         2.05 ns
BM_MakeHeap_uint32_Random_262144                                              1.77 ns         1.77 ns
BM_MakeHeap_uint32_Ascending_1                                               0.288 ns        0.277 ns
BM_MakeHeap_uint32_Ascending_4                                               0.658 ns        0.481 ns
BM_MakeHeap_uint32_Ascending_16                                              0.636 ns        0.637 ns
BM_MakeHeap_uint32_Ascending_64                                              0.643 ns        0.601 ns
BM_MakeHeap_uint32_Ascending_256                                             0.710 ns        0.636 ns
BM_MakeHeap_uint32_Ascending_1024                                            0.747 ns        0.660 ns
BM_MakeHeap_uint32_Ascending_16384                                           0.713 ns        0.633 ns
BM_MakeHeap_uint32_Ascending_262144                                          0.769 ns        0.731 ns
BM_MakeHeap_uint32_Descending_1                                              0.294 ns        0.280 ns
BM_MakeHeap_uint32_Descending_4                                              0.379 ns        0.305 ns
BM_MakeHeap_uint32_Descending_16                                             0.376 ns        0.268 ns
BM_MakeHeap_uint32_Descending_64                                             0.358 ns        0.271 ns
BM_MakeHeap_uint32_Descending_256                                            0.377 ns        0.284 ns
BM_MakeHeap_uint32_Descending_1024                                           0.355 ns        0.267 ns
BM_MakeHeap_uint32_Descending_16384                                          0.348 ns        0.248 ns
BM_MakeHeap_uint32_Descending_262144                                         0.349 ns        0.247 ns
BM_MakeHeap_uint32_SingleElement_1                                           0.292 ns        0.280 ns
BM_MakeHeap_uint32_SingleElement_4                                           0.570 ns        0.332 ns
BM_MakeHeap_uint32_SingleElement_16                                          0.635 ns        0.604 ns
BM_MakeHeap_uint32_SingleElement_64                                          0.653 ns        0.567 ns
BM_MakeHeap_uint32_SingleElement_256                                         0.703 ns        0.609 ns
BM_MakeHeap_uint32_SingleElement_1024                                        0.737 ns        0.604 ns
BM_MakeHeap_uint32_SingleElement_16384                                       0.699 ns        0.574 ns
BM_MakeHeap_uint32_SingleElement_262144                                      0.803 ns        0.684 ns
BM_MakeHeap_uint32_PipeOrgan_1                                               0.291 ns        0.284 ns
BM_MakeHeap_uint32_PipeOrgan_4                                               0.588 ns        0.399 ns
BM_MakeHeap_uint32_PipeOrgan_16                                              0.648 ns         1.12 ns
BM_MakeHeap_uint32_PipeOrgan_64                                              0.662 ns        0.771 ns
BM_MakeHeap_uint32_PipeOrgan_256                                             0.723 ns        0.672 ns
BM_MakeHeap_uint32_PipeOrgan_1024                                            0.749 ns        0.674 ns
BM_MakeHeap_uint32_PipeOrgan_16384                                           0.708 ns        0.638 ns
BM_MakeHeap_uint32_PipeOrgan_262144                                          0.786 ns        0.743 ns
BM_MakeHeap_uint32_Heap_1                                                    0.298 ns        0.282 ns
BM_MakeHeap_uint32_Heap_4                                                    0.396 ns        0.308 ns
BM_MakeHeap_uint32_Heap_16                                                   0.377 ns        0.268 ns
BM_MakeHeap_uint32_Heap_64                                                   0.356 ns        0.271 ns
BM_MakeHeap_uint32_Heap_256                                                  0.378 ns        0.290 ns
BM_MakeHeap_uint32_Heap_1024                                                 0.356 ns        0.275 ns
BM_MakeHeap_uint32_Heap_16384                                                0.348 ns        0.252 ns
BM_MakeHeap_uint32_Heap_262144                                               0.347 ns        0.250 ns
BM_MakeHeap_uint32_QuickSortAdversary_1                                      0.290 ns        0.284 ns
BM_MakeHeap_uint32_QuickSortAdversary_4                                      0.627 ns        0.409 ns
BM_MakeHeap_uint32_QuickSortAdversary_16                                     0.640 ns        0.653 ns
BM_MakeHeap_uint32_QuickSortAdversary_64                                     0.577 ns        0.484 ns
BM_MakeHeap_uint32_QuickSortAdversary_256                                    0.613 ns        0.521 ns
BM_MakeHeap_uint32_QuickSortAdversary_1024                                   0.652 ns        0.514 ns
BM_MakeHeap_uint32_QuickSortAdversary_16384                                  0.428 ns        0.308 ns
BM_MakeHeap_uint32_QuickSortAdversary_262144                                 0.373 ns        0.261 ns
BM_MakeHeap_uint64_Random_1                                                  0.291 ns        0.281 ns
BM_MakeHeap_uint64_Random_4                                                   2.20 ns         1.97 ns
BM_MakeHeap_uint64_Random_16                                                  1.93 ns         1.70 ns
BM_MakeHeap_uint64_Random_64                                                  1.89 ns         1.48 ns
BM_MakeHeap_uint64_Random_256                                                 2.10 ns         1.99 ns
BM_MakeHeap_uint64_Random_1024                                                1.41 ns         2.04 ns
BM_MakeHeap_uint64_Random_16384                                               2.12 ns         1.83 ns
BM_MakeHeap_uint64_Random_262144                                              1.83 ns         1.63 ns
BM_MakeHeap_uint64_Ascending_1                                               0.288 ns        0.283 ns
BM_MakeHeap_uint64_Ascending_4                                               0.624 ns        0.493 ns
BM_MakeHeap_uint64_Ascending_16                                              0.648 ns        0.688 ns
BM_MakeHeap_uint64_Ascending_64                                              0.671 ns        0.634 ns
BM_MakeHeap_uint64_Ascending_256                                             0.739 ns        0.680 ns
BM_MakeHeap_uint64_Ascending_1024                                            0.761 ns        0.698 ns
BM_MakeHeap_uint64_Ascending_16384                                           0.740 ns        0.685 ns
BM_MakeHeap_uint64_Ascending_262144                                          0.849 ns        0.837 ns
BM_MakeHeap_uint64_Descending_1                                              0.304 ns        0.287 ns
BM_MakeHeap_uint64_Descending_4                                              0.395 ns        0.312 ns
BM_MakeHeap_uint64_Descending_16                                             0.374 ns        0.276 ns
BM_MakeHeap_uint64_Descending_64                                             0.358 ns        0.272 ns
BM_MakeHeap_uint64_Descending_256                                            0.389 ns        0.303 ns
BM_MakeHeap_uint64_Descending_1024                                           0.361 ns        0.278 ns
BM_MakeHeap_uint64_Descending_16384                                          0.352 ns        0.253 ns
BM_MakeHeap_uint64_Descending_262144                                         0.350 ns        0.251 ns
BM_MakeHeap_uint64_SingleElement_1                                           0.295 ns        0.285 ns
BM_MakeHeap_uint64_SingleElement_4                                           0.569 ns        0.358 ns
BM_MakeHeap_uint64_SingleElement_16                                          0.649 ns        0.652 ns
BM_MakeHeap_uint64_SingleElement_64                                          0.673 ns        0.565 ns
BM_MakeHeap_uint64_SingleElement_256                                         0.732 ns        0.651 ns
BM_MakeHeap_uint64_SingleElement_1024                                        0.759 ns        0.632 ns
BM_MakeHeap_uint64_SingleElement_16384                                       0.748 ns        0.614 ns
BM_MakeHeap_uint64_SingleElement_262144                                      0.947 ns        0.797 ns
BM_MakeHeap_uint64_PipeOrgan_1                                               0.295 ns        0.284 ns
BM_MakeHeap_uint64_PipeOrgan_4                                               0.601 ns        0.496 ns
BM_MakeHeap_uint64_PipeOrgan_16                                              0.655 ns         1.18 ns
BM_MakeHeap_uint64_PipeOrgan_64                                              0.682 ns        0.803 ns
BM_MakeHeap_uint64_PipeOrgan_256                                             0.759 ns        0.710 ns
BM_MakeHeap_uint64_PipeOrgan_1024                                            0.759 ns        0.713 ns
BM_MakeHeap_uint64_PipeOrgan_16384                                           0.739 ns        0.696 ns
BM_MakeHeap_uint64_PipeOrgan_262144                                          0.870 ns        0.849 ns
BM_MakeHeap_uint64_Heap_1                                                    0.290 ns        0.284 ns
BM_MakeHeap_uint64_Heap_4                                                    0.413 ns        0.314 ns
BM_MakeHeap_uint64_Heap_16                                                   0.378 ns        0.277 ns
BM_MakeHeap_uint64_Heap_64                                                   0.362 ns        0.272 ns
BM_MakeHeap_uint64_Heap_256                                                  0.389 ns        0.303 ns
BM_MakeHeap_uint64_Heap_1024                                                 0.362 ns        0.283 ns
BM_MakeHeap_uint64_Heap_16384                                                0.352 ns        0.253 ns
BM_MakeHeap_uint64_Heap_262144                                               0.350 ns        0.251 ns
BM_MakeHeap_uint64_QuickSortAdversary_1                                      0.293 ns        0.284 ns
BM_MakeHeap_uint64_QuickSortAdversary_4                                      0.606 ns        0.494 ns
BM_MakeHeap_uint64_QuickSortAdversary_16                                     0.645 ns        0.691 ns
BM_MakeHeap_uint64_QuickSortAdversary_64                                     0.607 ns        0.511 ns
BM_MakeHeap_uint64_QuickSortAdversary_256                                    0.641 ns        0.537 ns
BM_MakeHeap_uint64_QuickSortAdversary_1024                                   0.657 ns        0.529 ns
BM_MakeHeap_uint64_QuickSortAdversary_16384                                  0.435 ns        0.316 ns
BM_MakeHeap_uint64_QuickSortAdversary_262144                                 0.382 ns        0.266 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1                                    0.297 ns        0.378 ns
BM_MakeHeap_pair<uint32, uint32>_Random_4                                     2.80 ns        0.765 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16                                    2.92 ns         2.20 ns
BM_MakeHeap_pair<uint32, uint32>_Random_64                                    3.17 ns         3.64 ns
BM_MakeHeap_pair<uint32, uint32>_Random_256                                   3.44 ns         3.20 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1024                                  3.35 ns         3.64 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16384                                 3.22 ns         3.50 ns
BM_MakeHeap_pair<uint32, uint32>_Random_262144                                3.61 ns         3.46 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1                                 0.291 ns        0.379 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_4                                 0.779 ns        0.436 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16                                0.943 ns         1.01 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_64                                 1.17 ns         1.26 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_256                                1.38 ns         1.44 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1024                               1.37 ns         1.43 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16384                              1.29 ns         1.31 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_262144                             1.37 ns         1.40 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1                                0.292 ns        0.382 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_4                                0.440 ns        0.347 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16                               0.529 ns        0.520 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_64                               0.540 ns        0.527 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_256                              0.637 ns        0.583 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1024                             0.552 ns        0.513 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16384                            0.522 ns        0.488 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_262144                           0.515 ns        0.492 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1                             0.299 ns        0.377 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_4                             0.787 ns        0.474 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16                             1.07 ns        0.921 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_64                             1.20 ns         1.15 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_256                            1.30 ns         1.27 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1024                           1.30 ns         1.31 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16384                          1.29 ns         1.28 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_262144                         1.37 ns         1.38 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1                                 0.293 ns        0.385 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_4                                 0.677 ns        0.438 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16                                 1.04 ns         1.00 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_64                                 1.20 ns         1.27 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_256                                1.40 ns         1.43 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1024                               1.36 ns         1.43 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16384                              1.27 ns         1.31 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_262144                             1.37 ns         1.41 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1                                      0.292 ns        0.378 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_4                                      0.440 ns        0.380 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16                                     0.560 ns        0.606 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_64                                     0.588 ns        0.573 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_256                                    0.632 ns        0.607 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1024                                   0.598 ns        0.580 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16384                                  0.576 ns        0.563 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_262144                                 0.572 ns        0.561 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1                        0.304 ns        0.379 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_4                        0.823 ns        0.430 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16                        1.08 ns         1.03 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_64                        1.18 ns         1.23 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_256                       1.39 ns         1.43 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1024                      1.36 ns         1.42 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16384                     1.28 ns         1.32 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_262144                    1.34 ns         1.37 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1                           0.276 ns        0.511 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_4                            4.25 ns         1.96 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16                           4.84 ns         3.77 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_64                           5.53 ns         4.93 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_256                          5.30 ns         5.06 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1024                         5.29 ns         5.02 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16384                        5.53 ns         5.31 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_262144                       5.49 ns         5.29 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1                        0.275 ns        0.443 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_4                         1.22 ns        0.764 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16                        1.39 ns         1.49 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_64                        1.66 ns         1.76 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_256                       1.85 ns         1.99 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1024                      1.90 ns         2.02 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16384                     2.15 ns         2.27 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_262144                    2.25 ns         2.37 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1                       0.274 ns        0.445 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_4                       0.939 ns        0.520 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16                       1.02 ns        0.811 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_64                       1.01 ns        0.941 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_256                      1.03 ns         1.01 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1024                    0.969 ns        0.947 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16384                   0.882 ns        0.876 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_262144                  0.893 ns        0.871 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1                    0.276 ns        0.443 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_4                     1.34 ns        0.870 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16                    1.60 ns         1.59 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_64                    1.91 ns         2.00 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_256                   1.91 ns         2.08 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1024                  1.91 ns         2.10 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16384                 2.13 ns         2.35 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_262144                2.25 ns         2.48 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1                        0.275 ns        0.446 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_4                         1.07 ns        0.671 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16                        1.36 ns         1.44 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_64                        1.70 ns         1.80 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_256                       1.88 ns         2.05 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1024                      1.92 ns         2.06 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16384                     2.11 ns         2.26 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_262144                    2.18 ns         2.34 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1                             0.274 ns        0.441 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_4                             0.938 ns        0.587 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16                             1.01 ns        0.873 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_64                             1.08 ns         1.00 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_256                            1.21 ns         1.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1024                           1.37 ns         1.29 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16384                          1.31 ns         1.26 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_262144                         1.29 ns         1.25 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1               0.275 ns        0.447 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_4                1.22 ns        0.764 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16               1.35 ns         1.46 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_64               1.63 ns         1.73 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_256              1.83 ns         1.87 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1024             1.81 ns         1.94 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16384            2.06 ns         2.19 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_262144           2.14 ns         2.31 ns
BM_MakeHeap_string_Random_1                                                  0.289 ns        0.446 ns
BM_MakeHeap_string_Random_4                                                   7.43 ns         4.84 ns
BM_MakeHeap_string_Random_16                                                  9.73 ns         8.92 ns
BM_MakeHeap_string_Random_64                                                  11.5 ns         11.5 ns
BM_MakeHeap_string_Random_256                                                 12.6 ns         12.4 ns
BM_MakeHeap_string_Random_1024                                                13.2 ns         13.2 ns
BM_MakeHeap_string_Random_16384                                               15.4 ns         15.4 ns
BM_MakeHeap_string_Random_262144                                              21.4 ns         21.5 ns
BM_MakeHeap_string_Ascending_1                                               0.287 ns        0.447 ns
BM_MakeHeap_string_Ascending_4                                                3.22 ns         2.44 ns
BM_MakeHeap_string_Ascending_16                                               3.77 ns         3.62 ns
BM_MakeHeap_string_Ascending_64                                               4.84 ns         5.17 ns
BM_MakeHeap_string_Ascending_256                                              5.79 ns         6.04 ns
BM_MakeHeap_string_Ascending_1024                                             5.93 ns         6.60 ns
BM_MakeHeap_string_Ascending_16384                                            6.84 ns         7.25 ns
BM_MakeHeap_string_Ascending_262144                                           13.5 ns         14.3 ns
BM_MakeHeap_string_Descending_1                                              0.293 ns        0.447 ns
BM_MakeHeap_string_Descending_4                                               2.61 ns         1.83 ns
BM_MakeHeap_string_Descending_16                                              2.64 ns         2.60 ns
BM_MakeHeap_string_Descending_64                                              2.75 ns         2.74 ns
BM_MakeHeap_string_Descending_256                                             3.78 ns         3.57 ns
BM_MakeHeap_string_Descending_1024                                            3.20 ns         3.51 ns
BM_MakeHeap_string_Descending_16384                                           3.57 ns         3.85 ns
BM_MakeHeap_string_Descending_262144                                          6.27 ns         6.39 ns
BM_MakeHeap_string_SingleElement_1                                           0.291 ns        0.448 ns
BM_MakeHeap_string_SingleElement_4                                            3.88 ns         2.71 ns
BM_MakeHeap_string_SingleElement_16                                           5.08 ns         4.96 ns
BM_MakeHeap_string_SingleElement_64                                           6.14 ns         6.29 ns
BM_MakeHeap_string_SingleElement_256                                          6.13 ns         6.46 ns
BM_MakeHeap_string_SingleElement_1024                                         5.98 ns         6.60 ns
BM_MakeHeap_string_SingleElement_16384                                        5.88 ns         6.39 ns
BM_MakeHeap_string_SingleElement_262144                                       9.95 ns         10.2 ns
BM_MakeHeap_string_PipeOrgan_1                                               0.292 ns        0.447 ns
BM_MakeHeap_string_PipeOrgan_4                                                2.97 ns         2.51 ns
BM_MakeHeap_string_PipeOrgan_16                                               3.76 ns         3.91 ns
BM_MakeHeap_string_PipeOrgan_64                                               4.89 ns         5.20 ns
BM_MakeHeap_string_PipeOrgan_256                                              5.77 ns         6.09 ns
BM_MakeHeap_string_PipeOrgan_1024                                             6.14 ns         6.40 ns
BM_MakeHeap_string_PipeOrgan_16384                                            6.83 ns         7.32 ns
BM_MakeHeap_string_PipeOrgan_262144                                           13.8 ns         14.6 ns
BM_MakeHeap_string_Heap_1                                                    0.288 ns        0.515 ns
BM_MakeHeap_string_Heap_4                                                     3.62 ns         4.20 ns
BM_MakeHeap_string_Heap_16                                                    5.36 ns         5.23 ns
BM_MakeHeap_string_Heap_64                                                    5.79 ns         5.38 ns
BM_MakeHeap_string_Heap_256                                                   5.70 ns         5.40 ns
BM_MakeHeap_string_Heap_1024                                                  5.78 ns         5.37 ns
BM_MakeHeap_string_Heap_16384                                                 6.09 ns         5.67 ns
BM_MakeHeap_string_Heap_262144                                                6.37 ns         5.96 ns
BM_MakeHeap_string_QuickSortAdversary_1                                      0.282 ns        0.448 ns
BM_MakeHeap_string_QuickSortAdversary_4                                       7.45 ns         5.60 ns
BM_MakeHeap_string_QuickSortAdversary_16                                      9.76 ns         8.85 ns
BM_MakeHeap_string_QuickSortAdversary_64                                      11.5 ns         11.2 ns
BM_MakeHeap_string_QuickSortAdversary_256                                     12.0 ns         11.8 ns
BM_MakeHeap_string_QuickSortAdversary_1024                                    12.2 ns         12.0 ns
BM_MakeHeap_string_QuickSortAdversary_16384                                   13.7 ns         13.6 ns
BM_MakeHeap_string_QuickSortAdversary_262144                                  14.1 ns         14.8 ns
BM_MakeHeap_float_Random_1                                                   0.287 ns        0.287 ns
BM_MakeHeap_float_Random_4                                                    2.29 ns         2.60 ns
BM_MakeHeap_float_Random_16                                                   4.00 ns         2.48 ns
BM_MakeHeap_float_Random_64                                                   4.41 ns         1.92 ns
BM_MakeHeap_float_Random_256                                                  4.73 ns         2.05 ns
BM_MakeHeap_float_Random_1024                                                 4.90 ns         2.27 ns
BM_MakeHeap_float_Random_16384                                                4.42 ns         2.27 ns
BM_MakeHeap_float_Random_262144                                               4.72 ns         1.39 ns
BM_MakeHeap_float_Ascending_1                                                0.291 ns        0.293 ns
BM_MakeHeap_float_Ascending_4                                                0.633 ns        0.428 ns
BM_MakeHeap_float_Ascending_16                                               0.638 ns        0.874 ns
BM_MakeHeap_float_Ascending_64                                               0.614 ns        0.698 ns
BM_MakeHeap_float_Ascending_256                                              0.663 ns        0.713 ns
BM_MakeHeap_float_Ascending_1024                                             0.660 ns        0.761 ns
BM_MakeHeap_float_Ascending_16384                                            0.628 ns        0.725 ns
BM_MakeHeap_float_Ascending_262144                                           0.629 ns        0.814 ns
BM_MakeHeap_float_Descending_1                                               0.290 ns        0.290 ns
BM_MakeHeap_float_Descending_4                                               0.421 ns        0.316 ns
BM_MakeHeap_float_Descending_16                                              0.302 ns        0.225 ns
BM_MakeHeap_float_Descending_64                                              0.293 ns        0.212 ns
BM_MakeHeap_float_Descending_256                                             0.314 ns        0.246 ns
BM_MakeHeap_float_Descending_1024                                            0.300 ns        0.231 ns
BM_MakeHeap_float_Descending_16384                                           0.308 ns        0.205 ns
BM_MakeHeap_float_Descending_262144                                          0.309 ns        0.203 ns
BM_MakeHeap_float_SingleElement_1                                            0.289 ns        0.292 ns
BM_MakeHeap_float_SingleElement_4                                            0.569 ns        0.347 ns
BM_MakeHeap_float_SingleElement_16                                           0.538 ns        0.825 ns
BM_MakeHeap_float_SingleElement_64                                           0.585 ns        0.727 ns
BM_MakeHeap_float_SingleElement_256                                          0.603 ns        0.708 ns
BM_MakeHeap_float_SingleElement_1024                                         0.618 ns        0.760 ns
BM_MakeHeap_float_SingleElement_16384                                        0.599 ns        0.726 ns
BM_MakeHeap_float_SingleElement_262144                                       0.723 ns        0.820 ns
BM_MakeHeap_float_PipeOrgan_1                                                0.289 ns        0.291 ns
BM_MakeHeap_float_PipeOrgan_4                                                0.457 ns        0.420 ns
BM_MakeHeap_float_PipeOrgan_16                                               0.670 ns         1.32 ns
BM_MakeHeap_float_PipeOrgan_64                                               0.764 ns        0.889 ns
BM_MakeHeap_float_PipeOrgan_256                                              0.793 ns        0.757 ns
BM_MakeHeap_float_PipeOrgan_1024                                             0.755 ns        0.764 ns
BM_MakeHeap_float_PipeOrgan_16384                                            0.723 ns        0.723 ns
BM_MakeHeap_float_PipeOrgan_262144                                           0.654 ns        0.817 ns
BM_MakeHeap_float_Heap_1                                                     0.291 ns        0.289 ns
BM_MakeHeap_float_Heap_4                                                     0.388 ns        0.316 ns
BM_MakeHeap_float_Heap_16                                                    0.317 ns        0.225 ns
BM_MakeHeap_float_Heap_64                                                    0.353 ns        0.213 ns
BM_MakeHeap_float_Heap_256                                                   0.361 ns        0.246 ns
BM_MakeHeap_float_Heap_1024                                                  0.381 ns        0.233 ns
BM_MakeHeap_float_Heap_16384                                                 0.390 ns        0.205 ns
BM_MakeHeap_float_Heap_262144                                                0.379 ns        0.202 ns
BM_MakeHeap_float_QuickSortAdversary_1                                       0.295 ns        0.289 ns
BM_MakeHeap_float_QuickSortAdversary_4                                       0.640 ns        0.422 ns
BM_MakeHeap_float_QuickSortAdversary_16                                      0.658 ns        0.871 ns
BM_MakeHeap_float_QuickSortAdversary_64                                      0.574 ns        0.659 ns
BM_MakeHeap_float_QuickSortAdversary_256                                     0.631 ns        0.550 ns
BM_MakeHeap_float_QuickSortAdversary_1024                                    0.617 ns        0.552 ns
BM_MakeHeap_float_QuickSortAdversary_16384                                   0.424 ns        0.283 ns
BM_MakeHeap_float_QuickSortAdversary_262144                                  0.386 ns        0.219 ns
```

Fixes #120752

Added: 
    

Modified: 
    libcxx/include/__algorithm/make_heap.h
    libcxx/include/__algorithm/partial_sort.h
    libcxx/include/__algorithm/partial_sort_copy.h
    libcxx/include/__algorithm/sift_down.h
    libcxx/test/libcxx/algorithms/bad_iterator_traits.verify.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index e8f0cdb27333a..8cfeda2b59811 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -12,9 +12,11 @@
 #include <__algorithm/comp.h>
 #include <__algorithm/comp_ref_type.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/push_heap.h>
 #include <__algorithm/sift_down.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
+#include <__type_traits/is_arithmetic.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -31,13 +33,23 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare&& __comp) {
   __comp_ref_type<_Compare> __comp_ref = __comp;
 
-  using 
diff erence_type = typename iterator_traits<_RandomAccessIterator>::
diff erence_type;
-  
diff erence_type __n   = __last - __first;
+  using __
diff _t     = __iter_
diff _t<_RandomAccessIterator>;
+  const __
diff _t __n = __last - __first;
+
+  static const bool __assume_both_children = is_arithmetic<__iter_value_type<_RandomAccessIterator> >::value;
+
+  // While it would be correct to always assume we have both children, in practice we observed this to be a performance
+  // improvement only for arithmetic types.
+  const __
diff _t __sift_down_n = __assume_both_children ? ((__n & 1) ? __n : __n - 1) : __n;
+
   if (__n > 1) {
     // start from the first parent, there is no need to consider children
-    for (
diff erence_type __start = (__n - 2) / 2; __start >= 0; --__start) {
-      std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
+
+    for (__
diff _t __start = (__sift_down_n - 2) / 2; __start >= 0; --__start) {
+      std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, __start);
     }
+    if _LIBCPP_CONSTEXPR (__assume_both_children)
+      std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n);
   }
 }
 

diff  --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h
index 7f8d0c49147e3..4b39ae0cf2dfc 100644
--- a/libcxx/include/__algorithm/partial_sort.h
+++ b/libcxx/include/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
   for (; __i != __last; ++__i) {
     if (__comp(*__i, *__first)) {
       _IterOps<_AlgPolicy>::iter_swap(__i, __first);
-      std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+      std::__sift_down<_AlgPolicy, false>(__first, __comp, __len, 0);
     }
   }
   std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);

diff  --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 172f53b290d54..2230dfc9cc4ad 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
     for (; __first != __last; ++__first)
       if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
         *__result_first = *__first;
-        std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+        std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0);
       }
     std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
   }

diff  --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index 42803e30631fb..e01c9b2b00f86 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -24,59 +24,60 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
+template <class _AlgPolicy, bool __assume_both_children, class _Compare, class _RandomAccessIterator>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __sift_down(_RandomAccessIterator __first,
             _Compare&& __comp,
-            typename iterator_traits<_RandomAccessIterator>::
diff erence_type __len,
-            _RandomAccessIterator __start) {
+            __iter_
diff _t<_RandomAccessIterator> __len,
+            __iter_
diff _t<_RandomAccessIterator> __start) {
   using _Ops = _IterOps<_AlgPolicy>;
 
   typedef typename iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
   // left-child of __start is at 2 * __start + 1
   // right-child of __start is at 2 * __start + 2
-  
diff erence_type __child = __start - __first;
+  
diff erence_type __child = __start;
 
   if (__len < 2 || (__len - 2) / 2 < __child)
     return;
 
-  __child                         = 2 * __child + 1;
-  _RandomAccessIterator __child_i = __first + __child;
+  __child = 2 * __child + 1;
 
-  if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + 
diff erence_type(1)))) {
+  if _LIBCPP_CONSTEXPR (__assume_both_children) {
+    // right-child exists and is greater than left-child
+    __child += __comp(__first[__child], __first[__child + 1]);
+  } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
     // right-child exists and is greater than left-child
-    ++__child_i;
     ++__child;
   }
 
   // check if we are in heap-order
-  if (__comp(*__child_i, *__start))
+  if (__comp(__first[__child], __first[__start]))
     // we are, __start is larger than its largest child
     return;
 
-  value_type __top(_Ops::__iter_move(__start));
+  value_type __top(_Ops::__iter_move(__first + __start));
   do {
     // we are not in heap-order, swap the parent with its largest child
-    *__start = _Ops::__iter_move(__child_i);
-    __start  = __child_i;
+    __first[__start] = _Ops::__iter_move(__first + __child);
+    __start          = __child;
 
     if ((__len - 2) / 2 < __child)
       break;
 
     // recompute the child based off of the updated parent
-    __child   = 2 * __child + 1;
-    __child_i = __first + __child;
+    __child = 2 * __child + 1;
 
-    if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + 
diff erence_type(1)))) {
+    if _LIBCPP_CONSTEXPR (__assume_both_children) {
+      __child += __comp(__first[__child], __first[__child + 1]);
+    } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
       // right-child exists and is greater than left-child
-      ++__child_i;
       ++__child;
     }
 
     // check if we are in heap-order
-  } while (!__comp(*__child_i, __top));
-  *__start = std::move(__top);
+  } while (!__comp(__first[__child], __top));
+  __first[__start] = std::move(__top);
 }
 
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>

diff  --git a/libcxx/test/libcxx/algorithms/bad_iterator_traits.verify.cpp b/libcxx/test/libcxx/algorithms/bad_iterator_traits.verify.cpp
index a0b5b88bb9f0e..63824171a25c0 100644
--- a/libcxx/test/libcxx/algorithms/bad_iterator_traits.verify.cpp
+++ b/libcxx/test/libcxx/algorithms/bad_iterator_traits.verify.cpp
@@ -57,5 +57,5 @@ struct BadIter {
 // behavior when these iterators are passed to standard algorithms.
 void test() {
   std::sort(BadIter(), BadIter());
-  //expected-error-re@*:* {{static assertion failed {{.*}}It looks like your iterator's `iterator_traits<It>::reference` does not match the return type of dereferencing the iterator}}
+  //expected-error-re@*:* 2 {{static assertion failed {{.*}}It looks like your iterator's `iterator_traits<It>::reference` does not match the return type of dereferencing the iterator}}
 }


        


More information about the libcxx-commits mailing list