[libcxx-commits] [libcxx] [libcxx][algorithm] Optimize std::stable_sort via radix sort algorithm (PR #104683)
Дмитрий Изволов via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Aug 17 12:25:31 PDT 2024
https://github.com/izvolov created https://github.com/llvm/llvm-project/pull/104683
The radix sort (MSD) algorithm allows to speed up std::stable_sort dramatically in case we sort integers.
The speed up varies from a relatively small to x10 times, depending on type of sorted elements and the initial state of the sorted array.
```
Running ./libcxx/test/benchmarks/stable_sort.bench.out
Run on (12 X 2600 MHz CPU s)
CPU Caches:
L1 Data 32 KiB
L1 Instruction 32 KiB
L2 Unified 256 KiB (x6)
L3 Unified 12288 KiB
Load Average: 3.48, 3.38, 3.08
---------------------------------------------------------------------------
Benchmark After Before
---------------------------------------------------------------------------
BM_StableSort_int8_Random_1 3.39 ns 3.58 ns
BM_StableSort_int8_Random_4 21.1 ns 21.9 ns
BM_StableSort_int8_Random_16 142 ns 147 ns
BM_StableSort_int8_Random_64 893 ns 903 ns
BM_StableSort_int8_Random_256 409 ns 5810 ns
BM_StableSort_int8_Random_1024 1235 ns 29973 ns
BM_StableSort_int8_Random_4096 4410 ns 141880 ns
BM_StableSort_int8_Random_16384 18044 ns 620540 ns
BM_StableSort_int8_Random_65536 144030 ns 2592013 ns
BM_StableSort_int8_Random_262144 858350 ns 10935814 ns
BM_StableSort_int8_Random_524288 2929988 ns 27060729 ns
BM_StableSort_int8_Random_1048576 6058292 ns 49622720 ns
BM_StableSort_int8_Ascending_1 3.42 ns 3.92 ns
BM_StableSort_int8_Ascending_4 5.86 ns 8.08 ns
BM_StableSort_int8_Ascending_16 10.6 ns 12.0 ns
BM_StableSort_int8_Ascending_64 28.9 ns 30.6 ns
BM_StableSort_int8_Ascending_256 415 ns 391 ns
BM_StableSort_int8_Ascending_1024 1666 ns 2309 ns
BM_StableSort_int8_Ascending_4096 7748 ns 12269 ns
BM_StableSort_int8_Ascending_16384 40588 ns 60181 ns
BM_StableSort_int8_Ascending_65536 178843 ns 298221 ns
BM_StableSort_int8_Ascending_262144 919959 ns 1402692 ns
BM_StableSort_int8_Ascending_524288 2397397 ns 3036984 ns
BM_StableSort_int8_Ascending_1048576 5080043 ns 7218581 ns
BM_StableSort_int8_Descending_1 3.44 ns 3.53 ns
BM_StableSort_int8_Descending_4 7.94 ns 8.29 ns
BM_StableSort_int8_Descending_16 59.6 ns 57.7 ns
BM_StableSort_int8_Descending_64 1051 ns 1027 ns
BM_StableSort_int8_Descending_256 422 ns 4718 ns
BM_StableSort_int8_Descending_1024 1676 ns 21044 ns
BM_StableSort_int8_Descending_4096 7766 ns 64827 ns
BM_StableSort_int8_Descending_16384 40230 ns 93981 ns
BM_StableSort_int8_Descending_65536 190978 ns 421151 ns
BM_StableSort_int8_Descending_262144 1055141 ns 1918927 ns
BM_StableSort_int8_Descending_524288 2875115 ns 3809153 ns
BM_StableSort_int8_Descending_1048576 5854135 ns 8713690 ns
BM_StableSort_int8_SingleElement_1 3.52 ns 3.46 ns
BM_StableSort_int8_SingleElement_4 6.25 ns 5.79 ns
BM_StableSort_int8_SingleElement_16 10.7 ns 11.4 ns
BM_StableSort_int8_SingleElement_64 29.3 ns 30.3 ns
BM_StableSort_int8_SingleElement_256 858 ns 380 ns
BM_StableSort_int8_SingleElement_1024 3036 ns 2231 ns
BM_StableSort_int8_SingleElement_4096 11580 ns 11866 ns
BM_StableSort_int8_SingleElement_16384 44956 ns 59621 ns
BM_StableSort_int8_SingleElement_65536 182006 ns 297853 ns
BM_StableSort_int8_SingleElement_262144 962181 ns 1432857 ns
BM_StableSort_int8_SingleElement_524288 2256687 ns 2975707 ns
BM_StableSort_int8_SingleElement_1048576 4522556 ns 6949948 ns
BM_StableSort_int8_PipeOrgan_1 3.26 ns 3.64 ns
BM_StableSort_int8_PipeOrgan_4 6.21 ns 6.58 ns
BM_StableSort_int8_PipeOrgan_16 23.7 ns 25.4 ns
BM_StableSort_int8_PipeOrgan_64 250 ns 248 ns
BM_StableSort_int8_PipeOrgan_256 414 ns 2498 ns
BM_StableSort_int8_PipeOrgan_1024 1697 ns 10946 ns
BM_StableSort_int8_PipeOrgan_4096 7840 ns 37238 ns
BM_StableSort_int8_PipeOrgan_16384 41402 ns 74805 ns
BM_StableSort_int8_PipeOrgan_65536 180107 ns 357891 ns
BM_StableSort_int8_PipeOrgan_262144 988273 ns 1647296 ns
BM_StableSort_int8_PipeOrgan_524288 2547374 ns 3245991 ns
BM_StableSort_int8_PipeOrgan_1048576 5128783 ns 7342444 ns
BM_StableSort_int8_QuickSortAdversary_1 3.14 ns 4.01 ns
BM_StableSort_int8_QuickSortAdversary_4 6.05 ns 7.02 ns
BM_StableSort_int8_QuickSortAdversary_16 10.5 ns 11.9 ns
BM_StableSort_int8_QuickSortAdversary_64 520 ns 516 ns
BM_StableSort_int8_QuickSortAdversary_256 920 ns 386 ns
BM_StableSort_int8_QuickSortAdversary_1024 3083 ns 2299 ns
BM_StableSort_int8_QuickSortAdversary_4096 11659 ns 12295 ns
BM_StableSort_int8_QuickSortAdversary_16384 45721 ns 60931 ns
BM_StableSort_int8_QuickSortAdversary_65536 186334 ns 295423 ns
BM_StableSort_int8_QuickSortAdversary_262144 946262 ns 1399973 ns
BM_StableSort_int8_QuickSortAdversary_524288 2282004 ns 2832266 ns
BM_StableSort_int8_QuickSortAdversary_1048576 4691123 ns 6963253 ns
BM_StableSort_uint8_Random_1 3.11 ns 3.44 ns
BM_StableSort_uint8_Random_4 21.9 ns 23.1 ns
BM_StableSort_uint8_Random_16 154 ns 171 ns
BM_StableSort_uint8_Random_64 1000 ns 1051 ns
BM_StableSort_uint8_Random_256 402 ns 6498 ns
BM_StableSort_uint8_Random_1024 1176 ns 35310 ns
BM_StableSort_uint8_Random_4096 4415 ns 164087 ns
BM_StableSort_uint8_Random_16384 17849 ns 686769 ns
BM_StableSort_uint8_Random_65536 146109 ns 2932051 ns
BM_StableSort_uint8_Random_262144 876710 ns 12163988 ns
BM_StableSort_uint8_Random_524288 2858089 ns 26458830 ns
BM_StableSort_uint8_Random_1048576 5766942 ns 54836214 ns
BM_StableSort_uint8_Ascending_1 3.11 ns 3.43 ns
BM_StableSort_uint8_Ascending_4 6.18 ns 7.24 ns
BM_StableSort_uint8_Ascending_16 14.5 ns 17.0 ns
BM_StableSort_uint8_Ascending_64 50.7 ns 59.2 ns
BM_StableSort_uint8_Ascending_256 395 ns 536 ns
BM_StableSort_uint8_Ascending_1024 1752 ns 2956 ns
BM_StableSort_uint8_Ascending_4096 7785 ns 15146 ns
BM_StableSort_uint8_Ascending_16384 41442 ns 74136 ns
BM_StableSort_uint8_Ascending_65536 180879 ns 354261 ns
BM_StableSort_uint8_Ascending_262144 945880 ns 1674256 ns
BM_StableSort_uint8_Ascending_524288 2287832 ns 3138581 ns
BM_StableSort_uint8_Ascending_1048576 4630290 ns 7296278 ns
BM_StableSort_uint8_Descending_1 3.19 ns 3.63 ns
BM_StableSort_uint8_Descending_4 9.60 ns 11.5 ns
BM_StableSort_uint8_Descending_16 78.3 ns 86.0 ns
BM_StableSort_uint8_Descending_64 1265 ns 1308 ns
BM_StableSort_uint8_Descending_256 395 ns 6556 ns
BM_StableSort_uint8_Descending_1024 1712 ns 24669 ns
BM_StableSort_uint8_Descending_4096 7748 ns 83407 ns
BM_StableSort_uint8_Descending_16384 40779 ns 104043 ns
BM_StableSort_uint8_Descending_65536 181560 ns 467680 ns
BM_StableSort_uint8_Descending_262144 1146627 ns 2102769 ns
BM_StableSort_uint8_Descending_524288 2874096 ns 4572229 ns
BM_StableSort_uint8_Descending_1048576 5873195 ns 10170663 ns
BM_StableSort_uint8_SingleElement_1 3.28 ns 3.58 ns
BM_StableSort_uint8_SingleElement_4 6.44 ns 7.40 ns
BM_StableSort_uint8_SingleElement_16 14.9 ns 16.4 ns
BM_StableSort_uint8_SingleElement_64 51.2 ns 52.9 ns
BM_StableSort_uint8_SingleElement_256 876 ns 490 ns
BM_StableSort_uint8_SingleElement_1024 3041 ns 2750 ns
BM_StableSort_uint8_SingleElement_4096 11947 ns 14326 ns
BM_StableSort_uint8_SingleElement_16384 46669 ns 69984 ns
BM_StableSort_uint8_SingleElement_65536 197903 ns 328961 ns
BM_StableSort_uint8_SingleElement_262144 1031466 ns 1551436 ns
BM_StableSort_uint8_SingleElement_524288 2447672 ns 3049553 ns
BM_StableSort_uint8_SingleElement_1048576 4793087 ns 7615245 ns
BM_StableSort_uint8_PipeOrgan_1 3.38 ns 3.56 ns
BM_StableSort_uint8_PipeOrgan_4 7.16 ns 8.70 ns
BM_StableSort_uint8_PipeOrgan_16 31.7 ns 35.3 ns
BM_StableSort_uint8_PipeOrgan_64 326 ns 366 ns
BM_StableSort_uint8_PipeOrgan_256 409 ns 2942 ns
BM_StableSort_uint8_PipeOrgan_1024 1994 ns 12571 ns
BM_StableSort_uint8_PipeOrgan_4096 8086 ns 46278 ns
BM_StableSort_uint8_PipeOrgan_16384 41749 ns 79813 ns
BM_StableSort_uint8_PipeOrgan_65536 180697 ns 375120 ns
BM_StableSort_uint8_PipeOrgan_262144 1004899 ns 1676143 ns
BM_StableSort_uint8_PipeOrgan_524288 2456081 ns 3333949 ns
BM_StableSort_uint8_PipeOrgan_1048576 5030857 ns 7591303 ns
BM_StableSort_uint8_QuickSortAdversary_1 3.12 ns 3.46 ns
BM_StableSort_uint8_QuickSortAdversary_4 7.25 ns 6.83 ns
BM_StableSort_uint8_QuickSortAdversary_16 14.6 ns 16.2 ns
BM_StableSort_uint8_QuickSortAdversary_64 650 ns 665 ns
BM_StableSort_uint8_QuickSortAdversary_256 395 ns 2982 ns
BM_StableSort_uint8_QuickSortAdversary_1024 3125 ns 2583 ns
BM_StableSort_uint8_QuickSortAdversary_4096 11797 ns 13929 ns
BM_StableSort_uint8_QuickSortAdversary_16384 45803 ns 66513 ns
BM_StableSort_uint8_QuickSortAdversary_65536 190745 ns 313467 ns
BM_StableSort_uint8_QuickSortAdversary_262144 974646 ns 1469014 ns
BM_StableSort_uint8_QuickSortAdversary_524288 2317553 ns 3022065 ns
BM_StableSort_uint8_QuickSortAdversary_1048576 4898703 ns 6854079 ns
BM_StableSort_int16_Random_1 3.94 ns 3.49 ns
BM_StableSort_int16_Random_4 20.8 ns 23.2 ns
BM_StableSort_int16_Random_16 133 ns 163 ns
BM_StableSort_int16_Random_64 903 ns 953 ns
BM_StableSort_int16_Random_256 5638 ns 6258 ns
BM_StableSort_int16_Random_1024 3056 ns 34587 ns
BM_StableSort_int16_Random_4096 10596 ns 168397 ns
BM_StableSort_int16_Random_16384 49908 ns 753031 ns
BM_StableSort_int16_Random_65536 444605 ns 3838368 ns
BM_StableSort_int16_Random_262144 2419345 ns 15657285 ns
BM_StableSort_int16_Random_524288 7984040 ns 32726933 ns
BM_StableSort_int16_Random_1048576 16092424 ns 67999766 ns
BM_StableSort_int16_Ascending_1 3.40 ns 3.43 ns
BM_StableSort_int16_Ascending_4 5.45 ns 5.79 ns
BM_StableSort_int16_Ascending_16 12.0 ns 15.3 ns
BM_StableSort_int16_Ascending_64 39.6 ns 52.6 ns
BM_StableSort_int16_Ascending_256 470 ns 550 ns
BM_StableSort_int16_Ascending_1024 1686 ns 2707 ns
BM_StableSort_int16_Ascending_4096 5676 ns 14165 ns
BM_StableSort_int16_Ascending_16384 21413 ns 69483 ns
BM_StableSort_int16_Ascending_65536 88010 ns 334466 ns
BM_StableSort_int16_Ascending_262144 567239 ns 1570620 ns
BM_StableSort_int16_Ascending_524288 1553063 ns 3424666 ns
BM_StableSort_int16_Ascending_1048576 3145577 ns 8499649 ns
BM_StableSort_int16_Descending_1 3.22 ns 3.54 ns
BM_StableSort_int16_Descending_4 6.85 ns 10.2 ns
BM_StableSort_int16_Descending_16 62.7 ns 62.2 ns
BM_StableSort_int16_Descending_64 1138 ns 1036 ns
BM_StableSort_int16_Descending_256 5541 ns 4696 ns
BM_StableSort_int16_Descending_1024 3046 ns 19577 ns
BM_StableSort_int16_Descending_4096 10962 ns 79149 ns
BM_StableSort_int16_Descending_16384 58182 ns 327709 ns
BM_StableSort_int16_Descending_65536 447025 ns 1424896 ns
BM_StableSort_int16_Descending_262144 1104973 ns 5921903 ns
BM_StableSort_int16_Descending_524288 2547840 ns 17956789 ns
BM_StableSort_int16_Descending_1048576 5093555 ns 17044318 ns
BM_StableSort_int16_SingleElement_1 3.56 ns 3.96 ns
BM_StableSort_int16_SingleElement_4 5.75 ns 6.72 ns
BM_StableSort_int16_SingleElement_16 12.4 ns 16.1 ns
BM_StableSort_int16_SingleElement_64 36.9 ns 54.4 ns
BM_StableSort_int16_SingleElement_256 473 ns 557 ns
BM_StableSort_int16_SingleElement_1024 1828 ns 2826 ns
BM_StableSort_int16_SingleElement_4096 6239 ns 14252 ns
BM_StableSort_int16_SingleElement_16384 23695 ns 70369 ns
BM_StableSort_int16_SingleElement_65536 93281 ns 361641 ns
BM_StableSort_int16_SingleElement_262144 599078 ns 1640216 ns
BM_StableSort_int16_SingleElement_524288 1659678 ns 3343087 ns
BM_StableSort_int16_SingleElement_1048576 3184033 ns 7770271 ns
BM_StableSort_int16_PipeOrgan_1 3.75 ns 3.76 ns
BM_StableSort_int16_PipeOrgan_4 5.94 ns 7.74 ns
BM_StableSort_int16_PipeOrgan_16 26.7 ns 25.9 ns
BM_StableSort_int16_PipeOrgan_64 300 ns 263 ns
BM_StableSort_int16_PipeOrgan_256 2769 ns 2760 ns
BM_StableSort_int16_PipeOrgan_1024 2996 ns 10544 ns
BM_StableSort_int16_PipeOrgan_4096 11641 ns 44750 ns
BM_StableSort_int16_PipeOrgan_16384 57224 ns 200464 ns
BM_StableSort_int16_PipeOrgan_65536 416873 ns 887631 ns
BM_StableSort_int16_PipeOrgan_262144 843264 ns 3588669 ns
BM_StableSort_int16_PipeOrgan_524288 2027741 ns 11056924 ns
BM_StableSort_int16_PipeOrgan_1048576 4223773 ns 13261276 ns
BM_StableSort_int16_QuickSortAdversary_1 3.83 ns 3.68 ns
BM_StableSort_int16_QuickSortAdversary_4 5.55 ns 6.93 ns
BM_StableSort_int16_QuickSortAdversary_16 12.3 ns 15.2 ns
BM_StableSort_int16_QuickSortAdversary_64 646 ns 632 ns
BM_StableSort_int16_QuickSortAdversary_256 2751 ns 2542 ns
BM_StableSort_int16_QuickSortAdversary_1024 3028 ns 16901 ns
BM_StableSort_int16_QuickSortAdversary_4096 10862 ns 80222 ns
BM_StableSort_int16_QuickSortAdversary_16384 57753 ns 317281 ns
BM_StableSort_int16_QuickSortAdversary_65536 94064 ns 328502 ns
BM_StableSort_int16_QuickSortAdversary_262144 557796 ns 1613208 ns
BM_StableSort_int16_QuickSortAdversary_524288 1518451 ns 3479740 ns
BM_StableSort_int16_QuickSortAdversary_1048576 3165129 ns 7655880 ns
BM_StableSort_uint16_Random_1 3.26 ns 3.44 ns
BM_StableSort_uint16_Random_4 21.1 ns 22.2 ns
BM_StableSort_uint16_Random_16 157 ns 156 ns
BM_StableSort_uint16_Random_64 955 ns 947 ns
BM_StableSort_uint16_Random_256 5886 ns 6097 ns
BM_StableSort_uint16_Random_1024 2787 ns 30776 ns
BM_StableSort_uint16_Random_4096 9973 ns 155652 ns
BM_StableSort_uint16_Random_16384 48628 ns 741072 ns
BM_StableSort_uint16_Random_65536 439609 ns 3478966 ns
BM_StableSort_uint16_Random_262144 2336983 ns 15197642 ns
BM_StableSort_uint16_Random_524288 7888701 ns 34234254 ns
BM_StableSort_uint16_Random_1048576 14865180 ns 68516386 ns
BM_StableSort_uint16_Ascending_1 3.33 ns 4.00 ns
BM_StableSort_uint16_Ascending_4 5.79 ns 6.64 ns
BM_StableSort_uint16_Ascending_16 14.9 ns 15.5 ns
BM_StableSort_uint16_Ascending_64 50.2 ns 52.5 ns
BM_StableSort_uint16_Ascending_256 538 ns 546 ns
BM_StableSort_uint16_Ascending_1024 1645 ns 2652 ns
BM_StableSort_uint16_Ascending_4096 5559 ns 14517 ns
BM_StableSort_uint16_Ascending_16384 22803 ns 70275 ns
BM_StableSort_uint16_Ascending_65536 83109 ns 333446 ns
BM_StableSort_uint16_Ascending_262144 562667 ns 1568670 ns
BM_StableSort_uint16_Ascending_524288 1564646 ns 3059839 ns
BM_StableSort_uint16_Ascending_1048576 3178826 ns 7048327 ns
BM_StableSort_uint16_Descending_1 3.34 ns 3.93 ns
BM_StableSort_uint16_Descending_4 8.75 ns 9.73 ns
BM_StableSort_uint16_Descending_16 55.9 ns 55.5 ns
BM_StableSort_uint16_Descending_64 1021 ns 1035 ns
BM_StableSort_uint16_Descending_256 4752 ns 4931 ns
BM_StableSort_uint16_Descending_1024 2982 ns 19727 ns
BM_StableSort_uint16_Descending_4096 10432 ns 83165 ns
BM_StableSort_uint16_Descending_16384 56593 ns 326131 ns
BM_StableSort_uint16_Descending_65536 439134 ns 1371346 ns
BM_StableSort_uint16_Descending_262144 1220925 ns 5735665 ns
BM_StableSort_uint16_Descending_524288 2767234 ns 16758330 ns
BM_StableSort_uint16_Descending_1048576 5673769 ns 17541715 ns
BM_StableSort_uint16_SingleElement_1 3.53 ns 3.73 ns
BM_StableSort_uint16_SingleElement_4 6.27 ns 5.81 ns
BM_StableSort_uint16_SingleElement_16 14.8 ns 15.1 ns
BM_StableSort_uint16_SingleElement_64 51.5 ns 50.9 ns
BM_StableSort_uint16_SingleElement_256 536 ns 540 ns
BM_StableSort_uint16_SingleElement_1024 1669 ns 2690 ns
BM_StableSort_uint16_SingleElement_4096 5840 ns 14230 ns
BM_StableSort_uint16_SingleElement_16384 22468 ns 68524 ns
BM_StableSort_uint16_SingleElement_65536 89845 ns 332187 ns
BM_StableSort_uint16_SingleElement_262144 590736 ns 1550868 ns
BM_StableSort_uint16_SingleElement_524288 1573677 ns 3095703 ns
BM_StableSort_uint16_SingleElement_1048576 3183421 ns 8251180 ns
BM_StableSort_uint16_PipeOrgan_1 3.70 ns 3.64 ns
BM_StableSort_uint16_PipeOrgan_4 7.01 ns 6.81 ns
BM_StableSort_uint16_PipeOrgan_16 25.7 ns 26.4 ns
BM_StableSort_uint16_PipeOrgan_64 283 ns 277 ns
BM_StableSort_uint16_PipeOrgan_256 2562 ns 2852 ns
BM_StableSort_uint16_PipeOrgan_1024 2863 ns 10892 ns
BM_StableSort_uint16_PipeOrgan_4096 10585 ns 45668 ns
BM_StableSort_uint16_PipeOrgan_16384 59151 ns 194358 ns
BM_StableSort_uint16_PipeOrgan_65536 508579 ns 854692 ns
BM_StableSort_uint16_PipeOrgan_262144 901294 ns 3606346 ns
BM_StableSort_uint16_PipeOrgan_524288 2192498 ns 10449279 ns
BM_StableSort_uint16_PipeOrgan_1048576 4204368 ns 11956606 ns
BM_StableSort_uint16_QuickSortAdversary_1 3.20 ns 3.63 ns
BM_StableSort_uint16_QuickSortAdversary_4 5.30 ns 6.38 ns
BM_StableSort_uint16_QuickSortAdversary_16 14.5 ns 15.3 ns
BM_StableSort_uint16_QuickSortAdversary_64 575 ns 611 ns
BM_StableSort_uint16_QuickSortAdversary_256 2423 ns 2577 ns
BM_StableSort_uint16_QuickSortAdversary_1024 2794 ns 16854 ns
BM_StableSort_uint16_QuickSortAdversary_4096 10511 ns 75952 ns
BM_StableSort_uint16_QuickSortAdversary_16384 56214 ns 333824 ns
BM_StableSort_uint16_QuickSortAdversary_65536 422512 ns 1354867 ns
BM_StableSort_uint16_QuickSortAdversary_262144 583301 ns 1564443 ns
BM_StableSort_uint16_QuickSortAdversary_524288 1584319 ns 3265575 ns
BM_StableSort_uint16_QuickSortAdversary_1048576 3197732 ns 7945245 ns
BM_StableSort_int32_Random_1 3.81 ns 3.70 ns
BM_StableSort_int32_Random_4 20.8 ns 23.4 ns
BM_StableSort_int32_Random_16 134 ns 161 ns
BM_StableSort_int32_Random_64 895 ns 984 ns
BM_StableSort_int32_Random_256 5640 ns 5897 ns
BM_StableSort_int32_Random_1024 6994 ns 32118 ns
BM_StableSort_int32_Random_4096 27367 ns 168960 ns
BM_StableSort_int32_Random_16384 183261 ns 843240 ns
BM_StableSort_int32_Random_65536 950914 ns 3953588 ns
BM_StableSort_int32_Random_262144 3673311 ns 16790171 ns
BM_StableSort_int32_Random_524288 11515700 ns 36023098 ns
BM_StableSort_int32_Random_1048576 24492515 ns 78116028 ns
BM_StableSort_int32_Ascending_1 3.31 ns 4.48 ns
BM_StableSort_int32_Ascending_4 5.96 ns 6.99 ns
BM_StableSort_int32_Ascending_16 13.0 ns 16.0 ns
BM_StableSort_int32_Ascending_64 36.7 ns 53.0 ns
BM_StableSort_int32_Ascending_256 391 ns 471 ns
BM_StableSort_int32_Ascending_1024 2705 ns 2682 ns
BM_StableSort_int32_Ascending_4096 8773 ns 14231 ns
BM_StableSort_int32_Ascending_16384 34709 ns 70625 ns
BM_StableSort_int32_Ascending_65536 142907 ns 344482 ns
BM_StableSort_int32_Ascending_262144 745483 ns 1591418 ns
BM_StableSort_int32_Ascending_524288 1873701 ns 3190305 ns
BM_StableSort_int32_Ascending_1048576 3851590 ns 7570095 ns
BM_StableSort_int32_Descending_1 3.22 ns 4.23 ns
BM_StableSort_int32_Descending_4 7.58 ns 11.2 ns
BM_StableSort_int32_Descending_16 63.9 ns 58.6 ns
BM_StableSort_int32_Descending_64 1133 ns 1017 ns
BM_StableSort_int32_Descending_256 4850 ns 4464 ns
BM_StableSort_int32_Descending_1024 7023 ns 18954 ns
BM_StableSort_int32_Descending_4096 28550 ns 75163 ns
BM_StableSort_int32_Descending_16384 200880 ns 341104 ns
BM_StableSort_int32_Descending_65536 1095910 ns 1398021 ns
BM_StableSort_int32_Descending_262144 3818864 ns 5695486 ns
BM_StableSort_int32_Descending_524288 5606779 ns 17593982 ns
BM_StableSort_int32_Descending_1048576 16416366 ns 26649503 ns
BM_StableSort_int32_SingleElement_1 3.81 ns 3.71 ns
BM_StableSort_int32_SingleElement_4 6.57 ns 6.61 ns
BM_StableSort_int32_SingleElement_16 14.0 ns 15.8 ns
BM_StableSort_int32_SingleElement_64 38.7 ns 53.5 ns
BM_StableSort_int32_SingleElement_256 386 ns 554 ns
BM_StableSort_int32_SingleElement_1024 2761 ns 3046 ns
BM_StableSort_int32_SingleElement_4096 9179 ns 15188 ns
BM_StableSort_int32_SingleElement_16384 34794 ns 70119 ns
BM_StableSort_int32_SingleElement_65536 135190 ns 354755 ns
BM_StableSort_int32_SingleElement_262144 760995 ns 1644072 ns
BM_StableSort_int32_SingleElement_524288 1969575 ns 3343419 ns
BM_StableSort_int32_SingleElement_1048576 4423816 ns 8346971 ns
BM_StableSort_int32_PipeOrgan_1 3.79 ns 3.63 ns
BM_StableSort_int32_PipeOrgan_4 6.21 ns 6.73 ns
BM_StableSort_int32_PipeOrgan_16 27.5 ns 26.0 ns
BM_StableSort_int32_PipeOrgan_64 291 ns 265 ns
BM_StableSort_int32_PipeOrgan_256 2557 ns 2518 ns
BM_StableSort_int32_PipeOrgan_1024 6765 ns 10976 ns
BM_StableSort_int32_PipeOrgan_4096 26373 ns 44537 ns
BM_StableSort_int32_PipeOrgan_16384 201466 ns 188582 ns
BM_StableSort_int32_PipeOrgan_65536 1148533 ns 802368 ns
BM_StableSort_int32_PipeOrgan_262144 2255177 ns 3477829 ns
BM_StableSort_int32_PipeOrgan_524288 3947015 ns 10356637 ns
BM_StableSort_int32_PipeOrgan_1048576 10274312 ns 16405366 ns
BM_StableSort_int32_QuickSortAdversary_1 3.32 ns 4.36 ns
BM_StableSort_int32_QuickSortAdversary_4 5.98 ns 7.44 ns
BM_StableSort_int32_QuickSortAdversary_16 13.0 ns 16.3 ns
BM_StableSort_int32_QuickSortAdversary_64 657 ns 616 ns
BM_StableSort_int32_QuickSortAdversary_256 2569 ns 2483 ns
BM_StableSort_int32_QuickSortAdversary_1024 6898 ns 19635 ns
BM_StableSort_int32_QuickSortAdversary_4096 27092 ns 75108 ns
BM_StableSort_int32_QuickSortAdversary_16384 190379 ns 316463 ns
BM_StableSort_int32_QuickSortAdversary_65536 1109040 ns 1319018 ns
BM_StableSort_int32_QuickSortAdversary_262144 4361925 ns 5472779 ns
BM_StableSort_int32_QuickSortAdversary_524288 6528215 ns 17538983 ns
BM_StableSort_int32_QuickSortAdversary_1048576 18345325 ns 27223926 ns
BM_StableSort_uint32_Random_1 3.67 ns 3.82 ns
BM_StableSort_uint32_Random_4 22.3 ns 21.8 ns
BM_StableSort_uint32_Random_16 155 ns 153 ns
BM_StableSort_uint32_Random_64 946 ns 976 ns
BM_StableSort_uint32_Random_256 5824 ns 6019 ns
BM_StableSort_uint32_Random_1024 4525 ns 32764 ns
BM_StableSort_uint32_Random_4096 17223 ns 158608 ns
BM_StableSort_uint32_Random_16384 134821 ns 748525 ns
BM_StableSort_uint32_Random_65536 716644 ns 3453325 ns
BM_StableSort_uint32_Random_262144 3628062 ns 16065414 ns
BM_StableSort_uint32_Random_524288 10971334 ns 36567712 ns
BM_StableSort_uint32_Random_1048576 22688377 ns 77533497 ns
BM_StableSort_uint32_Ascending_1 3.57 ns 3.44 ns
BM_StableSort_uint32_Ascending_4 5.73 ns 5.33 ns
BM_StableSort_uint32_Ascending_16 14.5 ns 14.0 ns
BM_StableSort_uint32_Ascending_64 50.3 ns 51.3 ns
BM_StableSort_uint32_Ascending_256 465 ns 467 ns
BM_StableSort_uint32_Ascending_1024 3042 ns 2530 ns
BM_StableSort_uint32_Ascending_4096 9842 ns 12207 ns
BM_StableSort_uint32_Ascending_16384 37994 ns 61726 ns
BM_StableSort_uint32_Ascending_65536 148890 ns 294385 ns
BM_StableSort_uint32_Ascending_262144 855080 ns 1422167 ns
BM_StableSort_uint32_Ascending_524288 2154903 ns 3203018 ns
BM_StableSort_uint32_Ascending_1048576 5002518 ns 7563817 ns
BM_StableSort_uint32_Descending_1 3.51 ns 3.40 ns
BM_StableSort_uint32_Descending_4 9.09 ns 7.95 ns
BM_StableSort_uint32_Descending_16 54.8 ns 74.4 ns
BM_StableSort_uint32_Descending_64 1003 ns 1305 ns
BM_StableSort_uint32_Descending_256 4545 ns 5300 ns
BM_StableSort_uint32_Descending_1024 4361 ns 21884 ns
BM_StableSort_uint32_Descending_4096 16018 ns 90534 ns
BM_StableSort_uint32_Descending_16384 146274 ns 381943 ns
BM_StableSort_uint32_Descending_65536 938248 ns 1536806 ns
BM_StableSort_uint32_Descending_262144 3899300 ns 6387843 ns
BM_StableSort_uint32_Descending_524288 5808157 ns 21959858 ns
BM_StableSort_uint32_Descending_1048576 17520047 ns 26351912 ns
BM_StableSort_uint32_SingleElement_1 4.03 ns 3.97 ns
BM_StableSort_uint32_SingleElement_4 6.55 ns 6.41 ns
BM_StableSort_uint32_SingleElement_16 15.6 ns 15.8 ns
BM_StableSort_uint32_SingleElement_64 52.3 ns 58.7 ns
BM_StableSort_uint32_SingleElement_256 473 ns 485 ns
BM_StableSort_uint32_SingleElement_1024 3020 ns 2407 ns
BM_StableSort_uint32_SingleElement_4096 9998 ns 12527 ns
BM_StableSort_uint32_SingleElement_16384 38072 ns 62228 ns
BM_StableSort_uint32_SingleElement_65536 153706 ns 295662 ns
BM_StableSort_uint32_SingleElement_262144 836532 ns 1477099 ns
BM_StableSort_uint32_SingleElement_524288 2144900 ns 3157204 ns
BM_StableSort_uint32_SingleElement_1048576 4995525 ns 7617233 ns
BM_StableSort_uint32_PipeOrgan_1 4.02 ns 3.99 ns
BM_StableSort_uint32_PipeOrgan_4 6.97 ns 6.84 ns
BM_StableSort_uint32_PipeOrgan_16 26.1 ns 29.7 ns
BM_StableSort_uint32_PipeOrgan_64 266 ns 333 ns
BM_StableSort_uint32_PipeOrgan_256 2462 ns 2892 ns
BM_StableSort_uint32_PipeOrgan_1024 4291 ns 12431 ns
BM_StableSort_uint32_PipeOrgan_4096 15638 ns 51449 ns
BM_StableSort_uint32_PipeOrgan_16384 154563 ns 217460 ns
BM_StableSort_uint32_PipeOrgan_65536 907724 ns 925873 ns
BM_StableSort_uint32_PipeOrgan_262144 2394580 ns 4103575 ns
BM_StableSort_uint32_PipeOrgan_524288 4177145 ns 13947158 ns
BM_StableSort_uint32_PipeOrgan_1048576 11848224 ns 18807297 ns
BM_StableSort_uint32_QuickSortAdversary_1 3.50 ns 3.43 ns
BM_StableSort_uint32_QuickSortAdversary_4 5.88 ns 4.96 ns
BM_StableSort_uint32_QuickSortAdversary_16 14.6 ns 14.0 ns
BM_StableSort_uint32_QuickSortAdversary_64 576 ns 715 ns
BM_StableSort_uint32_QuickSortAdversary_256 2353 ns 2797 ns
BM_StableSort_uint32_QuickSortAdversary_1024 4176 ns 21775 ns
BM_StableSort_uint32_QuickSortAdversary_4096 15565 ns 96188 ns
BM_StableSort_uint32_QuickSortAdversary_16384 149092 ns 398332 ns
BM_StableSort_uint32_QuickSortAdversary_65536 902488 ns 1552393 ns
BM_StableSort_uint32_QuickSortAdversary_262144 3946517 ns 6560414 ns
BM_StableSort_uint32_QuickSortAdversary_524288 6247114 ns 22420977 ns
BM_StableSort_uint32_QuickSortAdversary_1048576 19892446 ns 26529576 ns
BM_StableSort_int64_Random_1 3.83 ns 3.98 ns
BM_StableSort_int64_Random_4 21.1 ns 24.0 ns
BM_StableSort_int64_Random_16 129 ns 136 ns
BM_StableSort_int64_Random_64 890 ns 906 ns
BM_StableSort_int64_Random_256 5542 ns 5901 ns
BM_StableSort_int64_Random_1024 16085 ns 33112 ns
BM_StableSort_int64_Random_4096 63895 ns 162181 ns
BM_StableSort_int64_Random_16384 348827 ns 790045 ns
BM_StableSort_int64_Random_65536 1488237 ns 3557506 ns
BM_StableSort_int64_Random_262144 8195713 ns 16315808 ns
BM_StableSort_int64_Random_524288 16586833 ns 38274075 ns
BM_StableSort_int64_Random_1048576 40346644 ns 79182089 ns
BM_StableSort_int64_Ascending_1 3.76 ns 3.55 ns
BM_StableSort_int64_Ascending_4 5.82 ns 6.19 ns
BM_StableSort_int64_Ascending_16 11.7 ns 11.8 ns
BM_StableSort_int64_Ascending_64 32.9 ns 36.8 ns
BM_StableSort_int64_Ascending_256 415 ns 550 ns
BM_StableSort_int64_Ascending_1024 5352 ns 3347 ns
BM_StableSort_int64_Ascending_4096 17516 ns 19134 ns
BM_StableSort_int64_Ascending_16384 64147 ns 91099 ns
BM_StableSort_int64_Ascending_65536 322126 ns 434009 ns
BM_StableSort_int64_Ascending_262144 1554669 ns 2057056 ns
BM_StableSort_int64_Ascending_524288 3656527 ns 5016650 ns
BM_StableSort_int64_Ascending_1048576 10469979 ns 12908613 ns
BM_StableSort_int64_Descending_1 4.09 ns 3.35 ns
BM_StableSort_int64_Descending_4 9.13 ns 8.01 ns
BM_StableSort_int64_Descending_16 76.8 ns 92.9 ns
BM_StableSort_int64_Descending_64 1336 ns 1417 ns
BM_StableSort_int64_Descending_256 5525 ns 5674 ns
BM_StableSort_int64_Descending_1024 17461 ns 22558 ns
BM_StableSort_int64_Descending_4096 64285 ns 102360 ns
BM_StableSort_int64_Descending_16384 336946 ns 388940 ns
BM_StableSort_int64_Descending_65536 837912 ns 1662169 ns
BM_StableSort_int64_Descending_262144 3680806 ns 7494323 ns
BM_StableSort_int64_Descending_524288 11023784 ns 24935033 ns
BM_StableSort_int64_Descending_1048576 20023568 ns 33220712 ns
BM_StableSort_int64_SingleElement_1 3.37 ns 3.98 ns
BM_StableSort_int64_SingleElement_4 5.32 ns 6.92 ns
BM_StableSort_int64_SingleElement_16 10.9 ns 13.3 ns
BM_StableSort_int64_SingleElement_64 32.1 ns 43.8 ns
BM_StableSort_int64_SingleElement_256 420 ns 541 ns
BM_StableSort_int64_SingleElement_1024 5689 ns 3381 ns
BM_StableSort_int64_SingleElement_4096 19199 ns 17989 ns
BM_StableSort_int64_SingleElement_16384 75754 ns 91963 ns
BM_StableSort_int64_SingleElement_65536 357106 ns 500326 ns
BM_StableSort_int64_SingleElement_262144 1672975 ns 2417734 ns
BM_StableSort_int64_SingleElement_524288 3642891 ns 5200878 ns
BM_StableSort_int64_SingleElement_1048576 11172007 ns 13729511 ns
BM_StableSort_int64_PipeOrgan_1 3.38 ns 3.94 ns
BM_StableSort_int64_PipeOrgan_4 5.73 ns 6.44 ns
BM_StableSort_int64_PipeOrgan_16 27.5 ns 29.0 ns
BM_StableSort_int64_PipeOrgan_64 310 ns 321 ns
BM_StableSort_int64_PipeOrgan_256 2761 ns 2918 ns
BM_StableSort_int64_PipeOrgan_1024 16105 ns 12525 ns
BM_StableSort_int64_PipeOrgan_4096 65289 ns 59990 ns
BM_StableSort_int64_PipeOrgan_16384 341757 ns 270636 ns
BM_StableSort_int64_PipeOrgan_65536 587452 ns 1126132 ns
BM_StableSort_int64_PipeOrgan_262144 2837955 ns 5034180 ns
BM_StableSort_int64_PipeOrgan_524288 6617313 ns 15267354 ns
BM_StableSort_int64_PipeOrgan_1048576 15208796 ns 23162989 ns
BM_StableSort_int64_QuickSortAdversary_1 3.77 ns 3.45 ns
BM_StableSort_int64_QuickSortAdversary_4 5.55 ns 5.20 ns
BM_StableSort_int64_QuickSortAdversary_16 12.5 ns 11.5 ns
BM_StableSort_int64_QuickSortAdversary_64 646 ns 750 ns
BM_StableSort_int64_QuickSortAdversary_256 2655 ns 3539 ns
BM_StableSort_int64_QuickSortAdversary_1024 16373 ns 22349 ns
BM_StableSort_int64_QuickSortAdversary_4096 62306 ns 97248 ns
BM_StableSort_int64_QuickSortAdversary_16384 321755 ns 388084 ns
BM_StableSort_int64_QuickSortAdversary_65536 1374694 ns 1596091 ns
BM_StableSort_int64_QuickSortAdversary_262144 4374661 ns 6894139 ns
BM_StableSort_int64_QuickSortAdversary_524288 12736074 ns 23932229 ns
BM_StableSort_int64_QuickSortAdversary_1048576 22615219 ns 33355629 ns
BM_StableSort_uint64_Random_1 3.82 ns 3.49 ns
BM_StableSort_uint64_Random_4 22.4 ns 23.4 ns
BM_StableSort_uint64_Random_16 154 ns 146 ns
BM_StableSort_uint64_Random_64 924 ns 926 ns
BM_StableSort_uint64_Random_256 5864 ns 5913 ns
BM_StableSort_uint64_Random_1024 7168 ns 31746 ns
BM_StableSort_uint64_Random_4096 27668 ns 154224 ns
BM_StableSort_uint64_Random_16384 219526 ns 755205 ns
BM_StableSort_uint64_Random_65536 965251 ns 3490165 ns
BM_StableSort_uint64_Random_262144 6262162 ns 15889589 ns
BM_StableSort_uint64_Random_524288 12530078 ns 36458581 ns
BM_StableSort_uint64_Random_1048576 38462191 ns 75168445 ns
BM_StableSort_uint64_Ascending_1 3.30 ns 3.35 ns
BM_StableSort_uint64_Ascending_4 5.65 ns 5.84 ns
BM_StableSort_uint64_Ascending_16 14.7 ns 12.6 ns
BM_StableSort_uint64_Ascending_64 55.3 ns 34.6 ns
BM_StableSort_uint64_Ascending_256 513 ns 533 ns
BM_StableSort_uint64_Ascending_1024 5541 ns 3189 ns
BM_StableSort_uint64_Ascending_4096 17706 ns 20326 ns
BM_StableSort_uint64_Ascending_16384 66420 ns 93757 ns
BM_StableSort_uint64_Ascending_65536 341425 ns 435016 ns
BM_StableSort_uint64_Ascending_262144 1595691 ns 2088317 ns
BM_StableSort_uint64_Ascending_524288 3808703 ns 5092832 ns
BM_StableSort_uint64_Ascending_1048576 11060417 ns 13023250 ns
BM_StableSort_uint64_Descending_1 3.29 ns 3.35 ns
BM_StableSort_uint64_Descending_4 8.65 ns 7.92 ns
BM_StableSort_uint64_Descending_16 54.7 ns 80.2 ns
BM_StableSort_uint64_Descending_64 1028 ns 1307 ns
BM_StableSort_uint64_Descending_256 4521 ns 5635 ns
BM_StableSort_uint64_Descending_1024 7122 ns 23323 ns
BM_StableSort_uint64_Descending_4096 30538 ns 95892 ns
BM_StableSort_uint64_Descending_16384 195565 ns 392721 ns
BM_StableSort_uint64_Descending_65536 852002 ns 1720358 ns
BM_StableSort_uint64_Descending_262144 3737884 ns 7484130 ns
BM_StableSort_uint64_Descending_524288 11159345 ns 25690770 ns
BM_StableSort_uint64_Descending_1048576 20648864 ns 33057383 ns
BM_StableSort_uint64_SingleElement_1 3.62 ns 4.10 ns
BM_StableSort_uint64_SingleElement_4 6.73 ns 6.64 ns
BM_StableSort_uint64_SingleElement_16 14.9 ns 11.3 ns
BM_StableSort_uint64_SingleElement_64 52.0 ns 33.0 ns
BM_StableSort_uint64_SingleElement_256 511 ns 582 ns
BM_StableSort_uint64_SingleElement_1024 6499 ns 3287 ns
BM_StableSort_uint64_SingleElement_4096 22190 ns 17616 ns
BM_StableSort_uint64_SingleElement_16384 84378 ns 86885 ns
BM_StableSort_uint64_SingleElement_65536 466257 ns 457144 ns
BM_StableSort_uint64_SingleElement_262144 1993687 ns 2361999 ns
BM_StableSort_uint64_SingleElement_524288 4759565 ns 5096771 ns
BM_StableSort_uint64_SingleElement_1048576 12426111 ns 13468453 ns
BM_StableSort_uint64_PipeOrgan_1 3.73 ns 3.94 ns
BM_StableSort_uint64_PipeOrgan_4 7.18 ns 7.54 ns
BM_StableSort_uint64_PipeOrgan_16 25.2 ns 29.1 ns
BM_StableSort_uint64_PipeOrgan_64 260 ns 321 ns
BM_StableSort_uint64_PipeOrgan_256 2468 ns 2970 ns
BM_StableSort_uint64_PipeOrgan_1024 7025 ns 12912 ns
BM_StableSort_uint64_PipeOrgan_4096 28968 ns 53379 ns
BM_StableSort_uint64_PipeOrgan_16384 194156 ns 239790 ns
BM_StableSort_uint64_PipeOrgan_65536 599491 ns 993800 ns
BM_StableSort_uint64_PipeOrgan_262144 2648585 ns 4689680 ns
BM_StableSort_uint64_PipeOrgan_524288 7621109 ns 15401808 ns
BM_StableSort_uint64_PipeOrgan_1048576 15608814 ns 23484821 ns
BM_StableSort_uint64_QuickSortAdversary_1 3.38 ns 3.54 ns
BM_StableSort_uint64_QuickSortAdversary_4 5.50 ns 6.03 ns
BM_StableSort_uint64_QuickSortAdversary_16 14.2 ns 11.0 ns
BM_StableSort_uint64_QuickSortAdversary_64 597 ns 688 ns
BM_StableSort_uint64_QuickSortAdversary_256 2446 ns 2818 ns
BM_StableSort_uint64_QuickSortAdversary_1024 7266 ns 20319 ns
BM_StableSort_uint64_QuickSortAdversary_4096 31155 ns 89112 ns
BM_StableSort_uint64_QuickSortAdversary_16384 201033 ns 390574 ns
BM_StableSort_uint64_QuickSortAdversary_65536 871014 ns 1685639 ns
BM_StableSort_uint64_QuickSortAdversary_262144 3978535 ns 7265830 ns
BM_StableSort_uint64_QuickSortAdversary_524288 10279721 ns 25350004 ns
BM_StableSort_uint64_QuickSortAdversary_1048576 20256585 ns 33054393 ns
```
>From 5717ad80b633d5a2005b98bc35001c3cd0a47887 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?=
=?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= <dmitriy at izvolov.ru>
Date: Sun, 5 May 2024 11:08:26 +0300
Subject: [PATCH 1/2] radix-sort
---
libcxx/include/CMakeLists.txt | 1 +
libcxx/include/__algorithm/radix_sort.h | 410 +++++++++++++++++++++++
libcxx/include/__algorithm/stable_sort.h | 111 ++++--
libcxx/include/module.modulemap | 1 +
4 files changed, 491 insertions(+), 32 deletions(-)
create mode 100644 libcxx/include/__algorithm/radix_sort.h
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 32579272858a8e..95e4e3faf88671 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -74,6 +74,7 @@ set(files
__algorithm/prev_permutation.h
__algorithm/pstl.h
__algorithm/push_heap.h
+ __algorithm/radix_sort.h
__algorithm/ranges_adjacent_find.h
__algorithm/ranges_all_of.h
__algorithm/ranges_any_of.h
diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
new file mode 100644
index 00000000000000..5e14dec9df0918
--- /dev/null
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -0,0 +1,410 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_RADIX_SORT_H
+#define _LIBCPP___ALGORITHM_RADIX_SORT_H
+
+#include <__algorithm/copy.h>
+#include <__algorithm/for_each.h>
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/move_iterator.h>
+#include <__iterator/next.h>
+#include <__numeric/partial_sum.h>
+#include <__type_traits/decay.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_assignable.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_unsigned.h>
+#include <__type_traits/make_unsigned.h>
+#include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
+#include <climits>
+#include <cstdint>
+#include <initializer_list>
+#include <limits>
+#include <stdexcept>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+inline void __variadic_expansion_dummy(initializer_list<int>) {}
+
+# define EXPAND_VARIADIC(expression) __variadic_expansion_dummy({(expression, 0)...})
+
+template <typename _Iterator>
+constexpr auto __move_assign_please(_Iterator __i)
+ -> enable_if_t<is_move_assignable<typename iterator_traits<_Iterator>::value_type>::value,
+ move_iterator<_Iterator> > {
+ return make_move_iterator(std::move(__i));
+}
+
+template <typename _Iterator>
+constexpr auto __move_assign_please(_Iterator __i)
+ -> enable_if_t<not is_move_assignable<typename iterator_traits<_Iterator>::value_type>::value, _Iterator> {
+ return __i;
+}
+
+template <typename _Integer>
+constexpr _Integer __intlog2_impl(_Integer __integer) {
+ auto __degree = _Integer{0};
+
+ while ((__integer >>= 1) > 0) {
+ ++__degree;
+ }
+
+ return __degree;
+}
+
+template <typename _Integer>
+constexpr _Integer __intlog2(_Integer __integer) {
+ static_assert(is_integral<_Integer>::value, "Must be an integral type");
+
+ return __integer > 0 ? __intlog2_impl(__integer)
+ : throw domain_error("The binary logarithm is not defined on non-positive numbers");
+}
+
+template <typename _InputIterator, typename _OutputIterator>
+pair<_OutputIterator, typename iterator_traits<_InputIterator>::value_type>
+__partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) {
+ if (__first == __last)
+ return {__result, 0};
+
+ auto __max = *__first;
+ typename iterator_traits<_InputIterator>::value_type __sum = *__first;
+ *__result = __sum;
+
+ while (++__first != __last) {
+ if (__max < *__first) {
+ __max = *__first;
+ }
+ __sum = std::move(__sum) + *__first;
+ *++__result = __sum;
+ }
+ return {++__result, __max};
+}
+
+template <typename _Value, typename _Map, typename _Radix>
+struct __radix_sort_traits {
+ using image_type = decay_t<invoke_result_t<_Map, _Value> >;
+ static_assert(is_integral<image_type>::value, "");
+ static_assert(is_unsigned<image_type>::value, "");
+
+ using radix_type = decay_t<invoke_result_t<_Radix, image_type> >;
+ static_assert(is_integral<radix_type>::value, "");
+
+ constexpr static auto radix_value_range = numeric_limits<radix_type>::max() + 1;
+ constexpr static auto radix_size = __intlog2<uint64_t>(radix_value_range);
+ constexpr static auto radix_count = sizeof(image_type) * CHAR_BIT / radix_size;
+};
+
+template <typename _Value, typename _Map>
+struct __counting_sort_traits {
+ using image_type = decay_t<invoke_result_t<_Map, _Value> >;
+ static_assert(is_integral<image_type>::value, "");
+ static_assert(is_unsigned<image_type>::value, "");
+
+ constexpr static const auto value_range = numeric_limits<image_type>::max() + 1;
+ constexpr static auto radix_size = __intlog2<uint64_t>(value_range);
+};
+
+template <typename _Radix>
+auto __nth_radix(size_t __radix_number, _Radix __radix) {
+ return [__radix_number, __radix = std::move(__radix)](auto __n) {
+ using value_type = decltype(__n);
+ static_assert(is_integral<value_type>::value, "");
+ static_assert(is_unsigned<value_type>::value, "");
+ using traits = __counting_sort_traits<value_type, _Radix>;
+
+ return __radix(static_cast<value_type>(__n >> traits::radix_size * __radix_number));
+ };
+}
+
+template <typename _ForwardIterator, typename _Map, typename _RandomAccessIterator>
+void __count(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+ std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; });
+}
+
+template <typename _ForwardIterator, typename _Map, typename _RandomAccessIterator>
+void __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ using traits = __counting_sort_traits<value_type, _Map>;
+
+ __count(__first, __last, __map, __counters);
+
+ const auto __counters_end = __counters + traits::value_range;
+ partial_sum(__counters, __counters_end, __counters);
+}
+
+template <typename _ForwardIterator, typename _RandomAccessIterator1, typename _Map, typename _RandomAccessIterator2>
+void __dispose(_ForwardIterator __first,
+ _ForwardIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ std::for_each(__first, __last, [&__result, &__counters, &__map](auto&& __preimage) {
+ auto __index = __counters[__map(__preimage)]++;
+ __result[__index] = std::forward<decltype(__preimage)>(__preimage);
+ });
+}
+
+template <typename _BidirectionalIterator,
+ typename _RandomAccessIterator1,
+ typename _Map,
+ typename _RandomAccessIterator2>
+void dispose_backward(_BidirectionalIterator __first,
+ _BidirectionalIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ std::for_each(make_reverse_iterator(__last),
+ make_reverse_iterator(__first),
+ [&__result, &__counters, &__map](auto&& __preimage) {
+ auto __index = --__counters[__map(__preimage)];
+ __result[__index] = std::forward<decltype(__preimage)>(__preimage);
+ });
+}
+
+template <typename _ForwardIterator,
+ typename _Map,
+ typename _Radix,
+ typename _RandomAccessIterator1,
+ typename _RandomAccessIterator2,
+ size_t... _Radices>
+bool __collect_impl(
+ _ForwardIterator __first,
+ _ForwardIterator __last,
+ _Map __map,
+ _Radix __radix,
+ _RandomAccessIterator1 __counters,
+ _RandomAccessIterator2 __maximums,
+ index_sequence<_Radices...>) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ constexpr auto __radix_value_range = __radix_sort_traits<value_type, _Map, _Radix>::radix_value_range;
+
+ auto __previous = numeric_limits<invoke_result_t<_Map, value_type> >::min();
+ auto __is_sorted = true;
+ for_each(__first, __last, [&__counters, &__map, &__radix, &__previous, &__is_sorted](const auto& value) {
+ auto __current = __map(value);
+ __is_sorted &= (__current >= __previous);
+ __previous = __current;
+
+ EXPAND_VARIADIC(++__counters[_Radices][__nth_radix(_Radices, __radix)(__current)]);
+ });
+
+ EXPAND_VARIADIC(
+ __maximums[_Radices] =
+ __partial_sum_max(__counters[_Radices], __counters[_Radices] + __radix_value_range, __counters[_Radices])
+ .second);
+
+ return __is_sorted;
+}
+
+template <typename _ForwardIterator,
+ typename _Map,
+ typename _Radix,
+ typename _RandomAccessIterator1,
+ typename _RandomAccessIterator2>
+bool __collect(_ForwardIterator __first,
+ _ForwardIterator __last,
+ _Map __map,
+ _Radix __radix,
+ _RandomAccessIterator1 __counters,
+ _RandomAccessIterator2 __maximums) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ constexpr auto __radix_count = __radix_sort_traits<value_type, _Map, _Radix>::radix_count;
+ return __collect_impl(__first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>());
+}
+
+template <typename _BidirectionalIterator,
+ typename _RandomAccessIterator1,
+ typename _Map,
+ typename _RandomAccessIterator2>
+void __dispose_backward(_BidirectionalIterator __first,
+ _BidirectionalIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ for_each(
+ make_reverse_iterator(__last), make_reverse_iterator(__first), [&__result, &__counters, &__map](auto&& preimage) {
+ auto __index = --__counters[__map(preimage)];
+ __result[__index] = std::forward<decltype(preimage)>(preimage);
+ });
+}
+
+template <typename _ForwardIterator, typename _RandomAccessIterator, typename _Map>
+_RandomAccessIterator
+__counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ using traits = __counting_sort_traits<value_type, _Map>;
+
+ using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
+ difference_type __counters[traits::value_range + 1] = {0};
+
+ __collect(__first, __last, __map, next(std::begin(__counters)));
+ __dispose(__first, __last, __result, __map, std::begin(__counters));
+
+ return __result + __counters[traits::value_range];
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+typename enable_if<
+ __radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::radix_count == 1,
+ void>::type
+__radix_sort_impl(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 buffer,
+ _Map __map,
+ _Radix __radix) {
+ auto __buffer_end = __counting_sort_impl(
+ __move_assign_please(__first), __move_assign_please(__last), buffer, [&__map, &__radix](const auto& value) {
+ return __radix(__map(value));
+ });
+
+ std::copy(__move_assign_please(buffer), __move_assign_please(__buffer_end), __first);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+typename enable_if<
+ __radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::radix_count % 2 ==
+ 0,
+ void>::type
+__radix_sort_impl(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 __buffer_begin,
+ _Map __map,
+ _Radix __radix) {
+ using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type;
+ using traits = __radix_sort_traits<value_type, _Map, _Radix>;
+
+ using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
+ difference_type __counters[traits::radix_count][traits::radix_value_range] = {{0}};
+ difference_type __maximums[traits::radix_count] = {0};
+ const auto __is_sorted = __collect(__first, __last, __map, __radix, __counters, __maximums);
+ if (not __is_sorted) {
+ const auto __range_size = distance(__first, __last);
+ auto __buffer_end = __buffer_begin + __range_size;
+ for (size_t __radix_number = 0; __radix_number < traits::radix_count; __radix_number += 2) {
+ const auto __n0th_is_single = __maximums[__radix_number] == __range_size;
+ const auto __n1th_is_single = __maximums[__radix_number + 1] == __range_size;
+
+ if (__n0th_is_single && __n1th_is_single) {
+ continue;
+ }
+
+ if (__n0th_is_single) {
+ copy(__move_assign_please(__first), __move_assign_please(__last), __buffer_begin);
+ } else {
+ auto __n0th = [__radix_number, &__map, &__radix](const auto& __v) {
+ return __nth_radix(__radix_number, __radix)(__map(__v));
+ };
+ __dispose_backward(
+ __move_assign_please(__first),
+ __move_assign_please(__last),
+ __buffer_begin,
+ __n0th,
+ __counters[__radix_number]);
+ }
+
+ if (__n1th_is_single) {
+ copy(__move_assign_please(__buffer_begin), __move_assign_please(__buffer_end), __first);
+ } else {
+ auto __n1th = [__radix_number, &__map, &__radix](const auto& __v) {
+ return __nth_radix(__radix_number + 1, __radix)(__map(__v));
+ };
+ __dispose_backward(
+ __move_assign_please(__buffer_begin),
+ __move_assign_please(__buffer_end),
+ __first,
+ __n1th,
+ __counters[__radix_number + 1]);
+ }
+ }
+ }
+}
+
+constexpr auto __to_unsigned(bool __b) { return __b; }
+
+template <typename _Ip>
+constexpr auto __to_unsigned(_Ip __n) {
+ constexpr const auto __min_value = numeric_limits<_Ip>::min();
+ return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
+}
+
+struct __identity_fn {
+ template <typename _Tp>
+ constexpr decltype(auto) operator()(_Tp&& __value) const {
+ return std::forward<_Tp>(__value);
+ }
+};
+
+struct __low_byte_fn {
+ template <typename _Ip>
+ constexpr uint8_t operator()(_Ip __integer) const {
+ static_assert(is_integral<_Ip>::value, "");
+ static_assert(is_unsigned<_Ip>::value, "");
+
+ return static_cast<uint8_t>(__integer & 0xff);
+ }
+};
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+void __radix_sort(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 buffer,
+ _Map __map,
+ _Radix __radix) {
+ auto __map_to_unsigned = [__map = std::move(__map)](const auto& x) { return __to_unsigned(__map(x)); };
+ __radix_sort_impl(__first, __last, buffer, __map_to_unsigned, __radix);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+void __radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 buffer) {
+ __radix_sort(__first, __last, buffer, __identity_fn{}, __low_byte_fn{});
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+bool __radix_sort(
+ _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 buffer, _BoolConstant<true>) {
+ __radix_sort(__first, __last, buffer, __identity_fn{}, __low_byte_fn{});
+ return true;
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+bool __radix_sort(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BoolConstant<false>) {
+ return false;
+}
+
+# undef EXPAND_VARIADIC
+
+#else // _LIBCPP_STD_VER > 14
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, bool _B>
+bool __radix_sort(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BoolConstant<_B>) {
+ return false;
+}
+
+#endif // _LIBCPP_STD_VER > 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___ALGORITHM_RADIX_SORT_H
diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index 726e7e16b3564a..f8624726a4e323 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -13,6 +13,7 @@
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/inplace_merge.h>
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/radix_sort.h>
#include <__algorithm/sort.h>
#include <__config>
#include <__debug_utils/strict_weak_ordering_check.h>
@@ -20,6 +21,9 @@
#include <__memory/destruct_n.h>
#include <__memory/temporary_buffer.h>
#include <__memory/unique_ptr.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -133,20 +137,24 @@ _LIBCPP_HIDE_FROM_ABI void __merge_move_assign(
*__result = _Ops::__iter_move(__first2);
}
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort(_RandomAccessIterator __first,
- _RandomAccessIterator __last,
- _Compare __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
- ptrdiff_t __buff_size);
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _EnableRadixSort>
+void __stable_sort(
+ _RandomAccessIterator __first,
+ _RandomAccessIterator __last,
+ _Compare __comp,
+ typename iterator_traits<_RandomAccessIterator>::difference_type __len,
+ typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
+ ptrdiff_t __buff_size,
+ _BoolConstant<_EnableRadixSort>);
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort_move(_RandomAccessIterator __first1,
- _RandomAccessIterator __last1,
- _Compare __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- typename iterator_traits<_RandomAccessIterator>::value_type* __first2) {
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _EnableRadixSort>
+void __stable_sort_move(
+ _RandomAccessIterator __first1,
+ _RandomAccessIterator __last1,
+ _Compare __comp,
+ typename iterator_traits<_RandomAccessIterator>::difference_type __len,
+ typename iterator_traits<_RandomAccessIterator>::value_type* __first2,
+ _BoolConstant<_EnableRadixSort> __rs) {
using _Ops = _IterOps<_AlgPolicy>;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
@@ -179,8 +187,8 @@ void __stable_sort_move(_RandomAccessIterator __first1,
}
typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2;
_RandomAccessIterator __m = __first1 + __l2;
- std::__stable_sort<_AlgPolicy, _Compare>(__first1, __m, __comp, __l2, __first2, __l2);
- std::__stable_sort<_AlgPolicy, _Compare>(__m, __last1, __comp, __len - __l2, __first2 + __l2, __len - __l2);
+ std::__stable_sort<_AlgPolicy, _Compare>(__first1, __m, __comp, __l2, __first2, __l2, __rs);
+ std::__stable_sort<_AlgPolicy, _Compare>(__m, __last1, __comp, __len - __l2, __first2 + __l2, __len - __l2, __rs);
std::__merge_move_construct<_AlgPolicy, _Compare>(__first1, __m, __m, __last1, __first2, __comp);
}
@@ -189,13 +197,35 @@ struct __stable_sort_switch {
static const unsigned value = 128 * is_trivially_copy_assignable<_Tp>::value;
};
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort(_RandomAccessIterator __first,
- _RandomAccessIterator __last,
- _Compare __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
- ptrdiff_t __buff_size) {
+template <class _Tp, class = void>
+struct __radix_sort_min_switch {
+ static const unsigned value = (1 << 10);
+};
+
+template <class _Int8>
+struct __radix_sort_min_switch<_Int8, enable_if_t<is_integral_v<_Int8> && sizeof(_Int8) == 1>> {
+ static const unsigned value = (1 << 8);
+};
+
+template <class _Tp, class = void>
+struct __radix_sort_max_switch {
+ static const unsigned value = (1 << 16);
+};
+
+template <class _Int64>
+struct __radix_sort_max_switch<_Int64, enable_if_t<is_integral_v<_Int64> && sizeof(_Int64) == 8>> {
+ static const unsigned value = (1 << 15);
+};
+
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _EnableRadixSort>
+void __stable_sort(
+ _RandomAccessIterator __first,
+ _RandomAccessIterator __last,
+ _Compare __comp,
+ typename iterator_traits<_RandomAccessIterator>::difference_type __len,
+ typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
+ ptrdiff_t __buff_size,
+ _BoolConstant<_EnableRadixSort> __rs) {
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
switch (__len) {
@@ -211,14 +241,20 @@ void __stable_sort(_RandomAccessIterator __first,
std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
return;
}
+ if (__len <= __buff_size && __len >= static_cast<difference_type>(__radix_sort_min_switch<value_type>::value) &&
+ __len <= static_cast<difference_type>(__radix_sort_max_switch<value_type>::value)) {
+ if (std::__radix_sort(__first, __last, __buff, __rs)) {
+ return;
+ }
+ }
typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2;
_RandomAccessIterator __m = __first + __l2;
if (__len <= __buff_size) {
__destruct_n __d(0);
unique_ptr<value_type, __destruct_n&> __h2(__buff, __d);
- std::__stable_sort_move<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff);
+ std::__stable_sort_move<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff, __rs);
__d.__set(__l2, (value_type*)nullptr);
- std::__stable_sort_move<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff + __l2);
+ std::__stable_sort_move<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff + __l2, __rs);
__d.__set(__len, (value_type*)nullptr);
std::__merge_move_assign<_AlgPolicy, _Compare>(
__buff, __buff + __l2, __buff + __l2, __buff + __len, __first, __comp);
@@ -229,14 +265,17 @@ void __stable_sort(_RandomAccessIterator __first,
// __first, __comp);
return;
}
- std::__stable_sort<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff, __buff_size);
- std::__stable_sort<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff, __buff_size);
+ std::__stable_sort<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff, __buff_size, __rs);
+ std::__stable_sort<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff, __buff_size, __rs);
std::__inplace_merge<_AlgPolicy>(__first, __m, __last, __comp, __l2, __len - __l2, __buff, __buff_size);
}
-template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
-inline _LIBCPP_HIDE_FROM_ABI void
-__stable_sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) {
+template <class _AlgPolicy, class _RandomAccessIterator, class _Compare, bool _EnableRadixSort = false>
+inline _LIBCPP_HIDE_FROM_ABI void __stable_sort_impl(
+ _RandomAccessIterator __first,
+ _RandomAccessIterator __last,
+ _Compare& __comp,
+ _BoolConstant<_EnableRadixSort> __rs = _BoolConstant<false>{}) {
using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
@@ -251,19 +290,27 @@ __stable_sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last,
__h.reset(__buf.first);
}
- std::__stable_sort<_AlgPolicy, __comp_ref_type<_Compare> >(__first, __last, __comp, __len, __buf.first, __buf.second);
+ std::__stable_sort<_AlgPolicy, __comp_ref_type<_Compare> >(
+ __first, __last, __comp, __len, __buf.first, __buf.second, __rs);
std::__check_strict_weak_ordering_sorted(__first, __last, __comp);
}
template <class _RandomAccessIterator, class _Compare>
inline _LIBCPP_HIDE_FROM_ABI void
stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
- std::__stable_sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
+ std::__stable_sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp, _BoolConstant<false>());
}
template <class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI void stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
- std::stable_sort(__first, __last, __less<>());
+ using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
+ using reference_type = typename iterator_traits<_RandomAccessIterator>::reference;
+ auto __comp = __less<>();
+ std::__stable_sort_impl<_ClassicAlgPolicy>(
+ std::move(__first),
+ std::move(__last),
+ __comp,
+ _BoolConstant < is_integral<value_type>::value && is_same<value_type&, reference_type>::value > ());
}
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 13d0dce34d97e3..b6125add8acf2d 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -720,6 +720,7 @@ module std_private_algorithm_pstl [system
export *
}
module std_private_algorithm_push_heap [system] { header "__algorithm/push_heap.h" }
+module std_private_algorithm_radix_sort [system] { header "__algorithm/radix_sort.h" }
module std_private_algorithm_ranges_adjacent_find [system] { header "__algorithm/ranges_adjacent_find.h" }
module std_private_algorithm_ranges_all_of [system] { header "__algorithm/ranges_all_of.h" }
module std_private_algorithm_ranges_any_of [system] { header "__algorithm/ranges_any_of.h" }
>From edaab46ed6286d4170daaad78af6bfad8791b6c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?=
=?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= <dmitriy at izvolov.ru>
Date: Sat, 17 Aug 2024 17:21:06 +0300
Subject: [PATCH 2/2] ranged
---
libcxx/include/__algorithm/ranges_stable_sort.h | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__algorithm/ranges_stable_sort.h b/libcxx/include/__algorithm/ranges_stable_sort.h
index 9c7df80ae98722..96d84b208687fc 100644
--- a/libcxx/include/__algorithm/ranges_stable_sort.h
+++ b/libcxx/include/__algorithm/ranges_stable_sort.h
@@ -24,6 +24,8 @@
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/dangling.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
#include <__utility/forward.h>
#include <__utility/move.h>
@@ -45,7 +47,18 @@ struct __stable_sort {
auto __last_iter = ranges::next(__first, __last);
auto&& __projected_comp = std::__make_projected(__comp, __proj);
- std::__stable_sort_impl<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp);
+ constexpr auto __default_comp = is_same_v<_Comp, ranges::less>;
+ constexpr auto __default_proj = is_same_v<_Proj, identity>;
+ constexpr auto __integral_value = is_integral_v<iter_value_t<_Iter>>;
+ constexpr auto __integral_projection = __default_proj && __integral_value;
+ // constexpr auto __integral_projection = is_integral_v<remove_reference_t<invoke_result_t<_Proj&,
+ // iter_value_t<_Iter>>>>;
+ // TODO: Support projection in stable_sort
+ std::__stable_sort_impl<_RangeAlgPolicy>(
+ std::move(__first),
+ __last_iter,
+ __projected_comp,
+ _BoolConstant < __default_comp && __integral_projection > {});
return __last_iter;
}
More information about the libcxx-commits
mailing list