[libcxx-commits] [libcxx] [libc++] Optimize ranges::{for_each, for_each_n} for segmented iterators (PR #132896)

Peng Liu via libcxx-commits libcxx-commits at lists.llvm.org
Thu Mar 27 20:09:26 PDT 2025


================
@@ -40,11 +43,18 @@ struct __for_each_n {
   template <input_iterator _Iter, class _Proj = identity, indirectly_unary_invocable<projected<_Iter, _Proj>> _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func>
   operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const {
-    while (__count-- > 0) {
-      std::invoke(__func, std::invoke(__proj, *__first));
-      ++__first;
+    if constexpr (forward_iterator<_Iter>) {
+      auto __last = std::ranges::next(__first, __count);
----------------
winner245 wrote:

The complete benchmarks including `std::list` are provided. I don't see a significant difference for `std::list` due to the calculation of `std::next(__first, __n)`. I think this is because the subsequent call to `std::__for_each(__first, __last, __f)` dominates the total timing as the `__f` used in the benchmark is `std::clamp(x, 10, 100)`, which is much more computationally intensive than simple `list<int>::iterator::operator++` operations. 

```
-------------------------------------------------------------------------------
Benchmark                                     Before      After    Speedup
-------------------------------------------------------------------------------
std::for_each_n(vector<int>)/8              2.86 ns     2.88 ns      1.0x
std::for_each_n(vector<int>)/32             5.35 ns     5.32 ns      1.0x
std::for_each_n(vector<int>)/1024            163 ns      160 ns      1.0x
std::for_each_n(vector<int>)/8192           1299 ns     1264 ns      1.0x
std::for_each_n(vector<int>)/16384          2577 ns     2517 ns      1.0x
std::for_each_n(vector<int>)/65536         10234 ns    10328 ns      1.0x
std::for_each_n(vector<int>)/262144        42484 ns    43390 ns      1.0x
std::for_each_n(vector<int>)/1048576      188839 ns   174979 ns      1.1x
std::for_each_n(deque<int>)/8               5.20 ns     3.28 ns      1.6x
std::for_each_n(deque<int>)/32              19.4 ns     6.59 ns      2.9x
std::for_each_n(deque<int>)/1024             604 ns      163 ns      3.7x
std::for_each_n(deque<int>)/8192            4855 ns     1298 ns      3.7x
std::for_each_n(deque<int>)/16384           9626 ns     2630 ns      3.7x
std::for_each_n(deque<int>)/65536          38413 ns    10692 ns      3.6x
std::for_each_n(deque<int>)/262144        156156 ns    42146 ns      3.7x
std::for_each_n(deque<int>)/1048576       615028 ns   173606 ns      3.5x
std::for_each_n(list<int>)/8                5.00 ns     5.98 ns      0.8x
std::for_each_n(list<int>)/32               21.6 ns     24.7 ns      0.9x
std::for_each_n(list<int>)/1024             1062 ns     1185 ns      0.9x
std::for_each_n(list<int>)/8192            20489 ns    20802 ns      1.0x
std::for_each_n(list<int>)/16384           42544 ns    43098 ns      1.0x
std::for_each_n(list<int>)/65536          130331 ns   132124 ns      1.0x
std::for_each_n(list<int>)/262144         750621 ns   769889 ns      1.0x
std::for_each_n(list<int>)/1048576       4025899 ns  3956599 ns      1.0x
rng::for_each_n(vector<int>)/8              2.95 ns     3.01 ns      1.0x
rng::for_each_n(vector<int>)/32             5.66 ns     5.75 ns      1.0x
rng::for_each_n(vector<int>)/1024            164 ns      178 ns      0.9x
rng::for_each_n(vector<int>)/8192           1292 ns     1403 ns      0.9x
rng::for_each_n(vector<int>)/16384          2610 ns     2782 ns      0.9x
rng::for_each_n(vector<int>)/65536         10440 ns    11203 ns      0.9x
rng::for_each_n(vector<int>)/262144        42628 ns    45387 ns      0.9x
rng::for_each_n(vector<int>)/1048576      176957 ns   178294 ns      1.0x
rng::for_each_n(deque<int>)/8               4.79 ns     4.77 ns      1.0x
rng::for_each_n(deque<int>)/32              17.8 ns     8.32 ns      2.1x
rng::for_each_n(deque<int>)/1024             616 ns      172 ns      3.6x
rng::for_each_n(deque<int>)/8192            4812 ns     1350 ns      3.6x
rng::for_each_n(deque<int>)/16384           9909 ns     2710 ns      3.7x
rng::for_each_n(deque<int>)/65536          38938 ns    11029 ns      3.5x
rng::for_each_n(deque<int>)/262144        157589 ns    43788 ns      3.6x
rng::for_each_n(deque<int>)/1048576       623825 ns   175731 ns      3.6x
rng::for_each_n(list<int>)/8                6.45 ns     6.66 ns      1.0x
rng::for_each_n(list<int>)/32               22.9 ns     25.7 ns      0.9x
rng::for_each_n(list<int>)/1024             1053 ns     1082 ns      1.0x
rng::for_each_n(list<int>)/8192            20285 ns    20631 ns      1.0x
rng::for_each_n(list<int>)/16384           42034 ns    42725 ns      1.0x
rng::for_each_n(list<int>)/65536          128350 ns   129819 ns      1.0x
rng::for_each_n(list<int>)/262144         734181 ns   747416 ns      1.0x
rng::for_each_n(list<int>)/1048576       3901483 ns  4101418 ns      1.0x
-------------------------------------------------------------------------------
```

https://github.com/llvm/llvm-project/pull/132896


More information about the libcxx-commits mailing list