[libcxx-commits] [libcxx] [libc++] Optimize std::for_each_n for segmented iterators (PR #135468)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Fri Apr 11 20:15:18 PDT 2025
https://github.com/winner245 created https://github.com/llvm/llvm-project/pull/135468
This patch enhances the performance of `std::for_each_n` when used with segmented iterators, leading to significant performance improvements, summarized in the tables below.
#### `std::deque` iterators
| Benchmark | deque<char> | deque<short> | deque<int> |
|--------------------|-------------|--------------|------------|
| std::for_each_n | 13.1x | 17.7x | 3.7x |
#### `std::join_view` iterators
| Benchmark | `vector<vector<char>>` | `vector<vector<short>>` | `vector<vector<int>>` |
|--------------------|-----------------------|-----------------------|---------------------|
| std::for_each_n | 11.8x | 13.9x | 3.1x |
which results in performance improvements up to 17.7x for `std::deque<short>` iterators, and up to 13.9x for `join_view<vector<vector<short>>>` iterators.
### Detailed Benchmarks
#### `std::deque` iterators
```
--------------------------------------------------------------------------
Benchmark Before After Speedup
--------------------------------------------------------------------------
std::for_each_n(vector<char>)/8 4.26 ns 4.23 ns 1.0x
std::for_each_n(vector<char>)/32 2.68 ns 2.67 ns 1.0x
std::for_each_n(vector<char>)/50 9.49 ns 9.36 ns 1.0x
std::for_each_n(vector<char>)/1024 42.3 ns 40.1 ns 1.1x
std::for_each_n(vector<char>)/4096 163 ns 151 ns 1.1x
std::for_each_n(vector<char>)/8192 308 ns 294 ns 1.0x
std::for_each_n(vector<char>)/16384 608 ns 593 ns 1.0x
std::for_each_n(vector<char>)/65536 2435 ns 2464 ns 1.0x
std::for_each_n(vector<char>)/262144 10029 ns 10190 ns 1.0x
std::for_each_n(deque<char>)/8 6.57 ns 2.43 ns 2.7x
std::for_each_n(deque<char>)/32 24.0 ns 2.73 ns 8.8x
std::for_each_n(deque<char>)/50 33.2 ns 4.53 ns 7.3x
std::for_each_n(deque<char>)/1024 541 ns 44.9 ns 12.0x
std::for_each_n(deque<char>)/4096 2067 ns 169 ns 12.2x
std::for_each_n(deque<char>)/8192 4005 ns 305 ns 13.1x
std::for_each_n(deque<char>)/16384 7831 ns 639 ns 12.3x
std::for_each_n(deque<char>)/65536 31819 ns 2717 ns 11.7x
std::for_each_n(deque<char>)/262144 120801 ns 10674 ns 11.3x
std::for_each_n(list<char>)/8 4.97 ns 5.16 ns 1.0x
std::for_each_n(list<char>)/32 19.9 ns 20.6 ns 1.0x
std::for_each_n(list<char>)/50 40.6 ns 42.7 ns 1.0x
std::for_each_n(list<char>)/1024 996 ns 1038 ns 1.0x
std::for_each_n(list<char>)/4096 6186 ns 6341 ns 1.0x
std::for_each_n(list<char>)/8192 12522 ns 12391 ns 1.0x
std::for_each_n(list<char>)/16384 26158 ns 25739 ns 1.0x
std::for_each_n(list<char>)/65536 106410 ns 105299 ns 1.0x
std::for_each_n(list<char>)/262144 621473 ns 625741 ns 1.0x
std::for_each_n(vector<short>)/8 4.42 ns 3.92 ns 1.1x
std::for_each_n(vector<short>)/32 1.62 ns 1.64 ns 1.0x
std::for_each_n(vector<short>)/50 2.74 ns 2.75 ns 1.0x
std::for_each_n(vector<short>)/1024 34.0 ns 33.6 ns 1.0x
std::for_each_n(vector<short>)/4096 120 ns 117 ns 1.0x
std::for_each_n(vector<short>)/8192 229 ns 267 ns 0.9x
std::for_each_n(vector<short>)/16384 452 ns 469 ns 1.0x
std::for_each_n(vector<short>)/65536 2262 ns 2265 ns 1.0x
std::for_each_n(vector<short>)/262144 9129 ns 9140 ns 1.0x
std::for_each_n(deque<short>)/8 5.28 ns 1.78 ns 3.0x
std::for_each_n(deque<short>)/32 22.8 ns 2.08 ns 11.0x
std::for_each_n(deque<short>)/50 32.3 ns 4.46 ns 7.2x
std::for_each_n(deque<short>)/1024 545 ns 35.2 ns 15.5x
std::for_each_n(deque<short>)/4096 2158 ns 128 ns 16.9x
std::for_each_n(deque<short>)/8192 4303 ns 243 ns 17.7x
std::for_each_n(deque<short>)/16384 8624 ns 516 ns 16.7x
std::for_each_n(deque<short>)/65536 34569 ns 2336 ns 14.8x
std::for_each_n(deque<short>)/262144 137820 ns 9319 ns 14.8x
std::for_each_n(list<short>)/8 4.66 ns 4.95 ns 0.9x
std::for_each_n(list<short>)/32 19.9 ns 20.4 ns 1.0x
std::for_each_n(list<short>)/50 41.3 ns 41.1 ns 1.0x
std::for_each_n(list<short>)/1024 1018 ns 1021 ns 1.0x
std::for_each_n(list<short>)/4096 6110 ns 6294 ns 1.0x
std::for_each_n(list<short>)/8192 12433 ns 12692 ns 1.0x
std::for_each_n(list<short>)/16384 24739 ns 24820 ns 1.0x
std::for_each_n(list<short>)/65536 103376 ns 102812 ns 1.0x
std::for_each_n(list<short>)/262144 538314 ns 555664 ns 1.0x
std::for_each_n(vector<int>)/8 2.78 ns 2.73 ns 1.0x
std::for_each_n(vector<int>)/32 5.22 ns 5.26 ns 1.0x
std::for_each_n(vector<int>)/50 8.20 ns 8.65 ns 0.9x
std::for_each_n(vector<int>)/1024 156 ns 175 ns 0.9x
std::for_each_n(vector<int>)/4096 602 ns 758 ns 0.8x
std::for_each_n(vector<int>)/8192 1214 ns 1393 ns 0.9x
std::for_each_n(vector<int>)/16384 2417 ns 2690 ns 0.9x
std::for_each_n(vector<int>)/65536 9989 ns 10703 ns 0.9x
std::for_each_n(vector<int>)/262144 41512 ns 43798 ns 0.9x
std::for_each_n(deque<int>)/8 5.04 ns 2.75 ns 1.8x
std::for_each_n(deque<int>)/32 19.1 ns 5.56 ns 3.4x
std::for_each_n(deque<int>)/50 30.6 ns 8.55 ns 3.6x
std::for_each_n(deque<int>)/1024 567 ns 152 ns 3.7x
std::for_each_n(deque<int>)/4096 2241 ns 657 ns 3.4x
std::for_each_n(deque<int>)/8192 4512 ns 1334 ns 3.4x
std::for_each_n(deque<int>)/16384 9066 ns 2701 ns 3.4x
std::for_each_n(deque<int>)/65536 35955 ns 10887 ns 3.3x
std::for_each_n(deque<int>)/262144 146489 ns 44361 ns 3.3x
std::for_each_n(list<int>)/8 4.68 ns 6.05 ns 0.8x
std::for_each_n(list<int>)/32 21.0 ns 21.9 ns 1.0x
std::for_each_n(list<int>)/50 43.0 ns 42.2 ns 1.0x
std::for_each_n(list<int>)/1024 1015 ns 1035 ns 1.0x
std::for_each_n(list<int>)/4096 6373 ns 6331 ns 1.0x
std::for_each_n(list<int>)/8192 12757 ns 12836 ns 1.0x
std::for_each_n(list<int>)/16384 24879 ns 25035 ns 1.0x
std::for_each_n(list<int>)/65536 103931 ns 103773 ns 1.0x
std::for_each_n(list<int>)/262144 536841 ns 555330 ns 1.0x
--------------------------------------------------------------------------
```
#### `std::join_view` iterators
```
-----------------------------------------------------------------------------------------------------
Benchmark Before After Speedup
-----------------------------------------------------------------------------------------------------
std::for_each_n(join_view(vector<vector<char>>))/8 5.83 ns 2.56 ns 2.3x
std::for_each_n(join_view(vector<vector<char>>))/32 22.8 ns 3.12 ns 7.3x
std::for_each_n(join_view(vector<vector<char>>))/50 32.3 ns 5.37 ns 6.0x
std::for_each_n(join_view(vector<vector<char>>))/1024 477 ns 45.1 ns 10.6x
std::for_each_n(join_view(vector<vector<char>>))/4096 1898 ns 161 ns 11.8x
std::for_each_n(join_view(vector<vector<char>>))/8192 3785 ns 332 ns 11.4x
std::for_each_n(join_view(vector<vector<char>>))/16384 7530 ns 646 ns 11.7x
std::for_each_n(join_view(vector<vector<char>>))/65536 30685 ns 2670 ns 11.5x
std::for_each_n(join_view(vector<vector<char>>))/262144 122600 ns 10539 ns 11.6x
std::for_each_n(join_view(vector<vector<short>>))/8 6.14 ns 2.83 ns 2.2x
std::for_each_n(join_view(vector<vector<short>>))/32 25.2 ns 3.28 ns 7.7x
std::for_each_n(join_view(vector<vector<short>>))/50 33.7 ns 5.02 ns 6.7x
std::for_each_n(join_view(vector<vector<short>>))/1024 487 ns 38.6 ns 12.6x
std::for_each_n(join_view(vector<vector<short>>))/4096 1925 ns 150 ns 12.8x
std::for_each_n(join_view(vector<vector<short>>))/8192 3863 ns 291 ns 13.3x
std::for_each_n(join_view(vector<vector<short>>))/16384 7779 ns 558 ns 13.9x
std::for_each_n(join_view(vector<vector<short>>))/65536 30656 ns 2634 ns 11.6x
std::for_each_n(join_view(vector<vector<short>>))/262144 124937 ns 10667 ns 11.7x
std::for_each_n(join_view(vector<vector<int>>))/8 5.97 ns 3.45 ns 1.7x
std::for_each_n(join_view(vector<vector<int>>))/32 22.5 ns 7.20 ns 3.1x
std::for_each_n(join_view(vector<vector<int>>))/50 30.7 ns 10.7 ns 2.9x
std::for_each_n(join_view(vector<vector<int>>))/1024 491 ns 191 ns 2.6x
std::for_each_n(join_view(vector<vector<int>>))/4096 1928 ns 731 ns 2.6x
std::for_each_n(join_view(vector<vector<int>>))/8192 3874 ns 1402 ns 2.8x
std::for_each_n(join_view(vector<vector<int>>))/16384 7818 ns 2852 ns 2.7x
std::for_each_n(join_view(vector<vector<int>>))/65536 31101 ns 11439 ns 2.7x
std::for_each_n(join_view(vector<vector<int>>))/262144 126378 ns 45348 ns 2.8x
-----------------------------------------------------------------------------------------------------
```
>From 9b95e81844423c7717de00fa63de438ea247c98c Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Fri, 11 Apr 2025 17:37:10 -0400
Subject: [PATCH] Optimize std::for_each_n for segmented iterators
---
libcxx/docs/ReleaseNotes/21.rst | 3 +
libcxx/include/CMakeLists.txt | 1 +
libcxx/include/__algorithm/for_each.h | 45 +++---
libcxx/include/__algorithm/for_each_n.h | 64 ++++++++-
.../include/__algorithm/for_each_n_segment.h | 63 +++++++++
libcxx/include/module.modulemap | 1 +
.../nonmodifying/for_each_n.bench.cpp | 98 +++++++++++++
.../alg.foreach/for_each_n.pass.cpp | 129 ++++++++++++------
8 files changed, 332 insertions(+), 72 deletions(-)
create mode 100644 libcxx/include/__algorithm/for_each_n_segment.h
create mode 100644 libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 06e017850b88c..8361e60a40842 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -63,6 +63,9 @@ Improvements and New Features
- The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
+- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
+ up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index c225131101ce2..026ac036c5122 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -25,6 +25,7 @@ set(files
__algorithm/find_segment_if.h
__algorithm/for_each.h
__algorithm/for_each_n.h
+ __algorithm/for_each_n_segment.h
__algorithm/for_each_segment.h
__algorithm/generate.h
__algorithm/generate_n.h
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index e08f583504c01..0b14d8c219931 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -9,48 +9,43 @@
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_H
#define _LIBCPP___ALGORITHM_FOR_EACH_H
-
#include <__algorithm/for_each_segment.h>
#include <__config>
#include <__iterator/segmented_iterator.h>
-#include <__ranges/movable_box.h>
-#include <__utility/in_place.h>
-#include <__utility/move.h>
+#include <__type_traits/enable_if.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _InputIterator, class _Function>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function
-for_each(_InputIterator __first, _InputIterator __last, _Function __f) {
+template <class _InputIterator, class _Sent, class _Func>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __for_each(_InputIterator __first, _Sent __last, _Func& __f) {
for (; __first != __last; ++__first)
__f(*__first);
- return __f;
}
-// __movable_box is available in C++20, but is actually a copyable-box, so optimization is only correct in C++23
-#if _LIBCPP_STD_VER >= 23
-template <class _SegmentedIterator, class _Function>
- requires __is_segmented_iterator<_SegmentedIterator>::value
-_LIBCPP_HIDE_FROM_ABI constexpr _Function
-for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function __func) {
- ranges::__movable_box<_Function> __wrapped_func(in_place, std::move(__func));
- std::__for_each_segment(__first, __last, [&](auto __lfirst, auto __llast) {
- __wrapped_func =
- ranges::__movable_box<_Function>(in_place, std::for_each(__lfirst, __llast, std::move(*__wrapped_func)));
+#ifndef _LIBCPP_CXX03_LANG
+template <class _SegmentedIterator,
+ class _Function,
+ __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func) {
+ using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
+ std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
+ std::__for_each(__lfirst, __llast, __func);
});
- return std::move(*__wrapped_func);
}
-#endif // _LIBCPP_STD_VER >= 23
+#endif // !_LIBCPP_CXX03_LANG
-_LIBCPP_END_NAMESPACE_STD
+template <class _InputIterator, class _Function>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function
+for_each(_InputIterator __first, _InputIterator __last, _Function __f) {
+ std::__for_each(__first, __last, __f);
+ return __f;
+}
-_LIBCPP_POP_MACROS
+_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ALGORITHM_FOR_EACH_H
diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h
index fce380b49df3e..047115abde8ca 100644
--- a/libcxx/include/__algorithm/for_each_n.h
+++ b/libcxx/include/__algorithm/for_each_n.h
@@ -10,7 +10,12 @@
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_H
#define _LIBCPP___ALGORITHM_FOR_EACH_N_H
+#include <__algorithm/for_each.h>
+#include <__algorithm/for_each_n_segment.h>
#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
+#include <__type_traits/enable_if.h>
#include <__utility/convert_to_integral.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -19,11 +24,18 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-#if _LIBCPP_STD_VER >= 17
-
-template <class _InputIterator, class _Size, class _Function>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
-for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
+template <class _InputIterator,
+ class _Size,
+ class _Func,
+ __enable_if_t<!__has_random_access_iterator_category<_InputIterator>::value &&
+ (!__is_segmented_iterator<_InputIterator>::value
+ // || !__has_random_access_iterator_category<
+ // typename __segmented_iterator_traits<_InputIterator>::__local_iterator>::value
+ ), // TODO: __segmented_iterator_traits<_InputIterator> results in template instantiation
+ // during SFINAE, which is a hard error to be fixed. Once fixed, we should uncomment.
+ int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
+__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) {
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
_IntegralSize __n = __orig_n;
while (__n > 0) {
@@ -31,10 +43,48 @@ for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
++__first;
--__n;
}
- return __first;
+ return std::move(__first);
}
-#endif
+template <class _RandIter,
+ class _Size,
+ class _Func,
+ __enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
+__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
+ typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
+ auto __last = __first + __n;
+ std::__for_each(__first, __last, __f);
+ return std::move(__last);
+}
+
+#ifndef _LIBCPP_CXX03_LANG
+template <class _SegmentedIterator,
+ class _Size,
+ class _Func,
+ __enable_if_t<!__has_random_access_iterator_category<_SegmentedIterator>::value &&
+ __is_segmented_iterator<_SegmentedIterator>::value &&
+ __has_random_access_iterator_category<
+ typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
+ int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
+__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) {
+ using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
+ return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
+ std::__for_each(__lfirst, __llast, __f);
+ });
+}
+#endif // !_LIBCPP_CXX03_LANG
+
+#if _LIBCPP_STD_VER >= 17
+
+template <class _InputIterator, class _Size, class _Function>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
+for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
+ return std::__for_each_n(__first, __orig_n, __f);
+}
+
+#endif // _LIBCPP_STD_VER >= 17
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__algorithm/for_each_n_segment.h b/libcxx/include/__algorithm/for_each_n_segment.h
new file mode 100644
index 0000000000000..1b522fb373eee
--- /dev/null
+++ b/libcxx/include/__algorithm/for_each_n_segment.h
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
+#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
+
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// __for_each_n_segment optimizes linear iteration over segmented iterators. It processes a segmented
+// input range [__first, __first + __n) by applying the functor __func to each element within the segment.
+// The return value of __func is ignored, and the function returns an iterator pointing to one past the
+// last processed element in the input range.
+
+template <class _SegmentedIterator, class _Size, class _Functor>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
+__for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) {
+ static_assert(__is_segmented_iterator<_SegmentedIterator>::value &&
+ __has_random_access_iterator_category<
+ typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
+ "__for_each_n_segment only works with segmented iterators with random-access local iterators");
+ if (__orig_n <= 0)
+ return __first;
+
+ using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
+ using __local_iter_t = typename _Traits::__local_iterator;
+ using __difference_t = typename std::iterator_traits<__local_iter_t>::difference_type;
+ __difference_t __n = __orig_n;
+ auto __seg = _Traits::__segment(__first);
+ auto __local_first = _Traits::__local(__first);
+ __local_iter_t __local_last;
+
+ while (__n > 0) {
+ __local_last = _Traits::__end(__seg);
+ auto __seg_size = __local_last - __local_first;
+ if (__n <= __seg_size) {
+ __local_last = __local_first + __n;
+ __func(__local_first, __local_last);
+ break;
+ }
+ __func(__local_first, __local_last);
+ __n -= __seg_size;
+ __local_first = _Traits::__begin(++__seg);
+ }
+
+ return _Traits::__compose(__seg, __local_last);
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 65123eb268150..a41d33b3a7ec6 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -436,6 +436,7 @@ module std [system] {
module find_segment_if { header "__algorithm/find_segment_if.h" }
module find { header "__algorithm/find.h" }
module for_each_n { header "__algorithm/for_each_n.h" }
+ module for_each_n_segment { header "__algorithm/for_each_n_segment.h" }
module for_each_segment { header "__algorithm/for_each_segment.h" }
module for_each { header "__algorithm/for_each.h" }
module generate_n { header "__algorithm/generate_n.h" }
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
new file mode 100644
index 0000000000000..784708c7e01eb
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
@@ -0,0 +1,98 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cstddef>
+#include <deque>
+#include <list>
+#include <ranges>
+#include <string>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+
+int main(int argc, char** argv) {
+ auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
+
+ // std::for_each_n
+ {
+ auto bm = []<class Container>(std::string name, auto for_each_n) {
+ using ElemType = typename Container::value_type;
+ benchmark::RegisterBenchmark(
+ name,
+ [for_each_n](auto& st) {
+ std::size_t const n = st.range(0);
+ Container c(n, 1);
+ auto first = c.begin();
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = for_each_n(first, n, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(8)
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(4096)
+ ->Arg(8192)
+ ->Arg(1 << 14)
+ ->Arg(1 << 16)
+ ->Arg(1 << 18);
+ };
+ bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
+ bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
+ bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
+ }
+
+ // std::for_each_n for join_view
+ {
+ auto bm = []<class Container>(std::string name, auto for_each_n) {
+ using C1 = typename Container::value_type;
+ using ElemType = typename C1::value_type;
+ benchmark::RegisterBenchmark(
+ name,
+ [for_each_n](auto& st) {
+ std::size_t const size = st.range(0);
+ std::size_t const seg_size = 256;
+ std::size_t const segments = (size + seg_size - 1) / seg_size;
+ Container c(segments);
+ for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
+ c[i].resize(std::min(seg_size, n), ElemType(1));
+ }
+
+ auto view = c | std::views::join;
+ auto first = view.begin();
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(8)
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(4096)
+ ->Arg(8192)
+ ->Arg(1 << 14)
+ ->Arg(1 << 16)
+ ->Arg(1 << 18);
+ };
+ bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
+ }
+
+ benchmark::Initialize(&argc, argv);
+ benchmark::RunSpecifiedBenchmarks();
+ benchmark::Shutdown();
+ return 0;
+}
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp
index 371f6c92f1ed1..39c1174dcec8b 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp
@@ -13,69 +13,118 @@
// constexpr InputIterator // constexpr after C++17
// for_each_n(InputIterator first, Size n, Function f);
-
#include <algorithm>
#include <cassert>
+#include <deque>
#include <functional>
+#include <iterator>
+#include <ranges>
+#include <vector>
#include "test_macros.h"
#include "test_iterators.h"
-#if TEST_STD_VER > 17
-TEST_CONSTEXPR bool test_constexpr() {
- int ia[] = {1, 3, 6, 7};
- int expected[] = {3, 5, 8, 9};
- const std::size_t N = 4;
+struct for_each_test {
+ TEST_CONSTEXPR for_each_test(int c) : count(c) {}
+ int count;
+ TEST_CONSTEXPR_CXX14 void operator()(int& i) {
+ ++i;
+ ++count;
+ }
+};
- auto it = std::for_each_n(std::begin(ia), N, [](int &a) { a += 2; });
- return it == (std::begin(ia) + N)
- && std::equal(std::begin(ia), std::end(ia), std::begin(expected))
- ;
- }
-#endif
+struct deque_test {
+ std::deque<int>* d_;
+ int* i_;
+
+ deque_test(std::deque<int>& d, int& i) : d_(&d), i_(&i) {}
-struct for_each_test
-{
- for_each_test(int c) : count(c) {}
- int count;
- void operator()(int& i) {++i; ++count;}
+ void operator()(int& v) {
+ assert(&(*d_)[*i_] == &v);
+ ++*i_;
+ }
};
-int main(int, char**)
-{
+/*TEST_CONSTEXPR_CXX26*/
+void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
+ // check that segmented deque iterators work properly
+ int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
+ for (const int size : sizes) {
+ std::deque<int> d(size);
+ int index = 0;
+
+ std::for_each_n(d.begin(), d.size(), deque_test(d, index));
+ }
+}
+
+TEST_CONSTEXPR_CXX20 bool test() {
+ {
typedef cpp17_input_iterator<int*> Iter;
- int ia[] = {0, 1, 2, 3, 4, 5};
- const unsigned s = sizeof(ia)/sizeof(ia[0]);
+ int ia[] = {0, 1, 2, 3, 4, 5};
+ const unsigned s = sizeof(ia) / sizeof(ia[0]);
{
- auto f = for_each_test(0);
- Iter it = std::for_each_n(Iter(ia), 0, std::ref(f));
- assert(it == Iter(ia));
- assert(f.count == 0);
+ unsigned count = 0;
+ Iter it = std::for_each_n(Iter(ia), 0, [&count](int& i) {
+ ++i;
+ ++count;
+ });
+ assert(it == Iter(ia));
+ assert(count == 0);
}
{
- auto f = for_each_test(0);
- Iter it = std::for_each_n(Iter(ia), s, std::ref(f));
-
- assert(it == Iter(ia+s));
- assert(f.count == s);
- for (unsigned i = 0; i < s; ++i)
- assert(ia[i] == static_cast<int>(i+1));
+ unsigned count = 0;
+ Iter it = std::for_each_n(Iter(ia), s, [&count](int& i) {
+ ++i;
+ ++count;
+ });
+ assert(it == Iter(ia + s));
+ assert(count == s);
+ for (unsigned i = 0; i < s; ++i)
+ assert(ia[i] == static_cast<int>(i + 1));
}
{
- auto f = for_each_test(0);
- Iter it = std::for_each_n(Iter(ia), 1, std::ref(f));
-
- assert(it == Iter(ia+1));
- assert(f.count == 1);
- for (unsigned i = 0; i < 1; ++i)
- assert(ia[i] == static_cast<int>(i+2));
+ unsigned count = 0;
+ Iter it = std::for_each_n(Iter(ia), 1, [&count](int& i) {
+ ++i;
+ ++count;
+ });
+ assert(it == Iter(ia + 1));
+ assert(count == 1);
+ for (unsigned i = 0; i < 1; ++i)
+ assert(ia[i] == static_cast<int>(i + 2));
}
+ }
+
+ {
+ int ia[] = {1, 3, 6, 7};
+ int expected[] = {3, 5, 8, 9};
+ const std::size_t N = 4;
+
+ auto it = std::for_each_n(std::begin(ia), N, [](int& a) { a += 2; });
+ assert(it == (std::begin(ia) + N) && std::equal(std::begin(ia), std::end(ia), std::begin(expected)));
+ }
+
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ test_segmented_deque_iterator();
+
+#if TEST_STD_VER >= 20
+ {
+ std::vector<std::vector<int>> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}};
+ auto v = vec | std::views::join;
+ std::for_each_n(v.begin(), std::ranges::distance(v), [i = 0](int& a) mutable { assert(a == i++); });
+ }
+#endif
+
+ return true;
+}
+int main(int, char**) {
+ assert(test());
#if TEST_STD_VER > 17
- static_assert(test_constexpr());
+ static_assert(test());
#endif
return 0;
More information about the libcxx-commits
mailing list