[libcxx-commits] [libcxx] [libc++] Optimize ranges::{for_each, for_each_n} for segmented iterators (PR #132896)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Jun 7 11:39:22 PDT 2025
https://github.com/winner245 updated https://github.com/llvm/llvm-project/pull/132896
>From a5148ec3c08fa5b06f769b170af7bb8543f2b9b6 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Tue, 25 Mar 2025 03:44:59 -0400
Subject: [PATCH 01/12] Optimize ranges::{for_each, for_each_n} for segmented
iterators
---
libcxx/include/__algorithm/ranges_for_each.h | 14 ++++--
.../include/__algorithm/ranges_for_each_n.h | 15 ++++--
.../nonmodifying/for_each_n.bench.cpp | 2 +-
.../alg.foreach/ranges.for_each.pass.cpp | 46 +++++++++++++++++--
.../alg.foreach/ranges.for_each_n.pass.cpp | 46 ++++++++++++++++++-
5 files changed, 108 insertions(+), 15 deletions(-)
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index de39bc5522753..475f85366188e 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -9,6 +9,7 @@
#ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H
#define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H
+#include <__algorithm/for_each.h>
#include <__algorithm/in_fun_result.h>
#include <__config>
#include <__functional/identity.h>
@@ -41,9 +42,16 @@ struct __for_each {
template <class _Iter, class _Sent, class _Proj, class _Func>
_LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func>
__for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) {
- for (; __first != __last; ++__first)
- std::invoke(__func, std::invoke(__proj, *__first));
- return {std::move(__first), std::move(__func)};
+ if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) {
+ auto __n = __last - __first;
+ auto __end = __first + __n;
+ std::for_each(__first, __end, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); });
+ return {std::move(__end), std::move(__func)};
+ } else {
+ for (; __first != __last; ++__first)
+ std::invoke(__func, std::invoke(__proj, *__first));
+ return {std::move(__first), std::move(__func)};
+ }
}
public:
diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h
index 603cb723233c8..3108d66001295 100644
--- a/libcxx/include/__algorithm/ranges_for_each_n.h
+++ b/libcxx/include/__algorithm/ranges_for_each_n.h
@@ -9,6 +9,7 @@
#ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
#define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
+#include <__algorithm/for_each.h>
#include <__algorithm/in_fun_result.h>
#include <__config>
#include <__functional/identity.h>
@@ -40,11 +41,17 @@ struct __for_each_n {
template <input_iterator _Iter, class _Proj = identity, indirectly_unary_invocable<projected<_Iter, _Proj>> _Func>
_LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func>
operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const {
- while (__count-- > 0) {
- std::invoke(__func, std::invoke(__proj, *__first));
- ++__first;
+ if constexpr (random_access_iterator<_Iter>) {
+ auto __last = __first + __count;
+ std::for_each(__first, __last, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); });
+ return {std::move(__last), std::move(__func)};
+ } else {
+ while (__count-- > 0) {
+ std::invoke(__func, std::invoke(__proj, *__first));
+ ++__first;
+ }
+ return {std::move(__first), std::move(__func)};
}
- return {std::move(__first), std::move(__func)};
}
};
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
index 784708c7e01eb..0de291395463a 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
@@ -21,7 +21,7 @@
int main(int argc, char** argv) {
auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
- // std::for_each_n
+ // {std,ranges}::for_each_n
{
auto bm = []<class Container>(std::string name, auto for_each_n) {
using ElemType = typename Container::value_type;
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
index 8b9b6e82cbcb2..2f4bfb9db6dba 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
@@ -20,7 +20,10 @@
#include <algorithm>
#include <array>
+#include <cassert>
+#include <deque>
#include <ranges>
+#include <vector>
#include "almost_satisfies_types.h"
#include "test_iterators.h"
@@ -30,7 +33,7 @@ struct Callable {
};
template <class Iter, class Sent = Iter>
-concept HasForEachIt = requires (Iter iter, Sent sent) { std::ranges::for_each(iter, sent, Callable{}); };
+concept HasForEachIt = requires(Iter iter, Sent sent) { std::ranges::for_each(iter, sent, Callable{}); };
static_assert(HasForEachIt<int*>);
static_assert(!HasForEachIt<InputIteratorNotDerivedFrom>);
@@ -47,7 +50,7 @@ static_assert(!HasForEachItFunc<IndirectUnaryPredicateNotPredicate>);
static_assert(!HasForEachItFunc<IndirectUnaryPredicateNotCopyConstructible>);
template <class Range>
-concept HasForEachR = requires (Range range) { std::ranges::for_each(range, Callable{}); };
+concept HasForEachR = requires(Range range) { std::ranges::for_each(range, Callable{}); };
static_assert(HasForEachR<UncheckedRange<int*>>);
static_assert(!HasForEachR<InputRangeNotDerivedFrom>);
@@ -68,7 +71,7 @@ constexpr void test_iterator() {
{ // simple test
{
auto func = [i = 0](int& a) mutable { a += i++; };
- int a[] = {1, 6, 3, 4};
+ int a[] = {1, 6, 3, 4};
std::same_as<std::ranges::for_each_result<Iter, decltype(func)>> decltype(auto) ret =
std::ranges::for_each(Iter(a), Sent(Iter(a + 4)), func);
assert(a[0] == 1);
@@ -81,8 +84,8 @@ constexpr void test_iterator() {
assert(i == 4);
}
{
- auto func = [i = 0](int& a) mutable { a += i++; };
- int a[] = {1, 6, 3, 4};
+ auto func = [i = 0](int& a) mutable { a += i++; };
+ int a[] = {1, 6, 3, 4};
auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 4)));
std::same_as<std::ranges::for_each_result<Iter, decltype(func)>> decltype(auto) ret =
std::ranges::for_each(range, func);
@@ -110,6 +113,30 @@ constexpr void test_iterator() {
}
}
+struct deque_test {
+ std::deque<int>* d_;
+ int* i_;
+
+ deque_test(std::deque<int>& d, int& i) : d_(&d), i_(&i) {}
+
+ void operator()(int& v) {
+ assert(&(*d_)[*i_] == &v);
+ ++*i_;
+ }
+};
+
+/*TEST_CONSTEXPR_CXX23*/
+void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr
+ // check that segmented iterators work properly
+ int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
+ for (const int size : sizes) {
+ std::deque<int> d(size);
+ int index = 0;
+
+ std::ranges::for_each(d, deque_test(d, index));
+ }
+}
+
constexpr bool test() {
test_iterator<cpp17_input_iterator<int*>, sentinel_wrapper<cpp17_input_iterator<int*>>>();
test_iterator<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
@@ -146,6 +173,15 @@ constexpr bool test() {
}
}
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ test_segmented_deque_iterator();
+
+ {
+ std::vector<std::vector<int>> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}};
+ auto v = vec | std::views::join;
+ std::ranges::for_each(v, [i = 0](int x) mutable { assert(x == 2 * i++); }, [](int x) { return 2 * x; });
+ }
+
return true;
}
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
index d4b2d053d08ce..ad1447b7348f5 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
@@ -17,7 +17,12 @@
#include <algorithm>
#include <array>
+#include <cassert>
+#include <deque>
+#include <iterator>
#include <ranges>
+#include <ranges>
+#include <vector>
#include "almost_satisfies_types.h"
#include "test_iterators.h"
@@ -27,7 +32,7 @@ struct Callable {
};
template <class Iter>
-concept HasForEachN = requires (Iter iter) { std::ranges::for_each_n(iter, 0, Callable{}); };
+concept HasForEachN = requires(Iter iter) { std::ranges::for_each_n(iter, 0, Callable{}); };
static_assert(HasForEachN<int*>);
static_assert(!HasForEachN<InputIteratorNotDerivedFrom>);
@@ -45,7 +50,7 @@ template <class Iter>
constexpr void test_iterator() {
{ // simple test
auto func = [i = 0](int& a) mutable { a += i++; };
- int a[] = {1, 6, 3, 4};
+ int a[] = {1, 6, 3, 4};
std::same_as<std::ranges::for_each_result<Iter, decltype(func)>> auto ret =
std::ranges::for_each_n(Iter(a), 4, func);
assert(a[0] == 1);
@@ -64,6 +69,30 @@ constexpr void test_iterator() {
}
}
+struct deque_test {
+ std::deque<int>* d_;
+ int* i_;
+
+ deque_test(std::deque<int>& d, int& i) : d_(&d), i_(&i) {}
+
+ void operator()(int& v) {
+ assert(&(*d_)[*i_] == &v);
+ ++*i_;
+ }
+};
+
+/*TEST_CONSTEXPR_CXX23*/
+void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr
+ // check that segmented iterators work properly
+ int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
+ for (const int size : sizes) {
+ std::deque<int> d(size);
+ int index = 0;
+
+ std::ranges::for_each_n(d.begin(), d.size(), deque_test(d, index));
+ }
+}
+
constexpr bool test() {
test_iterator<cpp17_input_iterator<int*>>();
test_iterator<cpp20_input_iterator<int*>>();
@@ -89,6 +118,19 @@ constexpr bool test() {
assert(a[2].other == 6);
}
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ test_segmented_deque_iterator();
+
+ {
+ std::vector<std::vector<int>> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}};
+ auto v = vec | std::views::join;
+ std::ranges::for_each_n(
+ v.begin(),
+ std::ranges::distance(v),
+ [i = 0](int x) mutable { assert(x == 2 * i++); },
+ [](int x) { return 2 * x; });
+ }
+
return true;
}
>From 90c826b3dd2a4a1bcf36bc486e61da8468a90d56 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Tue, 25 Mar 2025 21:29:27 -0400
Subject: [PATCH 02/12] Address ldionne's review comments
---
libcxx/include/__algorithm/for_each.h | 1 +
libcxx/include/__algorithm/ranges_for_each.h | 4 +++-
libcxx/include/__algorithm/ranges_for_each_n.h | 4 +++-
.../alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp | 2 +-
.../alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp | 2 +-
5 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index b6c2c7c056edd..2a44c1dc60704 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -11,6 +11,7 @@
#define _LIBCPP___ALGORITHM_FOR_EACH_H
#include <__algorithm/for_each_segment.h>
+#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/enable_if.h>
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index 475f85366188e..5d27befd9619f 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -11,6 +11,7 @@
#include <__algorithm/for_each.h>
#include <__algorithm/in_fun_result.h>
+#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
@@ -45,7 +46,8 @@ struct __for_each {
if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) {
auto __n = __last - __first;
auto __end = __first + __n;
- std::for_each(__first, __end, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); });
+ auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
+ std::__for_each<_RangeAlgPolicy>(__first, __end, __f);
return {std::move(__end), std::move(__func)};
} else {
for (; __first != __last; ++__first)
diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h
index 3108d66001295..8384ba3bb14e6 100644
--- a/libcxx/include/__algorithm/ranges_for_each_n.h
+++ b/libcxx/include/__algorithm/ranges_for_each_n.h
@@ -11,6 +11,7 @@
#include <__algorithm/for_each.h>
#include <__algorithm/in_fun_result.h>
+#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
@@ -43,7 +44,8 @@ struct __for_each_n {
operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const {
if constexpr (random_access_iterator<_Iter>) {
auto __last = __first + __count;
- std::for_each(__first, __last, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); });
+ auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
+ std::__for_each<_RangeAlgPolicy>(__first, __last, __f);
return {std::move(__last), std::move(__func)};
} else {
while (__count-- > 0) {
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
index 2f4bfb9db6dba..14be4a42f667c 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
@@ -127,7 +127,7 @@ struct deque_test {
/*TEST_CONSTEXPR_CXX23*/
void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr
- // check that segmented iterators work properly
+ // check that segmented deque iterators work properly
int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
for (const int size : sizes) {
std::deque<int> d(size);
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
index ad1447b7348f5..ac073d3052170 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
@@ -83,7 +83,7 @@ struct deque_test {
/*TEST_CONSTEXPR_CXX23*/
void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr
- // check that segmented iterators work properly
+ // check that segmented deque iterators work properly
int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
for (const int size : sizes) {
std::deque<int> d(size);
>From fae4de0486d87661286337560d4af55fc2b0dbca Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Tue, 25 Mar 2025 23:11:34 -0400
Subject: [PATCH 03/12] Fix test and ADL call
---
.../alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp | 6 +++---
.../alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
index 14be4a42f667c..a6d0afde3186a 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
@@ -125,8 +125,8 @@ struct deque_test {
}
};
-/*TEST_CONSTEXPR_CXX23*/
-void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr
+/*TEST_CONSTEXPR_CXX26*/
+void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
// check that segmented deque iterators work properly
int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
for (const int size : sizes) {
@@ -173,7 +173,7 @@ constexpr bool test() {
}
}
- if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
test_segmented_deque_iterator();
{
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
index ac073d3052170..1578763694231 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
@@ -81,8 +81,8 @@ struct deque_test {
}
};
-/*TEST_CONSTEXPR_CXX23*/
-void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr
+/*TEST_CONSTEXPR_CXX26*/
+void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
// check that segmented deque iterators work properly
int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
for (const int size : sizes) {
@@ -118,7 +118,7 @@ constexpr bool test() {
assert(a[2].other == 6);
}
- if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
test_segmented_deque_iterator();
{
>From 37d68a31684a7717738d41d2523c667f1acda610 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Wed, 26 Mar 2025 11:10:37 -0400
Subject: [PATCH 04/12] Make for_each segmented iterator optimization valid for
C++03
---
libcxx/include/__algorithm/for_each_n.h | 1 +
libcxx/include/__algorithm/ranges_for_each_n.h | 5 +++--
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h
index 29351ec39f4e7..169de84b4d95f 100644
--- a/libcxx/include/__algorithm/for_each_n.h
+++ b/libcxx/include/__algorithm/for_each_n.h
@@ -14,6 +14,7 @@
#include <__algorithm/for_each_n_segment.h>
#include <__config>
#include <__iterator/iterator_traits.h>
+#include <__iterator/next.h>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/disjunction.h>
#include <__type_traits/enable_if.h>
diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h
index 8384ba3bb14e6..a5c81868c2062 100644
--- a/libcxx/include/__algorithm/ranges_for_each_n.h
+++ b/libcxx/include/__algorithm/ranges_for_each_n.h
@@ -18,6 +18,7 @@
#include <__iterator/concepts.h>
#include <__iterator/incrementable_traits.h>
#include <__iterator/iterator_traits.h>
+#include <__iterator/next.h>
#include <__iterator/projected.h>
#include <__ranges/concepts.h>
#include <__utility/move.h>
@@ -42,8 +43,8 @@ struct __for_each_n {
template <input_iterator _Iter, class _Proj = identity, indirectly_unary_invocable<projected<_Iter, _Proj>> _Func>
_LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func>
operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const {
- if constexpr (random_access_iterator<_Iter>) {
- auto __last = __first + __count;
+ if constexpr (forward_iterator<_Iter>) {
+ auto __last = std::ranges::next(__first, __count);
auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
std::__for_each<_RangeAlgPolicy>(__first, __last, __f);
return {std::move(__last), std::move(__func)};
>From 2a83548375a78def38a0060b5b2a0dd64c271d16 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Thu, 27 Mar 2025 11:50:12 -0400
Subject: [PATCH 05/12] Allow transitive include of <optional> in affected
headers
---
libcxx/include/experimental/iterator | 1 +
libcxx/include/mutex | 1 +
libcxx/include/shared_mutex | 1 +
3 files changed, 3 insertions(+)
diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator
index d92613845a662..565bb83903ac3 100644
--- a/libcxx/include/experimental/iterator
+++ b/libcxx/include/experimental/iterator
@@ -127,6 +127,7 @@ _LIBCPP_POP_MACROS
# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <cstddef>
# include <iosfwd>
+# include <optional>
# include <type_traits>
# endif
#endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
diff --git a/libcxx/include/mutex b/libcxx/include/mutex
index e058b3113073e..f616bad3ac171 100644
--- a/libcxx/include/mutex
+++ b/libcxx/include/mutex
@@ -504,6 +504,7 @@ _LIBCPP_POP_MACROS
# include <initializer_list>
# include <iosfwd>
# include <new>
+# include <optional>
# include <stdexcept>
# include <system_error>
# include <type_traits>
diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex
index e6759e413dfef..6469c02ca5874 100644
--- a/libcxx/include/shared_mutex
+++ b/libcxx/include/shared_mutex
@@ -457,6 +457,7 @@ _LIBCPP_POP_MACROS
# endif // _LIBCPP_HAS_THREADS
# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+# include <optional>
# include <system_error>
# endif
#endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
>From 5cc4af82cc65eb8bf00c5bb394e4bb3739503a53 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Thu, 27 Mar 2025 12:00:54 -0400
Subject: [PATCH 06/12] Remove unnecessary _AlgoPolicy template parameter
---
libcxx/include/__algorithm/for_each.h | 1 -
libcxx/include/__algorithm/ranges_for_each.h | 3 +--
libcxx/include/__algorithm/ranges_for_each_n.h | 3 +--
3 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index 2a44c1dc60704..b6c2c7c056edd 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -11,7 +11,6 @@
#define _LIBCPP___ALGORITHM_FOR_EACH_H
#include <__algorithm/for_each_segment.h>
-#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/enable_if.h>
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index 5d27befd9619f..096e60683e39d 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -11,7 +11,6 @@
#include <__algorithm/for_each.h>
#include <__algorithm/in_fun_result.h>
-#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
@@ -47,7 +46,7 @@ struct __for_each {
auto __n = __last - __first;
auto __end = __first + __n;
auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
- std::__for_each<_RangeAlgPolicy>(__first, __end, __f);
+ std::__for_each(__first, __end, __f);
return {std::move(__end), std::move(__func)};
} else {
for (; __first != __last; ++__first)
diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h
index a5c81868c2062..9c6c2b97a2ad1 100644
--- a/libcxx/include/__algorithm/ranges_for_each_n.h
+++ b/libcxx/include/__algorithm/ranges_for_each_n.h
@@ -11,7 +11,6 @@
#include <__algorithm/for_each.h>
#include <__algorithm/in_fun_result.h>
-#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
@@ -46,7 +45,7 @@ struct __for_each_n {
if constexpr (forward_iterator<_Iter>) {
auto __last = std::ranges::next(__first, __count);
auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
- std::__for_each<_RangeAlgPolicy>(__first, __last, __f);
+ std::__for_each(__first, __last, __f);
return {std::move(__last), std::move(__func)};
} else {
while (__count-- > 0) {
>From b74e1881ca0ea9d3b6b01474046a85ed60fbc987 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Fri, 28 Mar 2025 20:26:31 -0400
Subject: [PATCH 07/12] Apply optimization for join_view segmented iterators
---
libcxx/docs/ReleaseNotes/21.rst | 6 +
.../include/__algorithm/ranges_for_each_n.h | 5 +-
.../nonmodifying/for_each.bench.cpp | 23 +++-
.../nonmodifying/for_each_join_view.bench.cpp | 122 ++++++++++++++++++
.../nonmodifying/for_each_n.bench.cpp | 14 ++
5 files changed, 165 insertions(+), 5 deletions(-)
create mode 100644 libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 6cbc0baf29487..b652ed2f4eb1e 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -64,11 +64,17 @@ Improvements and New Features
- The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
+<<<<<<< HEAD
- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance
up to 10x, depending on type of sorted elements and the initial state of the sorted array.
- The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available
in C++23 and later.
+=======
+- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
+ resulting in performance improvements of up to 21.2x for ``std::deque::iterator`` segmented inputs and 17.9x for
+ ``join_view`` of ``vector<vector<T>>``.
+>>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators)
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h
index 9c6c2b97a2ad1..b92eeb6fa8d7c 100644
--- a/libcxx/include/__algorithm/ranges_for_each_n.h
+++ b/libcxx/include/__algorithm/ranges_for_each_n.h
@@ -9,7 +9,7 @@
#ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
#define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
-#include <__algorithm/for_each.h>
+#include <__algorithm/for_each_n.h>
#include <__algorithm/in_fun_result.h>
#include <__config>
#include <__functional/identity.h>
@@ -43,9 +43,8 @@ struct __for_each_n {
_LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func>
operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const {
if constexpr (forward_iterator<_Iter>) {
- auto __last = std::ranges::next(__first, __count);
auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
- std::__for_each(__first, __last, __f);
+ auto __last = std::for_each_n(__first, __count, __f);
return {std::move(__last), std::move(__func)};
} else {
while (__count-- > 0) {
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
index 760accbe4d929..1e33cf70f8487 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
@@ -23,6 +23,7 @@ int main(int argc, char** argv) {
// {std,ranges}::for_each
{
auto bm = []<class Container>(std::string name, auto for_each) {
+ using ElemType = typename Container::value_type;
benchmark::RegisterBenchmark(
name,
[for_each](auto& st) {
@@ -33,16 +34,34 @@ int main(int argc, char** argv) {
for ([[maybe_unused]] auto _ : st) {
benchmark::DoNotOptimize(c);
- auto result = for_each(first, last, [](int& x) { x = std::clamp(x, 10, 100); });
+ auto result = for_each(first, last, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
benchmark::DoNotOptimize(result);
}
})
->Arg(8)
->Arg(32)
->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(4096)
->Arg(8192)
- ->Arg(1 << 20);
+ ->Arg(1 << 14)
+ ->Arg(1 << 16)
+ ->Arg(1 << 18);
};
+ bm.operator()<std::vector<char>>("std::for_each(vector<char>)", std_for_each);
+ bm.operator()<std::deque<char>>("std::for_each(deque<char>)", std_for_each);
+ bm.operator()<std::list<char>>("std::for_each(list<char>)", std_for_each);
+ bm.operator()<std::vector<char>>("rng::for_each(vector<char>)", std::ranges::for_each);
+ bm.operator()<std::deque<char>>("rng::for_each(deque<char>)", std::ranges::for_each);
+ bm.operator()<std::list<char>>("rng::for_each(list<char>)", std::ranges::for_each);
+
+ bm.operator()<std::vector<short>>("std::for_each(vector<short>)", std_for_each);
+ bm.operator()<std::deque<short>>("std::for_each(deque<short>)", std_for_each);
+ bm.operator()<std::list<short>>("std::for_each(list<short>)", std_for_each);
+ bm.operator()<std::vector<short>>("rng::for_each(vector<short>)", std::ranges::for_each);
+ bm.operator()<std::deque<short>>("rng::for_each(deque<short>)", std::ranges::for_each);
+ bm.operator()<std::list<short>>("rng::for_each(list<short>)", std::ranges::for_each);
+
bm.operator()<std::vector<int>>("std::for_each(vector<int>)", std_for_each);
bm.operator()<std::deque<int>>("std::for_each(deque<int>)", std_for_each);
bm.operator()<std::list<int>>("std::for_each(list<int>)", std_for_each);
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp
new file mode 100644
index 0000000000000..28398ac988bf7
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp
@@ -0,0 +1,122 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cstddef>
+#include <deque>
+#include <list>
+#include <ranges>
+#include <string>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+
+int main(int argc, char** argv) {
+ auto std_for_each = [](auto first, auto last, auto f) { return std::for_each(first, last, f); };
+ auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
+
+ // {std,ranges}::for_each
+ {
+ auto bm = []<class Container>(std::string name, auto for_each) {
+ using C1 = typename Container::value_type;
+ using ElemType = typename C1::value_type;
+
+ benchmark::RegisterBenchmark(
+ name,
+ [for_each](auto& st) {
+ std::size_t const size = st.range(0);
+ std::size_t const seg_size = 256;
+ std::size_t const segments = (size + seg_size - 1) / seg_size;
+ Container c(segments);
+ for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
+ c[i].resize(std::min(seg_size, n), ElemType(1));
+ }
+
+ auto view = c | std::views::join;
+ auto first = view.begin();
+ auto last = view.end();
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = for_each(first, last, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(8)
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(4096)
+ ->Arg(8192)
+ ->Arg(1 << 14)
+ ->Arg(1 << 16)
+ ->Arg(1 << 18);
+ };
+ bm.operator()<std::vector<std::vector<char>>>("std::for_each(join_view(vector<vector<char>>))", std_for_each);
+ bm.operator()<std::vector<std::vector<short>>>("std::for_each(join_view(vector<vector<short>>))", std_for_each);
+ bm.operator()<std::vector<std::vector<int>>>("std::for_each(join_view(vector<vector<int>>))", std_for_each);
+ bm.operator()<std::vector<std::vector<char>>>(
+ "rng::for_each(join_view(vector<vector<char>>)", std::ranges::for_each);
+ bm.operator()<std::vector<std::vector<short>>>(
+ "rng::for_each(join_view(vector<vector<short>>)", std::ranges::for_each);
+ bm.operator()<std::vector<std::vector<int>>>("rng::for_each(join_view(vector<vector<int>>)", std::ranges::for_each);
+ }
+
+ // {std,ranges}::for_each_n
+ {
+ auto bm = []<class Container>(std::string name, auto for_each_n) {
+ using C1 = typename Container::value_type;
+ using ElemType = typename C1::value_type;
+ benchmark::RegisterBenchmark(
+ name,
+ [for_each_n](auto& st) {
+ std::size_t const size = st.range(0);
+ std::size_t const seg_size = 256;
+ std::size_t const segments = (size + seg_size - 1) / seg_size;
+ Container c(segments);
+ for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
+ c[i].resize(std::min(seg_size, n), ElemType(1));
+ }
+
+ auto view = c | std::views::join;
+ auto first = view.begin();
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(8)
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(4096)
+ ->Arg(8192)
+ ->Arg(1 << 14)
+ ->Arg(1 << 16)
+ ->Arg(1 << 18);
+ };
+ bm.operator()<std::vector<std::vector<char>>>("std::for_each_n(join_view(vector<vector<char>>))", std_for_each_n);
+ bm.operator()<std::vector<std::vector<short>>>("std::for_each_n(join_view(vector<vector<short>>))", std_for_each_n);
+ bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
+ bm.operator()<std::vector<std::vector<char>>>(
+ "rng::for_each_n(join_view(vector<vector<char>>)", std::ranges::for_each_n);
+ bm.operator()<std::vector<std::vector<short>>>(
+ "rng::for_each_n(join_view(vector<vector<short>>)", std::ranges::for_each_n);
+ bm.operator()<std::vector<std::vector<int>>>(
+ "rng::for_each_n(join_view(vector<vector<int>>)", std::ranges::for_each_n);
+ }
+
+ benchmark::Initialize(&argc, argv);
+ benchmark::RunSpecifiedBenchmarks();
+ benchmark::Shutdown();
+ return 0;
+}
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
index 0de291395463a..f0dcc30a39e14 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
@@ -48,6 +48,20 @@ int main(int argc, char** argv) {
->Arg(1 << 16)
->Arg(1 << 18);
};
+ bm.operator()<std::vector<char>>("std::for_each_n(vector<char>)", std_for_each_n);
+ bm.operator()<std::deque<char>>("std::for_each_n(deque<char>)", std_for_each_n);
+ bm.operator()<std::list<char>>("std::for_each_n(list<char>)", std_for_each_n);
+ bm.operator()<std::vector<char>>("rng::for_each_n(vector<char>)", std::ranges::for_each_n);
+ bm.operator()<std::deque<char>>("rng::for_each_n(deque<char>)", std::ranges::for_each_n);
+ bm.operator()<std::list<char>>("rng::for_each_n(list<char>)", std::ranges::for_each_n);
+
+ bm.operator()<std::vector<short>>("std::for_each_n(vector<short>)", std_for_each_n);
+ bm.operator()<std::deque<short>>("std::for_each_n(deque<short>)", std_for_each_n);
+ bm.operator()<std::list<short>>("std::for_each_n(list<short>)", std_for_each_n);
+ bm.operator()<std::vector<short>>("rng::for_each_n(vector<short>)", std::ranges::for_each_n);
+ bm.operator()<std::deque<short>>("rng::for_each_n(deque<short>)", std::ranges::for_each_n);
+ bm.operator()<std::list<short>>("rng::for_each_n(list<short>)", std::ranges::for_each_n);
+
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
>From 1f7ad3453b2b390019575a52d10fb237593d5d70 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Sat, 29 Mar 2025 11:21:07 -0400
Subject: [PATCH 08/12] Consistently extend segmented iterator optimization to
ranges::for_each
---
libcxx/docs/ReleaseNotes/21.rst | 2 +-
libcxx/include/__algorithm/ranges_for_each.h | 15 ++++++++++-----
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index b652ed2f4eb1e..a3bbc59b9bd2b 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -72,7 +72,7 @@ Improvements and New Features
in C++23 and later.
=======
- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
- resulting in performance improvements of up to 21.2x for ``std::deque::iterator`` segmented inputs and 17.9x for
+ resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` segmented inputs and 24.9x for
``join_view`` of ``vector<vector<T>>``.
>>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators)
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index 096e60683e39d..961f7558149a3 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -10,7 +10,9 @@
#define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H
#include <__algorithm/for_each.h>
+#include <__algorithm/for_each_n.h>
#include <__algorithm/in_fun_result.h>
+#include <__concepts/assignable.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
@@ -42,11 +44,14 @@ struct __for_each {
template <class _Iter, class _Sent, class _Proj, class _Func>
_LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func>
__for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) {
- if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) {
- auto __n = __last - __first;
- auto __end = __first + __n;
- auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
- std::__for_each(__first, __end, __f);
+ if constexpr (std::assignable_from<_Iter&, _Sent>) {
+ _Iter __end = std::move(__last);
+ std::for_each(__first, __end, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); });
+ return {std::move(__end), std::move(__func)};
+ } else if constexpr (sized_sentinel_for<_Sent, _Iter>) {
+ auto __end = std::for_each_n(__first, __last - __first, [&](auto&& __val) {
+ std::invoke(__func, std::invoke(__proj, __val));
+ });
return {std::move(__end), std::move(__func)};
} else {
for (; __first != __last; ++__first)
>From ca54b95bcd525c6c26f2bb264f0bca1d157edb4f Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Wed, 2 Apr 2025 23:15:57 -0400
Subject: [PATCH 09/12] Fix review comments
---
libcxx/docs/ReleaseNotes/21.rst | 5 +
libcxx/include/__algorithm/for_each.h | 18 ++-
libcxx/include/__algorithm/for_each_n.h | 19 ++-
.../include/__algorithm/for_each_n_segment.h | 6 +
libcxx/include/__algorithm/ranges_for_each.h | 16 +--
.../include/__algorithm/ranges_for_each_n.h | 14 +-
.../nonmodifying/for_each.bench.cpp | 56 ++++++--
.../nonmodifying/for_each_join_view.bench.cpp | 122 ------------------
.../nonmodifying/for_each_n.bench.cpp | 54 ++++++--
9 files changed, 124 insertions(+), 186 deletions(-)
delete mode 100644 libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index a3bbc59b9bd2b..49c188ebac420 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -72,9 +72,14 @@ Improvements and New Features
in C++23 and later.
=======
- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
+<<<<<<< HEAD
resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` segmented inputs and 24.9x for
``join_view`` of ``vector<vector<T>>``.
>>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators)
+=======
+ resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of
+ ``vector<vector<char>>``.
+>>>>>>> 590136ba0d9f (Fix review comments)
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index b6c2c7c056edd..01ddad761bb57 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -12,6 +12,8 @@
#include <__algorithm/for_each_segment.h>
#include <__config>
+#include <__functional/identity.h>
+#include <__functional/invoke.h>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/enable_if.h>
@@ -21,21 +23,24 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _InputIterator, class _Sent, class _Func>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __for_each(_InputIterator __first, _Sent __last, _Func& __f) {
+template <class _InputIterator, class _Sent, class _Func, class _Proj>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
+__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) {
for (; __first != __last; ++__first)
- __f(*__first);
+ std::invoke(__f, std::invoke(__proj, *__first));
+ return __first;
}
#ifndef _LIBCPP_CXX03_LANG
template <class _SegmentedIterator,
class _Function,
+ class _Proj,
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func) {
+__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func, _Proj& __proj) {
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
- std::__for_each(__lfirst, __llast, __func);
+ std::__for_each(__lfirst, __llast, __func, __proj);
});
}
#endif // !_LIBCPP_CXX03_LANG
@@ -43,7 +48,8 @@ __for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __f
template <class _InputIterator, class _Function>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function
for_each(_InputIterator __first, _InputIterator __last, _Function __f) {
- std::__for_each(__first, __last, __f);
+ __identity __proj;
+ std::__for_each(__first, __last, __f, __proj);
return __f;
}
diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h
index 169de84b4d95f..953662afd6310 100644
--- a/libcxx/include/__algorithm/for_each_n.h
+++ b/libcxx/include/__algorithm/for_each_n.h
@@ -13,8 +13,9 @@
#include <__algorithm/for_each.h>
#include <__algorithm/for_each_n_segment.h>
#include <__config>
+#include <__functional/identity.h>
+#include <__functional/invoke.h>
#include <__iterator/iterator_traits.h>
-#include <__iterator/next.h>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/disjunction.h>
#include <__type_traits/enable_if.h>
@@ -34,16 +35,17 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator,
class _Size,
class _Func,
+ class _Proj,
__enable_if_t<!__has_random_access_iterator_category<_InputIterator>::value &&
_Or< _Not<__is_segmented_iterator<_InputIterator> >,
_Not<__has_random_access_local_iterator<_InputIterator> > >::value,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
-__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) {
+__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
_IntegralSize __n = __orig_n;
while (__n > 0) {
- __f(*__first);
+ std::invoke(__f, std::invoke(__proj, *__first));
++__first;
--__n;
}
@@ -53,12 +55,13 @@ __for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) {
template <class _RandIter,
class _Size,
class _Func,
+ class _Proj,
__enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
auto __last = __first + __n;
- std::__for_each(__first, __last, __f);
+ std::__for_each(__first, __last, __f, __proj);
return std::move(__last);
}
@@ -66,16 +69,17 @@ __for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
template <class _SegmentedIterator,
class _Size,
class _Func,
+ class _Proj,
__enable_if_t<!__has_random_access_iterator_category<_SegmentedIterator>::value &&
__is_segmented_iterator<_SegmentedIterator>::value &&
__has_random_access_iterator_category<
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
-__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) {
+__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
- std::__for_each(__lfirst, __llast, __f);
+ std::__for_each(__lfirst, __llast, __f, __proj);
});
}
#endif // !_LIBCPP_CXX03_LANG
@@ -85,7 +89,8 @@ __for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) {
template <class _InputIterator, class _Size, class _Function>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
- return std::__for_each_n(__first, __orig_n, __f);
+ __identity __proj;
+ return std::__for_each_n(__first, __orig_n, __f, __proj);
}
#endif // _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__algorithm/for_each_n_segment.h b/libcxx/include/__algorithm/for_each_n_segment.h
index 1b522fb373eee..6c257dbcdc3ea 100644
--- a/libcxx/include/__algorithm/for_each_n_segment.h
+++ b/libcxx/include/__algorithm/for_each_n_segment.h
@@ -10,7 +10,13 @@
#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
#include <__config>
+<<<<<<< HEAD
#include <__iterator/iterator_traits.h>
+=======
+#include <__iterator/distance.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/next.h>
+>>>>>>> 4a86118918e8 (Fix review comments)
#include <__iterator/segmented_iterator.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index 961f7558149a3..ed0dcde688406 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -44,19 +44,13 @@ struct __for_each {
template <class _Iter, class _Sent, class _Proj, class _Func>
_LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func>
__for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) {
- if constexpr (std::assignable_from<_Iter&, _Sent>) {
- _Iter __end = std::move(__last);
- std::for_each(__first, __end, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); });
- return {std::move(__end), std::move(__func)};
- } else if constexpr (sized_sentinel_for<_Sent, _Iter>) {
- auto __end = std::for_each_n(__first, __last - __first, [&](auto&& __val) {
- std::invoke(__func, std::invoke(__proj, __val));
- });
+ if constexpr (!std::assignable_from<_Iter&, _Sent> && sized_sentinel_for<_Sent, _Iter>) {
+ auto __n = __last - __first;
+ auto __end = std::__for_each_n(std::move(__first), __n, __func, __proj);
return {std::move(__end), std::move(__func)};
} else {
- for (; __first != __last; ++__first)
- std::invoke(__func, std::invoke(__proj, *__first));
- return {std::move(__first), std::move(__func)};
+ auto __end = std::__for_each(std::move(__first), std::move(__last), __func, __proj);
+ return {std::move(__end), std::move(__func)};
}
}
diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h
index b92eeb6fa8d7c..ebcd38a8eef6f 100644
--- a/libcxx/include/__algorithm/ranges_for_each_n.h
+++ b/libcxx/include/__algorithm/ranges_for_each_n.h
@@ -17,7 +17,6 @@
#include <__iterator/concepts.h>
#include <__iterator/incrementable_traits.h>
#include <__iterator/iterator_traits.h>
-#include <__iterator/next.h>
#include <__iterator/projected.h>
#include <__ranges/concepts.h>
#include <__utility/move.h>
@@ -42,17 +41,8 @@ struct __for_each_n {
template <input_iterator _Iter, class _Proj = identity, indirectly_unary_invocable<projected<_Iter, _Proj>> _Func>
_LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func>
operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const {
- if constexpr (forward_iterator<_Iter>) {
- auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); };
- auto __last = std::for_each_n(__first, __count, __f);
- return {std::move(__last), std::move(__func)};
- } else {
- while (__count-- > 0) {
- std::invoke(__func, std::invoke(__proj, *__first));
- ++__first;
- }
- return {std::move(__first), std::move(__func)};
- }
+ auto __last = std::__for_each_n(std::move(__first), __count, __func, __proj);
+ return {std::move(__last), std::move(__func)};
}
};
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
index 1e33cf70f8487..9151ca19c7862 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
@@ -12,6 +12,7 @@
#include <cstddef>
#include <deque>
#include <list>
+#include <ranges>
#include <string>
#include <vector>
@@ -48,20 +49,6 @@ int main(int argc, char** argv) {
->Arg(1 << 16)
->Arg(1 << 18);
};
- bm.operator()<std::vector<char>>("std::for_each(vector<char>)", std_for_each);
- bm.operator()<std::deque<char>>("std::for_each(deque<char>)", std_for_each);
- bm.operator()<std::list<char>>("std::for_each(list<char>)", std_for_each);
- bm.operator()<std::vector<char>>("rng::for_each(vector<char>)", std::ranges::for_each);
- bm.operator()<std::deque<char>>("rng::for_each(deque<char>)", std::ranges::for_each);
- bm.operator()<std::list<char>>("rng::for_each(list<char>)", std::ranges::for_each);
-
- bm.operator()<std::vector<short>>("std::for_each(vector<short>)", std_for_each);
- bm.operator()<std::deque<short>>("std::for_each(deque<short>)", std_for_each);
- bm.operator()<std::list<short>>("std::for_each(list<short>)", std_for_each);
- bm.operator()<std::vector<short>>("rng::for_each(vector<short>)", std::ranges::for_each);
- bm.operator()<std::deque<short>>("rng::for_each(deque<short>)", std::ranges::for_each);
- bm.operator()<std::list<short>>("rng::for_each(list<short>)", std::ranges::for_each);
-
bm.operator()<std::vector<int>>("std::for_each(vector<int>)", std_for_each);
bm.operator()<std::deque<int>>("std::for_each(deque<int>)", std_for_each);
bm.operator()<std::list<int>>("std::for_each(list<int>)", std_for_each);
@@ -70,6 +57,47 @@ int main(int argc, char** argv) {
bm.operator()<std::list<int>>("rng::for_each(list<int>)", std::ranges::for_each);
}
+ // {std,ranges}::for_each for join_view
+ {
+ auto bm = []<class Container>(std::string name, auto for_each) {
+ using C1 = typename Container::value_type;
+ using ElemType = typename C1::value_type;
+
+ benchmark::RegisterBenchmark(
+ name,
+ [for_each](auto& st) {
+ std::size_t const size = st.range(0);
+ std::size_t const seg_size = 256;
+ std::size_t const segments = (size + seg_size - 1) / seg_size;
+ Container c(segments);
+ for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
+ c[i].resize(std::min(seg_size, n), ElemType(1));
+ }
+
+ auto view = c | std::views::join;
+ auto first = view.begin();
+ auto last = view.end();
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = for_each(first, last, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(8)
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(4096)
+ ->Arg(8192)
+ ->Arg(1 << 14)
+ ->Arg(1 << 16)
+ ->Arg(1 << 18);
+ };
+ bm.operator()<std::vector<std::vector<int>>>("std::for_each(join_view(vector<vector<int>>))", std_for_each);
+ bm.operator()<std::vector<std::vector<int>>>("rng::for_each(join_view(vector<vector<int>>)", std::ranges::for_each);
+ }
+
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp
deleted file mode 100644
index 28398ac988bf7..0000000000000
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-
-#include <algorithm>
-#include <cstddef>
-#include <deque>
-#include <list>
-#include <ranges>
-#include <string>
-#include <vector>
-
-#include <benchmark/benchmark.h>
-
-int main(int argc, char** argv) {
- auto std_for_each = [](auto first, auto last, auto f) { return std::for_each(first, last, f); };
- auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
-
- // {std,ranges}::for_each
- {
- auto bm = []<class Container>(std::string name, auto for_each) {
- using C1 = typename Container::value_type;
- using ElemType = typename C1::value_type;
-
- benchmark::RegisterBenchmark(
- name,
- [for_each](auto& st) {
- std::size_t const size = st.range(0);
- std::size_t const seg_size = 256;
- std::size_t const segments = (size + seg_size - 1) / seg_size;
- Container c(segments);
- for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
- c[i].resize(std::min(seg_size, n), ElemType(1));
- }
-
- auto view = c | std::views::join;
- auto first = view.begin();
- auto last = view.end();
-
- for ([[maybe_unused]] auto _ : st) {
- benchmark::DoNotOptimize(c);
- auto result = for_each(first, last, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
- benchmark::DoNotOptimize(result);
- }
- })
- ->Arg(8)
- ->Arg(32)
- ->Arg(50) // non power-of-two
- ->Arg(1024)
- ->Arg(4096)
- ->Arg(8192)
- ->Arg(1 << 14)
- ->Arg(1 << 16)
- ->Arg(1 << 18);
- };
- bm.operator()<std::vector<std::vector<char>>>("std::for_each(join_view(vector<vector<char>>))", std_for_each);
- bm.operator()<std::vector<std::vector<short>>>("std::for_each(join_view(vector<vector<short>>))", std_for_each);
- bm.operator()<std::vector<std::vector<int>>>("std::for_each(join_view(vector<vector<int>>))", std_for_each);
- bm.operator()<std::vector<std::vector<char>>>(
- "rng::for_each(join_view(vector<vector<char>>)", std::ranges::for_each);
- bm.operator()<std::vector<std::vector<short>>>(
- "rng::for_each(join_view(vector<vector<short>>)", std::ranges::for_each);
- bm.operator()<std::vector<std::vector<int>>>("rng::for_each(join_view(vector<vector<int>>)", std::ranges::for_each);
- }
-
- // {std,ranges}::for_each_n
- {
- auto bm = []<class Container>(std::string name, auto for_each_n) {
- using C1 = typename Container::value_type;
- using ElemType = typename C1::value_type;
- benchmark::RegisterBenchmark(
- name,
- [for_each_n](auto& st) {
- std::size_t const size = st.range(0);
- std::size_t const seg_size = 256;
- std::size_t const segments = (size + seg_size - 1) / seg_size;
- Container c(segments);
- for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
- c[i].resize(std::min(seg_size, n), ElemType(1));
- }
-
- auto view = c | std::views::join;
- auto first = view.begin();
-
- for ([[maybe_unused]] auto _ : st) {
- benchmark::DoNotOptimize(c);
- auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
- benchmark::DoNotOptimize(result);
- }
- })
- ->Arg(8)
- ->Arg(32)
- ->Arg(50) // non power-of-two
- ->Arg(1024)
- ->Arg(4096)
- ->Arg(8192)
- ->Arg(1 << 14)
- ->Arg(1 << 16)
- ->Arg(1 << 18);
- };
- bm.operator()<std::vector<std::vector<char>>>("std::for_each_n(join_view(vector<vector<char>>))", std_for_each_n);
- bm.operator()<std::vector<std::vector<short>>>("std::for_each_n(join_view(vector<vector<short>>))", std_for_each_n);
- bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
- bm.operator()<std::vector<std::vector<char>>>(
- "rng::for_each_n(join_view(vector<vector<char>>)", std::ranges::for_each_n);
- bm.operator()<std::vector<std::vector<short>>>(
- "rng::for_each_n(join_view(vector<vector<short>>)", std::ranges::for_each_n);
- bm.operator()<std::vector<std::vector<int>>>(
- "rng::for_each_n(join_view(vector<vector<int>>)", std::ranges::for_each_n);
- }
-
- benchmark::Initialize(&argc, argv);
- benchmark::RunSpecifiedBenchmarks();
- benchmark::Shutdown();
- return 0;
-}
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
index f0dcc30a39e14..3ace25a6052b6 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
@@ -48,20 +48,6 @@ int main(int argc, char** argv) {
->Arg(1 << 16)
->Arg(1 << 18);
};
- bm.operator()<std::vector<char>>("std::for_each_n(vector<char>)", std_for_each_n);
- bm.operator()<std::deque<char>>("std::for_each_n(deque<char>)", std_for_each_n);
- bm.operator()<std::list<char>>("std::for_each_n(list<char>)", std_for_each_n);
- bm.operator()<std::vector<char>>("rng::for_each_n(vector<char>)", std::ranges::for_each_n);
- bm.operator()<std::deque<char>>("rng::for_each_n(deque<char>)", std::ranges::for_each_n);
- bm.operator()<std::list<char>>("rng::for_each_n(list<char>)", std::ranges::for_each_n);
-
- bm.operator()<std::vector<short>>("std::for_each_n(vector<short>)", std_for_each_n);
- bm.operator()<std::deque<short>>("std::for_each_n(deque<short>)", std_for_each_n);
- bm.operator()<std::list<short>>("std::for_each_n(list<short>)", std_for_each_n);
- bm.operator()<std::vector<short>>("rng::for_each_n(vector<short>)", std::ranges::for_each_n);
- bm.operator()<std::deque<short>>("rng::for_each_n(deque<short>)", std::ranges::for_each_n);
- bm.operator()<std::list<short>>("rng::for_each_n(list<short>)", std::ranges::for_each_n);
-
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
@@ -105,6 +91,46 @@ int main(int argc, char** argv) {
bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
}
+ // {std,ranges}::for_each_n for join_view
+ {
+ auto bm = []<class Container>(std::string name, auto for_each_n) {
+ using C1 = typename Container::value_type;
+ using ElemType = typename C1::value_type;
+ benchmark::RegisterBenchmark(
+ name,
+ [for_each_n](auto& st) {
+ std::size_t const size = st.range(0);
+ std::size_t const seg_size = 256;
+ std::size_t const segments = (size + seg_size - 1) / seg_size;
+ Container c(segments);
+ for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
+ c[i].resize(std::min(seg_size, n), ElemType(1));
+ }
+
+ auto view = c | std::views::join;
+ auto first = view.begin();
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(8)
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(4096)
+ ->Arg(8192)
+ ->Arg(1 << 14)
+ ->Arg(1 << 16)
+ ->Arg(1 << 18);
+ };
+ bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
+ bm.operator()<std::vector<std::vector<int>>>(
+ "rng::for_each_n(join_view(vector<vector<int>>)", std::ranges::for_each_n);
+ }
+
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
>From 100521b7bade2343e292eb0c5434eea9c2fb9de9 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Fri, 4 Apr 2025 21:17:22 -0400
Subject: [PATCH 10/12] Fix invoke call by using std::__invoke
---
libcxx/include/__algorithm/for_each.h | 12 ++++++------
libcxx/include/__algorithm/for_each_n.h | 9 +++++----
libcxx/include/__algorithm/ranges_for_each.h | 1 -
libcxx/include/__algorithm/ranges_for_each_n.h | 1 -
4 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index 01ddad761bb57..a3cee6783154f 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -13,9 +13,9 @@
#include <__algorithm/for_each_segment.h>
#include <__config>
#include <__functional/identity.h>
-#include <__functional/invoke.h>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/enable_if.h>
+#include <__type_traits/invoke.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -27,13 +27,13 @@ template <class _InputIterator, class _Sent, class _Func, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) {
for (; __first != __last; ++__first)
- std::invoke(__f, std::invoke(__proj, *__first));
+ std::__invoke(__f, std::__invoke(__proj, *__first));
return __first;
}
#ifndef _LIBCPP_CXX03_LANG
template <class _SegmentedIterator,
- class _Function,
+ class _Func,
class _Proj,
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
@@ -45,9 +45,9 @@ __for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __f
}
#endif // !_LIBCPP_CXX03_LANG
-template <class _InputIterator, class _Function>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function
-for_each(_InputIterator __first, _InputIterator __last, _Function __f) {
+template <class _InputIterator, class _Func>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Func
+for_each(_InputIterator __first, _InputIterator __last, _Func __f) {
__identity __proj;
std::__for_each(__first, __last, __f, __proj);
return __f;
diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h
index 953662afd6310..fb0f14fae49ae 100644
--- a/libcxx/include/__algorithm/for_each_n.h
+++ b/libcxx/include/__algorithm/for_each_n.h
@@ -14,12 +14,13 @@
#include <__algorithm/for_each_n_segment.h>
#include <__config>
#include <__functional/identity.h>
-#include <__functional/invoke.h>
#include <__iterator/iterator_traits.h>
+#include <__iterator/next.h>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/disjunction.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/negation.h>
+#include <__type_traits/invoke.h>
#include <__utility/convert_to_integral.h>
#include <__utility/move.h>
@@ -45,7 +46,7 @@ __for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj)
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
_IntegralSize __n = __orig_n;
while (__n > 0) {
- std::invoke(__f, std::invoke(__proj, *__first));
+ std::__invoke(__f, std::__invoke(__proj, *__first));
++__first;
--__n;
}
@@ -86,9 +87,9 @@ __for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __pr
#if _LIBCPP_STD_VER >= 17
-template <class _InputIterator, class _Size, class _Function>
+template <class _InputIterator, class _Size, class _Func>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
-for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
+for_each_n(_InputIterator __first, _Size __orig_n, _Func __f) {
__identity __proj;
return std::__for_each_n(__first, __orig_n, __f, __proj);
}
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index ed0dcde688406..1b11b52798dd6 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -15,7 +15,6 @@
#include <__concepts/assignable.h>
#include <__config>
#include <__functional/identity.h>
-#include <__functional/invoke.h>
#include <__iterator/concepts.h>
#include <__iterator/projected.h>
#include <__ranges/access.h>
diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h
index ebcd38a8eef6f..3aab1b79c10a1 100644
--- a/libcxx/include/__algorithm/ranges_for_each_n.h
+++ b/libcxx/include/__algorithm/ranges_for_each_n.h
@@ -13,7 +13,6 @@
#include <__algorithm/in_fun_result.h>
#include <__config>
#include <__functional/identity.h>
-#include <__functional/invoke.h>
#include <__iterator/concepts.h>
#include <__iterator/incrementable_traits.h>
#include <__iterator/iterator_traits.h>
>From 05161a1637eee204ad275f75c4356a2559730e5a Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Sat, 5 Apr 2025 16:45:17 -0400
Subject: [PATCH 11/12] Refactor to simplify logic of for_each_n_segment.h
---
libcxx/docs/ReleaseNotes/21.rst | 15 ++-----
libcxx/include/__algorithm/for_each.h | 11 ++++-
libcxx/include/__algorithm/for_each_n.h | 7 ++--
.../include/__algorithm/for_each_n_segment.h | 6 ---
.../nonmodifying/for_each_n.bench.cpp | 41 ++-----------------
5 files changed, 19 insertions(+), 61 deletions(-)
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 49c188ebac420..9f1a32a222f0d 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -64,22 +64,11 @@ Improvements and New Features
- The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
-<<<<<<< HEAD
- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance
up to 10x, depending on type of sorted elements and the initial state of the sorted array.
- The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available
in C++23 and later.
-=======
-- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
-<<<<<<< HEAD
- resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` segmented inputs and 24.9x for
- ``join_view`` of ``vector<vector<T>>``.
->>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators)
-=======
- resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of
- ``vector<vector<char>>``.
->>>>>>> 590136ba0d9f (Fix review comments)
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
@@ -87,6 +76,10 @@ Improvements and New Features
- The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets
with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively.
+- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
+ resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of
+ ``vector<vector<char>>``.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index a3cee6783154f..4167eec3506e4 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -16,11 +16,15 @@
#include <__iterator/segmented_iterator.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
+#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Sent, class _Func, class _Proj>
@@ -36,12 +40,13 @@ template <class _SegmentedIterator,
class _Func,
class _Proj,
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func, _Proj& __proj) {
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
+__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) {
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
std::__for_each(__lfirst, __llast, __func, __proj);
});
+ return __last;
}
#endif // !_LIBCPP_CXX03_LANG
@@ -55,4 +60,6 @@ for_each(_InputIterator __first, _InputIterator __last, _Func __f) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_FOR_EACH_H
diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h
index fb0f14fae49ae..9a6c6bb5175d6 100644
--- a/libcxx/include/__algorithm/for_each_n.h
+++ b/libcxx/include/__algorithm/for_each_n.h
@@ -15,12 +15,11 @@
#include <__config>
#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
-#include <__iterator/next.h>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/disjunction.h>
#include <__type_traits/enable_if.h>
-#include <__type_traits/negation.h>
#include <__type_traits/invoke.h>
+#include <__type_traits/negation.h>
#include <__utility/convert_to_integral.h>
#include <__utility/move.h>
@@ -59,11 +58,11 @@ template <class _RandIter,
class _Proj,
__enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
-__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
+__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
auto __last = __first + __n;
std::__for_each(__first, __last, __f, __proj);
- return std::move(__last);
+ return __last;
}
#ifndef _LIBCPP_CXX03_LANG
diff --git a/libcxx/include/__algorithm/for_each_n_segment.h b/libcxx/include/__algorithm/for_each_n_segment.h
index 6c257dbcdc3ea..1b522fb373eee 100644
--- a/libcxx/include/__algorithm/for_each_n_segment.h
+++ b/libcxx/include/__algorithm/for_each_n_segment.h
@@ -10,13 +10,7 @@
#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
#include <__config>
-<<<<<<< HEAD
#include <__iterator/iterator_traits.h>
-=======
-#include <__iterator/distance.h>
-#include <__iterator/iterator_traits.h>
-#include <__iterator/next.h>
->>>>>>> 4a86118918e8 (Fix review comments)
#include <__iterator/segmented_iterator.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
index 3ace25a6052b6..e6624bd304447 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp
@@ -51,44 +51,9 @@ int main(int argc, char** argv) {
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
- }
-
- // std::for_each_n for join_view
- {
- auto bm = []<class Container>(std::string name, auto for_each_n) {
- using C1 = typename Container::value_type;
- using ElemType = typename C1::value_type;
- benchmark::RegisterBenchmark(
- name,
- [for_each_n](auto& st) {
- std::size_t const size = st.range(0);
- std::size_t const seg_size = 256;
- std::size_t const segments = (size + seg_size - 1) / seg_size;
- Container c(segments);
- for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
- c[i].resize(std::min(seg_size, n), ElemType(1));
- }
-
- auto view = c | std::views::join;
- auto first = view.begin();
-
- for ([[maybe_unused]] auto _ : st) {
- benchmark::DoNotOptimize(c);
- auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
- benchmark::DoNotOptimize(result);
- }
- })
- ->Arg(8)
- ->Arg(32)
- ->Arg(50) // non power-of-two
- ->Arg(1024)
- ->Arg(4096)
- ->Arg(8192)
- ->Arg(1 << 14)
- ->Arg(1 << 16)
- ->Arg(1 << 18);
- };
- bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
+ bm.operator()<std::vector<int>>("rng::for_each_n(vector<int>)", std::ranges::for_each_n);
+ bm.operator()<std::deque<int>>("rng::for_each_n(deque<int>)", std::ranges::for_each_n);
+ bm.operator()<std::list<int>>("rng::for_each_n(list<int>)", std::ranges::for_each_n);
}
// {std,ranges}::for_each_n for join_view
>From b525b74c179e309ea1e259d016561c71cab2b57a Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Sat, 7 Jun 2025 07:56:55 -0400
Subject: [PATCH 12/12] Address ldionne's comments
---
libcxx/docs/ReleaseNotes/21.rst | 9 +++------
libcxx/include/__algorithm/ranges_for_each.h | 5 ++++-
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 9f1a32a222f0d..349c37ae625f1 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -70,16 +70,13 @@ Improvements and New Features
- The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available
in C++23 and later.
-- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
- up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
+- The ``std::for_each_n``, ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for
+ segmented iterators, resulting in a performance improvement of up to 17.7x for ``std::deque<short>`` iterators, and up
+ to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
- The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets
with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively.
-- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators,
- resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of
- ``vector<vector<char>>``.
-
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index 1b11b52798dd6..e9c84e8583f87 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -43,7 +43,10 @@ struct __for_each {
template <class _Iter, class _Sent, class _Proj, class _Func>
_LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func>
__for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) {
- if constexpr (!std::assignable_from<_Iter&, _Sent> && sized_sentinel_for<_Sent, _Iter>) {
+ // In the case where we have different iterator and sentinel types, the segmented iterator optimization
+ // in std::for_each will not kick in. Therefore, we prefer std::for_each_n in that case (whenever we can
+ // obtain the `n`).
+ if constexpr (!std::assignable_from<_Iter&, _Sent> && std::sized_sentinel_for<_Sent, _Iter>) {
auto __n = __last - __first;
auto __end = std::__for_each_n(std::move(__first), __n, __func, __proj);
return {std::move(__end), std::move(__func)};
More information about the libcxx-commits
mailing list