[libcxx-commits] [libcxx] [libc++] Optimize ranges::for_each for iterating over __trees (PR #164405)
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Oct 21 05:07:49 PDT 2025
https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/164405
None
>From cb8f2799dcd85cb9845b2e522f13216a2b4d5e65 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Tue, 21 Oct 2025 14:07:25 +0200
Subject: [PATCH] [libc++] Optimize ranges::for_each for iterating over __trees
---
libcxx/include/CMakeLists.txt | 1 +
libcxx/include/__algorithm/ranges_for_each.h | 10 ++++-
.../__algorithm/specialized_algorithms.h | 31 +++++++++++++++
libcxx/include/__tree | 31 +++++++++++++++
libcxx/include/map | 39 +++++++++++++++++++
libcxx/include/module.modulemap.in | 1 +
libcxx/include/set | 37 ++++++++++++++++++
.../nonmodifying/for_each.bench.cpp | 34 ++++++++++++----
8 files changed, 176 insertions(+), 8 deletions(-)
create mode 100644 libcxx/include/__algorithm/specialized_algorithms.h
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index ddace8bf8c728..8005fda2fd485 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -192,6 +192,7 @@ set(files
__algorithm/shuffle.h
__algorithm/sift_down.h
__algorithm/simd_utils.h
+ __algorithm/specialized_algorithms.h
__algorithm/sort.h
__algorithm/sort_heap.h
__algorithm/stable_partition.h
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index e9c84e8583f87..574d11f6205e0 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -12,6 +12,7 @@
#include <__algorithm/for_each.h>
#include <__algorithm/for_each_n.h>
#include <__algorithm/in_fun_result.h>
+#include <__algorithm/specialized_algorithms.h>
#include <__concepts/assignable.h>
#include <__config>
#include <__functional/identity.h>
@@ -20,6 +21,7 @@
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/dangling.h>
+#include <__type_traits/remove_cvref.h>
#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -71,7 +73,13 @@ struct __for_each {
indirectly_unary_invocable<projected<iterator_t<_Range>, _Proj>> _Func>
_LIBCPP_HIDE_FROM_ABI constexpr for_each_result<borrowed_iterator_t<_Range>, _Func>
operator()(_Range&& __range, _Func __func, _Proj __proj = {}) const {
- return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj);
+ using _SpecialAlg = __specialized_algorithm<_Algorithm::__for_each, remove_cvref_t<_Range>>;
+ if constexpr (_SpecialAlg::__has_algorithm) {
+ auto [__iter, __func2] = _SpecialAlg()(__range, std::move(__func));
+ return {std::move(__iter), std::move(__func)};
+ } else {
+ return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj);
+ }
}
};
diff --git a/libcxx/include/__algorithm/specialized_algorithms.h b/libcxx/include/__algorithm/specialized_algorithms.h
new file mode 100644
index 0000000000000..049ae79223abc
--- /dev/null
+++ b/libcxx/include/__algorithm/specialized_algorithms.h
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
+#define _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+enum class _Algorithm {
+ __for_each,
+};
+
+template <_Algorithm, class _Range>
+struct __specialized_algorithm {
+ static const bool __has_algorithm = false;
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
diff --git a/libcxx/include/__tree b/libcxx/include/__tree
index 0738c8c6a5e2b..b027cef99ba63 100644
--- a/libcxx/include/__tree
+++ b/libcxx/include/__tree
@@ -11,6 +11,7 @@
#define _LIBCPP___TREE
#include <__algorithm/min.h>
+#include <__algorithm/specialized_algorithms.h>
#include <__assert>
#include <__config>
#include <__fwd/pair.h>
@@ -1466,8 +1467,38 @@ private:
return __dest;
}
+
+ friend struct __specialized_algorithm<_Algorithm::__for_each, __tree>;
};
+#ifndef _LIBCPP_CXX03_LANG
+template <class _Tp, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Algorithm::__for_each, __tree<_Tp, _Compare, _Allocator> > {
+ static const bool __has_algorithm = true;
+
+ using __tree _LIBCPP_NODEBUG = __tree<_Tp, _Compare, _Allocator>;
+ using __node_pointer _LIBCPP_NODEBUG = typename __tree::__node_pointer;
+
+ template <class _Func>
+#ifndef _LIBCPP_COMPILER_GCC
+ _LIBCPP_HIDE_FROM_ABI
+#endif
+ static void __impl(__node_pointer __root, _Func& __func) {
+ if (__root->__left_)
+ __impl(static_cast<__node_pointer>(__root->__left_), __func);
+ __func(__root->__get_value());
+ if (__root->__right_)
+ __impl(static_cast<__node_pointer>(__root->__right_), __func);
+ }
+
+ template <class _Tree, class _Func>
+ _LIBCPP_HIDE_FROM_ABI static auto operator()(_Tree&& __range, _Func __func) {
+ __impl(__range.__root(), __func);
+ return std::make_pair(__range.end(), std::move(__func));
+ }
+};
+#endif
+
// Precondition: __size_ != 0
template <class _Tp, class _Compare, class _Allocator>
typename __tree<_Tp, _Compare, _Allocator>::__node_pointer
diff --git a/libcxx/include/map b/libcxx/include/map
index 3ff849afcde09..99bda570295ae 100644
--- a/libcxx/include/map
+++ b/libcxx/include/map
@@ -577,6 +577,7 @@ erase_if(multimap<Key, T, Compare, Allocator>& c, Predicate pred); // C++20
# include <__algorithm/equal.h>
# include <__algorithm/lexicographical_compare.h>
# include <__algorithm/lexicographical_compare_three_way.h>
+# include <__algorithm/specialized_algorithms.h>
# include <__assert>
# include <__config>
# include <__functional/binary_function.h>
@@ -1375,6 +1376,8 @@ private:
# ifdef _LIBCPP_CXX03_LANG
_LIBCPP_HIDE_FROM_ABI __node_holder __construct_node_with_key(const key_type& __k);
# endif
+
+ friend struct __specialized_algorithm<_Algorithm::__for_each, map>;
};
# if _LIBCPP_STD_VER >= 17
@@ -1427,6 +1430,23 @@ map(initializer_list<pair<_Key, _Tp>>, _Allocator)
-> map<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
# endif
+# if _LIBCPP_STD_VER >= 14
+template <class _Key, class _Tp, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Algorithm::__for_each, map<_Key, _Tp, _Compare, _Allocator>> {
+ using __map _LIBCPP_NODEBUG = map<_Key, _Tp, _Compare, _Allocator>;
+
+ static const bool __has_algorithm = true;
+
+ // set's begin() and end() are identical with and without const qualifiaction
+ template <class _Map, class _Func>
+ _LIBCPP_HIDE_FROM_ABI static auto operator()(_Map&& __map, _Func __func) {
+ auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, typename __map::__base>()(
+ __map.__tree_, std::move(__func));
+ return std::make_pair(__map.end(), std::move(__func2));
+ }
+};
+# endif
+
# ifndef _LIBCPP_CXX03_LANG
template <class _Key, class _Tp, class _Compare, class _Allocator>
map<_Key, _Tp, _Compare, _Allocator>::map(map&& __m, const allocator_type& __a)
@@ -1940,6 +1960,8 @@ private:
typedef __map_node_destructor<__node_allocator> _Dp;
typedef unique_ptr<__node, _Dp> __node_holder;
+
+ friend struct __specialized_algorithm<_Algorithm::__for_each, multimap>;
};
# if _LIBCPP_STD_VER >= 17
@@ -1992,6 +2014,23 @@ multimap(initializer_list<pair<_Key, _Tp>>, _Allocator)
-> multimap<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
# endif
+# if _LIBCPP_STD_VER >= 14
+template <class _Key, class _Tp, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Algorithm::__for_each, multimap<_Key, _Tp, _Compare, _Allocator>> {
+ using __map _LIBCPP_NODEBUG = multimap<_Key, _Tp, _Compare, _Allocator>;
+
+ static const bool __has_algorithm = true;
+
+ // set's begin() and end() are identical with and without const qualifiaction
+ template <class _Map, class _Func>
+ _LIBCPP_HIDE_FROM_ABI static auto operator()(_Map&& __map, _Func __func) {
+ auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, typename __map::__base>()(
+ __map.__tree_, std::move(__func));
+ return std::make_pair(__map.end(), std::move(__func2));
+ }
+};
+# endif
+
# ifndef _LIBCPP_CXX03_LANG
template <class _Key, class _Tp, class _Compare, class _Allocator>
multimap<_Key, _Tp, _Compare, _Allocator>::multimap(multimap&& __m, const allocator_type& __a)
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index 894093b409e11..7977133e7efc8 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -844,6 +844,7 @@ module std [system] {
export std.memory.unique_temporary_buffer // TODO: Workaround for https://llvm.org/PR120108
}
module swap_ranges { header "__algorithm/swap_ranges.h" }
+ module specialized_algorithms { header "__algorithm/specialized_algorithms.h" }
module three_way_comp_ref_type { header "__algorithm/three_way_comp_ref_type.h" }
module transform { header "__algorithm/transform.h" }
module uniform_random_bit_generator_adaptor { header "__algorithm/uniform_random_bit_generator_adaptor.h" }
diff --git a/libcxx/include/set b/libcxx/include/set
index 59ed0155c1def..c9b0579f2c54c 100644
--- a/libcxx/include/set
+++ b/libcxx/include/set
@@ -518,6 +518,7 @@ erase_if(multiset<Key, Compare, Allocator>& c, Predicate pred); // C++20
# include <__algorithm/equal.h>
# include <__algorithm/lexicographical_compare.h>
# include <__algorithm/lexicographical_compare_three_way.h>
+# include <__algorithm/specialized_algorithms.h>
# include <__assert>
# include <__config>
# include <__functional/is_transparent.h>
@@ -902,6 +903,9 @@ public:
return __tree_.__equal_range_multi(__k);
}
# endif
+
+ template <_Algorithm, class>
+ friend struct __specialized_algorithm;
};
# if _LIBCPP_STD_VER >= 17
@@ -948,6 +952,21 @@ template <class _Key, class _Allocator, class = enable_if_t<__is_allocator_v<_Al
set(initializer_list<_Key>, _Allocator) -> set<_Key, less<_Key>, _Allocator>;
# endif
+# if _LIBCPP_STD_VER >= 14
+template <_Algorithm __alg, class _Key, class _Compare, class _Allocator>
+struct __specialized_algorithm<__alg, set<_Key, _Compare, _Allocator>> {
+ using __set _LIBCPP_NODEBUG = set<_Key, _Compare, _Allocator>;
+
+ static const bool __has_algorithm = __specialized_algorithm<__alg, typename __set::__base>::__has_algorithm;
+
+ // set's begin() and end() are identical with and without const qualifiaction
+ template <class... _Args>
+ _LIBCPP_HIDE_FROM_ABI static auto operator()(const __set& __set, _Args&&... __args) {
+ return __specialized_algorithm<__alg, typename __set::__base>()(__set.__tree_, std::forward<_Args>(__args)...);
+ }
+};
+# endif
+
# ifndef _LIBCPP_CXX03_LANG
template <class _Key, class _Compare, class _Allocator>
@@ -1362,6 +1381,9 @@ public:
return __tree_.__equal_range_multi(__k);
}
# endif
+
+ template <_Algorithm, class>
+ friend struct __specialized_algorithm;
};
# if _LIBCPP_STD_VER >= 17
@@ -1409,6 +1431,21 @@ template <class _Key, class _Allocator, class = enable_if_t<__is_allocator_v<_Al
multiset(initializer_list<_Key>, _Allocator) -> multiset<_Key, less<_Key>, _Allocator>;
# endif
+# if _LIBCPP_STD_VER >= 14
+template <_Algorithm __alg, class _Key, class _Compare, class _Allocator>
+struct __specialized_algorithm<__alg, multiset<_Key, _Compare, _Allocator>> {
+ using __set _LIBCPP_NODEBUG = multiset<_Key, _Compare, _Allocator>;
+
+ static const bool __has_algorithm = __specialized_algorithm<__alg, typename __set::__base>::__has_algorithm;
+
+ // set's begin() and end() are identical with and without const qualifiaction
+ template <class... _Args>
+ _LIBCPP_HIDE_FROM_ABI static auto operator()(const __set& __set, _Args&&... __args) {
+ return __specialized_algorithm<__alg, typename __set::__base>()(__set.__tree_, std::forward<_Args>(__args)...);
+ }
+};
+# endif
+
# ifndef _LIBCPP_CXX03_LANG
template <class _Key, class _Compare, class _Allocator>
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
index f58f336f8b892..0188ccd59acec 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
@@ -23,7 +23,7 @@ int main(int argc, char** argv) {
// {std,ranges}::for_each
{
- auto bm = []<class Container>(std::string name, auto for_each) {
+ auto sequence_bm = []<class Container>(std::string name, auto for_each) {
using ElemType = typename Container::value_type;
benchmark::RegisterBenchmark(
name,
@@ -44,12 +44,32 @@ int main(int argc, char** argv) {
->Arg(50) // non power-of-two
->Arg(8192);
};
- bm.operator()<std::vector<int>>("std::for_each(vector<int>)", std_for_each);
- bm.operator()<std::deque<int>>("std::for_each(deque<int>)", std_for_each);
- bm.operator()<std::list<int>>("std::for_each(list<int>)", std_for_each);
- bm.operator()<std::vector<int>>("rng::for_each(vector<int>)", std::ranges::for_each);
- bm.operator()<std::deque<int>>("rng::for_each(deque<int>)", std::ranges::for_each);
- bm.operator()<std::list<int>>("rng::for_each(list<int>)", std::ranges::for_each);
+ sequence_bm.operator()<std::vector<int>>("std::for_each(vector<int>)", std_for_each);
+ sequence_bm.operator()<std::deque<int>>("std::for_each(deque<int>)", std_for_each);
+ sequence_bm.operator()<std::list<int>>("std::for_each(list<int>)", std_for_each);
+ sequence_bm.operator()<std::vector<int>>("rng::for_each(vector<int>)", std::ranges::for_each);
+ sequence_bm.operator()<std::deque<int>>("rng::for_each(deque<int>)", std::ranges::for_each);
+ sequence_bm.operator()<std::list<int>>("rng::for_each(list<int>)", std::ranges::for_each);
+
+ auto associative_ranges_bm = []<class Container>(std::type_identity<Container>, std::string name, auto for_each) {
+ benchmark::RegisterBenchmark(
+ name,
+ [for_each](auto& st) {
+ Container c;
+ for (int64_t i = 0; i != st.range(0); ++i)
+ c.insert(i);
+
+ for (auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ for_each(c, [](auto v) { benchmark::DoNotOptimize(v); });
+ }
+ })
+ ->Arg(8)
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->Arg(8192);
+ };
+ associative_ranges_bm(std::type_identity<std::set<int>>{}, "rng::for_each(set<int>)", std::ranges::for_each);
}
// {std,ranges}::for_each for join_view
More information about the libcxx-commits
mailing list