[libcxx-commits] [libcxx] [libc++] Optimize ranges::for_each for iterating over __trees (PR #164405)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Tue Oct 21 05:07:49 PDT 2025


https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/164405

None

>From cb8f2799dcd85cb9845b2e522f13216a2b4d5e65 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Tue, 21 Oct 2025 14:07:25 +0200
Subject: [PATCH] [libc++] Optimize ranges::for_each for iterating over __trees

---
 libcxx/include/CMakeLists.txt                 |  1 +
 libcxx/include/__algorithm/ranges_for_each.h  | 10 ++++-
 .../__algorithm/specialized_algorithms.h      | 31 +++++++++++++++
 libcxx/include/__tree                         | 31 +++++++++++++++
 libcxx/include/map                            | 39 +++++++++++++++++++
 libcxx/include/module.modulemap.in            |  1 +
 libcxx/include/set                            | 37 ++++++++++++++++++
 .../nonmodifying/for_each.bench.cpp           | 34 ++++++++++++----
 8 files changed, 176 insertions(+), 8 deletions(-)
 create mode 100644 libcxx/include/__algorithm/specialized_algorithms.h

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index ddace8bf8c728..8005fda2fd485 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -192,6 +192,7 @@ set(files
   __algorithm/shuffle.h
   __algorithm/sift_down.h
   __algorithm/simd_utils.h
+  __algorithm/specialized_algorithms.h
   __algorithm/sort.h
   __algorithm/sort_heap.h
   __algorithm/stable_partition.h
diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h
index e9c84e8583f87..574d11f6205e0 100644
--- a/libcxx/include/__algorithm/ranges_for_each.h
+++ b/libcxx/include/__algorithm/ranges_for_each.h
@@ -12,6 +12,7 @@
 #include <__algorithm/for_each.h>
 #include <__algorithm/for_each_n.h>
 #include <__algorithm/in_fun_result.h>
+#include <__algorithm/specialized_algorithms.h>
 #include <__concepts/assignable.h>
 #include <__config>
 #include <__functional/identity.h>
@@ -20,6 +21,7 @@
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/dangling.h>
+#include <__type_traits/remove_cvref.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -71,7 +73,13 @@ struct __for_each {
             indirectly_unary_invocable<projected<iterator_t<_Range>, _Proj>> _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr for_each_result<borrowed_iterator_t<_Range>, _Func>
   operator()(_Range&& __range, _Func __func, _Proj __proj = {}) const {
-    return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj);
+    using _SpecialAlg = __specialized_algorithm<_Algorithm::__for_each, remove_cvref_t<_Range>>;
+    if constexpr (_SpecialAlg::__has_algorithm) {
+      auto [__iter, __func2] = _SpecialAlg()(__range, std::move(__func));
+      return {std::move(__iter), std::move(__func)};
+    } else {
+      return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj);
+    }
   }
 };
 
diff --git a/libcxx/include/__algorithm/specialized_algorithms.h b/libcxx/include/__algorithm/specialized_algorithms.h
new file mode 100644
index 0000000000000..049ae79223abc
--- /dev/null
+++ b/libcxx/include/__algorithm/specialized_algorithms.h
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
+#define _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+enum class _Algorithm {
+  __for_each,
+};
+
+template <_Algorithm, class _Range>
+struct __specialized_algorithm {
+  static const bool __has_algorithm = false;
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
diff --git a/libcxx/include/__tree b/libcxx/include/__tree
index 0738c8c6a5e2b..b027cef99ba63 100644
--- a/libcxx/include/__tree
+++ b/libcxx/include/__tree
@@ -11,6 +11,7 @@
 #define _LIBCPP___TREE
 
 #include <__algorithm/min.h>
+#include <__algorithm/specialized_algorithms.h>
 #include <__assert>
 #include <__config>
 #include <__fwd/pair.h>
@@ -1466,8 +1467,38 @@ private:
 
     return __dest;
   }
+
+  friend struct __specialized_algorithm<_Algorithm::__for_each, __tree>;
 };
 
+#ifndef _LIBCPP_CXX03_LANG
+template <class _Tp, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Algorithm::__for_each, __tree<_Tp, _Compare, _Allocator> > {
+  static const bool __has_algorithm = true;
+
+  using __tree _LIBCPP_NODEBUG         = __tree<_Tp, _Compare, _Allocator>;
+  using __node_pointer _LIBCPP_NODEBUG = typename __tree::__node_pointer;
+
+  template <class _Func>
+#ifndef _LIBCPP_COMPILER_GCC
+  _LIBCPP_HIDE_FROM_ABI
+#endif
+  static void __impl(__node_pointer __root, _Func& __func) {
+    if (__root->__left_)
+      __impl(static_cast<__node_pointer>(__root->__left_), __func);
+    __func(__root->__get_value());
+    if (__root->__right_)
+      __impl(static_cast<__node_pointer>(__root->__right_), __func);
+  }
+
+  template <class _Tree, class _Func>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(_Tree&& __range, _Func __func) {
+    __impl(__range.__root(), __func);
+    return std::make_pair(__range.end(), std::move(__func));
+  }
+};
+#endif
+
 // Precondition:  __size_ != 0
 template <class _Tp, class _Compare, class _Allocator>
 typename __tree<_Tp, _Compare, _Allocator>::__node_pointer
diff --git a/libcxx/include/map b/libcxx/include/map
index 3ff849afcde09..99bda570295ae 100644
--- a/libcxx/include/map
+++ b/libcxx/include/map
@@ -577,6 +577,7 @@ erase_if(multimap<Key, T, Compare, Allocator>& c, Predicate pred);  // C++20
 #  include <__algorithm/equal.h>
 #  include <__algorithm/lexicographical_compare.h>
 #  include <__algorithm/lexicographical_compare_three_way.h>
+#  include <__algorithm/specialized_algorithms.h>
 #  include <__assert>
 #  include <__config>
 #  include <__functional/binary_function.h>
@@ -1375,6 +1376,8 @@ private:
 #  ifdef _LIBCPP_CXX03_LANG
   _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node_with_key(const key_type& __k);
 #  endif
+
+  friend struct __specialized_algorithm<_Algorithm::__for_each, map>;
 };
 
 #  if _LIBCPP_STD_VER >= 17
@@ -1427,6 +1430,23 @@ map(initializer_list<pair<_Key, _Tp>>, _Allocator)
     -> map<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
 #  endif
 
+#  if _LIBCPP_STD_VER >= 14
+template <class _Key, class _Tp, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Algorithm::__for_each, map<_Key, _Tp, _Compare, _Allocator>> {
+  using __map _LIBCPP_NODEBUG = map<_Key, _Tp, _Compare, _Allocator>;
+
+  static const bool __has_algorithm = true;
+
+  // set's begin() and end() are identical with and without const qualifiaction
+  template <class _Map, class _Func>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(_Map&& __map, _Func __func) {
+    auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, typename __map::__base>()(
+        __map.__tree_, std::move(__func));
+    return std::make_pair(__map.end(), std::move(__func2));
+  }
+};
+#  endif
+
 #  ifndef _LIBCPP_CXX03_LANG
 template <class _Key, class _Tp, class _Compare, class _Allocator>
 map<_Key, _Tp, _Compare, _Allocator>::map(map&& __m, const allocator_type& __a)
@@ -1940,6 +1960,8 @@ private:
 
   typedef __map_node_destructor<__node_allocator> _Dp;
   typedef unique_ptr<__node, _Dp> __node_holder;
+
+  friend struct __specialized_algorithm<_Algorithm::__for_each, multimap>;
 };
 
 #  if _LIBCPP_STD_VER >= 17
@@ -1992,6 +2014,23 @@ multimap(initializer_list<pair<_Key, _Tp>>, _Allocator)
     -> multimap<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
 #  endif
 
+#  if _LIBCPP_STD_VER >= 14
+template <class _Key, class _Tp, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Algorithm::__for_each, multimap<_Key, _Tp, _Compare, _Allocator>> {
+  using __map _LIBCPP_NODEBUG = multimap<_Key, _Tp, _Compare, _Allocator>;
+
+  static const bool __has_algorithm = true;
+
+  // set's begin() and end() are identical with and without const qualifiaction
+  template <class _Map, class _Func>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(_Map&& __map, _Func __func) {
+    auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, typename __map::__base>()(
+        __map.__tree_, std::move(__func));
+    return std::make_pair(__map.end(), std::move(__func2));
+  }
+};
+#  endif
+
 #  ifndef _LIBCPP_CXX03_LANG
 template <class _Key, class _Tp, class _Compare, class _Allocator>
 multimap<_Key, _Tp, _Compare, _Allocator>::multimap(multimap&& __m, const allocator_type& __a)
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index 894093b409e11..7977133e7efc8 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -844,6 +844,7 @@ module std [system] {
       export std.memory.unique_temporary_buffer // TODO: Workaround for https://llvm.org/PR120108
     }
     module swap_ranges                            { header "__algorithm/swap_ranges.h" }
+    module specialized_algorithms                 { header "__algorithm/specialized_algorithms.h" }
     module three_way_comp_ref_type                { header "__algorithm/three_way_comp_ref_type.h" }
     module transform                              { header "__algorithm/transform.h" }
     module uniform_random_bit_generator_adaptor   { header "__algorithm/uniform_random_bit_generator_adaptor.h" }
diff --git a/libcxx/include/set b/libcxx/include/set
index 59ed0155c1def..c9b0579f2c54c 100644
--- a/libcxx/include/set
+++ b/libcxx/include/set
@@ -518,6 +518,7 @@ erase_if(multiset<Key, Compare, Allocator>& c, Predicate pred);  // C++20
 #  include <__algorithm/equal.h>
 #  include <__algorithm/lexicographical_compare.h>
 #  include <__algorithm/lexicographical_compare_three_way.h>
+#  include <__algorithm/specialized_algorithms.h>
 #  include <__assert>
 #  include <__config>
 #  include <__functional/is_transparent.h>
@@ -902,6 +903,9 @@ public:
     return __tree_.__equal_range_multi(__k);
   }
 #  endif
+
+  template <_Algorithm, class>
+  friend struct __specialized_algorithm;
 };
 
 #  if _LIBCPP_STD_VER >= 17
@@ -948,6 +952,21 @@ template <class _Key, class _Allocator, class = enable_if_t<__is_allocator_v<_Al
 set(initializer_list<_Key>, _Allocator) -> set<_Key, less<_Key>, _Allocator>;
 #  endif
 
+#  if _LIBCPP_STD_VER >= 14
+template <_Algorithm __alg, class _Key, class _Compare, class _Allocator>
+struct __specialized_algorithm<__alg, set<_Key, _Compare, _Allocator>> {
+  using __set _LIBCPP_NODEBUG = set<_Key, _Compare, _Allocator>;
+
+  static const bool __has_algorithm = __specialized_algorithm<__alg, typename __set::__base>::__has_algorithm;
+
+  // set's begin() and end() are identical with and without const qualifiaction
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(const __set& __set, _Args&&... __args) {
+    return __specialized_algorithm<__alg, typename __set::__base>()(__set.__tree_, std::forward<_Args>(__args)...);
+  }
+};
+#  endif
+
 #  ifndef _LIBCPP_CXX03_LANG
 
 template <class _Key, class _Compare, class _Allocator>
@@ -1362,6 +1381,9 @@ public:
     return __tree_.__equal_range_multi(__k);
   }
 #  endif
+
+  template <_Algorithm, class>
+  friend struct __specialized_algorithm;
 };
 
 #  if _LIBCPP_STD_VER >= 17
@@ -1409,6 +1431,21 @@ template <class _Key, class _Allocator, class = enable_if_t<__is_allocator_v<_Al
 multiset(initializer_list<_Key>, _Allocator) -> multiset<_Key, less<_Key>, _Allocator>;
 #  endif
 
+#  if _LIBCPP_STD_VER >= 14
+template <_Algorithm __alg, class _Key, class _Compare, class _Allocator>
+struct __specialized_algorithm<__alg, multiset<_Key, _Compare, _Allocator>> {
+  using __set _LIBCPP_NODEBUG = multiset<_Key, _Compare, _Allocator>;
+
+  static const bool __has_algorithm = __specialized_algorithm<__alg, typename __set::__base>::__has_algorithm;
+
+  // set's begin() and end() are identical with and without const qualifiaction
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(const __set& __set, _Args&&... __args) {
+    return __specialized_algorithm<__alg, typename __set::__base>()(__set.__tree_, std::forward<_Args>(__args)...);
+  }
+};
+#  endif
+
 #  ifndef _LIBCPP_CXX03_LANG
 
 template <class _Key, class _Compare, class _Allocator>
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
index f58f336f8b892..0188ccd59acec 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp
@@ -23,7 +23,7 @@ int main(int argc, char** argv) {
 
   // {std,ranges}::for_each
   {
-    auto bm = []<class Container>(std::string name, auto for_each) {
+    auto sequence_bm = []<class Container>(std::string name, auto for_each) {
       using ElemType = typename Container::value_type;
       benchmark::RegisterBenchmark(
           name,
@@ -44,12 +44,32 @@ int main(int argc, char** argv) {
           ->Arg(50) // non power-of-two
           ->Arg(8192);
     };
-    bm.operator()<std::vector<int>>("std::for_each(vector<int>)", std_for_each);
-    bm.operator()<std::deque<int>>("std::for_each(deque<int>)", std_for_each);
-    bm.operator()<std::list<int>>("std::for_each(list<int>)", std_for_each);
-    bm.operator()<std::vector<int>>("rng::for_each(vector<int>)", std::ranges::for_each);
-    bm.operator()<std::deque<int>>("rng::for_each(deque<int>)", std::ranges::for_each);
-    bm.operator()<std::list<int>>("rng::for_each(list<int>)", std::ranges::for_each);
+    sequence_bm.operator()<std::vector<int>>("std::for_each(vector<int>)", std_for_each);
+    sequence_bm.operator()<std::deque<int>>("std::for_each(deque<int>)", std_for_each);
+    sequence_bm.operator()<std::list<int>>("std::for_each(list<int>)", std_for_each);
+    sequence_bm.operator()<std::vector<int>>("rng::for_each(vector<int>)", std::ranges::for_each);
+    sequence_bm.operator()<std::deque<int>>("rng::for_each(deque<int>)", std::ranges::for_each);
+    sequence_bm.operator()<std::list<int>>("rng::for_each(list<int>)", std::ranges::for_each);
+
+    auto associative_ranges_bm = []<class Container>(std::type_identity<Container>, std::string name, auto for_each) {
+      benchmark::RegisterBenchmark(
+          name,
+          [for_each](auto& st) {
+            Container c;
+            for (int64_t i = 0; i != st.range(0); ++i)
+              c.insert(i);
+
+            for (auto _ : st) {
+              benchmark::DoNotOptimize(c);
+              for_each(c, [](auto v) { benchmark::DoNotOptimize(v); });
+            }
+          })
+          ->Arg(8)
+          ->Arg(32)
+          ->Arg(50) // non power-of-two
+          ->Arg(8192);
+    };
+    associative_ranges_bm(std::type_identity<std::set<int>>{}, "rng::for_each(set<int>)", std::ranges::for_each);
   }
 
   // {std,ranges}::for_each for join_view



More information about the libcxx-commits mailing list