[libcxx-commits] [libcxx] c81bfc6 - [libc++] Optimize for_each for segmented iterators

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Tue Nov 14 14:55:46 PST 2023


Author: Nikolas Klauser
Date: 2023-11-14T23:55:24+01:00
New Revision: c81bfc61da7e3a17a0ecd42ad049c9cd74384192

URL: https://github.com/llvm/llvm-project/commit/c81bfc61da7e3a17a0ecd42ad049c9cd74384192
DIFF: https://github.com/llvm/llvm-project/commit/c81bfc61da7e3a17a0ecd42ad049c9cd74384192.diff

LOG: [libc++] Optimize for_each for segmented iterators

```
---------------------------------------------------
Benchmark                       old             new
---------------------------------------------------
bm_for_each/1               3.00 ns         2.98 ns
bm_for_each/2               4.53 ns         4.57 ns
bm_for_each/3               5.82 ns         5.82 ns
bm_for_each/4               6.94 ns         6.91 ns
bm_for_each/5               7.55 ns         7.75 ns
bm_for_each/6               7.06 ns         7.45 ns
bm_for_each/7               6.69 ns         7.14 ns
bm_for_each/8               6.86 ns         4.06 ns
bm_for_each/16              11.5 ns         5.73 ns
bm_for_each/64              43.7 ns         4.06 ns
bm_for_each/512              356 ns         7.98 ns
bm_for_each/4096            2787 ns         53.6 ns
bm_for_each/32768          20836 ns          438 ns
bm_for_each/262144        195362 ns         4945 ns
bm_for_each/1048576       685482 ns        19822 ns
```

Reviewed By: ldionne, Mordante, #libc

Spies: bgraur, sberg, arichardson, libcxx-commits

Differential Revision: https://reviews.llvm.org/D151274

Added: 
    libcxx/benchmarks/algorithms/for_each.bench.cpp
    libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.pass.cpp

Modified: 
    libcxx/benchmarks/CMakeLists.txt
    libcxx/docs/ReleaseNotes/18.rst
    libcxx/include/__algorithm/for_each.h
    libcxx/utils/data/ignore_format.txt

Removed: 
    libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp


################################################################################
diff  --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 7591f34d938bf86..4307f6b57831f29 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -176,6 +176,7 @@ set(BENCHMARK_TESTS
     algorithms/count.bench.cpp
     algorithms/equal.bench.cpp
     algorithms/find.bench.cpp
+    algorithms/for_each.bench.cpp
     algorithms/lower_bound.bench.cpp
     algorithms/make_heap.bench.cpp
     algorithms/make_heap_then_sort_heap.bench.cpp

diff  --git a/libcxx/benchmarks/algorithms/for_each.bench.cpp b/libcxx/benchmarks/algorithms/for_each.bench.cpp
new file mode 100644
index 000000000000000..7019dc13ca601a7
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/for_each.bench.cpp
@@ -0,0 +1,23 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <deque>
+
+static void bm_deque_for_each(benchmark::State& state) {
+  std::deque<char> vec1(state.range(), '1');
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(vec1);
+    benchmark::DoNotOptimize(
+        std::for_each(vec1.begin(), vec1.end(), [](char& v) { v = std::clamp(v, (char)10, (char)100); }));
+  }
+}
+BENCHMARK(bm_deque_for_each)->DenseRange(1, 8)->Range(16, 1 << 20);
+
+BENCHMARK_MAIN();

diff  --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst
index 27a1758fcc799df..d5df7fde5be91af 100644
--- a/libcxx/docs/ReleaseNotes/18.rst
+++ b/libcxx/docs/ReleaseNotes/18.rst
@@ -56,6 +56,9 @@ Improvements and New Features
 - ``std::ranges::count`` is now optimized for ``vector<bool>::iterator``, which
   can lead up to 350x performance improvements.
 
+- ``std::for_each`` has been optimized for segmented iterators like ``std::deque::iterator`` in C++23 and
+  later, which can lead up to 40x performance improvements.
+
 - The library now provides several hardening modes under which common cases of library undefined behavior will be turned
   into a reliable program termination. The ``fast`` hardening mode enables a set of security-critical checks with
   minimal runtime overhead; the ``extensive`` hardening mode additionally enables relatively cheap checks that catch

diff  --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index 6564f31cd0e7941..259e527f87f9156 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -10,23 +10,48 @@
 #ifndef _LIBCPP___ALGORITHM_FOR_EACH_H
 #define _LIBCPP___ALGORITHM_FOR_EACH_H
 
+#include <__algorithm/for_each_segment.h>
 #include <__config>
+#include <__iterator/segmented_iterator.h>
+#include <__ranges/movable_box.h>
+#include <__type_traits/enable_if.h>
+#include <__utility/in_place.h>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _InputIterator, class _Function>
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function for_each(_InputIterator __first,
-                                                                                  _InputIterator __last,
-                                                                                  _Function __f) {
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function
+for_each(_InputIterator __first, _InputIterator __last, _Function __f) {
   for (; __first != __last; ++__first)
     __f(*__first);
   return __f;
 }
 
+// __movable_box is available in C++20, but is actually a copyable-box, so optimization is only correct in C++23
+#if _LIBCPP_STD_VER >= 23
+template <class _SegmentedIterator, class _Function>
+  requires __is_segmented_iterator<_SegmentedIterator>::value
+_LIBCPP_HIDE_FROM_ABI constexpr _Function
+for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function __func) {
+  ranges::__movable_box<_Function> __wrapped_func(in_place, std::move(__func));
+  std::__for_each_segment(__first, __last, [&](auto __lfirst, auto __llast) {
+    __wrapped_func =
+        ranges::__movable_box<_Function>(in_place, std::for_each(__lfirst, __llast, std::move(*__wrapped_func)));
+  });
+  return std::move(*__wrapped_func);
+}
+#endif // _LIBCPP_STD_VER >= 23
+
 _LIBCPP_END_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___ALGORITHM_FOR_EACH_H

diff  --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.pass.cpp
new file mode 100644
index 000000000000000..94a1ec6a1d29454
--- /dev/null
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.pass.cpp
@@ -0,0 +1,100 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <algorithm>
+
+// template<InputIterator Iter, Callable<auto, Iter::reference> Function>
+//   constexpr Function   // constexpr since C++20
+//   for_each(Iter first, Iter last, Function f);
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#if __has_include(<ranges>)
+#  include <ranges>
+#endif
+#include <vector>
+
+#include "test_macros.h"
+#include "test_iterators.h"
+
+struct for_each_test {
+  TEST_CONSTEXPR for_each_test(int c) : count(c) {}
+
+  // for_each functors only have to be move constructible
+  for_each_test(const for_each_test&)            = delete;
+  for_each_test(for_each_test&&)                 = default;
+  for_each_test& operator=(const for_each_test&) = delete;
+  for_each_test& operator=(for_each_test&&)      = delete;
+
+  int count;
+  TEST_CONSTEXPR_CXX14 void operator()(int& i) {
+    ++i;
+    ++count;
+  }
+};
+
+struct Test {
+  template <class Iter>
+  TEST_CONSTEXPR_CXX20 void operator()() {
+    int sizes[] = {0, 1, 6};
+    for (const int size : sizes) {
+      int ia[]        = {0, 1, 2, 3, 4, 5};
+      for_each_test f = std::for_each(Iter(ia), Iter(ia + size), for_each_test(0));
+      assert(f.count == size);
+      for (int i = 0; i < size; ++i)
+        assert(ia[i] == static_cast<int>(i + 1));
+    }
+  }
+};
+
+TEST_CONSTEXPR_CXX20 bool test() {
+  types::for_each(types::cpp17_input_iterator_list<int*>(), Test());
+
+  // TODO: Remove the `_LIBCPP_ENABLE_EXPERIMENTAL` check once we have the FTM guarded or views::join isn't
+  // experimental anymore
+#if TEST_STD_VER >= 20 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
+  { // Make sure that the segmented iterator optimization works during constant evaluation
+    std::vector<std::vector<int>> vecs = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}};
+    auto v                             = std::views::join(vecs);
+    std::for_each(v.begin(), v.end(), [i = 0](int& a) mutable { assert(a == ++i); });
+  }
+#endif
+
+  return true;
+}
+
+struct deque_test {
+  std::deque<int>* d_;
+  int* i_;
+
+  deque_test(std::deque<int>& d, int& i) : d_(&d), i_(&i) {}
+
+  void operator()(int& v) {
+    assert(&(*d_)[*i_] == &v);
+    ++*i_;
+  }
+};
+
+int main(int, char**) {
+  test();
+#if TEST_STD_VER >= 20
+  static_assert(test());
+#endif
+
+  // check that segmented iterators work properly
+  int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
+  for (const int size : sizes) {
+    std::deque<int> d(size);
+    int index = 0;
+
+    std::for_each(d.begin(), d.end(), deque_test(d, index));
+  }
+
+  return 0;
+}

diff  --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp
deleted file mode 100644
index d6b46554e95ecbb..000000000000000
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// <algorithm>
-
-// template<InputIterator Iter, Callable<auto, Iter::reference> Function>
-//   requires CopyConstructible<Function>
-//   constexpr Function   // constexpr after C++17
-//   for_each(Iter first, Iter last, Function f);
-
-#include <algorithm>
-#include <cassert>
-
-#include "test_macros.h"
-#include "test_iterators.h"
-
-#if TEST_STD_VER > 17
-TEST_CONSTEXPR bool test_constexpr() {
-    int ia[] = {1, 3, 6, 7};
-    int expected[] = {3, 5, 8, 9};
-
-    std::for_each(std::begin(ia), std::end(ia), [](int &a) { a += 2; });
-    return std::equal(std::begin(ia), std::end(ia), std::begin(expected))
-        ;
-    }
-#endif
-
-struct for_each_test
-{
-    for_each_test(int c) : count(c) {}
-    int count;
-    void operator()(int& i) {++i; ++count;}
-};
-
-int main(int, char**)
-{
-    int ia[] = {0, 1, 2, 3, 4, 5};
-    const unsigned s = sizeof(ia)/sizeof(ia[0]);
-    for_each_test f = std::for_each(cpp17_input_iterator<int*>(ia),
-                                    cpp17_input_iterator<int*>(ia+s),
-                                    for_each_test(0));
-    assert(f.count == s);
-    for (unsigned i = 0; i < s; ++i)
-        assert(ia[i] == static_cast<int>(i+1));
-
-#if TEST_STD_VER > 17
-    static_assert(test_constexpr());
-#endif
-
-  return 0;
-}

diff  --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt
index 48b3c04adb54238..0a0db837c450019 100644
--- a/libcxx/utils/data/ignore_format.txt
+++ b/libcxx/utils/data/ignore_format.txt
@@ -10,7 +10,6 @@ libcxx/include/__algorithm/fill.h
 libcxx/include/__algorithm/fill_n.h
 libcxx/include/__algorithm/find_end.h
 libcxx/include/__algorithm/find_first_of.h
-libcxx/include/__algorithm/for_each.h
 libcxx/include/__algorithm/for_each_n.h
 libcxx/include/__algorithm/generate.h
 libcxx/include/__algorithm/generate_n.h
@@ -1139,7 +1138,6 @@ libcxx/test/std/algorithms/alg.nonmodifying/alg.find/ranges.find.pass.cpp
 libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp
 libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp
 libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp
-libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp
 libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/is_permutation.pass.cpp
 libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/is_permutation_pred.pass.cpp
 libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/ranges.is_permutation.pass.cpp


        


More information about the libcxx-commits mailing list