[libcxx-commits] [libcxx] [libc++] Optimize std::find for segmented iterators (PR #67224)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Thu Dec 14 04:43:18 PST 2023


https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/67224

>From 3855f9f21a54a82bcdc6b74004038ad78e023fa3 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sat, 23 Sep 2023 10:07:31 +0200
Subject: [PATCH] [libc++] Optimize std::find for segmented iterators

Spies: libcxx-commits

Differential Revision: https://reviews.llvm.org/D157809
---
 libcxx/benchmarks/algorithms/find.bench.cpp   | 31 +++++++---
 libcxx/include/CMakeLists.txt                 |  1 +
 libcxx/include/__algorithm/find.h             | 31 ++++++++++
 libcxx/include/__algorithm/find_segment_if.h  | 62 +++++++++++++++++++
 libcxx/include/deque                          |  2 +-
 libcxx/include/module.modulemap.in            |  1 +
 ...obust_against_copying_comparators.pass.cpp |  1 +
 ...obust_against_copying_projections.pass.cpp | 11 ++++
 .../alg.nonmodifying/alg.find/find.pass.cpp   | 56 +++++++++++++++++
 .../alg.find/ranges.find.pass.cpp             | 60 ++++++++++++++++++
 10 files changed, 245 insertions(+), 11 deletions(-)
 create mode 100644 libcxx/include/__algorithm/find_segment_if.h

diff --git a/libcxx/benchmarks/algorithms/find.bench.cpp b/libcxx/benchmarks/algorithms/find.bench.cpp
index b87c575a16b4dc..6ff2d95ab43533 100644
--- a/libcxx/benchmarks/algorithms/find.bench.cpp
+++ b/libcxx/benchmarks/algorithms/find.bench.cpp
@@ -9,12 +9,15 @@
 #include <algorithm>
 #include <benchmark/benchmark.h>
 #include <cstring>
+#include <deque>
 #include <random>
 #include <vector>
 
-template <class T>
+template <class Container>
 static void bm_find(benchmark::State& state) {
-  std::vector<T> vec1(state.range(), '1');
+  using T = Container::value_type;
+
+  Container vec1(state.range(), '1');
   std::mt19937_64 rng(std::random_device{}());
 
   for (auto _ : state) {
@@ -25,13 +28,18 @@ static void bm_find(benchmark::State& state) {
     vec1[idx] = '1';
   }
 }
-BENCHMARK(bm_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
 
-template <class T>
+template <class Container>
 static void bm_ranges_find(benchmark::State& state) {
-  std::vector<T> vec1(state.range(), '1');
+  using T = Container::value_type;
+
+  Container vec1(state.range(), '1');
   std::mt19937_64 rng(std::random_device{}());
 
   for (auto _ : state) {
@@ -42,9 +50,12 @@ static void bm_ranges_find(benchmark::State& state) {
     vec1[idx] = '1';
   }
 }
-BENCHMARK(bm_ranges_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_ranges_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_ranges_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
 
 static void bm_vector_bool_find(benchmark::State& state) {
   std::vector<bool> vec1(state.range(), false);
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index d8faf6467b79ae..f2845061d5f1ad 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -22,6 +22,7 @@ set(files
   __algorithm/find_first_of.h
   __algorithm/find_if.h
   __algorithm/find_if_not.h
+  __algorithm/find_segment_if.h
   __algorithm/for_each.h
   __algorithm/for_each_n.h
   __algorithm/for_each_segment.h
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 0118489d94699d..282286c42cfc87 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -10,6 +10,7 @@
 #ifndef _LIBCPP___ALGORITHM_FIND_H
 #define _LIBCPP___ALGORITHM_FIND_H
 
+#include <__algorithm/find_segment_if.h>
 #include <__algorithm/min.h>
 #include <__algorithm/unwrap_iter.h>
 #include <__bit/countr.h>
@@ -18,8 +19,10 @@
 #include <__functional/identity.h>
 #include <__functional/invoke.h>
 #include <__fwd/bit_reference.h>
+#include <__iterator/segmented_iterator.h>
 #include <__string/constexpr_c_functions.h>
 #include <__type_traits/is_same.h>
+#include <__utility/move.h>
 
 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
 #  include <cwchar>
@@ -118,6 +121,34 @@ __find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst>
   return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
 }
 
+// segmented iterator implementation
+
+template <class>
+struct __find_segment;
+
+template <class _SegmentedIterator,
+          class _Tp,
+          class _Proj,
+          __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
+__find_impl(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
+  return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj);
+}
+
+template <class _Tp>
+struct __find_segment {
+  const _Tp& __value_;
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __find_segment(const _Tp& __value) : __value_(__value) {}
+
+  template <class _InputIterator, class _Proj>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
+  operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) {
+    return std::__find_impl(__first, __last, __value_, __proj);
+  }
+};
+
+// public API
 template <class _InputIterator, class _Tp>
 _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
 find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
diff --git a/libcxx/include/__algorithm/find_segment_if.h b/libcxx/include/__algorithm/find_segment_if.h
new file mode 100644
index 00000000000000..9d6064f3e283a6
--- /dev/null
+++ b/libcxx/include/__algorithm/find_segment_if.h
@@ -0,0 +1,62 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
+#define _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
+
+#include <__config>
+#include <__iterator/segmented_iterator.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// __find_segment_if is a utility function for optimizing iteration over segmented iterators linearly.
+// [__first, __last) has to be a segmented range. __pred is expected to take a range of local iterators and the __proj.
+// It returns an iterator to the first element that satisfies the predicate, or a one-past-the-end iterator if there was
+// no match. __proj may be anything that should be passed to __pred, but is expected to be a projection to support
+// ranges algorithms, or __identity for classic algorithms.
+
+template <class _SegmentedIterator, class _Pred, class _Proj>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
+__find_segment_if(_SegmentedIterator __first, _SegmentedIterator __last, _Pred __pred, _Proj& __proj) {
+  using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
+
+  auto __sfirst = _Traits::__segment(__first);
+  auto __slast  = _Traits::__segment(__last);
+
+  // We are in a single segment, so we might not be at the beginning or end
+  if (__sfirst == __slast)
+    return _Traits::__compose(__sfirst, __pred(_Traits::__local(__first), _Traits::__local(__last), __proj));
+
+  { // We have more than one segment. Iterate over the first segment, since we might not start at the beginning
+    auto __llast = _Traits::__end(__sfirst);
+    auto __liter = __pred(_Traits::__local(__first), __llast, __proj);
+    if (__liter != __llast)
+      return _Traits::__compose(__sfirst, __liter);
+  }
+  ++__sfirst;
+
+  // Iterate over the segments which are guaranteed to be completely in the range
+  while (__sfirst != __slast) {
+    auto __llast = _Traits::__end(__sfirst);
+    auto __liter = __pred(_Traits::__begin(__sfirst), _Traits::__end(__sfirst), __proj);
+    if (__liter != __llast)
+      return _Traits::__compose(__sfirst, __liter);
+    ++__sfirst;
+  }
+
+  // Iterate over the last segment
+  return _Traits::__compose(__sfirst, __pred(_Traits::__begin(__sfirst), _Traits::__local(__last), __proj));
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
diff --git a/libcxx/include/deque b/libcxx/include/deque
index b5d094dc415ddf..5ee7c300844e4b 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -440,7 +440,7 @@ public:
   }
 
   static _LIBCPP_HIDE_FROM_ABI _Iterator __compose(__segment_iterator __segment, __local_iterator __local) {
-        if (__local == __end(__segment)) {
+        if (__segment && __local == __end(__segment)) {
             ++__segment;
             return _Iterator(__segment, *__segment);
         }
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index 90ee7fbb2157c2..60d41ea7d0232d 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -665,6 +665,7 @@ module std_private_algorithm_find_end                                    [system
 module std_private_algorithm_find_first_of                               [system] { header "__algorithm/find_first_of.h" }
 module std_private_algorithm_find_if                                     [system] { header "__algorithm/find_if.h" }
 module std_private_algorithm_find_if_not                                 [system] { header "__algorithm/find_if_not.h" }
+module std_private_algorithm_find_segment_if                             [system] { header "__algorithm/find_segment_if.h" }
 module std_private_algorithm_for_each                                    [system] { header "__algorithm/for_each.h" }
 module std_private_algorithm_for_each_n                                  [system] { header "__algorithm/for_each_n.h" }
 module std_private_algorithm_for_each_segment                            [system] { header "__algorithm/for_each_segment.h" }
diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp
index eece9833f8a635..1a6c0d11460c5a 100644
--- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp
+++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_comparators.pass.cpp
@@ -15,6 +15,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
+#include <deque>
 
 #include "test_macros.h"
 
diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp
index afbbc224ea8644..64019273d5c72b 100644
--- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp
+++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp
@@ -15,6 +15,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
+#include <deque>
 
 #include "test_macros.h"
 
@@ -224,8 +225,18 @@ constexpr bool all_the_algorithms()
     return true;
 }
 
+void test_deque() {
+    std::deque<T> d;
+    int copies = 0;
+    void* value = nullptr;
+
+    (void)std::ranges::find(d, value, Proj(&copies));
+    assert(copies == 0);
+}
+
 int main(int, char**)
 {
+    test_deque();
     all_the_algorithms();
     static_assert(all_the_algorithms());
 
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
index b55a852c10cafa..e32c634b1cca24 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <deque>
 #include <vector>
 #include <type_traits>
 
@@ -113,6 +114,52 @@ struct TestTypes {
   }
 };
 
+void test_deque() {
+  { // empty deque
+    std::deque<int> data;
+    assert(std::find(data.begin(), data.end(), 4) == data.end());
+  }
+
+  { // single element - match
+    std::deque<int> data;
+    data.push_back(4);
+    assert(std::find(data.begin(), data.end(), 4) == data.begin());
+  }
+
+  { // single element - no match
+    std::deque<int> data;
+    data.push_back(3);
+    assert(std::find(data.begin(), data.end(), 4) == data.end());
+  }
+
+  // many elements
+  int sizes[] = {2, 3, 1023, 1024, 1025, 2047, 2048, 2049};
+  for (auto size : sizes) {
+    { // last element match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      data[size - 1] = 4;
+      assert(std::find(data.begin(), data.end(), 4) == data.end() - 1);
+    }
+
+    { // second-last element match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      data[size - 2] = 4;
+      assert(std::find(data.begin(), data.end(), 4) == data.end() - 2);
+    }
+
+    { // no match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      assert(std::find(data.begin(), data.end(), 4) == data.end());
+    }
+  }
+}
+
 TEST_CONSTEXPR_CXX20 bool test() {
   types::for_each(types::integer_types(), TestTypes<char>());
   types::for_each(types::integer_types(), TestTypes<int>());
@@ -122,10 +169,19 @@ TEST_CONSTEXPR_CXX20 bool test() {
   Test<TriviallyComparable<wchar_t>, TriviallyComparable<wchar_t>>().operator()<TriviallyComparable<wchar_t>*>();
 #endif
 
+#if TEST_STD_VER >= 20
+  {
+    std::vector<std::vector<int>> vec = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}};
+    auto view                         = vec | std::views::join;
+    assert(std::find(view.begin(), view.end(), 4) == std::next(view.begin(), 3));
+  }
+#endif
+
   return true;
 }
 
 int main(int, char**) {
+  test_deque();
   test();
 #if TEST_STD_VER >= 20
   static_assert(test());
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/ranges.find.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/ranges.find.pass.cpp
index 22f938f73ae078..7e8f29ae6288f9 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/ranges.find.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/ranges.find.pass.cpp
@@ -23,6 +23,7 @@
 #include <algorithm>
 #include <array>
 #include <cassert>
+#include <deque>
 #include <ranges>
 #include <vector>
 
@@ -127,6 +128,15 @@ constexpr bool test() {
                     });
                   });
 
+#if TEST_STD_VER >= 20
+  {
+    std::vector<std::vector<int>> vec = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}};
+    auto view                         = vec | std::views::join;
+    assert(std::ranges::find(view.begin(), view.end(), 4) == std::next(view.begin(), 3));
+    assert(std::ranges::find(view, 4) == std::next(view.begin(), 3));
+  }
+#endif
+
   { // check that the first element is returned
     {
       struct S {
@@ -202,7 +212,57 @@ class Comparable {
   friend bool operator==(const Comparable& lhs, long long rhs) { return comparable_data[lhs.index_] == rhs; }
 };
 
+void test_deque() {
+  { // empty deque
+    std::deque<int> data;
+    assert(std::ranges::find(data, 4) == data.end());
+    assert(std::ranges::find(data.begin(), data.end(), 4) == data.end());
+  }
+
+  { // single element - match
+    std::deque<int> data = {4};
+    assert(std::ranges::find(data, 4) == data.begin());
+    assert(std::ranges::find(data.begin(), data.end(), 4) == data.begin());
+  }
+
+  { // single element - no match
+    std::deque<int> data = {3};
+    assert(std::ranges::find(data, 4) == data.end());
+    assert(std::ranges::find(data.begin(), data.end(), 4) == data.end());
+  }
+
+  // many elements
+  for (auto size : {2, 3, 1023, 1024, 1025, 2047, 2048, 2049}) {
+    { // last element match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      data[size - 1] = 4;
+      assert(std::ranges::find(data, 4) == data.end() - 1);
+      assert(std::ranges::find(data.begin(), data.end(), 4) == data.end() - 1);
+    }
+
+    { // second-last element match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      data[size - 2] = 4;
+      assert(std::ranges::find(data, 4) == data.end() - 2);
+      assert(std::ranges::find(data.begin(), data.end(), 4) == data.end() - 2);
+    }
+
+    { // no match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      assert(std::ranges::find(data, 4) == data.end());
+      assert(std::ranges::find(data.begin(), data.end(), 4) == data.end());
+    }
+  }
+}
+
 int main(int, char**) {
+  test_deque();
   test();
   static_assert(test());
 



More information about the libcxx-commits mailing list