[libcxx-commits] [libcxx] [libc++] Optimize std::find for segmented iterators (PR #67224)

via libcxx-commits libcxx-commits at lists.llvm.org
Sat Sep 23 01:19:11 PDT 2023


https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/67224

```
--------------------------------------------------------------------------
Benchmark                                              old             new
--------------------------------------------------------------------------
bm_find<std::deque<char>>/1                        6.06 ns         10.6 ns
bm_find<std::deque<char>>/2                        15.5 ns         10.6 ns
bm_find<std::deque<char>>/3                        19.0 ns         10.6 ns
bm_find<std::deque<char>>/4                        20.8 ns         10.6 ns
bm_find<std::deque<char>>/5                        22.0 ns         10.6 ns
bm_find<std::deque<char>>/6                        23.0 ns         10.5 ns
bm_find<std::deque<char>>/7                        24.8 ns         10.7 ns
bm_find<std::deque<char>>/8                        25.7 ns         10.6 ns
bm_find<std::deque<char>>/16                       28.3 ns         10.6 ns
bm_find<std::deque<char>>/64                       44.2 ns         27.0 ns
bm_find<std::deque<char>>/512                       133 ns         37.6 ns
bm_find<std::deque<char>>/4096                      867 ns         53.1 ns
bm_find<std::deque<char>>/32768                    6838 ns          160 ns
bm_find<std::deque<char>>/262144                  52897 ns         1495 ns
bm_find<std::deque<char>>/1048576                215621 ns         6077 ns
bm_find<std::deque<short>>/1                       6.03 ns         6.28 ns
bm_find<std::deque<short>>/2                       15.8 ns         15.8 ns
bm_find<std::deque<short>>/3                       20.5 ns         20.3 ns
bm_find<std::deque<short>>/4                       21.0 ns         21.0 ns
bm_find<std::deque<short>>/5                       23.0 ns         22.1 ns
bm_find<std::deque<short>>/6                       22.6 ns         23.0 ns
bm_find<std::deque<short>>/7                       23.4 ns         23.7 ns
bm_find<std::deque<short>>/8                       24.4 ns         24.9 ns
bm_find<std::deque<short>>/16                      26.6 ns         27.2 ns
bm_find<std::deque<short>>/64                      43.2 ns         40.9 ns
bm_find<std::deque<short>>/512                      124 ns         90.7 ns
bm_find<std::deque<short>>/4096                     845 ns          525 ns
bm_find<std::deque<short>>/32768                   7273 ns         3194 ns
bm_find<std::deque<short>>/262144                 53710 ns        24385 ns
bm_find<std::deque<short>>/1048576               216086 ns        96195 ns
bm_find<std::deque<int>>/1                         6.03 ns         10.3 ns
bm_find<std::deque<int>>/2                         15.6 ns         10.3 ns
bm_find<std::deque<int>>/3                         19.1 ns         10.3 ns
bm_find<std::deque<int>>/4                         22.3 ns         10.3 ns
bm_find<std::deque<int>>/5                         23.5 ns         10.4 ns
bm_find<std::deque<int>>/6                         23.1 ns         10.3 ns
bm_find<std::deque<int>>/7                         23.7 ns         10.2 ns
bm_find<std::deque<int>>/8                         24.5 ns         10.2 ns
bm_find<std::deque<int>>/16                        27.9 ns         26.6 ns
bm_find<std::deque<int>>/64                        42.6 ns         32.2 ns
bm_find<std::deque<int>>/512                        123 ns         43.0 ns
bm_find<std::deque<int>>/4096                       874 ns         93.5 ns
bm_find<std::deque<int>>/32768                     7031 ns          751 ns
bm_find<std::deque<int>>/262144                   57723 ns         6169 ns
bm_find<std::deque<int>>/1048576                 230867 ns        35851 ns
bm_ranges_find<std::deque<char>>/1                 5.97 ns         10.6 ns
bm_ranges_find<std::deque<char>>/2                 16.0 ns         10.5 ns
bm_ranges_find<std::deque<char>>/3                 19.5 ns         10.5 ns
bm_ranges_find<std::deque<char>>/4                 21.1 ns         10.6 ns
bm_ranges_find<std::deque<char>>/5                 22.8 ns         10.5 ns
bm_ranges_find<std::deque<char>>/6                 22.8 ns         10.6 ns
bm_ranges_find<std::deque<char>>/7                 23.4 ns         10.8 ns
bm_ranges_find<std::deque<char>>/8                 24.1 ns         10.5 ns
bm_ranges_find<std::deque<char>>/16                26.9 ns         10.6 ns
bm_ranges_find<std::deque<char>>/64                50.2 ns         27.2 ns
bm_ranges_find<std::deque<char>>/512                126 ns         38.3 ns
bm_ranges_find<std::deque<char>>/4096               868 ns         53.8 ns
bm_ranges_find<std::deque<char>>/32768             6695 ns          161 ns
bm_ranges_find<std::deque<char>>/262144           54411 ns         1497 ns
bm_ranges_find<std::deque<char>>/1048576         241699 ns         6042 ns
bm_ranges_find<std::deque<short>>/1                6.39 ns         6.31 ns
bm_ranges_find<std::deque<short>>/2                15.8 ns         15.9 ns
bm_ranges_find<std::deque<short>>/3                19.0 ns         19.8 ns
bm_ranges_find<std::deque<short>>/4                20.8 ns         20.9 ns
bm_ranges_find<std::deque<short>>/5                21.8 ns         22.1 ns
bm_ranges_find<std::deque<short>>/6                23.0 ns         23.0 ns
bm_ranges_find<std::deque<short>>/7                23.2 ns         23.9 ns
bm_ranges_find<std::deque<short>>/8                23.7 ns         24.4 ns
bm_ranges_find<std::deque<short>>/16               26.6 ns         26.8 ns
bm_ranges_find<std::deque<short>>/64               43.4 ns         39.7 ns
bm_ranges_find<std::deque<short>>/512               131 ns         90.5 ns
bm_ranges_find<std::deque<short>>/4096              851 ns          523 ns
bm_ranges_find<std::deque<short>>/32768            7370 ns         3166 ns
bm_ranges_find<std::deque<short>>/262144          60778 ns        24814 ns
bm_ranges_find<std::deque<short>>/1048576        229288 ns        99273 ns
bm_ranges_find<std::deque<int>>/1                  6.43 ns         10.2 ns
bm_ranges_find<std::deque<int>>/2                  16.6 ns         10.2 ns
bm_ranges_find<std::deque<int>>/3                  19.6 ns         10.2 ns
bm_ranges_find<std::deque<int>>/4                  21.0 ns         10.2 ns
bm_ranges_find<std::deque<int>>/5                  21.9 ns         10.4 ns
bm_ranges_find<std::deque<int>>/6                  22.7 ns         10.2 ns
bm_ranges_find<std::deque<int>>/7                  23.9 ns         10.2 ns
bm_ranges_find<std::deque<int>>/8                  23.8 ns         10.2 ns
bm_ranges_find<std::deque<int>>/16                 27.2 ns         27.1 ns
bm_ranges_find<std::deque<int>>/64                 42.4 ns         32.4 ns
bm_ranges_find<std::deque<int>>/512                 122 ns         43.0 ns
bm_ranges_find<std::deque<int>>/4096                895 ns         93.7 ns
bm_ranges_find<std::deque<int>>/32768              6890 ns          756 ns
bm_ranges_find<std::deque<int>>/262144            54025 ns         6102 ns
bm_ranges_find<std::deque<int>>/1048576          221558 ns        32783 ns
```

>From d2575aa2645e491ab1417603b3fd71c618a4042e Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sat, 23 Sep 2023 10:07:31 +0200
Subject: [PATCH] [libc++] Optimize std::find for segmented iterators

Spies: libcxx-commits

Differential Revision: https://reviews.llvm.org/D157809
---
 libcxx/benchmarks/algorithms/find.bench.cpp   | 31 ++++++----
 libcxx/include/CMakeLists.txt                 |  1 +
 libcxx/include/__algorithm/find.h             | 30 +++++++++-
 libcxx/include/__algorithm/find_segment_if.h  | 56 +++++++++++++++++++
 .../alg.nonmodifying/alg.find/find.pass.cpp   | 45 +++++++++++++++
 5 files changed, 152 insertions(+), 11 deletions(-)
 create mode 100644 libcxx/include/__algorithm/find_segment_if.h

diff --git a/libcxx/benchmarks/algorithms/find.bench.cpp b/libcxx/benchmarks/algorithms/find.bench.cpp
index b87c575a16b4dcd..6ff2d95ab435333 100644
--- a/libcxx/benchmarks/algorithms/find.bench.cpp
+++ b/libcxx/benchmarks/algorithms/find.bench.cpp
@@ -9,12 +9,15 @@
 #include <algorithm>
 #include <benchmark/benchmark.h>
 #include <cstring>
+#include <deque>
 #include <random>
 #include <vector>
 
-template <class T>
+template <class Container>
 static void bm_find(benchmark::State& state) {
-  std::vector<T> vec1(state.range(), '1');
+  using T = Container::value_type;
+
+  Container vec1(state.range(), '1');
   std::mt19937_64 rng(std::random_device{}());
 
   for (auto _ : state) {
@@ -25,13 +28,18 @@ static void bm_find(benchmark::State& state) {
     vec1[idx] = '1';
   }
 }
-BENCHMARK(bm_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
 
-template <class T>
+template <class Container>
 static void bm_ranges_find(benchmark::State& state) {
-  std::vector<T> vec1(state.range(), '1');
+  using T = Container::value_type;
+
+  Container vec1(state.range(), '1');
   std::mt19937_64 rng(std::random_device{}());
 
   for (auto _ : state) {
@@ -42,9 +50,12 @@ static void bm_ranges_find(benchmark::State& state) {
     vec1[idx] = '1';
   }
 }
-BENCHMARK(bm_ranges_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_ranges_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_ranges_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
 
 static void bm_vector_bool_find(benchmark::State& state) {
   std::vector<bool> vec1(state.range(), false);
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 28028955606118e..69ee33b41debd53 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -22,6 +22,7 @@ set(files
   __algorithm/find_first_of.h
   __algorithm/find_if.h
   __algorithm/find_if_not.h
+  __algorithm/find_segment_if.h
   __algorithm/for_each.h
   __algorithm/for_each_n.h
   __algorithm/for_each_segment.h
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index d7c268bc6b338b0..69597f9ed107674 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -10,6 +10,7 @@
 #ifndef _LIBCPP___ALGORITHM_FIND_H
 #define _LIBCPP___ALGORITHM_FIND_H
 
+#include <__algorithm/find_segment_if.h>
 #include <__algorithm/min.h>
 #include <__algorithm/unwrap_iter.h>
 #include <__bit/countr.h>
@@ -118,8 +119,35 @@ __find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst>
   return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
 }
 
+// segmented iterator implementation
+
+template <class>
+struct __find_segment;
+
+template <class _SegmentedIterator,
+          class _Tp,
+          class _Proj,
+          __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
+__find_impl(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
+  return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj);
+}
+
+template <class _Tp>
+struct __find_segment {
+  const _Tp& __value_;
+
+  __find_segment(const _Tp& __value) : __value_(__value) {}
+
+  template <class _InputIterator, class _Proj>
+  _InputIterator operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) {
+    return std::__find_impl(__first, __last, __value_, __proj);
+  }
+};
+
+// public API
 template <class _InputIterator, class _Tp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
 find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
   __identity __proj;
   return std::__rewrap_iter(
diff --git a/libcxx/include/__algorithm/find_segment_if.h b/libcxx/include/__algorithm/find_segment_if.h
new file mode 100644
index 000000000000000..56ab802ea49afde
--- /dev/null
+++ b/libcxx/include/__algorithm/find_segment_if.h
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
+#define _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
+
+#include <__config>
+#include <__iterator/segmented_iterator.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _SegmentedIterator, class _Pred, class _Proj>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
+__find_segment_if(_SegmentedIterator __first, _SegmentedIterator __last, _Pred __pred, _Proj __proj) {
+  using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
+
+  auto __sfirst = _Traits::__segment(__first);
+  auto __slast  = _Traits::__segment(__last);
+
+  // We are in a single segment, so we might not be at the beginning or end
+  if (__sfirst == __slast)
+    return _Traits::__compose(__sfirst, __pred(_Traits::__local(__first), _Traits::__local(__last), __proj));
+
+  { // We have more than one segment. Itertor over the first segment, since we might not start at the beginning
+    auto __llast = _Traits::__end(__sfirst);
+    auto __liter = __pred(_Traits::__local(__first), __llast, __proj);
+    if (__liter != __llast)
+      return _Traits::__compose(__sfirst, __liter);
+  }
+  ++__sfirst;
+
+  // Iterate over the segments which are guaranteed to be completely in the range
+  while (__sfirst != __slast) {
+    auto __llast = _Traits::__end(__sfirst);
+    auto __liter = __pred(_Traits::__begin(__sfirst), _Traits::__end(__sfirst), __proj);
+    if (__liter != __llast)
+      return _Traits::__compose(__sfirst, __liter);
+    ++__sfirst;
+  }
+
+  // Iterate over the last segment
+  return _Traits::__compose(__sfirst, __pred(_Traits::__begin(__sfirst), _Traits::__local(__last), __proj));
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
index b55a852c10cafac..040398391269733 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <deque>
 #include <vector>
 #include <type_traits>
 
@@ -113,6 +114,49 @@ struct TestTypes {
   }
 };
 
+void test_deque() {
+  { // empty deque
+    std::deque<int> data;
+    assert(std::find(data.begin(), data.end(), 4) == data.end());
+  }
+
+  { // single element - match
+    std::deque<int> data = {4};
+    assert(std::find(data.begin(), data.end(), 4) == data.begin());
+  }
+
+  { // single element - no match
+    std::deque<int> data = {3};
+    assert(std::find(data.begin(), data.end(), 4) == data.end());
+  }
+
+  // many elements
+  for (auto size : {2, 3, 1023, 1024, 1025, 2047, 2048, 2049}) {
+    { // last element match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      data[size - 1] = 4;
+      assert(std::find(data.begin(), data.end(), 4) == data.end() - 1);
+    }
+
+    { // second-last element match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      data[size - 2] = 4;
+      assert(std::find(data.begin(), data.end(), 4) == data.end() - 2);
+    }
+
+    { // no match
+      std::deque<int> data;
+      data.resize(size);
+      std::fill(data.begin(), data.end(), 3);
+      assert(std::find(data.begin(), data.end(), 4) == data.end());
+    }
+  }
+}
+
 TEST_CONSTEXPR_CXX20 bool test() {
   types::for_each(types::integer_types(), TestTypes<char>());
   types::for_each(types::integer_types(), TestTypes<int>());
@@ -126,6 +170,7 @@ TEST_CONSTEXPR_CXX20 bool test() {
 }
 
 int main(int, char**) {
+  test_deque();
   test();
 #if TEST_STD_VER >= 20
   static_assert(test());



More information about the libcxx-commits mailing list