[libcxx-commits] [libcxx] [libc++] Optimize std::find for segmented iterators (PR #67224)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Sep 23 01:19:11 PDT 2023
https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/67224
```
--------------------------------------------------------------------------
Benchmark old new
--------------------------------------------------------------------------
bm_find<std::deque<char>>/1 6.06 ns 10.6 ns
bm_find<std::deque<char>>/2 15.5 ns 10.6 ns
bm_find<std::deque<char>>/3 19.0 ns 10.6 ns
bm_find<std::deque<char>>/4 20.8 ns 10.6 ns
bm_find<std::deque<char>>/5 22.0 ns 10.6 ns
bm_find<std::deque<char>>/6 23.0 ns 10.5 ns
bm_find<std::deque<char>>/7 24.8 ns 10.7 ns
bm_find<std::deque<char>>/8 25.7 ns 10.6 ns
bm_find<std::deque<char>>/16 28.3 ns 10.6 ns
bm_find<std::deque<char>>/64 44.2 ns 27.0 ns
bm_find<std::deque<char>>/512 133 ns 37.6 ns
bm_find<std::deque<char>>/4096 867 ns 53.1 ns
bm_find<std::deque<char>>/32768 6838 ns 160 ns
bm_find<std::deque<char>>/262144 52897 ns 1495 ns
bm_find<std::deque<char>>/1048576 215621 ns 6077 ns
bm_find<std::deque<short>>/1 6.03 ns 6.28 ns
bm_find<std::deque<short>>/2 15.8 ns 15.8 ns
bm_find<std::deque<short>>/3 20.5 ns 20.3 ns
bm_find<std::deque<short>>/4 21.0 ns 21.0 ns
bm_find<std::deque<short>>/5 23.0 ns 22.1 ns
bm_find<std::deque<short>>/6 22.6 ns 23.0 ns
bm_find<std::deque<short>>/7 23.4 ns 23.7 ns
bm_find<std::deque<short>>/8 24.4 ns 24.9 ns
bm_find<std::deque<short>>/16 26.6 ns 27.2 ns
bm_find<std::deque<short>>/64 43.2 ns 40.9 ns
bm_find<std::deque<short>>/512 124 ns 90.7 ns
bm_find<std::deque<short>>/4096 845 ns 525 ns
bm_find<std::deque<short>>/32768 7273 ns 3194 ns
bm_find<std::deque<short>>/262144 53710 ns 24385 ns
bm_find<std::deque<short>>/1048576 216086 ns 96195 ns
bm_find<std::deque<int>>/1 6.03 ns 10.3 ns
bm_find<std::deque<int>>/2 15.6 ns 10.3 ns
bm_find<std::deque<int>>/3 19.1 ns 10.3 ns
bm_find<std::deque<int>>/4 22.3 ns 10.3 ns
bm_find<std::deque<int>>/5 23.5 ns 10.4 ns
bm_find<std::deque<int>>/6 23.1 ns 10.3 ns
bm_find<std::deque<int>>/7 23.7 ns 10.2 ns
bm_find<std::deque<int>>/8 24.5 ns 10.2 ns
bm_find<std::deque<int>>/16 27.9 ns 26.6 ns
bm_find<std::deque<int>>/64 42.6 ns 32.2 ns
bm_find<std::deque<int>>/512 123 ns 43.0 ns
bm_find<std::deque<int>>/4096 874 ns 93.5 ns
bm_find<std::deque<int>>/32768 7031 ns 751 ns
bm_find<std::deque<int>>/262144 57723 ns 6169 ns
bm_find<std::deque<int>>/1048576 230867 ns 35851 ns
bm_ranges_find<std::deque<char>>/1 5.97 ns 10.6 ns
bm_ranges_find<std::deque<char>>/2 16.0 ns 10.5 ns
bm_ranges_find<std::deque<char>>/3 19.5 ns 10.5 ns
bm_ranges_find<std::deque<char>>/4 21.1 ns 10.6 ns
bm_ranges_find<std::deque<char>>/5 22.8 ns 10.5 ns
bm_ranges_find<std::deque<char>>/6 22.8 ns 10.6 ns
bm_ranges_find<std::deque<char>>/7 23.4 ns 10.8 ns
bm_ranges_find<std::deque<char>>/8 24.1 ns 10.5 ns
bm_ranges_find<std::deque<char>>/16 26.9 ns 10.6 ns
bm_ranges_find<std::deque<char>>/64 50.2 ns 27.2 ns
bm_ranges_find<std::deque<char>>/512 126 ns 38.3 ns
bm_ranges_find<std::deque<char>>/4096 868 ns 53.8 ns
bm_ranges_find<std::deque<char>>/32768 6695 ns 161 ns
bm_ranges_find<std::deque<char>>/262144 54411 ns 1497 ns
bm_ranges_find<std::deque<char>>/1048576 241699 ns 6042 ns
bm_ranges_find<std::deque<short>>/1 6.39 ns 6.31 ns
bm_ranges_find<std::deque<short>>/2 15.8 ns 15.9 ns
bm_ranges_find<std::deque<short>>/3 19.0 ns 19.8 ns
bm_ranges_find<std::deque<short>>/4 20.8 ns 20.9 ns
bm_ranges_find<std::deque<short>>/5 21.8 ns 22.1 ns
bm_ranges_find<std::deque<short>>/6 23.0 ns 23.0 ns
bm_ranges_find<std::deque<short>>/7 23.2 ns 23.9 ns
bm_ranges_find<std::deque<short>>/8 23.7 ns 24.4 ns
bm_ranges_find<std::deque<short>>/16 26.6 ns 26.8 ns
bm_ranges_find<std::deque<short>>/64 43.4 ns 39.7 ns
bm_ranges_find<std::deque<short>>/512 131 ns 90.5 ns
bm_ranges_find<std::deque<short>>/4096 851 ns 523 ns
bm_ranges_find<std::deque<short>>/32768 7370 ns 3166 ns
bm_ranges_find<std::deque<short>>/262144 60778 ns 24814 ns
bm_ranges_find<std::deque<short>>/1048576 229288 ns 99273 ns
bm_ranges_find<std::deque<int>>/1 6.43 ns 10.2 ns
bm_ranges_find<std::deque<int>>/2 16.6 ns 10.2 ns
bm_ranges_find<std::deque<int>>/3 19.6 ns 10.2 ns
bm_ranges_find<std::deque<int>>/4 21.0 ns 10.2 ns
bm_ranges_find<std::deque<int>>/5 21.9 ns 10.4 ns
bm_ranges_find<std::deque<int>>/6 22.7 ns 10.2 ns
bm_ranges_find<std::deque<int>>/7 23.9 ns 10.2 ns
bm_ranges_find<std::deque<int>>/8 23.8 ns 10.2 ns
bm_ranges_find<std::deque<int>>/16 27.2 ns 27.1 ns
bm_ranges_find<std::deque<int>>/64 42.4 ns 32.4 ns
bm_ranges_find<std::deque<int>>/512 122 ns 43.0 ns
bm_ranges_find<std::deque<int>>/4096 895 ns 93.7 ns
bm_ranges_find<std::deque<int>>/32768 6890 ns 756 ns
bm_ranges_find<std::deque<int>>/262144 54025 ns 6102 ns
bm_ranges_find<std::deque<int>>/1048576 221558 ns 32783 ns
```
>From d2575aa2645e491ab1417603b3fd71c618a4042e Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sat, 23 Sep 2023 10:07:31 +0200
Subject: [PATCH] [libc++] Optimize std::find for segmented iterators
Spies: libcxx-commits
Differential Revision: https://reviews.llvm.org/D157809
---
libcxx/benchmarks/algorithms/find.bench.cpp | 31 ++++++----
libcxx/include/CMakeLists.txt | 1 +
libcxx/include/__algorithm/find.h | 30 +++++++++-
libcxx/include/__algorithm/find_segment_if.h | 56 +++++++++++++++++++
.../alg.nonmodifying/alg.find/find.pass.cpp | 45 +++++++++++++++
5 files changed, 152 insertions(+), 11 deletions(-)
create mode 100644 libcxx/include/__algorithm/find_segment_if.h
diff --git a/libcxx/benchmarks/algorithms/find.bench.cpp b/libcxx/benchmarks/algorithms/find.bench.cpp
index b87c575a16b4dcd..6ff2d95ab435333 100644
--- a/libcxx/benchmarks/algorithms/find.bench.cpp
+++ b/libcxx/benchmarks/algorithms/find.bench.cpp
@@ -9,12 +9,15 @@
#include <algorithm>
#include <benchmark/benchmark.h>
#include <cstring>
+#include <deque>
#include <random>
#include <vector>
-template <class T>
+template <class Container>
static void bm_find(benchmark::State& state) {
- std::vector<T> vec1(state.range(), '1');
+ using T = Container::value_type;
+
+ Container vec1(state.range(), '1');
std::mt19937_64 rng(std::random_device{}());
for (auto _ : state) {
@@ -25,13 +28,18 @@ static void bm_find(benchmark::State& state) {
vec1[idx] = '1';
}
}
-BENCHMARK(bm_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
-template <class T>
+template <class Container>
static void bm_ranges_find(benchmark::State& state) {
- std::vector<T> vec1(state.range(), '1');
+ using T = Container::value_type;
+
+ Container vec1(state.range(), '1');
std::mt19937_64 rng(std::random_device{}());
for (auto _ : state) {
@@ -42,9 +50,12 @@ static void bm_ranges_find(benchmark::State& state) {
vec1[idx] = '1';
}
}
-BENCHMARK(bm_ranges_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_ranges_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
-BENCHMARK(bm_ranges_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
static void bm_vector_bool_find(benchmark::State& state) {
std::vector<bool> vec1(state.range(), false);
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 28028955606118e..69ee33b41debd53 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -22,6 +22,7 @@ set(files
__algorithm/find_first_of.h
__algorithm/find_if.h
__algorithm/find_if_not.h
+ __algorithm/find_segment_if.h
__algorithm/for_each.h
__algorithm/for_each_n.h
__algorithm/for_each_segment.h
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index d7c268bc6b338b0..69597f9ed107674 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -10,6 +10,7 @@
#ifndef _LIBCPP___ALGORITHM_FIND_H
#define _LIBCPP___ALGORITHM_FIND_H
+#include <__algorithm/find_segment_if.h>
#include <__algorithm/min.h>
#include <__algorithm/unwrap_iter.h>
#include <__bit/countr.h>
@@ -118,8 +119,35 @@ __find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst>
return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
}
+// segmented iterator implementation
+
+template <class>
+struct __find_segment;
+
+template <class _SegmentedIterator,
+ class _Tp,
+ class _Proj,
+ __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
+__find_impl(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
+ return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj);
+}
+
+template <class _Tp>
+struct __find_segment {
+ const _Tp& __value_;
+
+ __find_segment(const _Tp& __value) : __value_(__value) {}
+
+ template <class _InputIterator, class _Proj>
+ _InputIterator operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) {
+ return std::__find_impl(__first, __last, __value_, __proj);
+ }
+};
+
+// public API
template <class _InputIterator, class _Tp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
__identity __proj;
return std::__rewrap_iter(
diff --git a/libcxx/include/__algorithm/find_segment_if.h b/libcxx/include/__algorithm/find_segment_if.h
new file mode 100644
index 000000000000000..56ab802ea49afde
--- /dev/null
+++ b/libcxx/include/__algorithm/find_segment_if.h
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
+#define _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
+
+#include <__config>
+#include <__iterator/segmented_iterator.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _SegmentedIterator, class _Pred, class _Proj>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
+__find_segment_if(_SegmentedIterator __first, _SegmentedIterator __last, _Pred __pred, _Proj __proj) {
+ using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
+
+ auto __sfirst = _Traits::__segment(__first);
+ auto __slast = _Traits::__segment(__last);
+
+ // We are in a single segment, so we might not be at the beginning or end
+ if (__sfirst == __slast)
+ return _Traits::__compose(__sfirst, __pred(_Traits::__local(__first), _Traits::__local(__last), __proj));
+
+ { // We have more than one segment. Itertor over the first segment, since we might not start at the beginning
+ auto __llast = _Traits::__end(__sfirst);
+ auto __liter = __pred(_Traits::__local(__first), __llast, __proj);
+ if (__liter != __llast)
+ return _Traits::__compose(__sfirst, __liter);
+ }
+ ++__sfirst;
+
+ // Iterate over the segments which are guaranteed to be completely in the range
+ while (__sfirst != __slast) {
+ auto __llast = _Traits::__end(__sfirst);
+ auto __liter = __pred(_Traits::__begin(__sfirst), _Traits::__end(__sfirst), __proj);
+ if (__liter != __llast)
+ return _Traits::__compose(__sfirst, __liter);
+ ++__sfirst;
+ }
+
+ // Iterate over the last segment
+ return _Traits::__compose(__sfirst, __pred(_Traits::__begin(__sfirst), _Traits::__local(__last), __proj));
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ALGORITHM_FIND_SEGMENT_IF_H
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
index b55a852c10cafac..040398391269733 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp
@@ -17,6 +17,7 @@
#include <algorithm>
#include <cassert>
+#include <deque>
#include <vector>
#include <type_traits>
@@ -113,6 +114,49 @@ struct TestTypes {
}
};
+void test_deque() {
+ { // empty deque
+ std::deque<int> data;
+ assert(std::find(data.begin(), data.end(), 4) == data.end());
+ }
+
+ { // single element - match
+ std::deque<int> data = {4};
+ assert(std::find(data.begin(), data.end(), 4) == data.begin());
+ }
+
+ { // single element - no match
+ std::deque<int> data = {3};
+ assert(std::find(data.begin(), data.end(), 4) == data.end());
+ }
+
+ // many elements
+ for (auto size : {2, 3, 1023, 1024, 1025, 2047, 2048, 2049}) {
+ { // last element match
+ std::deque<int> data;
+ data.resize(size);
+ std::fill(data.begin(), data.end(), 3);
+ data[size - 1] = 4;
+ assert(std::find(data.begin(), data.end(), 4) == data.end() - 1);
+ }
+
+ { // second-last element match
+ std::deque<int> data;
+ data.resize(size);
+ std::fill(data.begin(), data.end(), 3);
+ data[size - 2] = 4;
+ assert(std::find(data.begin(), data.end(), 4) == data.end() - 2);
+ }
+
+ { // no match
+ std::deque<int> data;
+ data.resize(size);
+ std::fill(data.begin(), data.end(), 3);
+ assert(std::find(data.begin(), data.end(), 4) == data.end());
+ }
+ }
+}
+
TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::integer_types(), TestTypes<char>());
types::for_each(types::integer_types(), TestTypes<int>());
@@ -126,6 +170,7 @@ TEST_CONSTEXPR_CXX20 bool test() {
}
int main(int, char**) {
+ test_deque();
test();
#if TEST_STD_VER >= 20
static_assert(test());
More information about the libcxx-commits
mailing list