[libcxx-commits] [libcxx] [libcxx] Unwrap iterators in __find_segment (PR #161274)
via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Sep 29 13:49:28 PDT 2025
https://github.com/lbonn created https://github.com/llvm/llvm-project/pull/161274
The segmented iterator optimized implementation of find now unwraps iterators when processing each segments.
As a result, it is able to take better advantage to some find
specializations: calling memchr/wmemchr for vector<vector<{char,int}>>
```
Benchmark Baseline Candidate Difference % Difference
-------------------------------------------------------------- ---------- ----------- ------------ --------------
rng::find(join_view(deque<deque<int>>))_(process_all)/1024 71.13 61.19 -9.94 -13.97
rng::find(join_view(deque<deque<int>>))_(process_all)/32768 2359.19 2237.02 -122.17 -5.18
rng::find(join_view(deque<deque<int>>))_(process_all)/50 16.88 17.59 0.71 4.20
rng::find(join_view(deque<deque<int>>))_(process_all)/8 15.59 16.10 0.51 3.27
rng::find(join_view(deque<deque<int>>))_(process_all)/8192 647.01 532.75 -114.26 -17.66
rng::find(join_view(list<vector<int>>))_(process_all)/1024 689.76 680.74 -9.02 -1.31
rng::find(join_view(list<vector<int>>))_(process_all)/32768 22284.95 21500.26 -784.69 -3.52
rng::find(join_view(list<vector<int>>))_(process_all)/50 32.77 32.12 -0.65 -1.98
rng::find(join_view(list<vector<int>>))_(process_all)/8 6.11 5.92 -0.19 -3.11
rng::find(join_view(list<vector<int>>))_(process_all)/8192 5527.88 5373.43 -154.45 -2.79
rng::find(join_view(vector<list<int>>))_(process_all)/1024 1305.59 1264.04 -41.55 -3.18
rng::find(join_view(vector<list<int>>))_(process_all)/32768 42840.88 43322.64 481.76 1.12
rng::find(join_view(vector<list<int>>))_(process_all)/50 57.52 62.35 4.82 8.38
rng::find(join_view(vector<list<int>>))_(process_all)/8 6.06 5.98 -0.07 -1.18
rng::find(join_view(vector<list<int>>))_(process_all)/8192 20700.53 21431.66 731.12 3.53
rng::find(join_view(vector<vector<char>>))_(process_all)/1024 310.64 18.34 -292.30 -94.09
rng::find(join_view(vector<vector<char>>))_(process_all)/32768 9424.96 531.99 -8892.97 -94.36
rng::find(join_view(vector<vector<char>>))_(process_all)/50 18.58 3.25 -15.32 -82.49
rng::find(join_view(vector<vector<char>>))_(process_all)/8 4.81 2.98 -1.84 -38.13
rng::find(join_view(vector<vector<char>>))_(process_all)/8192 2437.50 126.88 -2310.62 -94.79
rng::find(join_view(vector<vector<int>>))_(process_all)/1024 297.10 41.70 -255.39 -85.96
rng::find(join_view(vector<vector<int>>))_(process_all)/32768 9662.42 1822.05 -7840.36 -81.14
rng::find(join_view(vector<vector<int>>))_(process_all)/50 22.29 5.10 -17.19 -77.11
rng::find(join_view(vector<vector<int>>))_(process_all)/8 3.73 3.13 -0.60 -16.05
rng::find(join_view(vector<vector<int>>))_(process_all)/8192 2399.68 356.10 -2043.58 -85.16
```
>From d426559752344901a354505f0b992a7241a262fe Mon Sep 17 00:00:00 2001
From: Laurent Bonnans <github at lbonnans.net>
Date: Sun, 28 Sep 2025 23:31:17 +0200
Subject: [PATCH] [libcxx] Unwrap iterators in __find_segment
The segmented iterator optimized implementation of find now unwraps
iterators when processing each segments.
As a result, it is able to take better advantage to some find
specializations: calling memchr/wmemchr for vector<vector<{char,int}>>
```
Benchmark Baseline Candidate Difference % Difference
-------------------------------------------------------------- ---------- ----------- ------------ --------------
rng::find(join_view(deque<deque<int>>))_(process_all)/1024 71.13 61.19 -9.94 -13.97
rng::find(join_view(deque<deque<int>>))_(process_all)/32768 2359.19 2237.02 -122.17 -5.18
rng::find(join_view(deque<deque<int>>))_(process_all)/50 16.88 17.59 0.71 4.20
rng::find(join_view(deque<deque<int>>))_(process_all)/8 15.59 16.10 0.51 3.27
rng::find(join_view(deque<deque<int>>))_(process_all)/8192 647.01 532.75 -114.26 -17.66
rng::find(join_view(list<vector<int>>))_(process_all)/1024 689.76 680.74 -9.02 -1.31
rng::find(join_view(list<vector<int>>))_(process_all)/32768 22284.95 21500.26 -784.69 -3.52
rng::find(join_view(list<vector<int>>))_(process_all)/50 32.77 32.12 -0.65 -1.98
rng::find(join_view(list<vector<int>>))_(process_all)/8 6.11 5.92 -0.19 -3.11
rng::find(join_view(list<vector<int>>))_(process_all)/8192 5527.88 5373.43 -154.45 -2.79
rng::find(join_view(vector<list<int>>))_(process_all)/1024 1305.59 1264.04 -41.55 -3.18
rng::find(join_view(vector<list<int>>))_(process_all)/32768 42840.88 43322.64 481.76 1.12
rng::find(join_view(vector<list<int>>))_(process_all)/50 57.52 62.35 4.82 8.38
rng::find(join_view(vector<list<int>>))_(process_all)/8 6.06 5.98 -0.07 -1.18
rng::find(join_view(vector<list<int>>))_(process_all)/8192 20700.53 21431.66 731.12 3.53
rng::find(join_view(vector<vector<char>>))_(process_all)/1024 310.64 18.34 -292.30 -94.09
rng::find(join_view(vector<vector<char>>))_(process_all)/32768 9424.96 531.99 -8892.97 -94.36
rng::find(join_view(vector<vector<char>>))_(process_all)/50 18.58 3.25 -15.32 -82.49
rng::find(join_view(vector<vector<char>>))_(process_all)/8 4.81 2.98 -1.84 -38.13
rng::find(join_view(vector<vector<char>>))_(process_all)/8192 2437.50 126.88 -2310.62 -94.79
rng::find(join_view(vector<vector<int>>))_(process_all)/1024 297.10 41.70 -255.39 -85.96
rng::find(join_view(vector<vector<int>>))_(process_all)/32768 9662.42 1822.05 -7840.36 -81.14
rng::find(join_view(vector<vector<int>>))_(process_all)/50 22.29 5.10 -17.19 -77.11
rng::find(join_view(vector<vector<int>>))_(process_all)/8 3.73 3.13 -0.60 -16.05
rng::find(join_view(vector<vector<int>>))_(process_all)/8192 2399.68 356.10 -2043.58 -85.16
```
---
libcxx/include/__algorithm/find.h | 3 +-
.../algorithms/nonmodifying/find.bench.cpp | 53 +++++++++++++++++++
2 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 5f32ae8fc9524..91c6a4e744a71 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -228,7 +228,8 @@ struct __find_segment {
template <class _InputIterator, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
- return std::__find(__first, __last, __value_, __proj);
+ return std::__rewrap_iter(
+ __first, std::__find(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value_, __proj));
}
};
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
index afea31fb59e95..7780b5a92a6c4 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
@@ -12,6 +12,7 @@
#include <cstddef>
#include <deque>
#include <list>
+#include <ranges>
#include <string>
#include <vector>
@@ -83,6 +84,20 @@ int main(int argc, char** argv) {
bm.template operator()<std::list<int>>("rng::find_if_not(list<int>) (" + comment + ")", ranges_find_if_not);
};
+ auto register_nested_container_benchmarks = [&](auto bm, std::string comment) {
+ // ranges_find
+ bm.template operator()<std::vector<std::vector<char>>>(
+ "rng::find(join_view(vector<vector<char>>)) (" + comment + ")", ranges_find);
+ bm.template operator()<std::vector<std::vector<int>>>(
+ "rng::find(join_view(vector<vector<int>>)) (" + comment + ")", ranges_find);
+ bm.template operator()<std::list<std::vector<int>>>(
+ "rng::find(join_view(list<vector<int>>)) (" + comment + ")", ranges_find);
+ bm.template operator()<std::vector<std::list<int>>>(
+ "rng::find(join_view(vector<list<int>>)) (" + comment + ")", ranges_find);
+ bm.template operator()<std::deque<std::deque<int>>>(
+ "rng::find(join_view(deque<deque<int>>)) (" + comment + ")", ranges_find);
+ };
+
// Benchmark {std,ranges}::{find,find_if,find_if_not}(normal container) where we
// bail out after 25% of elements
{
@@ -142,6 +157,44 @@ int main(int argc, char** argv) {
register_benchmarks(bm, "process all");
}
+ // Benchmark {std,ranges}::{find,find_if,find_if_not}(join(normal container)) where we process the whole sequence
+ {
+ auto bm = []<class Container>(std::string name, auto find) {
+ benchmark::RegisterBenchmark(
+ name,
+ [find](auto& st) {
+ std::size_t const size = st.range(0);
+ std::size_t const seg_size = 256;
+ std::size_t const segments = (size + seg_size - 1) / seg_size;
+ using C1 = typename Container::value_type;
+ using ValueType = typename C1::value_type;
+ ValueType x = Generate<ValueType>::random();
+ ValueType y = random_different_from({x});
+ Container c(segments);
+ auto n = size;
+ for (auto it = c.begin(); it != c.end(); it++) {
+ it->resize(std::min(seg_size, n), x);
+ n -= it->size();
+ }
+
+ auto view = c | std::views::join;
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ benchmark::DoNotOptimize(y);
+ auto result = find(view.begin(), view.end(), y);
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(8)
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(8192)
+ ->Arg(1 << 15);
+ };
+ register_nested_container_benchmarks(bm, "process all");
+ }
+
// Benchmark {std,ranges}::{find,find_if,find_if_not}(vector<bool>) where we process the whole sequence
{
auto bm = [](std::string name, auto find) {
More information about the libcxx-commits
mailing list