[libcxx-commits] [libcxx] [libcxx] Unwrap iterators in __find_segment (PR #161274)

via libcxx-commits libcxx-commits at lists.llvm.org
Mon Sep 29 13:49:28 PDT 2025


https://github.com/lbonn created https://github.com/llvm/llvm-project/pull/161274

The segmented iterator optimized implementation of find now unwraps iterators when processing each segments.

As a result, it is able to take better advantage to some find
specializations: calling memchr/wmemchr for vector<vector<{char,int}>>

```
Benchmark                                                         Baseline    Candidate    Difference    % Difference
--------------------------------------------------------------  ----------  -----------  ------------  --------------
rng::find(join_view(deque<deque<int>>))_(process_all)/1024           71.13        61.19         -9.94          -13.97
rng::find(join_view(deque<deque<int>>))_(process_all)/32768        2359.19      2237.02       -122.17           -5.18
rng::find(join_view(deque<deque<int>>))_(process_all)/50             16.88        17.59          0.71            4.20
rng::find(join_view(deque<deque<int>>))_(process_all)/8              15.59        16.10          0.51            3.27
rng::find(join_view(deque<deque<int>>))_(process_all)/8192          647.01       532.75       -114.26          -17.66
rng::find(join_view(list<vector<int>>))_(process_all)/1024          689.76       680.74         -9.02           -1.31
rng::find(join_view(list<vector<int>>))_(process_all)/32768       22284.95     21500.26       -784.69           -3.52
rng::find(join_view(list<vector<int>>))_(process_all)/50             32.77        32.12         -0.65           -1.98
rng::find(join_view(list<vector<int>>))_(process_all)/8               6.11         5.92         -0.19           -3.11
rng::find(join_view(list<vector<int>>))_(process_all)/8192         5527.88      5373.43       -154.45           -2.79
rng::find(join_view(vector<list<int>>))_(process_all)/1024         1305.59      1264.04        -41.55           -3.18
rng::find(join_view(vector<list<int>>))_(process_all)/32768       42840.88     43322.64        481.76            1.12
rng::find(join_view(vector<list<int>>))_(process_all)/50             57.52        62.35          4.82            8.38
rng::find(join_view(vector<list<int>>))_(process_all)/8               6.06         5.98         -0.07           -1.18
rng::find(join_view(vector<list<int>>))_(process_all)/8192        20700.53     21431.66        731.12            3.53
rng::find(join_view(vector<vector<char>>))_(process_all)/1024       310.64        18.34       -292.30          -94.09
rng::find(join_view(vector<vector<char>>))_(process_all)/32768     9424.96       531.99      -8892.97          -94.36
rng::find(join_view(vector<vector<char>>))_(process_all)/50          18.58         3.25        -15.32          -82.49
rng::find(join_view(vector<vector<char>>))_(process_all)/8            4.81         2.98         -1.84          -38.13
rng::find(join_view(vector<vector<char>>))_(process_all)/8192      2437.50       126.88      -2310.62          -94.79
rng::find(join_view(vector<vector<int>>))_(process_all)/1024        297.10        41.70       -255.39          -85.96
rng::find(join_view(vector<vector<int>>))_(process_all)/32768      9662.42      1822.05      -7840.36          -81.14
rng::find(join_view(vector<vector<int>>))_(process_all)/50           22.29         5.10        -17.19          -77.11
rng::find(join_view(vector<vector<int>>))_(process_all)/8             3.73         3.13         -0.60          -16.05
rng::find(join_view(vector<vector<int>>))_(process_all)/8192       2399.68       356.10      -2043.58          -85.16
```

>From d426559752344901a354505f0b992a7241a262fe Mon Sep 17 00:00:00 2001
From: Laurent Bonnans <github at lbonnans.net>
Date: Sun, 28 Sep 2025 23:31:17 +0200
Subject: [PATCH] [libcxx] Unwrap iterators in __find_segment

The segmented iterator optimized implementation of find now unwraps
iterators when processing each segments.

As a result, it is able to take better advantage to some find
specializations: calling memchr/wmemchr for vector<vector<{char,int}>>

```
Benchmark                                                         Baseline    Candidate    Difference    % Difference
--------------------------------------------------------------  ----------  -----------  ------------  --------------
rng::find(join_view(deque<deque<int>>))_(process_all)/1024           71.13        61.19         -9.94          -13.97
rng::find(join_view(deque<deque<int>>))_(process_all)/32768        2359.19      2237.02       -122.17           -5.18
rng::find(join_view(deque<deque<int>>))_(process_all)/50             16.88        17.59          0.71            4.20
rng::find(join_view(deque<deque<int>>))_(process_all)/8              15.59        16.10          0.51            3.27
rng::find(join_view(deque<deque<int>>))_(process_all)/8192          647.01       532.75       -114.26          -17.66
rng::find(join_view(list<vector<int>>))_(process_all)/1024          689.76       680.74         -9.02           -1.31
rng::find(join_view(list<vector<int>>))_(process_all)/32768       22284.95     21500.26       -784.69           -3.52
rng::find(join_view(list<vector<int>>))_(process_all)/50             32.77        32.12         -0.65           -1.98
rng::find(join_view(list<vector<int>>))_(process_all)/8               6.11         5.92         -0.19           -3.11
rng::find(join_view(list<vector<int>>))_(process_all)/8192         5527.88      5373.43       -154.45           -2.79
rng::find(join_view(vector<list<int>>))_(process_all)/1024         1305.59      1264.04        -41.55           -3.18
rng::find(join_view(vector<list<int>>))_(process_all)/32768       42840.88     43322.64        481.76            1.12
rng::find(join_view(vector<list<int>>))_(process_all)/50             57.52        62.35          4.82            8.38
rng::find(join_view(vector<list<int>>))_(process_all)/8               6.06         5.98         -0.07           -1.18
rng::find(join_view(vector<list<int>>))_(process_all)/8192        20700.53     21431.66        731.12            3.53
rng::find(join_view(vector<vector<char>>))_(process_all)/1024       310.64        18.34       -292.30          -94.09
rng::find(join_view(vector<vector<char>>))_(process_all)/32768     9424.96       531.99      -8892.97          -94.36
rng::find(join_view(vector<vector<char>>))_(process_all)/50          18.58         3.25        -15.32          -82.49
rng::find(join_view(vector<vector<char>>))_(process_all)/8            4.81         2.98         -1.84          -38.13
rng::find(join_view(vector<vector<char>>))_(process_all)/8192      2437.50       126.88      -2310.62          -94.79
rng::find(join_view(vector<vector<int>>))_(process_all)/1024        297.10        41.70       -255.39          -85.96
rng::find(join_view(vector<vector<int>>))_(process_all)/32768      9662.42      1822.05      -7840.36          -81.14
rng::find(join_view(vector<vector<int>>))_(process_all)/50           22.29         5.10        -17.19          -77.11
rng::find(join_view(vector<vector<int>>))_(process_all)/8             3.73         3.13         -0.60          -16.05
rng::find(join_view(vector<vector<int>>))_(process_all)/8192       2399.68       356.10      -2043.58          -85.16
```
---
 libcxx/include/__algorithm/find.h             |  3 +-
 .../algorithms/nonmodifying/find.bench.cpp    | 53 +++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 5f32ae8fc9524..91c6a4e744a71 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -228,7 +228,8 @@ struct __find_segment {
   template <class _InputIterator, class _Proj>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
   operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
-    return std::__find(__first, __last, __value_, __proj);
+    return std::__rewrap_iter(
+        __first, std::__find(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value_, __proj));
   }
 };
 
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
index afea31fb59e95..7780b5a92a6c4 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
@@ -12,6 +12,7 @@
 #include <cstddef>
 #include <deque>
 #include <list>
+#include <ranges>
 #include <string>
 #include <vector>
 
@@ -83,6 +84,20 @@ int main(int argc, char** argv) {
     bm.template operator()<std::list<int>>("rng::find_if_not(list<int>) (" + comment + ")", ranges_find_if_not);
   };
 
+  auto register_nested_container_benchmarks = [&](auto bm, std::string comment) {
+    // ranges_find
+    bm.template operator()<std::vector<std::vector<char>>>(
+        "rng::find(join_view(vector<vector<char>>)) (" + comment + ")", ranges_find);
+    bm.template operator()<std::vector<std::vector<int>>>(
+        "rng::find(join_view(vector<vector<int>>)) (" + comment + ")", ranges_find);
+    bm.template operator()<std::list<std::vector<int>>>(
+        "rng::find(join_view(list<vector<int>>)) (" + comment + ")", ranges_find);
+    bm.template operator()<std::vector<std::list<int>>>(
+        "rng::find(join_view(vector<list<int>>)) (" + comment + ")", ranges_find);
+    bm.template operator()<std::deque<std::deque<int>>>(
+        "rng::find(join_view(deque<deque<int>>)) (" + comment + ")", ranges_find);
+  };
+
   // Benchmark {std,ranges}::{find,find_if,find_if_not}(normal container) where we
   // bail out after 25% of elements
   {
@@ -142,6 +157,44 @@ int main(int argc, char** argv) {
     register_benchmarks(bm, "process all");
   }
 
+  // Benchmark {std,ranges}::{find,find_if,find_if_not}(join(normal container)) where we process the whole sequence
+  {
+    auto bm = []<class Container>(std::string name, auto find) {
+      benchmark::RegisterBenchmark(
+          name,
+          [find](auto& st) {
+            std::size_t const size     = st.range(0);
+            std::size_t const seg_size = 256;
+            std::size_t const segments = (size + seg_size - 1) / seg_size;
+            using C1                   = typename Container::value_type;
+            using ValueType            = typename C1::value_type;
+            ValueType x                = Generate<ValueType>::random();
+            ValueType y                = random_different_from({x});
+            Container c(segments);
+            auto n = size;
+            for (auto it = c.begin(); it != c.end(); it++) {
+              it->resize(std::min(seg_size, n), x);
+              n -= it->size();
+            }
+
+            auto view = c | std::views::join;
+
+            for ([[maybe_unused]] auto _ : st) {
+              benchmark::DoNotOptimize(c);
+              benchmark::DoNotOptimize(y);
+              auto result = find(view.begin(), view.end(), y);
+              benchmark::DoNotOptimize(result);
+            }
+          })
+          ->Arg(8)
+          ->Arg(50) // non power-of-two
+          ->Arg(1024)
+          ->Arg(8192)
+          ->Arg(1 << 15);
+    };
+    register_nested_container_benchmarks(bm, "process all");
+  }
+
   // Benchmark {std,ranges}::{find,find_if,find_if_not}(vector<bool>) where we process the whole sequence
   {
     auto bm = [](std::string name, auto find) {



More information about the libcxx-commits mailing list