[libcxx-commits] [libcxx] [libc++] Speed up set_intersection() by fast-forwarding over ranges of non-matching elements with one-sided binary search. (PR #75230)

Iuri Chaer via libcxx-commits libcxx-commits at lists.llvm.org
Tue Jul 16 08:48:12 PDT 2024


https://github.com/ichaer updated https://github.com/llvm/llvm-project/pull/75230

>From b65415f5b70591eae965cae1316054145d399158 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:52:13 +0100
Subject: [PATCH 01/56] [libc++][test] Add lower_bound complexity validation
 tests prior to introducing one-sided binary search for non-random iterators.

---
 .../lower.bound/lower_bound.pass.cpp          | 19 +++++--
 .../lower.bound/lower_bound_comp.pass.cpp     | 28 ++++++++--
 libcxx/test/support/test_iterators.h          | 55 ++++++++++++++-----
 3 files changed, 79 insertions(+), 23 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
index a2d8ab632303c..5c11962d13777 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
@@ -39,11 +39,20 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-    Iter i = std::lower_bound(first, last, value);
-    for (Iter j = first; j != i; ++j)
-        assert(*j < value);
-    for (Iter j = i; j != last; ++j)
-        assert(!(*j < value));
+  std::size_t strides{};
+  std::size_t displacement{};
+  stride_counting_iterator f(first, &strides, &displacement);
+  stride_counting_iterator l(last, &strides, &displacement);
+
+  auto i = std::lower_bound(f, l, value);
+  for (auto j = f; j != i; ++j)
+    assert(*j < value);
+  for (auto j = i; j != l; ++j)
+    assert(!(*j < value));
+
+  auto len = std::distance(first, last);
+  assert(strides <= 2.5 * len + 1);
+  assert(displacement <= 2.5 * len + 1);
 }
 
 template <class Iter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
index b9133028d9ade..05fd43eada461 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
@@ -17,6 +17,7 @@
 #include <vector>
 #include <cassert>
 #include <cstddef>
+#include <cmath>
 
 #include "test_macros.h"
 #include "test_iterators.h"
@@ -38,11 +39,28 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-    Iter i = std::lower_bound(first, last, value, std::greater<int>());
-    for (Iter j = first; j != i; ++j)
-        assert(std::greater<int>()(*j, value));
-    for (Iter j = i; j != last; ++j)
-        assert(!std::greater<int>()(*j, value));
+  std::size_t strides{};
+  std::size_t displacement{};
+  stride_counting_iterator f(first, &strides, &displacement);
+  stride_counting_iterator l(last, &strides, &displacement);
+
+  std::size_t comparisons{};
+  auto cmp = [&comparisons](int rhs, int lhs) {
+    ++comparisons;
+    return std::greater<int>()(rhs, lhs);
+  };
+
+  auto i = std::lower_bound(f, l, value, cmp);
+
+  for (auto j = f; j != i; ++j)
+    assert(std::greater<int>()(*j, value));
+  for (auto j = i; j != l; ++j)
+    assert(!std::greater<int>()(*j, value));
+
+  auto len = std::distance(first, last);
+  assert(strides <= 2.5 * len + 1);
+  assert(displacement <= 2.5 * len + 1);
+  assert(comparisons <= 2 * ceil(log(len + 1) + 2));
 }
 
 template <class Iter>
diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 1133b9597d09c..3b86a93564e4b 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -730,7 +730,9 @@ struct common_input_iterator {
 // * `stride_displacement`, which records the displacement of the calls. This means that both
 //   op++/op+= will increase the displacement counter by 1, and op--/op-= will decrease the
 //   displacement counter by 1.
-template <class It>
+template <class It,
+          class StrideCountType        = std::iter_difference_t<It>,
+          class StrideDisplacementType = std::iter_difference_t<It>>
 class stride_counting_iterator {
 public:
     using value_type = typename iter_value_or_void<It>::type;
@@ -743,16 +745,40 @@ class stride_counting_iterator {
         std::conditional_t<std::input_iterator<It>,         std::input_iterator_tag,
         /* else */                                          std::output_iterator_tag
     >>>>>;
+    using iterator_category = iterator_concept;
 
     stride_counting_iterator() requires std::default_initializable<It> = default;
 
     constexpr explicit stride_counting_iterator(It const& it) : base_(base(it)) { }
 
+    constexpr explicit stride_counting_iterator(
+        It const& it, StrideCountType* stride_count, StrideDisplacementType* stride_displacement)
+        : base_(base(it)), stride_count_(stride_count), stride_displacement_(stride_displacement) {}
+
+    constexpr stride_counting_iterator(const stride_counting_iterator& o) { *this = o; }
+    constexpr stride_counting_iterator(stride_counting_iterator&& o) { *this = o; }
+
+    constexpr stride_counting_iterator& operator=(const stride_counting_iterator& o) {
+      base_ = o.base_;
+      // if memory backing count is owned by the object, copy values
+      if (o.stride_count_ == &o.stride_count_default_) {
+        assert(o.stride_displacement_ == &o.stride_displacement_default_);
+        *stride_count_        = *o.stride_count_;
+        *stride_displacement_ = *o.stride_displacement_;
+        return *this;
+      }
+      // otherwise share the same externally-owned variables
+      stride_count_        = o.stride_count_;
+      stride_displacement_ = o.stride_displacement_;
+      return *this;
+    }
+    constexpr stride_counting_iterator& operator=(stride_counting_iterator&& o) { return *this = o; }
+
     friend constexpr It base(stride_counting_iterator const& it) { return It(it.base_); }
 
-    constexpr difference_type stride_count() const { return stride_count_; }
+    constexpr StrideCountType stride_count() const { return *stride_count_; }
 
-    constexpr difference_type stride_displacement() const { return stride_displacement_; }
+    constexpr StrideDisplacementType stride_displacement() const { return *stride_displacement_; }
 
     constexpr decltype(auto) operator*() const { return *It(base_); }
 
@@ -761,8 +787,8 @@ class stride_counting_iterator {
     constexpr stride_counting_iterator& operator++() {
         It tmp(base_);
         base_ = base(++tmp);
-        ++stride_count_;
-        ++stride_displacement_;
+        ++*stride_count_;
+        ++*stride_displacement_;
         return *this;
     }
 
@@ -781,8 +807,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(--tmp);
-        ++stride_count_;
-        --stride_displacement_;
+        ++*stride_count_;
+        --*stride_displacement_;
         return *this;
     }
 
@@ -799,8 +825,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(tmp += n);
-        ++stride_count_;
-        ++stride_displacement_;
+        ++*stride_count_;
+        ++*stride_displacement_;
         return *this;
     }
 
@@ -809,8 +835,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(tmp -= n);
-        ++stride_count_;
-        --stride_displacement_;
+        ++*stride_count_;
+        --*stride_displacement_;
         return *this;
     }
 
@@ -873,8 +899,11 @@ class stride_counting_iterator {
 
 private:
     decltype(base(std::declval<It>())) base_;
-    difference_type stride_count_ = 0;
-    difference_type stride_displacement_ = 0;
+    StrideCountType stride_count_default_               = 0;
+    StrideDisplacementType stride_displacement_default_ = 0;
+
+    StrideCountType* stride_count_               = &stride_count_default_;
+    StrideDisplacementType* stride_displacement_ = &stride_displacement_default_;
 };
 template <class It>
 stride_counting_iterator(It) -> stride_counting_iterator<It>;

>From f6bcf2743080ced55d9d589daed611c5e9696ac5 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:52:37 +0100
Subject: [PATCH 02/56] [libc++] Introduce one-sided binary search for
 lower_bound on non-random iterators.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided
version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
---
 .../include/__algorithm/iterator_operations.h | 47 +++++++++++++
 libcxx/include/__algorithm/lower_bound.h      | 69 +++++++++++++++++--
 2 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index e6176da4f5606..d73573747087e 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -87,6 +87,53 @@ struct _IterOps<_ClassicAlgPolicy> {
     std::advance(__iter, __count);
   }
 
+  // advance with sentinel, a la std::ranges::advance
+  // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
+  // use the incoming type for returning and steer clear of negative overflows
+  template <class _Iter, class _Distance>
+  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+    return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
+  }
+
+  // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
+  template <class _InputIter, class _Distance>
+  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
+    _Distance __dist{};
+    for (; __dist < __count && __iter != __sentinel; ++__dist)
+      ++__iter;
+    return __count - __dist;
+  }
+
+  // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
+  template <class _BiDirIter, class _Distance>
+  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
+    _Distance __dist{};
+    if (__count >= 0)
+      for (; __dist < __count && __iter != __sentinel; ++__dist)
+        ++__iter;
+    else
+      for (__count = -__count; __dist < __count && __iter != __sentinel; ++__dist)
+        --__iter;
+    return __count - __dist;
+  }
+
+  // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
+  template <class _RandIter, class _Distance>
+  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
+    auto __dist = _IterOps::distance(__iter, __sentinel);
+    _LIBCPP_ASSERT_UNCATEGORIZED(
+        __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count<0");
+    if (__count < 0)
+      __dist = __dist > __count ? __dist : __count;
+    else
+      __dist = __dist < __count ? __dist : __count;
+    __iter += __dist;
+    return __count - __dist;
+  }
+
   // distance
   template <class _Iter>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 91c3bdaafd0cf..b432829667fa9 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -27,11 +27,13 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
-_Iter __lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
-  auto __len = _IterOps<_AlgPolicy>::distance(__first, __last);
-
+template <class _AlgPolicy, class _Iter, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting(
+    _Iter __first,
+    const _Type& __value,
+    typename iterator_traits<_Iter>::difference_type __len,
+    _Comp& __comp,
+    _Proj& __proj) {
   while (__len != 0) {
     auto __l2 = std::__half_positive(__len);
     _Iter __m = __first;
@@ -46,13 +48,68 @@ _Iter __lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp& __
   return __first;
 }
 
+// One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
+// advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
+// 2*(log(n)-1) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
+// the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
+// container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
+// traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
+// would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided
+// version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
+template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
+__lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
+  // static_assert(std::is_base_of<std::forward_iterator_tag, typename _IterOps<_AlgPolicy>::template
+  // __iterator_category<_Iter>>::value,
+  //       "lower_bound() is a multipass algorithm and requires forward iterator or better");
+
+  using _Distance = typename iterator_traits<_Iter>::difference_type;
+  for (_Distance __step = 1; __first != __last; __step <<= 1) {
+    auto __it   = __first;
+    auto __dist = __step - _IterOps<_AlgPolicy>::advance(__it, __step, __last);
+    // once we reach the last range where needle can be we must start
+    // looking inwards, bisecting that range
+    if (__it == __last || !std::__invoke(__comp, std::__invoke(__proj, *__it), __value)) {
+      return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
+    }
+    // range not found, move forward!
+    __first = std::move(__it);
+  }
+  return __first;
+}
+
+template <class _AlgPolicy, class _InputIter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIter __lower_bound(
+    _InputIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj, std::input_iterator_tag) {
+  return std::__lower_bound_onesided<_AlgPolicy>(__first, __last, __value, __comp, __proj);
+}
+
+template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter __lower_bound(
+    _RandIter __first,
+    _Sent __last,
+    const _Type& __value,
+    _Comp& __comp,
+    _Proj& __proj,
+    std::random_access_iterator_tag) {
+  const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
+  return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
+}
+
+template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
+__lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp&& __comp, _Proj&& __proj) {
+  return std::__lower_bound<_AlgPolicy>(
+      __first, __last, __value, __comp, __proj, typename _IterOps<_AlgPolicy>::template __iterator_category<_Iter>());
+}
+
 template <class _ForwardIterator, class _Tp, class _Compare>
 _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
   static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value,
                 "The comparator has to be callable");
   auto __proj = std::__identity();
-  return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, __comp, __proj);
+  return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, std::move(__comp), std::move(__proj));
 }
 
 template <class _ForwardIterator, class _Tp>

>From 36bb63e36b56f98da2b808ab55410bec5c1d0bb5 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:53:09 +0100
Subject: [PATCH 03/56] [libc++][test] Add set_intersection complexity
 validation tests prior to introducing use of one-sided binary search to
 fast-forward over ranges of elements.

---
 .../ranges_set_intersection.pass.cpp          | 240 +++++++++++++++++-
 1 file changed, 234 insertions(+), 6 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index 0ee89e0131a07..30cedd19038d7 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -28,6 +28,9 @@
 #include <algorithm>
 #include <array>
 #include <concepts>
+#include <cstddef>
+#include <iterator>
+#include <type_traits>
 
 #include "almost_satisfies_types.h"
 #include "MoveOnly.h"
@@ -93,14 +96,17 @@ static_assert(!HasSetIntersectionRange<UncheckedRange<MoveOnly*>, UncheckedRange
 
 using std::ranges::set_intersection_result;
 
+// TODO: std::ranges::set_intersection calls std::ranges::copy
+// std::ranges::copy(contiguous_iterator<int*>, sentinel_wrapper<contiguous_iterator<int*>>, contiguous_iterator<int*>) doesn't seem to work.
+// It seems that std::ranges::copy calls std::copy, which unwraps contiguous_iterator<int*> into int*,
+// and then it failed because there is no == between int* and sentinel_wrapper<contiguous_iterator<int*>>
+template <typename Iter>
+using SentinelWorkaround = std::conditional_t<std::contiguous_iterator<Iter>, Iter, sentinel_wrapper<Iter>>;
+
 template <class In1, class In2, class Out, std::size_t N1, std::size_t N2, std::size_t N3>
 constexpr void testSetIntersectionImpl(std::array<int, N1> in1, std::array<int, N2> in2, std::array<int, N3> expected) {
-  // TODO: std::ranges::set_intersection calls std::ranges::copy
-  // std::ranges::copy(contiguous_iterator<int*>, sentinel_wrapper<contiguous_iterator<int*>>, contiguous_iterator<int*>) doesn't seem to work.
-  // It seems that std::ranges::copy calls std::copy, which unwraps contiguous_iterator<int*> into int*,
-  // and then it failed because there is no == between int* and sentinel_wrapper<contiguous_iterator<int*>>
-  using Sent1 = std::conditional_t<std::contiguous_iterator<In1>, In1, sentinel_wrapper<In1>>;
-  using Sent2 = std::conditional_t<std::contiguous_iterator<In2>, In2, sentinel_wrapper<In2>>;
+  using Sent1 = SentinelWorkaround<In1>;
+  using Sent2 = SentinelWorkaround<In2>;
 
   // iterator overload
   {
@@ -272,6 +278,225 @@ constexpr void runAllIteratorPermutationsTests() {
   static_assert(withAllPermutationsOfInIter1AndInIter2<contiguous_iterator<int*>>());
 }
 
+namespace {
+struct [[nodiscard]] OperationCounts {
+  std::size_t comparisons{};
+  struct PerInput {
+    std::size_t proj{};
+    std::size_t iterator_strides{};
+    std::ptrdiff_t iterator_displacement{};
+
+    // IGNORES proj!
+    [[nodiscard]] constexpr bool operator==(const PerInput& o) const {
+      return iterator_strides == o.iterator_strides && iterator_displacement == o.iterator_displacement;
+    }
+
+    [[nodiscard]] constexpr bool matchesExpectation(const PerInput& expect) {
+      return proj <= expect.proj && iterator_strides <= expect.iterator_strides &&
+             iterator_displacement <= expect.iterator_displacement;
+    }
+  };
+  std::array<PerInput, 2> in;
+
+  [[nodiscard]] constexpr bool matchesExpectation(const OperationCounts& expect) {
+    return comparisons <= expect.comparisons && in[0].matchesExpectation(expect.in[0]) &&
+           in[1].matchesExpectation(expect.in[1]);
+  }
+
+  [[nodiscard]] constexpr bool operator==(const OperationCounts& o) const {
+    return comparisons == o.comparisons && std::ranges::equal(in, o.in);
+  }
+};
+} // namespace
+
+#include <iostream>
+template <template <class...> class In1,
+          template <class...>
+          class In2,
+          class Out,
+          std::size_t N1,
+          std::size_t N2,
+          std::size_t N3>
+constexpr void testSetIntersectionAndReturnOpCounts(
+    std::array<int, N1> in1,
+    std::array<int, N2> in2,
+    std::array<int, N3> expected,
+    const OperationCounts& expectedOpCounts) {
+  OperationCounts ops;
+
+  const auto comp = [&ops](int x, int y) {
+    ++ops.comparisons;
+    return x < y;
+  };
+
+  std::array<int, N3> out;
+
+  stride_counting_iterator b1(
+      In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+  stride_counting_iterator e1(
+      In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+  stride_counting_iterator b2(
+      In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+  stride_counting_iterator e2(
+      In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+
+  std::set_intersection(b1, e1, b2, e2, Out(out.data()), comp);
+
+  assert(std::ranges::equal(out, expected));
+  assert(ops.matchesExpectation(expectedOpCounts));
+}
+
+template <template <class...> class In1,
+          template <class...>
+          class In2,
+          class Out,
+          std::size_t N1,
+          std::size_t N2,
+          std::size_t N3>
+constexpr void testRangesSetIntersectionAndReturnOpCounts(
+    std::array<int, N1> in1,
+    std::array<int, N2> in2,
+    std::array<int, N3> expected,
+    const OperationCounts& expectedOpCounts) {
+  OperationCounts ops;
+
+  const auto comp = [&ops](int x, int y) {
+    ++ops.comparisons;
+    return x < y;
+  };
+
+  const auto proj1 = [&ops](const int& i) {
+    ++ops.in[0].proj;
+    return i;
+  };
+
+  const auto proj2 = [&ops](const int& i) {
+    ++ops.in[1].proj;
+    return i;
+  };
+
+  std::array<int, N3> out;
+
+  stride_counting_iterator b1(
+      In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+  stride_counting_iterator e1(
+      In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+  stride_counting_iterator b2(
+      In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+  stride_counting_iterator e2(
+      In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+
+  std::ranges::subrange r1{b1, SentinelWorkaround<decltype(e1)>{e1}};
+  std::ranges::subrange r2{b2, SentinelWorkaround<decltype(e2)>{e2}};
+  std::same_as<set_intersection_result<decltype(e1), decltype(e2), Out>> decltype(auto) result =
+      std::ranges::set_intersection(r1, r2, Out{out.data()}, comp, proj1, proj2);
+  assert(std::ranges::equal(out, expected));
+  assert(base(result.in1) == base(e1));
+  assert(base(result.in2) == base(e2));
+  assert(base(result.out) == out.data() + out.size());
+  assert(ops.matchesExpectation(expectedOpCounts));
+}
+
+template <template <typename...> class In1, template <typename...> class In2, class Out>
+constexpr void testComplexityParameterizedIter() {
+  // Worst-case complexity:
+  // Let N=(last1 - first1) and M=(last2 - first2)
+  // At most 2*(N+M) - 1 comparisons and applications of each projection.
+  // At most 2*(N+M) iterator mutations.
+  {
+    std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+    std::array r2{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+    std::array<int, 0> expected{};
+
+    OperationCounts expectedCounts;
+    expectedCounts.comparisons                 = 37;
+    expectedCounts.in[0].proj                  = 37;
+    expectedCounts.in[0].iterator_strides      = 30;
+    expectedCounts.in[0].iterator_displacement = 30;
+    expectedCounts.in[1]                       = expectedCounts.in[0];
+
+    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+  }
+
+  {
+    std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+    std::array r2{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+    std::array expected{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+    OperationCounts expectedCounts;
+    expectedCounts.comparisons                 = 38;
+    expectedCounts.in[0].proj                  = 38;
+    expectedCounts.in[0].iterator_strides      = 30;
+    expectedCounts.in[0].iterator_displacement = 30;
+    expectedCounts.in[1]                       = expectedCounts.in[0];
+
+    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+  }
+
+  // Lower complexity when there is low overlap between ranges: we can make 2*log(X) comparisons when one range
+  // has X elements that can be skipped over.
+  {
+    std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+    std::array r2{15};
+    std::array expected{15};
+
+    OperationCounts expectedCounts;
+    expectedCounts.comparisons                 = 8;
+    expectedCounts.in[0].proj                  = 8;
+    expectedCounts.in[0].iterator_strides      = 24;
+    expectedCounts.in[0].iterator_displacement = 24;
+    expectedCounts.in[1].proj                  = 8;
+    expectedCounts.in[1].iterator_strides      = 3;
+    expectedCounts.in[1].iterator_displacement = 3;
+
+    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+  }
+
+  {
+    std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+    std::array r2{0, 16};
+    std::array<int, 0> expected{};
+
+    OperationCounts expectedCounts;
+    expectedCounts.comparisons                 = 10;
+    expectedCounts.in[0].proj                  = 10;
+    expectedCounts.in[0].iterator_strides      = 24;
+    expectedCounts.in[0].iterator_displacement = 24;
+    expectedCounts.in[1].proj                  = 10;
+    expectedCounts.in[1].iterator_strides      = 4;
+    expectedCounts.in[1].iterator_displacement = 4;
+
+    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+  }
+}
+
+template <template <typename...> class In2, class Out>
+constexpr void testComplexityParameterizedIterPermutateIn1() {
+  //common_input_iterator
+  testComplexityParameterizedIter<forward_iterator, In2, Out>();
+  testComplexityParameterizedIter<bidirectional_iterator, In2, Out>();
+  testComplexityParameterizedIter<random_access_iterator, In2, Out>();
+}
+
+template <class Out>
+constexpr void testComplexityParameterizedIterPermutateIn1In2() {
+  testComplexityParameterizedIterPermutateIn1<forward_iterator, Out>();
+  testComplexityParameterizedIterPermutateIn1<bidirectional_iterator, Out>();
+  testComplexityParameterizedIterPermutateIn1<random_access_iterator, Out>();
+}
+
+constexpr bool testComplexityMultipleTypes() {
+  //testComplexityParameterizedIter<cpp20_input_iterator, random_access_iterator, OutIter>();
+  testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
+  testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
+  testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
+  return true;
+}
+
 constexpr bool test() {
   // check that every element is copied exactly once
   {
@@ -572,5 +797,8 @@ int main(int, char**) {
   // than the step limit.
   runAllIteratorPermutationsTests();
 
+  testComplexityMultipleTypes();
+  static_assert(testComplexityMultipleTypes());
+
   return 0;
 }

>From c23272c389329d3af83c0f58f896ee6ea47260ed Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:53:31 +0100
Subject: [PATCH 04/56] [libc++] Introduce use of __lower_bound_onesided to
 improve average complexity of set_intersection.

---
 libcxx/include/__algorithm/set_intersection.h | 154 +++++++++++++++++-
 1 file changed, 150 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index f2603fe1365ac..556738022f485 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -12,9 +12,13 @@
 #include <__algorithm/comp.h>
 #include <__algorithm/comp_ref_type.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/lower_bound.h>
 #include <__config>
+#include <__functional/identity.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/next.h>
+#include <__type_traits/is_same.h>
+#include <__utility/exchange.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -36,9 +40,122 @@ struct __set_intersection_result {
 };
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-__set_intersection(
-    _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+struct _LIBCPP_NODISCARD_EXT __set_intersector {
+  _InIter1& __first1_;
+  const _Sent1& __last1_;
+  _InIter2& __first2_;
+  const _Sent2& __last2_;
+  _OutIter& __result_;
+  _Compare& __comp_;
+  static constexpr auto __proj_ = std::__identity();
+  bool __prev_advanced_         = true;
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersector(
+      _InIter1& __first1, _Sent1& __last1, _InIter2& __first2, _Sent2& __last2, _OutIter& __result, _Compare& __comp)
+      : __first1_(__first1),
+        __last1_(__last1),
+        __first2_(__first2),
+        __last2_(__last2),
+        __result_(__result),
+        __comp_(__comp) {}
+
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+      _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+      operator()() && {
+    while (__first2_ != __last2_) {
+      __advance1_and_maybe_add_result();
+      if (__first1_ == __last1_)
+        break;
+      __advance2_and_maybe_add_result();
+    }
+    return __set_intersection_result<_InIter1, _InIter2, _OutIter>(
+        _IterOps<_AlgPolicy>::next(std::move(__first1_), std::move(__last1_)),
+        _IterOps<_AlgPolicy>::next(std::move(__first2_), std::move(__last2_)),
+        std::move(__result_));
+  }
+
+private:
+  // advance __iter to the first element in the range where !__comp_(__iter, __value)
+  // add result if this is the second consecutive call without advancing
+  // this method only works if you alternate calls between __advance1_and_maybe_add_result() and
+  // __advance2_and_maybe_add_result()
+  template <class _Iter, class _Sent, class _Value>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+  __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
+    // use one-sided lower bound for improved algorithmic complexity bounds
+    const auto __tmp =
+        std::exchange(__iter, std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_));
+    __add_output_unless(__tmp != __iter);
+  }
+
+  // advance __first1_ to the first element in the range where !__comp_(*__first1_, *__first2_)
+  // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance1_and_maybe_add_result() {
+    __advance_and_maybe_add_result(__first1_, __last1_, *__first2_);
+  }
+
+  // advance __first2_ to the first element in the range where !__comp_(*__first2_, *__first1_)
+  // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance2_and_maybe_add_result() {
+    __advance_and_maybe_add_result(__first2_, __last2_, *__first1_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __add_output_unless(bool __advanced) {
+    if (__advanced | __prev_advanced_) {
+      __prev_advanced_ = __advanced;
+    } else {
+      *__result_ = *__first1_;
+      ++__result_;
+      ++__first1_;
+      ++__first2_;
+      __prev_advanced_ = true;
+    }
+  }
+};
+
+// with forward iterators we can use binary search to skip over entries
+template <class _AlgPolicy,
+          class _Compare,
+          class _InForwardIter1,
+          class _Sent1,
+          class _InForwardIter2,
+          class _Sent2,
+          class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
+    __set_intersection(
+        _InForwardIter1 __first1,
+        _Sent1 __last1,
+        _InForwardIter2 __first2,
+        _Sent2 __last2,
+        _OutIter __result,
+        _Compare&& __comp,
+        std::forward_iterator_tag,
+        std::forward_iterator_tag) {
+  std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
+      __intersector(__first1, __last1, __first2, __last2, __result, __comp);
+  return std::move(__intersector)();
+}
+
+// input iterators are not suitable for multipass algorithms, so we stick to the classic single-pass version
+template <class _AlgPolicy,
+          class _Compare,
+          class _InInputIter1,
+          class _Sent1,
+          class _InInputIter2,
+          class _Sent2,
+          class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
+    __set_intersection(
+        _InInputIter1 __first1,
+        _Sent1 __last1,
+        _InInputIter2 __first2,
+        _Sent2 __last2,
+        _OutIter __result,
+        _Compare&& __comp,
+        std::input_iterator_tag,
+        std::input_iterator_tag) {
   while (__first1 != __last1 && __first2 != __last2) {
     if (__comp(*__first1, *__first2))
       ++__first1;
@@ -52,12 +169,41 @@ __set_intersection(
     }
   }
 
-  return __set_intersection_result<_InIter1, _InIter2, _OutIter>(
+  return std::__set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>(
       _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)),
       _IterOps<_AlgPolicy>::next(std::move(__first2), std::move(__last2)),
       std::move(__result));
 }
 
+template <class _AlgPolicy, class _Iter>
+class __set_intersection_iter_category {
+  template <class _It>
+  using __cat = typename std::_IterOps<_AlgPolicy>::template __iterator_category<_It>;
+  template <class _It>
+  static auto test(__cat<_It>*) -> __cat<_It>;
+  template <class>
+  static std::input_iterator_tag test(...);
+
+public:
+  using __type = decltype(test<_Iter>(nullptr));
+};
+
+template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+    __set_intersection(
+        _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+  return std::__set_intersection<_AlgPolicy>(
+      std::move(__first1),
+      std::move(__last1),
+      std::move(__first2),
+      std::move(__last2),
+      std::move(__result),
+      std::forward<_Compare>(__comp),
+      typename std::__set_intersection_iter_category<_AlgPolicy, _InIter1>::__type(),
+      typename std::__set_intersection_iter_category<_AlgPolicy, _InIter2>::__type());
+}
+
 template <class _InputIterator1, class _InputIterator2, class _OutputIterator, class _Compare>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_intersection(
     _InputIterator1 __first1,

>From 0b57ea00b44dbe69bc5125a08691a72b0dea42ce Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 2 Jan 2024 17:18:21 +0000
Subject: [PATCH 05/56] Fix `constexpr` annotations.

---
 libcxx/include/__algorithm/iterator_operations.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index d73573747087e..21117e6b7d760 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -91,13 +91,13 @@ struct _IterOps<_ClassicAlgPolicy> {
   // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
   // use the incoming type for returning and steer clear of negative overflows
   template <class _Iter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
     return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
   }
 
   // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
   template <class _InputIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
     _Distance __dist{};
     for (; __dist < __count && __iter != __sentinel; ++__dist)
@@ -107,7 +107,7 @@ struct _IterOps<_ClassicAlgPolicy> {
 
   // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
   template <class _BiDirIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
     _Distance __dist{};
     if (__count >= 0)

>From 08af54897cd8e39a25a1e97b0174b68beb408cd0 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 2 Jan 2024 17:18:59 +0000
Subject: [PATCH 06/56] Remove std::exchange dependency from
 std::set_intersection so it works before C++14

---
 libcxx/include/__algorithm/set_intersection.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 556738022f485..46f6fbe4d3dd2 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -83,8 +83,8 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
   __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
     // use one-sided lower bound for improved algorithmic complexity bounds
-    const auto __tmp =
-        std::exchange(__iter, std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_));
+    const auto __tmp = std::move(__iter);
+    __iter = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_);
     __add_output_unless(__tmp != __iter);
   }
 

>From 7aa3927064083b6a96bfcc4e00d1b4fc24d9c96e Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 2 Jan 2024 17:20:06 +0000
Subject: [PATCH 07/56] Review feedback: don't use one-sided lower bound in
 lower_bound() itself since that violates the complexity guarantees from the
 standard.

---
 libcxx/include/__algorithm/lower_bound.h       | 18 ++----------------
 .../lower.bound/lower_bound.pass.cpp           | 10 +++++-----
 .../lower.bound/lower_bound_comp.pass.cpp      | 13 ++++++-------
 3 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index b432829667fa9..3febcb411268f 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -78,38 +78,24 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
   return __first;
 }
 
-template <class _AlgPolicy, class _InputIter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIter __lower_bound(
-    _InputIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj, std::input_iterator_tag) {
-  return std::__lower_bound_onesided<_AlgPolicy>(__first, __last, __value, __comp, __proj);
-}
-
 template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
 _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter __lower_bound(
     _RandIter __first,
     _Sent __last,
     const _Type& __value,
     _Comp& __comp,
-    _Proj& __proj,
-    std::random_access_iterator_tag) {
+    _Proj& __proj) {
   const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
   return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
 }
 
-template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
-__lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp&& __comp, _Proj&& __proj) {
-  return std::__lower_bound<_AlgPolicy>(
-      __first, __last, __value, __comp, __proj, typename _IterOps<_AlgPolicy>::template __iterator_category<_Iter>());
-}
-
 template <class _ForwardIterator, class _Tp, class _Compare>
 _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
   static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value,
                 "The comparator has to be callable");
   auto __proj = std::__identity();
-  return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, std::move(__comp), std::move(__proj));
+  return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, __comp, __proj);
 }
 
 template <class _ForwardIterator, class _Tp>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
index 5c11962d13777..dd2916338e8f6 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
@@ -45,14 +45,14 @@ test(Iter first, Iter last, const T& value)
   stride_counting_iterator l(last, &strides, &displacement);
 
   auto i = std::lower_bound(f, l, value);
-  for (auto j = f; j != i; ++j)
+  for (auto j = base(f); j != base(i); ++j)
     assert(*j < value);
-  for (auto j = i; j != l; ++j)
+  for (auto j = base(i); j != base(l); ++j)
     assert(!(*j < value));
 
-  auto len = std::distance(first, last);
-  assert(strides <= 2.5 * len + 1);
-  assert(displacement <= 2.5 * len + 1);
+  auto len = static_cast<std::size_t>(std::distance(first, last));
+  assert(strides <= 2 * len);
+  assert(displacement <= 2 * len);
 }
 
 template <class Iter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
index 05fd43eada461..ff928e23b9006 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
@@ -51,16 +51,15 @@ test(Iter first, Iter last, const T& value)
   };
 
   auto i = std::lower_bound(f, l, value, cmp);
-
-  for (auto j = f; j != i; ++j)
+  for (auto j = base(f); j != base(i); ++j)
     assert(std::greater<int>()(*j, value));
-  for (auto j = i; j != l; ++j)
+  for (auto j = base(i); j != base(l); ++j)
     assert(!std::greater<int>()(*j, value));
 
-  auto len = std::distance(first, last);
-  assert(strides <= 2.5 * len + 1);
-  assert(displacement <= 2.5 * len + 1);
-  assert(comparisons <= 2 * ceil(log(len + 1) + 2));
+  auto len = static_cast<std::size_t>(std::distance(first, last));
+  assert(strides <= 2 * len);
+  assert(displacement <= 2 * len);
+  assert(comparisons <= std::ceil(std::log2(len + 1)));
 }
 
 template <class Iter>

>From c44c2a2b8ea818287b859c5ce318d195c59e9d65 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 2 Jan 2024 17:21:33 +0000
Subject: [PATCH 08/56] Create new benchmark for set_intersection().

---
 libcxx/benchmarks/CMakeLists.txt              |   1 +
 .../algorithms/set_intersection.bench.cpp     | 224 ++++++++++++++++++
 2 files changed, 225 insertions(+)
 create mode 100644 libcxx/benchmarks/algorithms/set_intersection.bench.cpp

diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 7591f34d938bf..da2ea6fd4c3d1 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -192,6 +192,7 @@ set(BENCHMARK_TESTS
     algorithms/ranges_sort.bench.cpp
     algorithms/ranges_sort_heap.bench.cpp
     algorithms/ranges_stable_sort.bench.cpp
+    algorithms/set_intersection.bench.cpp
     algorithms/sort.bench.cpp
     algorithms/sort_heap.bench.cpp
     algorithms/stable_sort.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
new file mode 100644
index 0000000000000..c6a01707d6531
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -0,0 +1,224 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <forward_list>
+#include <iterator>
+#include <set>
+#include <vector>
+
+#include "common.h"
+
+namespace {
+
+// types of containers we'll want to test, covering interesting iterator types
+struct VectorContainer {
+  template <typename... Args>
+  using type = std::vector<Args...>;
+
+  static constexpr const char* Name = "Vector";
+};
+
+struct SetContainer {
+  template <typename... Args>
+  using type = std::set<Args...>;
+
+  static constexpr const char* Name = "Set";
+};
+
+struct ForwardListContainer {
+  template <typename... Args>
+  using type = std::forward_list<Args...>;
+
+  static constexpr const char* Name = "ForwardList";
+};
+
+using AllContainerTypes = std::tuple<VectorContainer, SetContainer, ForwardListContainer>;
+
+// set_intersection performance may depend on where matching values lie
+enum class OverlapPosition {
+    Nowhere,
+    Front,
+    Back,
+    Interlaced,
+};
+
+struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 4> {
+  static constexpr const char* Names[] = {
+      "Nowhere", "Front", "Back", "Interlaced"};
+};
+
+// functor that moves elements from an iterator range into a new Container instance
+template <typename Container>
+struct MoveInto {};
+
+template <typename T>
+struct MoveInto<std::vector<T>> {
+    template <class It>
+    [[nodiscard]] static std::vector<T> operator()(It first, It last) {
+        std::vector<T> out;
+        std::move(first, last, std::back_inserter(out));
+        return out;
+    }
+};
+
+template <typename T>
+struct MoveInto<std::forward_list<T>> {
+    template <class It>
+    [[nodiscard]] static std::forward_list<T> operator()(It first, It last) {
+        std::forward_list<T> out;
+        std::move(first, last, std::front_inserter(out));
+        out.reverse();
+        return out;
+    }
+};
+
+template <typename T>
+struct MoveInto<std::set<T>> {
+    template <class It>
+    [[nodiscard]] static std::set<T> operator()(It first, It last) {
+        std::set<T> out;
+        std::move(first, last, std::inserter(out, out.begin()));
+        return out;
+    }
+};
+
+// lightweight wrapping around fillValues() which puts a little effort into
+// making that would be contiguous when sorted non-contiguous in memory
+template <typename T>
+std::vector<T> getVectorOfRandom(size_t N) {
+  std::vector<T> V;
+  fillValues(V, N, Order::Random);
+  sortValues(V, Order::Random);
+  return std::vector<T>(V);
+}
+
+// forward_iterator wrapping which, for each increment, moves the underlying iterator forward Stride elements
+template <typename Wrapped>
+struct StridedFwdIt {
+  Wrapped Base;
+  unsigned Stride;
+
+  using iterator_category = std::forward_iterator_tag;
+  using difference_type = typename Wrapped::difference_type;
+  using value_type = typename Wrapped::value_type;
+  using pointer = typename Wrapped::pointer;
+  using reference = typename Wrapped::reference;
+
+  StridedFwdIt(Wrapped B, unsigned Stride_) : Base(B), Stride(Stride_) { assert(Stride != 0); }
+
+  StridedFwdIt operator++() { for (unsigned I=0; I<Stride; ++I) ++Base; return *this; }
+  StridedFwdIt operator++(int) { auto Tmp = *this; ++*this; return Tmp; }
+  value_type& operator*() { return *Base; }
+  const value_type& operator*() const { return *Base; }
+  value_type& operator->() { return *Base; }
+  const value_type& operator->() const { return *Base; }
+  bool operator==(const StridedFwdIt& o) const { return Base==o.Base; }
+  bool operator!=(const StridedFwdIt& o) const { return !operator==(o); }
+};
+template <typename Wrapped> StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>;
+
+
+// realistically, data won't all be nicely contiguous in a container
+// we'll go through some effort to ensure that it's shuffled through memory
+template <class Container>
+std::pair<Container, Container> genCacheUnfriendlyData(size_t Size1, size_t Size2, OverlapPosition Pos) {
+  using ValueType = typename Container::value_type;
+  const MoveInto<Container> moveInto;
+  const auto SrcSize = Pos == OverlapPosition::Nowhere ? Size1 + Size2 : std::max(Size1, Size2);
+  std::vector<ValueType> Src = getVectorOfRandom<ValueType>(SrcSize);
+
+  if (Pos == OverlapPosition::Nowhere) {
+    std::sort(Src.begin(), Src.end());
+    return std::make_pair(
+        moveInto(Src.begin(), Src.begin() + Size1),
+        moveInto(Src.begin() + Size1, Src.end()));
+  }
+
+  // all other overlap types will have to copy some part of the data, but if
+  // we copy after sorting it will likely have high cache locality, so we sort
+  // each copy separately
+  auto Copy = Src;
+  std::sort(Src.begin(), Src.end());
+  std::sort(Copy.begin(), Copy.end());
+
+  switch(Pos) {
+    case OverlapPosition::Nowhere:
+      break;
+
+    case OverlapPosition::Front:
+      return std::make_pair(
+          moveInto(Src.begin(), Src.begin() + Size1),
+          moveInto(Copy.begin(), Copy.begin() + Size2));
+
+    case OverlapPosition::Back:
+      return std::make_pair(
+          moveInto(Src.begin() + (Src.size() - Size1), Src.end()),
+          moveInto(Copy.begin() + (Copy.size() - Size2), Copy.end()));
+
+    case OverlapPosition::Interlaced:
+      const auto Stride1 = Size1 < Size2 ? Size2/Size1 : 1;
+      const auto Stride2 = Size2 < Size1 ? Size1/Size2 : 1;
+      return std::make_pair(
+          moveInto(StridedFwdIt(Src.begin(), Stride1), StridedFwdIt(Src.end(), Stride1)),
+          moveInto(StridedFwdIt(Copy.begin(), Stride2), StridedFwdIt(Copy.end(), Stride2)));
+  }
+  abort();
+  return std::pair<Container, Container>();
+}
+
+
+template <class ValueType, class Container, class Overlap>
+struct SetIntersection {
+  using ContainerType = typename Container::template type<Value<ValueType>>;
+  size_t Size1;
+  size_t Size2;
+
+  SetIntersection(size_t M, size_t N) : Size1(M), Size2(N) {}
+
+  void run(benchmark::State& state) const {
+    state.PauseTiming();
+    auto Input = genCacheUnfriendlyData<ContainerType>(Size1, Size2, Overlap());
+    std::vector<Value<ValueType>> out(std::min(Size1, Size2));
+
+    size_t cmp;
+    auto trackingLess = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
+        ++cmp;
+        return std::less<Value<ValueType>>{}(lhs, rhs);
+    };
+
+    const auto BatchSize =  std::max(size_t{16}, (2*TestSetElements) / (Size1+Size2));
+    state.ResumeTiming();
+
+    for (const auto& _ : state) {
+      while (state.KeepRunningBatch(BatchSize)) {
+        for (unsigned i=0; i<BatchSize; ++i) {
+          const auto& [C1, C2] = Input;
+          auto outIter = std::set_intersection(C1.begin(), C1.end(), C2.begin(), C2.end(), out.begin(), trackingLess);
+          benchmark::DoNotOptimize(outIter);
+          state.counters["Comparisons"] = cmp;
+        }
+      }
+    }
+  }
+
+  std::string name() const {
+    return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name +
+        ValueType::name() + '_' + std::to_string(Size1) + '_' + std::to_string(Size2);
+  }
+};
+
+} // namespace
+
+int main(int argc, char** argv) {/**/
+  benchmark::Initialize(&argc, argv);
+  if (benchmark::ReportUnrecognizedArguments(argc, argv))
+    return 1;
+  makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(Quantities, Quantities);
+  benchmark::RunSpecifiedBenchmarks();
+}

>From 46cc95f71742e32d8131a5b08fa271b122a919c3 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Fri, 5 Jan 2024 23:04:19 +0000
Subject: [PATCH 09/56] Formatting fixups.

---
 .../algorithms/set_intersection.bench.cpp     | 201 +++++++++---------
 .../include/__algorithm/iterator_operations.h |   3 +-
 libcxx/include/__algorithm/lower_bound.h      |   8 +-
 libcxx/include/__algorithm/set_intersection.h |  52 ++---
 4 files changed, 131 insertions(+), 133 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index c6a01707d6531..4fa411bba4354 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -42,15 +42,14 @@ using AllContainerTypes = std::tuple<VectorContainer, SetContainer, ForwardListC
 
 // set_intersection performance may depend on where matching values lie
 enum class OverlapPosition {
-    Nowhere,
-    Front,
-    Back,
-    Interlaced,
+  None,
+  Front,
+  Back,
+  Interlaced,
 };
 
 struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 4> {
-  static constexpr const char* Names[] = {
-      "Nowhere", "Front", "Back", "Interlaced"};
+  static constexpr const char* Names[] = {"None", "Front", "Back", "Interlaced"};
 };
 
 // functor that moves elements from an iterator range into a new Container instance
@@ -59,148 +58,149 @@ struct MoveInto {};
 
 template <typename T>
 struct MoveInto<std::vector<T>> {
-    template <class It>
-    [[nodiscard]] static std::vector<T> operator()(It first, It last) {
-        std::vector<T> out;
-        std::move(first, last, std::back_inserter(out));
-        return out;
-    }
+  template <class It>
+  [[nodiscard]] static std::vector<T> operator()(It first, It last) {
+    std::vector<T> out;
+    std::move(first, last, std::back_inserter(out));
+    return out;
+  }
 };
 
 template <typename T>
 struct MoveInto<std::forward_list<T>> {
-    template <class It>
-    [[nodiscard]] static std::forward_list<T> operator()(It first, It last) {
-        std::forward_list<T> out;
-        std::move(first, last, std::front_inserter(out));
-        out.reverse();
-        return out;
-    }
+  template <class It>
+  [[nodiscard]] static std::forward_list<T> operator()(It first, It last) {
+    std::forward_list<T> out;
+    std::move(first, last, std::front_inserter(out));
+    out.reverse();
+    return out;
+  }
 };
 
 template <typename T>
 struct MoveInto<std::set<T>> {
-    template <class It>
-    [[nodiscard]] static std::set<T> operator()(It first, It last) {
-        std::set<T> out;
-        std::move(first, last, std::inserter(out, out.begin()));
-        return out;
-    }
+  template <class It>
+  [[nodiscard]] static std::set<T> operator()(It first, It last) {
+    std::set<T> out;
+    std::move(first, last, std::inserter(out, out.begin()));
+    return out;
+  }
 };
 
 // lightweight wrapping around fillValues() which puts a little effort into
 // making that would be contiguous when sorted non-contiguous in memory
 template <typename T>
 std::vector<T> getVectorOfRandom(size_t N) {
-  std::vector<T> V;
-  fillValues(V, N, Order::Random);
-  sortValues(V, Order::Random);
-  return std::vector<T>(V);
+  std::vector<T> v;
+  fillValues(v, N, Order::Random);
+  sortValues(v, Order::Random);
+  return std::vector<T>(v);
 }
 
 // forward_iterator wrapping which, for each increment, moves the underlying iterator forward Stride elements
 template <typename Wrapped>
 struct StridedFwdIt {
-  Wrapped Base;
-  unsigned Stride;
+  Wrapped base_;
+  unsigned stride_;
 
   using iterator_category = std::forward_iterator_tag;
-  using difference_type = typename Wrapped::difference_type;
-  using value_type = typename Wrapped::value_type;
-  using pointer = typename Wrapped::pointer;
-  using reference = typename Wrapped::reference;
-
-  StridedFwdIt(Wrapped B, unsigned Stride_) : Base(B), Stride(Stride_) { assert(Stride != 0); }
-
-  StridedFwdIt operator++() { for (unsigned I=0; I<Stride; ++I) ++Base; return *this; }
-  StridedFwdIt operator++(int) { auto Tmp = *this; ++*this; return Tmp; }
-  value_type& operator*() { return *Base; }
-  const value_type& operator*() const { return *Base; }
-  value_type& operator->() { return *Base; }
-  const value_type& operator->() const { return *Base; }
-  bool operator==(const StridedFwdIt& o) const { return Base==o.Base; }
+  using difference_type   = typename Wrapped::difference_type;
+  using value_type        = typename Wrapped::value_type;
+  using pointer           = typename Wrapped::pointer;
+  using reference         = typename Wrapped::reference;
+
+  StridedFwdIt(Wrapped base, unsigned stride) : base_(base), stride_(stride) { assert(stride_ != 0); }
+
+  StridedFwdIt operator++() {
+    for (unsigned i = 0; i < stride_; ++i)
+      ++base_;
+    return *this;
+  }
+  StridedFwdIt operator++(int) {
+    auto tmp = *this;
+    ++*this;
+    return tmp;
+  }
+  value_type& operator*() { return *base_; }
+  const value_type& operator*() const { return *base_; }
+  value_type& operator->() { return *base_; }
+  const value_type& operator->() const { return *base_; }
+  bool operator==(const StridedFwdIt& o) const { return base_ == o.base_; }
   bool operator!=(const StridedFwdIt& o) const { return !operator==(o); }
 };
-template <typename Wrapped> StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>;
-
+template <typename Wrapped>
+StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>;
 
 // realistically, data won't all be nicely contiguous in a container
 // we'll go through some effort to ensure that it's shuffled through memory
 template <class Container>
-std::pair<Container, Container> genCacheUnfriendlyData(size_t Size1, size_t Size2, OverlapPosition Pos) {
+std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) {
   using ValueType = typename Container::value_type;
-  const MoveInto<Container> moveInto;
-  const auto SrcSize = Pos == OverlapPosition::Nowhere ? Size1 + Size2 : std::max(Size1, Size2);
-  std::vector<ValueType> Src = getVectorOfRandom<ValueType>(SrcSize);
-
-  if (Pos == OverlapPosition::Nowhere) {
-    std::sort(Src.begin(), Src.end());
-    return std::make_pair(
-        moveInto(Src.begin(), Src.begin() + Size1),
-        moveInto(Src.begin() + Size1, Src.end()));
+  const MoveInto<Container> move_into;
+  const auto src_size = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2);
+  std::vector<ValueType> src = getVectorOfRandom<ValueType>(src_size);
+
+  if (pos == OverlapPosition::None) {
+    std::sort(src.begin(), src.end());
+    return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(src.begin() + size1, src.end()));
   }
 
   // all other overlap types will have to copy some part of the data, but if
   // we copy after sorting it will likely have high cache locality, so we sort
   // each copy separately
-  auto Copy = Src;
-  std::sort(Src.begin(), Src.end());
-  std::sort(Copy.begin(), Copy.end());
-
-  switch(Pos) {
-    case OverlapPosition::Nowhere:
-      break;
-
-    case OverlapPosition::Front:
-      return std::make_pair(
-          moveInto(Src.begin(), Src.begin() + Size1),
-          moveInto(Copy.begin(), Copy.begin() + Size2));
-
-    case OverlapPosition::Back:
-      return std::make_pair(
-          moveInto(Src.begin() + (Src.size() - Size1), Src.end()),
-          moveInto(Copy.begin() + (Copy.size() - Size2), Copy.end()));
-
-    case OverlapPosition::Interlaced:
-      const auto Stride1 = Size1 < Size2 ? Size2/Size1 : 1;
-      const auto Stride2 = Size2 < Size1 ? Size1/Size2 : 1;
-      return std::make_pair(
-          moveInto(StridedFwdIt(Src.begin(), Stride1), StridedFwdIt(Src.end(), Stride1)),
-          moveInto(StridedFwdIt(Copy.begin(), Stride2), StridedFwdIt(Copy.end(), Stride2)));
+  auto copy = src;
+  std::sort(src.begin(), src.end());
+  std::sort(copy.begin(), copy.end());
+
+  switch (pos) {
+  case OverlapPosition::None:
+    break;
+
+  case OverlapPosition::Front:
+    return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(copy.begin(), copy.begin() + size2));
+
+  case OverlapPosition::Back:
+    return std::make_pair(move_into(src.begin() + (src.size() - size1), src.end()),
+                          move_into(copy.begin() + (copy.size() - size2), copy.end()));
+
+  case OverlapPosition::Interlaced:
+    const auto stride1 = size1 < size2 ? size2 / size1 : 1;
+    const auto stride2 = size2 < size1 ? size1 / size2 : 1;
+    return std::make_pair(move_into(StridedFwdIt(src.begin(), stride1), StridedFwdIt(src.end(), stride1)),
+                          move_into(StridedFwdIt(copy.begin(), stride2), StridedFwdIt(copy.end(), stride2)));
   }
   abort();
   return std::pair<Container, Container>();
 }
 
-
 template <class ValueType, class Container, class Overlap>
 struct SetIntersection {
   using ContainerType = typename Container::template type<Value<ValueType>>;
-  size_t Size1;
-  size_t Size2;
+  size_t size1_;
+  size_t size2_;
 
-  SetIntersection(size_t M, size_t N) : Size1(M), Size2(N) {}
+  SetIntersection(size_t size1, size_t size2) : size1_(size1), size2_(size2) {}
 
   void run(benchmark::State& state) const {
     state.PauseTiming();
-    auto Input = genCacheUnfriendlyData<ContainerType>(Size1, Size2, Overlap());
-    std::vector<Value<ValueType>> out(std::min(Size1, Size2));
+    auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap());
+    std::vector<Value<ValueType>> out(std::min(size1_, size2_));
 
     size_t cmp;
-    auto trackingLess = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
-        ++cmp;
-        return std::less<Value<ValueType>>{}(lhs, rhs);
+    auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
+      ++cmp;
+      return std::less<Value<ValueType>>{}(lhs, rhs);
     };
 
-    const auto BatchSize =  std::max(size_t{16}, (2*TestSetElements) / (Size1+Size2));
+    const auto BATCH_SIZE = std::max(size_t{16}, (2 * TestSetElements) / (size1_ + size2_));
     state.ResumeTiming();
 
     for (const auto& _ : state) {
-      while (state.KeepRunningBatch(BatchSize)) {
-        for (unsigned i=0; i<BatchSize; ++i) {
-          const auto& [C1, C2] = Input;
-          auto outIter = std::set_intersection(C1.begin(), C1.end(), C2.begin(), C2.end(), out.begin(), trackingLess);
-          benchmark::DoNotOptimize(outIter);
+      while (state.KeepRunningBatch(BATCH_SIZE)) {
+        for (unsigned i = 0; i < BATCH_SIZE; ++i) {
+          const auto& [c1, c2] = input;
+          auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin(), tracking_less);
+          benchmark::DoNotOptimize(res);
           state.counters["Comparisons"] = cmp;
         }
       }
@@ -208,17 +208,18 @@ struct SetIntersection {
   }
 
   std::string name() const {
-    return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name +
-        ValueType::name() + '_' + std::to_string(Size1) + '_' + std::to_string(Size2);
+    return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name + ValueType::name() + '_' +
+           std::to_string(size1_) + '_' + std::to_string(size2_);
   }
 };
 
 } // namespace
 
-int main(int argc, char** argv) {/**/
+int main(int argc, char** argv) { /**/
   benchmark::Initialize(&argc, argv);
   if (benchmark::ReportUnrecognizedArguments(argc, argv))
     return 1;
-  makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(Quantities, Quantities);
+  makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(
+      Quantities, Quantities);
   benchmark::RunSpecifiedBenchmarks();
 }
diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index 21117e6b7d760..6ce9895f545a5 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -91,7 +91,8 @@ struct _IterOps<_ClassicAlgPolicy> {
   // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
   // use the incoming type for returning and steer clear of negative overflows
   template <class _Iter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
+  advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
     return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
   }
 
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 3febcb411268f..b1ecd1ae0d569 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -79,12 +79,8 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
 }
 
 template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter __lower_bound(
-    _RandIter __first,
-    _Sent __last,
-    const _Type& __value,
-    _Comp& __comp,
-    _Proj& __proj) {
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
+__lower_bound(_RandIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
   const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
   return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
 }
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 46f6fbe4d3dd2..a18bb6ff947b7 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -60,8 +60,8 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
         __comp_(__comp) {}
 
   _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-      _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-      operator()() && {
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+  operator()() && {
     while (__first2_ != __last2_) {
       __advance1_and_maybe_add_result();
       if (__first1_ == __last1_)
@@ -84,7 +84,7 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
     // use one-sided lower bound for improved algorithmic complexity bounds
     const auto __tmp = std::move(__iter);
-    __iter = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_);
+    __iter           = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_);
     __add_output_unless(__tmp != __iter);
   }
 
@@ -122,16 +122,16 @@ template <class _AlgPolicy,
           class _Sent2,
           class _OutIter>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
-    __set_intersection(
-        _InForwardIter1 __first1,
-        _Sent1 __last1,
-        _InForwardIter2 __first2,
-        _Sent2 __last2,
-        _OutIter __result,
-        _Compare&& __comp,
-        std::forward_iterator_tag,
-        std::forward_iterator_tag) {
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
+__set_intersection(
+    _InForwardIter1 __first1,
+    _Sent1 __last1,
+    _InForwardIter2 __first2,
+    _Sent2 __last2,
+    _OutIter __result,
+    _Compare&& __comp,
+    std::forward_iterator_tag,
+    std::forward_iterator_tag) {
   std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
       __intersector(__first1, __last1, __first2, __last2, __result, __comp);
   return std::move(__intersector)();
@@ -146,16 +146,16 @@ template <class _AlgPolicy,
           class _Sent2,
           class _OutIter>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
-    __set_intersection(
-        _InInputIter1 __first1,
-        _Sent1 __last1,
-        _InInputIter2 __first2,
-        _Sent2 __last2,
-        _OutIter __result,
-        _Compare&& __comp,
-        std::input_iterator_tag,
-        std::input_iterator_tag) {
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
+__set_intersection(
+    _InInputIter1 __first1,
+    _Sent1 __last1,
+    _InInputIter2 __first2,
+    _Sent2 __last2,
+    _OutIter __result,
+    _Compare&& __comp,
+    std::input_iterator_tag,
+    std::input_iterator_tag) {
   while (__first1 != __last1 && __first2 != __last2) {
     if (__comp(*__first1, *__first2))
       ++__first1;
@@ -190,9 +190,9 @@ class __set_intersection_iter_category {
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-    __set_intersection(
-        _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+__set_intersection(
+    _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
   return std::__set_intersection<_AlgPolicy>(
       std::move(__first1),
       std::move(__last1),

>From 450f5cebd41e425133fd221bf23b40bb20922eef Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 8 Jan 2024 21:51:27 +0000
Subject: [PATCH 10/56] General improvements to benchmark, including
 simplifying and slimming it down for faster runs, and including comparison
 counter.

---
 .../algorithms/set_intersection.bench.cpp     | 72 +++++++------------
 1 file changed, 27 insertions(+), 45 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 4fa411bba4354..baa5a7cdf0507 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
-#include <forward_list>
 #include <iterator>
 #include <set>
 #include <vector>
@@ -31,57 +30,26 @@ struct SetContainer {
   static constexpr const char* Name = "Set";
 };
 
-struct ForwardListContainer {
-  template <typename... Args>
-  using type = std::forward_list<Args...>;
-
-  static constexpr const char* Name = "ForwardList";
-};
-
-using AllContainerTypes = std::tuple<VectorContainer, SetContainer, ForwardListContainer>;
+using AllContainerTypes = std::tuple<VectorContainer, SetContainer>;
 
 // set_intersection performance may depend on where matching values lie
 enum class OverlapPosition {
   None,
   Front,
-  Back,
+  // performance-wise, matches at the back are identical to ones at the front
   Interlaced,
 };
 
-struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 4> {
-  static constexpr const char* Names[] = {"None", "Front", "Back", "Interlaced"};
+struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 3> {
+  static constexpr const char* Names[] = {"None", "Front", "Interlaced"};
 };
 
 // functor that moves elements from an iterator range into a new Container instance
 template <typename Container>
-struct MoveInto {};
-
-template <typename T>
-struct MoveInto<std::vector<T>> {
-  template <class It>
-  [[nodiscard]] static std::vector<T> operator()(It first, It last) {
-    std::vector<T> out;
-    std::move(first, last, std::back_inserter(out));
-    return out;
-  }
-};
-
-template <typename T>
-struct MoveInto<std::forward_list<T>> {
+struct MoveInto {
   template <class It>
-  [[nodiscard]] static std::forward_list<T> operator()(It first, It last) {
-    std::forward_list<T> out;
-    std::move(first, last, std::front_inserter(out));
-    out.reverse();
-    return out;
-  }
-};
-
-template <typename T>
-struct MoveInto<std::set<T>> {
-  template <class It>
-  [[nodiscard]] static std::set<T> operator()(It first, It last) {
-    std::set<T> out;
+  [[nodiscard]] static Container operator()(It first, It last) {
+    Container out;
     std::move(first, last, std::inserter(out, out.begin()));
     return out;
   }
@@ -137,7 +105,7 @@ template <class Container>
 std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) {
   using ValueType = typename Container::value_type;
   const MoveInto<Container> move_into;
-  const auto src_size = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2);
+  const auto src_size        = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2);
   std::vector<ValueType> src = getVectorOfRandom<ValueType>(src_size);
 
   if (pos == OverlapPosition::None) {
@@ -159,10 +127,6 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
   case OverlapPosition::Front:
     return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(copy.begin(), copy.begin() + size2));
 
-  case OverlapPosition::Back:
-    return std::make_pair(move_into(src.begin() + (src.size() - size1), src.end()),
-                          move_into(copy.begin() + (copy.size() - size2), copy.end()));
-
   case OverlapPosition::Interlaced:
     const auto stride1 = size1 < size2 ? size2 / size1 : 1;
     const auto stride2 = size2 < size1 ? size1 / size2 : 1;
@@ -181,6 +145,11 @@ struct SetIntersection {
 
   SetIntersection(size_t size1, size_t size2) : size1_(size1), size2_(size2) {}
 
+  bool skip() const noexcept {
+    // let's save some time and skip simmetrical runs
+    return size1_ <= size2_;
+  }
+
   void run(benchmark::State& state) const {
     state.PauseTiming();
     auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap());
@@ -192,12 +161,13 @@ struct SetIntersection {
       return std::less<Value<ValueType>>{}(lhs, rhs);
     };
 
-    const auto BATCH_SIZE = std::max(size_t{16}, (2 * TestSetElements) / (size1_ + size2_));
+    const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_));
     state.ResumeTiming();
 
     for (const auto& _ : state) {
       while (state.KeepRunningBatch(BATCH_SIZE)) {
         for (unsigned i = 0; i < BATCH_SIZE; ++i) {
+          cmp                  = 0;
           const auto& [c1, c2] = input;
           auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin(), tracking_less);
           benchmark::DoNotOptimize(res);
@@ -219,6 +189,18 @@ int main(int argc, char** argv) { /**/
   benchmark::Initialize(&argc, argv);
   if (benchmark::ReportUnrecognizedArguments(argc, argv))
     return 1;
+  const std::vector<size_t> Quantities = {
+      1 << 0,
+      1 << 4,
+      1 << 8,
+      1 << 14,
+// Running each benchmark in parallel consumes too much memory with MSAN
+// and can lead to the test process being killed.
+#if !TEST_HAS_FEATURE(memory_sanitizer)
+      1 << 18
+#endif
+  };
+
   makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(
       Quantities, Quantities);
   benchmark::RunSpecifiedBenchmarks();

>From d0c5f2b8d23c76db2ba325aa0fb6172d1b6eb1da Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 15 Jan 2024 16:19:29 +0000
Subject: [PATCH 11/56] Huh, I wonder how I got `git clang-format` to miss
 those changes =/

---
 .../algorithms/set_intersection.bench.cpp     | 10 ++--
 libcxx/include/__algorithm/set_intersection.h | 58 +++++++++----------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index baa5a7cdf0507..38010170508a8 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -190,14 +190,14 @@ int main(int argc, char** argv) { /**/
   if (benchmark::ReportUnrecognizedArguments(argc, argv))
     return 1;
   const std::vector<size_t> Quantities = {
-      1 << 0,
-      1 << 4,
-      1 << 8,
-      1 << 14,
+    1 << 0,
+    1 << 4,
+    1 << 8,
+    1 << 14,
 // Running each benchmark in parallel consumes too much memory with MSAN
 // and can lead to the test process being killed.
 #if !TEST_HAS_FEATURE(memory_sanitizer)
-      1 << 18
+    1 << 18
 #endif
   };
 
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index a18bb6ff947b7..504350d10779e 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -59,9 +59,9 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
         __result_(__result),
         __comp_(__comp) {}
 
-  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-  operator()() && {
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+      __set_intersection_result<_InIter1, _InIter2, _OutIter>
+      operator()() && {
     while (__first2_ != __last2_) {
       __advance1_and_maybe_add_result();
       if (__first1_ == __last1_)
@@ -121,17 +121,17 @@ template <class _AlgPolicy,
           class _InForwardIter2,
           class _Sent2,
           class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
-__set_intersection(
-    _InForwardIter1 __first1,
-    _Sent1 __last1,
-    _InForwardIter2 __first2,
-    _Sent2 __last2,
-    _OutIter __result,
-    _Compare&& __comp,
-    std::forward_iterator_tag,
-    std::forward_iterator_tag) {
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+    __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
+    __set_intersection(
+        _InForwardIter1 __first1,
+        _Sent1 __last1,
+        _InForwardIter2 __first2,
+        _Sent2 __last2,
+        _OutIter __result,
+        _Compare&& __comp,
+        std::forward_iterator_tag,
+        std::forward_iterator_tag) {
   std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
       __intersector(__first1, __last1, __first2, __last2, __result, __comp);
   return std::move(__intersector)();
@@ -145,17 +145,17 @@ template <class _AlgPolicy,
           class _InInputIter2,
           class _Sent2,
           class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
-__set_intersection(
-    _InInputIter1 __first1,
-    _Sent1 __last1,
-    _InInputIter2 __first2,
-    _Sent2 __last2,
-    _OutIter __result,
-    _Compare&& __comp,
-    std::input_iterator_tag,
-    std::input_iterator_tag) {
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+    __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
+    __set_intersection(
+        _InInputIter1 __first1,
+        _Sent1 __last1,
+        _InInputIter2 __first2,
+        _Sent2 __last2,
+        _OutIter __result,
+        _Compare&& __comp,
+        std::input_iterator_tag,
+        std::input_iterator_tag) {
   while (__first1 != __last1 && __first2 != __last2) {
     if (__comp(*__first1, *__first2))
       ++__first1;
@@ -189,10 +189,10 @@ class __set_intersection_iter_category {
 };
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-__set_intersection(
-    _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+    __set_intersection_result<_InIter1, _InIter2, _OutIter>
+    __set_intersection(
+        _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
   return std::__set_intersection<_AlgPolicy>(
       std::move(__first1),
       std::move(__last1),

>From faa31150e13902941cfa0c9ef87bff265b12d898 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Jan 2024 16:25:56 +0000
Subject: [PATCH 12/56] Oops, bad mistake while porting into libc++!
 `__lower_bound_onesided()` must start with `__step==0`, otherwise we can't
 match the complexity of linear search when continually matching (like a
 std::set_intersection() of matching containers will).

---
 libcxx/include/__algorithm/lower_bound.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index b1ecd1ae0d569..dc86e2fa5c81d 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -63,6 +63,12 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
   // __iterator_category<_Iter>>::value,
   //       "lower_bound() is a multipass algorithm and requires forward iterator or better");
 
+  // split the step 0 scenario: this allows us to match worst-case complexity
+  // when replacing linear search
+  if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value))
+    return __first;
+  ++__first;
+
   using _Distance = typename iterator_traits<_Iter>::difference_type;
   for (_Distance __step = 1; __first != __last; __step <<= 1) {
     auto __it   = __first;

>From 995d04b872c8552633c36e38d382897e8329d1e2 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Jan 2024 22:31:41 +0000
Subject: [PATCH 13/56] Oops, bad tracking of displacement on
 `stride_counting_iterator`

---
 libcxx/test/support/test_iterators.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 3b86a93564e4b..d1e077e1b2655 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -826,7 +826,7 @@ class stride_counting_iterator {
         It tmp(base_);
         base_ = base(tmp += n);
         ++*stride_count_;
-        ++*stride_displacement_;
+        *stride_displacement_ += n;
         return *this;
     }
 
@@ -836,7 +836,7 @@ class stride_counting_iterator {
         It tmp(base_);
         base_ = base(tmp -= n);
         ++*stride_count_;
-        --*stride_displacement_;
+        *stride_displacement_ -= n;
         return *this;
     }
 

>From d568d491cef941e2cb03d85bcce9b7d2ec7314c4 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Jan 2024 22:33:13 +0000
Subject: [PATCH 14/56] Add more counters to the set_intersection benchmark,
 guard them behind an environment variable so we can choose to either measure
 time more accurately or obtain more information.

This led me down an interesting road of validating benchmark results and finding a significant discrepancy in timings between when I run all test cases at once or `--benchmark-filter` them individually.
---
 .../algorithms/set_intersection.bench.cpp     | 38 +++++++++++++------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 38010170508a8..b2de0c3223b00 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -9,9 +9,11 @@
 #include <algorithm>
 #include <iterator>
 #include <set>
+#include <stdlib.h>
 #include <vector>
 
 #include "common.h"
+#include "test_iterators.h"
 
 namespace {
 
@@ -137,6 +139,10 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
   return std::pair<Container, Container>();
 }
 
+// use environment variable to enable additional counters: instrumentation will
+// impact CPU utilisation, let's give the user the option
+static const bool TRACK_COUNTERS = getenv("TRACK_COUNTERS") != nullptr;
+
 template <class ValueType, class Container, class Overlap>
 struct SetIntersection {
   using ContainerType = typename Container::template type<Value<ValueType>>;
@@ -147,7 +153,7 @@ struct SetIntersection {
 
   bool skip() const noexcept {
     // let's save some time and skip simmetrical runs
-    return size1_ <= size2_;
+    return size1_ < size2_;
   }
 
   void run(benchmark::State& state) const {
@@ -155,23 +161,33 @@ struct SetIntersection {
     auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap());
     std::vector<Value<ValueType>> out(std::min(size1_, size2_));
 
-    size_t cmp;
-    auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
-      ++cmp;
-      return std::less<Value<ValueType>>{}(lhs, rhs);
-    };
-
     const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_));
     state.ResumeTiming();
 
     for (const auto& _ : state) {
       while (state.KeepRunningBatch(BATCH_SIZE)) {
         for (unsigned i = 0; i < BATCH_SIZE; ++i) {
-          cmp                  = 0;
           const auto& [c1, c2] = input;
-          auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin(), tracking_less);
-          benchmark::DoNotOptimize(res);
-          state.counters["Comparisons"] = cmp;
+          if (TRACK_COUNTERS) {
+            size_t cmp{}, strides{}, displacement{};
+            auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
+              ++cmp;
+              return std::less<Value<ValueType>>{}(lhs, rhs);
+            };
+            stride_counting_iterator b1(c1.begin(), &strides, &displacement);
+            stride_counting_iterator e1(c1.end(), &strides, &displacement);
+            stride_counting_iterator b2(c2.begin(), &strides, &displacement);
+            stride_counting_iterator e2(c2.end(), &strides, &displacement);
+            auto res = std::set_intersection(b1, e1, b2, e2, out.begin(), tracking_less);
+            benchmark::DoNotOptimize(res);
+            state.counters["comparisons"]       = cmp;
+            state.counters["iter_strides"]      = strides;
+            state.counters["iter_displacement"] = displacement;
+
+          } else {
+            auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin());
+            benchmark::DoNotOptimize(res);
+          }
         }
       }
     }

>From bb872e0b1d19a77450b8455c348d3f4669adcefb Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Thu, 1 Feb 2024 13:41:07 +0000
Subject: [PATCH 15/56] Revert "Oops, bad tracking of displacement on
 `stride_counting_iterator`"

This reverts commit 995d04b872c8552633c36e38d382897e8329d1e2.
---
 libcxx/test/support/test_iterators.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 189684022d1a3..191de7f3c8a36 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -826,7 +826,7 @@ class stride_counting_iterator {
         It tmp(base_);
         base_ = base(tmp += n);
         ++*stride_count_;
-        *stride_displacement_ += n;
+        ++*stride_displacement_;
         return *this;
     }
 
@@ -836,7 +836,7 @@ class stride_counting_iterator {
         It tmp(base_);
         base_ = base(tmp -= n);
         ++*stride_count_;
-        *stride_displacement_ -= n;
+        --*stride_displacement_;
         return *this;
     }
 

>From a1cd8ffc82fe6021dbafb7b543e5472eda3ef87a Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Thu, 1 Feb 2024 17:39:08 +0000
Subject: [PATCH 16/56] * Fix C++03 compatibility issues. * Fix tests I had
 broken. * More tweaks and better comments.

---
 .../include/__algorithm/iterator_operations.h |  6 ++--
 libcxx/include/__algorithm/lower_bound.h      | 12 ++++---
 libcxx/include/__algorithm/set_intersection.h | 36 ++++++++++++++-----
 .../lower.bound/lower_bound.pass.cpp          | 11 ++++--
 .../lower.bound/lower_bound_comp.pass.cpp     | 25 +++++++++----
 .../ranges_set_intersection.pass.cpp          | 22 ++++++------
 6 files changed, 76 insertions(+), 36 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index d9a6e7f35df72..449d03d52e324 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -98,7 +98,7 @@ struct _IterOps<_ClassicAlgPolicy> {
   template <class _InputIter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
-    _Distance __dist{};
+    _Distance __dist = _Distance();
     for (; __dist < __count && __iter != __sentinel; ++__dist)
       ++__iter;
     return __count - __dist;
@@ -108,7 +108,7 @@ struct _IterOps<_ClassicAlgPolicy> {
   template <class _BiDirIter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
-    _Distance __dist{};
+    _Distance __dist = _Distance();
     if (__count >= 0)
       for (; __dist < __count && __iter != __sentinel; ++__dist)
         ++__iter;
@@ -120,7 +120,7 @@ struct _IterOps<_ClassicAlgPolicy> {
 
   // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
   template <class _RandIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR static _Distance
   __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
     auto __dist = _IterOps::distance(__iter, __sentinel);
     _LIBCPP_ASSERT_UNCATEGORIZED(
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 12a9c4850460b..e22700fa80269 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -50,7 +50,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
 
 // One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
 // advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
-// 2*(log(n)-1) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
+// 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
 // the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
 // container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
 // traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
@@ -63,11 +63,9 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
   // __iterator_category<_Iter>>::value,
   //       "lower_bound() is a multipass algorithm and requires forward iterator or better");
 
-  // split the step 0 scenario: this allows us to match worst-case complexity
-  // when replacing linear search
+  // step = 0, ensuring we can always short-circuit when distance is 1 later on
   if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value))
     return __first;
-  ++__first;
 
   using _Distance = typename iterator_traits<_Iter>::difference_type;
   for (_Distance __step = 1; __first != __last; __step <<= 1) {
@@ -76,10 +74,14 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
     // once we reach the last range where needle can be we must start
     // looking inwards, bisecting that range
     if (__it == __last || !std::__invoke(__comp, std::__invoke(__proj, *__it), __value)) {
+      // we've already checked the previous value and it was less, we can save
+      // one comparison by skipping bisection
+      if (__dist == 1)
+        return __it;
       return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
     }
     // range not found, move forward!
-    __first = std::move(__it);
+    __first = __it;
   }
   return __first;
 }
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index e3aa99d004eee..00fedec3701d6 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -20,6 +20,7 @@
 #include <__type_traits/is_same.h>
 #include <__utility/exchange.h>
 #include <__utility/move.h>
+#include <__utility/swap.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -50,8 +51,7 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   const _Sent2& __last2_;
   _OutIter& __result_;
   _Compare& __comp_;
-  static constexpr auto __proj_ = std::__identity();
-  bool __prev_advanced_         = true;
+  bool __prev_advanced_ = true;
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersector(
       _InIter1& __first1, _Sent1& __last1, _InIter2& __first2, _Sent2& __last2, _OutIter& __result, _Compare& __comp)
@@ -64,7 +64,7 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
 
   _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
       __set_intersection_result<_InIter1, _InIter2, _OutIter>
-      operator()() && {
+      operator()() {
     while (__first2_ != __last2_) {
       __advance1_and_maybe_add_result();
       if (__first1_ == __last1_)
@@ -85,9 +85,27 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   template <class _Iter, class _Sent, class _Value>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
   __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
-    // use one-sided lower bound for improved algorithmic complexity bounds
-    const auto __tmp = std::move(__iter);
-    __iter           = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_);
+    static _LIBCPP_CONSTEXPR std::__identity __proj;
+    // use one-sided binary search for improved algorithmic complexity bounds
+    // understanding how we can use binary search and still respect complexity
+    // guarantees is _not_ straightforward, so let me explain: the guarantee
+    // is "at most 2*(N+M)-1 comparisons", and one-sided binary search will
+    // necessarily overshoot depending on the position of the needle in the
+    // haystack -- for instance, if we're searching for 3 in (1, 2, 3, 4),
+    // we'll check if 3<1, then 3<2, then 3<4, and, finally, 3<3, for a total of
+    // 4 comparisons, when linear search would have yielded 3. However,
+    // because we won't need to perform the intervening reciprocal comparisons
+    // (ie 1<3, 2<3, 4<3), that extra comparison doesn't run afoul of the
+    // guarantee. Additionally, this type of scenario can only happen for match
+    // distances of up to 5 elements, because 2*log2(8) is 6, and we'll still
+    // be worse-off at position 5 of an 8-element set. From then onwards
+    // these scenarios can't happen.
+    // TL;DR: we'll be 1 comparison worse-off compared to the classic linear-
+    // searching algorithm if matching position 3 of a set with 4 elements,
+    // or position 5 if the set has 7 or 8 elements, but we'll never exceed
+    // the complexity guarantees from the standard.
+    _Iter __tmp = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj);
+    std::swap(__tmp, __iter);
     __add_output_unless(__tmp != __iter);
   }
 
@@ -137,7 +155,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
         std::forward_iterator_tag) {
   std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
       __intersector(__first1, __last1, __first2, __last2, __result, __comp);
-  return std::move(__intersector)();
+  return __intersector();
 }
 
 // input iterators are not suitable for multipass algorithms, so we stick to the classic single-pass version
@@ -183,7 +201,7 @@ class __set_intersection_iter_category {
   template <class _It>
   using __cat = typename std::_IterOps<_AlgPolicy>::template __iterator_category<_It>;
   template <class _It>
-  static auto test(__cat<_It>*) -> __cat<_It>;
+  static __cat<_It> test(__cat<_It>*);
   template <class>
   static std::input_iterator_tag test(...);
 
@@ -202,7 +220,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
       std::move(__first2),
       std::move(__last2),
       std::move(__result),
-      std::forward<_Compare>(__comp),
+      __comp,
       typename std::__set_intersection_iter_category<_AlgPolicy, _InIter1>::__type(),
       typename std::__set_intersection_iter_category<_AlgPolicy, _InIter2>::__type());
 }
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
index dd2916338e8f6..196af84b69222 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
@@ -39,10 +39,15 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-  std::size_t strides{};
-  std::size_t displacement{};
+#if TEST_STD_VER > 17
+  std::size_t strides      = 0;
+  std::size_t displacement = 0;
   stride_counting_iterator f(first, &strides, &displacement);
   stride_counting_iterator l(last, &strides, &displacement);
+#else
+  Iter& f = first;
+  Iter& l = last;
+#endif
 
   auto i = std::lower_bound(f, l, value);
   for (auto j = base(f); j != base(i); ++j)
@@ -50,9 +55,11 @@ test(Iter first, Iter last, const T& value)
   for (auto j = base(i); j != base(l); ++j)
     assert(!(*j < value));
 
+#if TEST_STD_VER > 17
   auto len = static_cast<std::size_t>(std::distance(first, last));
   assert(strides <= 2 * len);
   assert(displacement <= 2 * len);
+#endif
 }
 
 template <class Iter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
index ff928e23b9006..643fd0052e479 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
@@ -39,16 +39,27 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-  std::size_t strides{};
-  std::size_t displacement{};
+#if TEST_STD_VER > 17
+  std::size_t strides      = 0;
+  std::size_t displacement = 0;
   stride_counting_iterator f(first, &strides, &displacement);
   stride_counting_iterator l(last, &strides, &displacement);
+#else
+  Iter& f = first;
+  Iter& l = last;
+#endif
+
+  std::size_t comparisons = 0;
+  struct InstrumentedGreater {
+    explicit InstrumentedGreater(std::size_t* cmp) : comparisons_(cmp) {}
+    bool operator()(int rhs, int lhs) const {
+      ++*comparisons_;
+      return std::greater<int>()(rhs, lhs);
+    }
 
-  std::size_t comparisons{};
-  auto cmp = [&comparisons](int rhs, int lhs) {
-    ++comparisons;
-    return std::greater<int>()(rhs, lhs);
+    std::size_t* comparisons_;
   };
+  InstrumentedGreater cmp(&comparisons);
 
   auto i = std::lower_bound(f, l, value, cmp);
   for (auto j = base(f); j != base(i); ++j)
@@ -57,8 +68,10 @@ test(Iter first, Iter last, const T& value)
     assert(!std::greater<int>()(*j, value));
 
   auto len = static_cast<std::size_t>(std::distance(first, last));
+#if TEST_STD_VER > 17
   assert(strides <= 2 * len);
   assert(displacement <= 2 * len);
+#endif
   assert(comparisons <= std::ceil(std::log2(len + 1)));
 }
 
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index f658a95778c85..2f3b0df9cda7c 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -436,20 +436,20 @@ constexpr void testComplexityParameterizedIter() {
   }
 
   // Lower complexity when there is low overlap between ranges: we can make 2*log(X) comparisons when one range
-  // has X elements that can be skipped over.
+  // has X elements that can be skipped over (and then 1 more to confirm that the value we found is equal).
   {
     std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
     std::array r2{15};
     std::array expected{15};
 
     OperationCounts expectedCounts;
-    expectedCounts.comparisons                 = 8;
-    expectedCounts.in[0].proj                  = 8;
-    expectedCounts.in[0].iterator_strides      = 24;
-    expectedCounts.in[0].iterator_displacement = 24;
-    expectedCounts.in[1].proj                  = 8;
-    expectedCounts.in[1].iterator_strides      = 3;
-    expectedCounts.in[1].iterator_displacement = 3;
+    expectedCounts.comparisons                 = 9;
+    expectedCounts.in[0].proj                  = 9;
+    expectedCounts.in[0].iterator_strides      = 23;
+    expectedCounts.in[0].iterator_displacement = 23;
+    expectedCounts.in[1].proj                  = 9;
+    expectedCounts.in[1].iterator_strides      = 1;
+    expectedCounts.in[1].iterator_displacement = 1;
 
     testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
     testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
@@ -721,9 +721,9 @@ constexpr bool test() {
       std::ranges::set_intersection(r1.begin(), r1.end(), r2.begin(), r2.end(), out.data(), comp, proj1, proj2);
 
       assert(std::ranges::equal(out, expected, {}, &Data::data));
-      assert(numberOfComp < maxOperation);
-      assert(numberOfProj1 < maxOperation);
-      assert(numberOfProj2 < maxOperation);
+      assert(numberOfComp <= maxOperation);
+      assert(numberOfProj1 <= maxOperation);
+      assert(numberOfProj2 <= maxOperation);
     }
 
     // range overload

>From 24d1d5b9a9d93f567be004e4a36e5b9147898b06 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Thu, 1 Feb 2024 18:04:17 +0000
Subject: [PATCH 17/56] Remove non-ascii characters, CI doesn't like them.

---
 libcxx/include/__algorithm/lower_bound.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index e22700fa80269..6016502404002 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -49,13 +49,14 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
 }
 
 // One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
-// advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
-// 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
+// advantage of being \Omega(1) rather than the classic algorithm's \Omega(log(n)), with the downside of executing at
+// most 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
 // the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
-// container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
+// container's size upfront, which adds \Omega(n) iterator increments to the complexity. The second one is when you're
 // traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
-// would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided
-// version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
+// would yield \Omega(n*log(n)) comparisons and, for non-random iterators, \Omega(n^2) iterator increments, whereas the
+// one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of
+// comparisons.
 template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
 __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {

>From 4b7377367ed671f9a23e81f2211ff54d50a84d17 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Fri, 2 Feb 2024 19:02:19 +0000
Subject: [PATCH 18/56] Oops, missed an #include

---
 libcxx/include/__algorithm/iterator_operations.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index 449d03d52e324..5797e1d7e78d8 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -11,6 +11,7 @@
 
 #include <__algorithm/iter_swap.h>
 #include <__algorithm/ranges_iterator_concept.h>
+#include <__assert>
 #include <__config>
 #include <__iterator/advance.h>
 #include <__iterator/distance.h>

>From d0facc560c7d8ed38b27ee0a3bbbe367d134a096 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 5 Feb 2024 22:14:42 +0000
Subject: [PATCH 19/56] set_intersection.h: remove `static constexpr`, it
 breaks constexprness of the method.

---
 libcxx/include/__algorithm/set_intersection.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 00fedec3701d6..e3e93f9e75562 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -85,7 +85,7 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   template <class _Iter, class _Sent, class _Value>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
   __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
-    static _LIBCPP_CONSTEXPR std::__identity __proj;
+    _LIBCPP_CONSTEXPR std::__identity __proj;
     // use one-sided binary search for improved algorithmic complexity bounds
     // understanding how we can use binary search and still respect complexity
     // guarantees is _not_ straightforward, so let me explain: the guarantee

>From a12aa376ec6eff642c6c0ed8b448215c4f8a89c9 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 5 Feb 2024 22:16:24 +0000
Subject: [PATCH 20/56] Fix constexpr shenanigans with gcc and
 `stride_counting_iterator`

---
 libcxx/test/support/test_iterators.h | 56 ++++++++++++++--------------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 191de7f3c8a36..d9eb149be38e3 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -734,6 +734,20 @@ template <class It,
           class StrideCountType        = std::iter_difference_t<It>,
           class StrideDisplacementType = std::iter_difference_t<It>>
 class stride_counting_iterator {
+  template <typename UnderlyingType>
+  struct concrete_or_ref {
+    using value_type            = std::remove_cv_t<std::remove_reference_t<UnderlyingType>>;
+    constexpr concrete_or_ref() = default;
+    explicit constexpr concrete_or_ref(UnderlyingType* c) noexcept : ptr_{c} {}
+
+    constexpr operator value_type&() noexcept { return ptr_ ? *ptr_ : val_; }
+    constexpr operator const value_type&() const noexcept { return ptr_ ? *ptr_ : val_; }
+
+  private:
+    value_type val_{};
+    value_type* ptr_{nullptr};
+  };
+
 public:
     using value_type = typename iter_value_or_void<It>::type;
     using difference_type = std::iter_difference_t<It>;
@@ -758,27 +772,14 @@ class stride_counting_iterator {
     constexpr stride_counting_iterator(const stride_counting_iterator& o) { *this = o; }
     constexpr stride_counting_iterator(stride_counting_iterator&& o) { *this = o; }
 
-    constexpr stride_counting_iterator& operator=(const stride_counting_iterator& o) {
-      base_ = o.base_;
-      // if memory backing count is owned by the object, copy values
-      if (o.stride_count_ == &o.stride_count_default_) {
-        assert(o.stride_displacement_ == &o.stride_displacement_default_);
-        *stride_count_        = *o.stride_count_;
-        *stride_displacement_ = *o.stride_displacement_;
-        return *this;
-      }
-      // otherwise share the same externally-owned variables
-      stride_count_        = o.stride_count_;
-      stride_displacement_ = o.stride_displacement_;
-      return *this;
-    }
+    constexpr stride_counting_iterator& operator=(const stride_counting_iterator& o) = default;
     constexpr stride_counting_iterator& operator=(stride_counting_iterator&& o) { return *this = o; }
 
     friend constexpr It base(stride_counting_iterator const& it) { return It(it.base_); }
 
-    constexpr StrideCountType stride_count() const { return *stride_count_; }
+    constexpr StrideCountType stride_count() const { return stride_count_; }
 
-    constexpr StrideDisplacementType stride_displacement() const { return *stride_displacement_; }
+    constexpr StrideDisplacementType stride_displacement() const { return stride_displacement_; }
 
     constexpr decltype(auto) operator*() const { return *It(base_); }
 
@@ -787,8 +788,8 @@ class stride_counting_iterator {
     constexpr stride_counting_iterator& operator++() {
         It tmp(base_);
         base_ = base(++tmp);
-        ++*stride_count_;
-        ++*stride_displacement_;
+        ++stride_count_;
+        ++stride_displacement_;
         return *this;
     }
 
@@ -807,8 +808,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(--tmp);
-        ++*stride_count_;
-        --*stride_displacement_;
+        ++stride_count_;
+        --stride_displacement_;
         return *this;
     }
 
@@ -825,8 +826,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(tmp += n);
-        ++*stride_count_;
-        ++*stride_displacement_;
+        ++stride_count_;
+        ++stride_displacement_;
         return *this;
     }
 
@@ -835,8 +836,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(tmp -= n);
-        ++*stride_count_;
-        --*stride_displacement_;
+        ++stride_count_;
+        --stride_displacement_;
         return *this;
     }
 
@@ -899,11 +900,8 @@ class stride_counting_iterator {
 
 private:
     decltype(base(std::declval<It>())) base_;
-    StrideCountType stride_count_default_               = 0;
-    StrideDisplacementType stride_displacement_default_ = 0;
-
-    StrideCountType* stride_count_               = &stride_count_default_;
-    StrideDisplacementType* stride_displacement_ = &stride_displacement_default_;
+    concrete_or_ref<StrideCountType> stride_count_;
+    concrete_or_ref<StrideDisplacementType> stride_displacement_;
 };
 template <class It>
 stride_counting_iterator(It) -> stride_counting_iterator<It>;

>From 69dba78ed467990aa6a8a8cbb032706cfc551a20 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 6 Feb 2024 21:31:30 +0000
Subject: [PATCH 21/56] Restrict number of constexpr steps so
 `ranges_set_intersection.pass.cpp` is ok on gcc.

---
 .../ranges_set_intersection.pass.cpp              | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index 2f3b0df9cda7c..162f6ca8b7f35 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -483,18 +483,21 @@ constexpr void testComplexityParameterizedIterPermutateIn1() {
 }
 
 template <class Out>
-constexpr void testComplexityParameterizedIterPermutateIn1In2() {
+constexpr bool testComplexityParameterizedIterPermutateIn1In2() {
   testComplexityParameterizedIterPermutateIn1<forward_iterator, Out>();
   testComplexityParameterizedIterPermutateIn1<bidirectional_iterator, Out>();
   testComplexityParameterizedIterPermutateIn1<random_access_iterator, Out>();
+  return true;
 }
 
-constexpr bool testComplexityMultipleTypes() {
-  //testComplexityParameterizedIter<cpp20_input_iterator, random_access_iterator, OutIter>();
+constexpr void runAllComplexityTests() {
   testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
   testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
   testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
-  return true;
+
+  static_assert(testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>());
+  static_assert(testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>());
+  static_assert(testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>());
 }
 
 constexpr bool test() {
@@ -797,8 +800,8 @@ int main(int, char**) {
   // than the step limit.
   runAllIteratorPermutationsTests();
 
-  testComplexityMultipleTypes();
-  static_assert(testComplexityMultipleTypes());
+  // similar for complexity tests
+  runAllComplexityTests();
 
   return 0;
 }

>From fe1fe8c4607044ad14d42a6e6d713b7f58f4ef11 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 12 Feb 2024 08:16:56 +0000
Subject: [PATCH 22/56] Fix constexpr annotation and make internal methods
 private in _IterOps

---
 libcxx/include/__algorithm/iterator_operations.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index 5797e1d7e78d8..c9fd4376595bd 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -95,6 +95,7 @@ struct _IterOps<_ClassicAlgPolicy> {
     return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
   }
 
+private:
   // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
   template <class _InputIter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
@@ -121,7 +122,7 @@ struct _IterOps<_ClassicAlgPolicy> {
 
   // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
   template <class _RandIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR static _Distance
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
     auto __dist = _IterOps::distance(__iter, __sentinel);
     _LIBCPP_ASSERT_UNCATEGORIZED(
@@ -134,6 +135,7 @@ struct _IterOps<_ClassicAlgPolicy> {
     return __count - __dist;
   }
 
+public:
   // distance
   template <class _Iter>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static typename iterator_traits<_Iter>::difference_type

>From bb2c7588947e92451a46f310fe63cd137303fcff Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 12 Feb 2024 08:18:15 +0000
Subject: [PATCH 23/56] Allow for assertions in comparison count when in
 hardened mode for complexity validation.

---
 .../set.intersection/ranges_set_intersection.pass.cpp  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index 162f6ca8b7f35..4858493145af9 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -299,7 +299,14 @@ struct [[nodiscard]] OperationCounts {
   std::array<PerInput, 2> in;
 
   [[nodiscard]] constexpr bool matchesExpectation(const OperationCounts& expect) {
-    return comparisons <= expect.comparisons && in[0].matchesExpectation(expect.in[0]) &&
+    // __debug_less will perform an additional comparison in an assertion
+    constexpr unsigned comparison_multiplier =
+#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
+        2;
+#else
+        1;
+#endif
+    return comparisons <= comparison_multiplier * expect.comparisons && in[0].matchesExpectation(expect.in[0]) &&
            in[1].matchesExpectation(expect.in[1]);
   }
 
@@ -309,7 +316,6 @@ struct [[nodiscard]] OperationCounts {
 };
 } // namespace
 
-#include <iostream>
 template <template <class...> class In1,
           template <class...>
           class In2,

>From c6b895c46b5f11d988506a4bb66a655dfad275bb Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 13 Feb 2024 20:12:24 +0000
Subject: [PATCH 24/56] Revert lower_bound.pass.cpp changes, will move into a
 new PR.

---
 .../lower.bound/lower_bound.pass.cpp          | 26 ++++---------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
index 196af84b69222..a2d8ab632303c 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
@@ -39,27 +39,11 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-#if TEST_STD_VER > 17
-  std::size_t strides      = 0;
-  std::size_t displacement = 0;
-  stride_counting_iterator f(first, &strides, &displacement);
-  stride_counting_iterator l(last, &strides, &displacement);
-#else
-  Iter& f = first;
-  Iter& l = last;
-#endif
-
-  auto i = std::lower_bound(f, l, value);
-  for (auto j = base(f); j != base(i); ++j)
-    assert(*j < value);
-  for (auto j = base(i); j != base(l); ++j)
-    assert(!(*j < value));
-
-#if TEST_STD_VER > 17
-  auto len = static_cast<std::size_t>(std::distance(first, last));
-  assert(strides <= 2 * len);
-  assert(displacement <= 2 * len);
-#endif
+    Iter i = std::lower_bound(first, last, value);
+    for (Iter j = first; j != i; ++j)
+        assert(*j < value);
+    for (Iter j = i; j != last; ++j)
+        assert(!(*j < value));
 }
 
 template <class Iter>

>From 31321b943b9a5c9358844725b2e0bc0910055062 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 13 Feb 2024 20:14:34 +0000
Subject: [PATCH 25/56] Oops, forgot to revert this one too.

---
 .../lower.bound/lower_bound_comp.pass.cpp     | 40 +++----------------
 1 file changed, 5 insertions(+), 35 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
index 643fd0052e479..b9133028d9ade 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
@@ -17,7 +17,6 @@
 #include <vector>
 #include <cassert>
 #include <cstddef>
-#include <cmath>
 
 #include "test_macros.h"
 #include "test_iterators.h"
@@ -39,40 +38,11 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-#if TEST_STD_VER > 17
-  std::size_t strides      = 0;
-  std::size_t displacement = 0;
-  stride_counting_iterator f(first, &strides, &displacement);
-  stride_counting_iterator l(last, &strides, &displacement);
-#else
-  Iter& f = first;
-  Iter& l = last;
-#endif
-
-  std::size_t comparisons = 0;
-  struct InstrumentedGreater {
-    explicit InstrumentedGreater(std::size_t* cmp) : comparisons_(cmp) {}
-    bool operator()(int rhs, int lhs) const {
-      ++*comparisons_;
-      return std::greater<int>()(rhs, lhs);
-    }
-
-    std::size_t* comparisons_;
-  };
-  InstrumentedGreater cmp(&comparisons);
-
-  auto i = std::lower_bound(f, l, value, cmp);
-  for (auto j = base(f); j != base(i); ++j)
-    assert(std::greater<int>()(*j, value));
-  for (auto j = base(i); j != base(l); ++j)
-    assert(!std::greater<int>()(*j, value));
-
-  auto len = static_cast<std::size_t>(std::distance(first, last));
-#if TEST_STD_VER > 17
-  assert(strides <= 2 * len);
-  assert(displacement <= 2 * len);
-#endif
-  assert(comparisons <= std::ceil(std::log2(len + 1)));
+    Iter i = std::lower_bound(first, last, value, std::greater<int>());
+    for (Iter j = first; j != i; ++j)
+        assert(std::greater<int>()(*j, value));
+    for (Iter j = i; j != last; ++j)
+        assert(!std::greater<int>()(*j, value));
 }
 
 template <class Iter>

>From 3805e95cf63137cef87f796436abcf2038923e29 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 16:29:06 +0100
Subject: [PATCH 26/56] s/_LIBCPP_NODISCARD_EXT/_LIBCPP_NODISCARD/ after
 merging #87094

---
 libcxx/include/__algorithm/lower_bound.h      |  6 +++---
 libcxx/include/__algorithm/set_intersection.h | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 56084215e37ca..9424a50373fad 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -28,7 +28,7 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _AlgPolicy, class _Iter, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting(
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting(
     _Iter __first,
     const _Type& __value,
     typename iterator_traits<_Iter>::difference_type __len,
@@ -58,7 +58,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
 // one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of
 // comparisons.
 template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
 __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
   // static_assert(std::is_base_of<std::forward_iterator_tag, typename _IterOps<_AlgPolicy>::template
   // __iterator_category<_Iter>>::value,
@@ -88,7 +88,7 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
 }
 
 template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
+_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
 __lower_bound(_RandIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
   const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
   return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index e3e93f9e75562..340dd7ec8b552 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -44,7 +44,7 @@ struct __set_intersection_result {
 };
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-struct _LIBCPP_NODISCARD_EXT __set_intersector {
+struct _LIBCPP_NODISCARD __set_intersector {
   _InIter1& __first1_;
   const _Sent1& __last1_;
   _InIter2& __first2_;
@@ -62,7 +62,7 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
         __result_(__result),
         __comp_(__comp) {}
 
-  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+  _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
       __set_intersection_result<_InIter1, _InIter2, _OutIter>
       operator()() {
     while (__first2_ != __last2_) {
@@ -142,7 +142,7 @@ template <class _AlgPolicy,
           class _InForwardIter2,
           class _Sent2,
           class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
     __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
     __set_intersection(
         _InForwardIter1 __first1,
@@ -166,7 +166,7 @@ template <class _AlgPolicy,
           class _InInputIter2,
           class _Sent2,
           class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
     __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
     __set_intersection(
         _InInputIter1 __first1,
@@ -210,7 +210,7 @@ class __set_intersection_iter_category {
 };
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
     __set_intersection_result<_InIter1, _InIter2, _OutIter>
     __set_intersection(
         _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {

>From 090df863302f92ade54e54dbb3b34ba0d9c58c63 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 11:52:10 +0100
Subject: [PATCH 27/56] Address feedback about qualifying abort(), added
 comment to clarify choice of not having a `default` case in `switch`.

---
 libcxx/benchmarks/algorithms/set_intersection.bench.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index b2de0c3223b00..396adb5067a40 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -124,6 +124,7 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
 
   switch (pos) {
   case OverlapPosition::None:
+    // we like -Wswitch :)
     break;
 
   case OverlapPosition::Front:
@@ -135,7 +136,7 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
     return std::make_pair(move_into(StridedFwdIt(src.begin(), stride1), StridedFwdIt(src.end(), stride1)),
                           move_into(StridedFwdIt(copy.begin(), stride2), StridedFwdIt(copy.end(), stride2)));
   }
-  abort();
+  std::abort(); // would be std::unreachable() if it could
   return std::pair<Container, Container>();
 }
 

>From cb92d3cd7621b2360d1736c7aa53962c9226118f Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 11:53:20 +0100
Subject: [PATCH 28/56] Address comment about broken comment for
 `getVectorOfRandom()`: move the function closer to its point of usage and
 document what `genCacheUnfriendlyData()` is trying to do in its own comment.
 `getVectorOfRandom()` has imho a good name which describes all it's meant to
 achieve, it's `genCacheUnfriendlyData()` that needs explaining.

---
 .../algorithms/set_intersection.bench.cpp     | 48 ++++++++++---------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 396adb5067a40..521e184f81a12 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -46,27 +46,6 @@ struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosit
   static constexpr const char* Names[] = {"None", "Front", "Interlaced"};
 };
 
-// functor that moves elements from an iterator range into a new Container instance
-template <typename Container>
-struct MoveInto {
-  template <class It>
-  [[nodiscard]] static Container operator()(It first, It last) {
-    Container out;
-    std::move(first, last, std::inserter(out, out.begin()));
-    return out;
-  }
-};
-
-// lightweight wrapping around fillValues() which puts a little effort into
-// making that would be contiguous when sorted non-contiguous in memory
-template <typename T>
-std::vector<T> getVectorOfRandom(size_t N) {
-  std::vector<T> v;
-  fillValues(v, N, Order::Random);
-  sortValues(v, Order::Random);
-  return std::vector<T>(v);
-}
-
 // forward_iterator wrapping which, for each increment, moves the underlying iterator forward Stride elements
 template <typename Wrapped>
 struct StridedFwdIt {
@@ -101,8 +80,31 @@ struct StridedFwdIt {
 template <typename Wrapped>
 StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>;
 
-// realistically, data won't all be nicely contiguous in a container
+// functor that moves elements from an iterator range into a new Container instance
+template <typename Container>
+struct MoveInto {
+  template <class It>
+  [[nodiscard]] static Container operator()(It first, It last) {
+    Container out;
+    std::move(first, last, std::inserter(out, out.begin()));
+    return out;
+  }
+};
+
+template <typename T>
+std::vector<T> getVectorOfRandom(size_t N) {
+  std::vector<T> v;
+  fillValues(v, N, Order::Random);
+  sortValues(v, Order::Random);
+  return std::vector<T>(v);
+}
+
+// realistically, data won't all be nicely contiguous in a container,
 // we'll go through some effort to ensure that it's shuffled through memory
+// this is especially important for containers with non-contiguous element
+// storage, but it will affect even a std::vector, because when you copy a
+// std::vector<std::string> the underlying data storage position for the char
+// arrays of the copy are likely to have high locality
 template <class Container>
 std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) {
   using ValueType = typename Container::value_type;
@@ -116,7 +118,7 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
   }
 
   // all other overlap types will have to copy some part of the data, but if
-  // we copy after sorting it will likely have high cache locality, so we sort
+  // we copy after sorting it will likely have high locality, so we sort
   // each copy separately
   auto copy = src;
   std::sort(src.begin(), src.end());

>From f4a6f3630cc5fba532dae97cd2cc97b1d929a9cd Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 17:49:11 +0100
Subject: [PATCH 29/56] Oops, forgot to format =/. Still working on the
 remaining feedback, but it would be good to be sure that we have a good
 baseline after this big merge from main.

---
 libcxx/include/__algorithm/set_intersection.h | 58 +++++++++----------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 340dd7ec8b552..a5a86baa345cb 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -62,9 +62,9 @@ struct _LIBCPP_NODISCARD __set_intersector {
         __result_(__result),
         __comp_(__comp) {}
 
-  _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
-      __set_intersection_result<_InIter1, _InIter2, _OutIter>
-      operator()() {
+  _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+  operator()() {
     while (__first2_ != __last2_) {
       __advance1_and_maybe_add_result();
       if (__first1_ == __last1_)
@@ -142,17 +142,17 @@ template <class _AlgPolicy,
           class _InForwardIter2,
           class _Sent2,
           class _OutIter>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
-    __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
-    __set_intersection(
-        _InForwardIter1 __first1,
-        _Sent1 __last1,
-        _InForwardIter2 __first2,
-        _Sent2 __last2,
-        _OutIter __result,
-        _Compare&& __comp,
-        std::forward_iterator_tag,
-        std::forward_iterator_tag) {
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
+__set_intersection(
+    _InForwardIter1 __first1,
+    _Sent1 __last1,
+    _InForwardIter2 __first2,
+    _Sent2 __last2,
+    _OutIter __result,
+    _Compare&& __comp,
+    std::forward_iterator_tag,
+    std::forward_iterator_tag) {
   std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
       __intersector(__first1, __last1, __first2, __last2, __result, __comp);
   return __intersector();
@@ -166,17 +166,17 @@ template <class _AlgPolicy,
           class _InInputIter2,
           class _Sent2,
           class _OutIter>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
-    __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
-    __set_intersection(
-        _InInputIter1 __first1,
-        _Sent1 __last1,
-        _InInputIter2 __first2,
-        _Sent2 __last2,
-        _OutIter __result,
-        _Compare&& __comp,
-        std::input_iterator_tag,
-        std::input_iterator_tag) {
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
+__set_intersection(
+    _InInputIter1 __first1,
+    _Sent1 __last1,
+    _InInputIter2 __first2,
+    _Sent2 __last2,
+    _OutIter __result,
+    _Compare&& __comp,
+    std::input_iterator_tag,
+    std::input_iterator_tag) {
   while (__first1 != __last1 && __first2 != __last2) {
     if (__comp(*__first1, *__first2))
       ++__first1;
@@ -210,10 +210,10 @@ class __set_intersection_iter_category {
 };
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
-    __set_intersection_result<_InIter1, _InIter2, _OutIter>
-    __set_intersection(
-        _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+__set_intersection(
+    _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
   return std::__set_intersection<_AlgPolicy>(
       std::move(__first1),
       std::move(__last1),

>From 3f9cfec32224b97692f6c6119c00b3abd309c508 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 20:35:27 +0100
Subject: [PATCH 30/56] Address comment about making the benchmark's `struct
 MoveInto` into a function -- make it a lambda, to avoid the explicit template
 parameter a freestanding function would require.

---
 .../algorithms/set_intersection.bench.cpp       | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 521e184f81a12..74ba9e8a4ad1a 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -80,17 +80,6 @@ struct StridedFwdIt {
 template <typename Wrapped>
 StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>;
 
-// functor that moves elements from an iterator range into a new Container instance
-template <typename Container>
-struct MoveInto {
-  template <class It>
-  [[nodiscard]] static Container operator()(It first, It last) {
-    Container out;
-    std::move(first, last, std::inserter(out, out.begin()));
-    return out;
-  }
-};
-
 template <typename T>
 std::vector<T> getVectorOfRandom(size_t N) {
   std::vector<T> v;
@@ -108,7 +97,11 @@ std::vector<T> getVectorOfRandom(size_t N) {
 template <class Container>
 std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) {
   using ValueType = typename Container::value_type;
-  const MoveInto<Container> move_into;
+  auto move_into = [](auto first, auto last) {
+      Container out;
+      std::move(first, last, std::inserter(out, out.begin()));
+      return out;
+  };
   const auto src_size        = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2);
   std::vector<ValueType> src = getVectorOfRandom<ValueType>(src_size);
 

>From 1afb99d14541f2388464fa43c5cf6cbb5ec701a6 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 20:37:19 +0100
Subject: [PATCH 31/56] Address comment about using `common.h`'s `Quantities`
 constant in the benchmark.

---
 .../benchmarks/algorithms/set_intersection.bench.cpp  | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 74ba9e8a4ad1a..30e580d4813d4 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -201,17 +201,6 @@ int main(int argc, char** argv) { /**/
   benchmark::Initialize(&argc, argv);
   if (benchmark::ReportUnrecognizedArguments(argc, argv))
     return 1;
-  const std::vector<size_t> Quantities = {
-    1 << 0,
-    1 << 4,
-    1 << 8,
-    1 << 14,
-// Running each benchmark in parallel consumes too much memory with MSAN
-// and can lead to the test process being killed.
-#if !TEST_HAS_FEATURE(memory_sanitizer)
-    1 << 18
-#endif
-  };
 
   makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(
       Quantities, Quantities);

>From 613e64af77a9a48240ae21bc515271e246061c1e Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 22:20:16 +0100
Subject: [PATCH 32/56] Address feedback to improve assertion in
 _IterOps::__advance()

---
 libcxx/include/__algorithm/iterator_operations.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index c9fd4376595bd..4e1ff4bb83f15 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -125,8 +125,8 @@ struct _IterOps<_ClassicAlgPolicy> {
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
     auto __dist = _IterOps::distance(__iter, __sentinel);
-    _LIBCPP_ASSERT_UNCATEGORIZED(
-        __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count<0");
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(
+        __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count < 0");
     if (__count < 0)
       __dist = __dist > __count ? __dist : __count;
     else

>From 4588447fd009eb95aa8fc7f9d7e4e3837df92ea3 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 22:28:08 +0100
Subject: [PATCH 33/56] Rename new sentinel-based `_IterOps::advance()` to
 `_IterOps::__advance_to` -- no reason IMO to have a second override if
 `__advance_to = ranges::advance` in c++20...

---
 libcxx/include/__algorithm/iterator_operations.h | 10 +++++-----
 libcxx/include/__algorithm/lower_bound.h         |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index 4e1ff4bb83f15..12a76cdfbeab2 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -91,15 +91,15 @@ struct _IterOps<_ClassicAlgPolicy> {
   // use the incoming type for returning and steer clear of negative overflows
   template <class _Iter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
-  advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
-    return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
+  __advance_to(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+    return _IterOps::__advance_to(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
   }
 
 private:
   // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
   template <class _InputIter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
-  __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
+  __advance_to(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
     _Distance __dist = _Distance();
     for (; __dist < __count && __iter != __sentinel; ++__dist)
       ++__iter;
@@ -109,7 +109,7 @@ struct _IterOps<_ClassicAlgPolicy> {
   // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
   template <class _BiDirIter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
-  __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
+  __advance_to(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
     _Distance __dist = _Distance();
     if (__count >= 0)
       for (; __dist < __count && __iter != __sentinel; ++__dist)
@@ -123,7 +123,7 @@ struct _IterOps<_ClassicAlgPolicy> {
   // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
   template <class _RandIter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
-  __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
+  __advance_to(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
     auto __dist = _IterOps::distance(__iter, __sentinel);
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(
         __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count < 0");
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 9424a50373fad..f92befc97e1c0 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -71,7 +71,7 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
   using _Distance = typename iterator_traits<_Iter>::difference_type;
   for (_Distance __step = 1; __first != __last; __step <<= 1) {
     auto __it   = __first;
-    auto __dist = __step - _IterOps<_AlgPolicy>::advance(__it, __step, __last);
+    auto __dist = __step - _IterOps<_AlgPolicy>::__advance_to(__it, __step, __last);
     // once we reach the last range where needle can be we must start
     // looking inwards, bisecting that range
     if (__it == __last || !std::__invoke(__comp, std::__invoke(__proj, *__it), __value)) {

>From 2af9a6fb935c7cfe75f2ea9b546786120c4178ba Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 22:42:20 +0100
Subject: [PATCH 34/56] Address feedback about using
 `iterator_traits<_Iter>::difference_type` instead of a templated `_Distance`
 in `_IterOps::__advance_to()`

---
 .../include/__algorithm/iterator_operations.h | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index 12a76cdfbeab2..571bd13b0e240 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -89,28 +89,28 @@ struct _IterOps<_ClassicAlgPolicy> {
   // advance with sentinel, a la std::ranges::advance
   // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
   // use the incoming type for returning and steer clear of negative overflows
-  template <class _Iter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
-  __advance_to(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+  template <class _Iter>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_Iter>
+  __advance_to(_Iter& __iter, __difference_type<_Iter> __count, const _Iter& __sentinel) {
     return _IterOps::__advance_to(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
   }
 
 private:
   // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
-  template <class _InputIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
-  __advance_to(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
-    _Distance __dist = _Distance();
+  template <class _InputIter>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_InputIter>
+  __advance_to(_InputIter& __iter, __difference_type<_InputIter> __count, const _InputIter& __sentinel, input_iterator_tag) {
+    __difference_type<_InputIter> __dist = 0;
     for (; __dist < __count && __iter != __sentinel; ++__dist)
       ++__iter;
     return __count - __dist;
   }
 
   // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
-  template <class _BiDirIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
-  __advance_to(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
-    _Distance __dist = _Distance();
+  template <class _BiDirIter>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_BiDirIter>
+  __advance_to(_BiDirIter& __iter, __difference_type<_BiDirIter> __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
+    __difference_type<_BiDirIter> __dist = 0;
     if (__count >= 0)
       for (; __dist < __count && __iter != __sentinel; ++__dist)
         ++__iter;
@@ -121,9 +121,9 @@ struct _IterOps<_ClassicAlgPolicy> {
   }
 
   // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
-  template <class _RandIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
-  __advance_to(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
+  template <class _RandIter>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_RandIter>
+  __advance_to(_RandIter& __iter, __difference_type<_RandIter> __count, const _RandIter& __sentinel, random_access_iterator_tag) {
     auto __dist = _IterOps::distance(__iter, __sentinel);
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(
         __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count < 0");

>From 4f05ded78a5032f8ed9ff471b65860c97981bd6a Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Apr 2024 22:55:40 +0100
Subject: [PATCH 35/56] git clang-format on the last batch of changes

---
 .../algorithms/set_intersection.bench.cpp          |  8 ++++----
 libcxx/include/__algorithm/iterator_operations.h   | 14 ++++++++++----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 30e580d4813d4..2233b85f1162f 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -97,10 +97,10 @@ std::vector<T> getVectorOfRandom(size_t N) {
 template <class Container>
 std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) {
   using ValueType = typename Container::value_type;
-  auto move_into = [](auto first, auto last) {
-      Container out;
-      std::move(first, last, std::inserter(out, out.begin()));
-      return out;
+  auto move_into  = [](auto first, auto last) {
+    Container out;
+    std::move(first, last, std::inserter(out, out.begin()));
+    return out;
   };
   const auto src_size        = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2);
   std::vector<ValueType> src = getVectorOfRandom<ValueType>(src_size);
diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index 571bd13b0e240..6edafe26fb42b 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -98,8 +98,8 @@ struct _IterOps<_ClassicAlgPolicy> {
 private:
   // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
   template <class _InputIter>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_InputIter>
-  __advance_to(_InputIter& __iter, __difference_type<_InputIter> __count, const _InputIter& __sentinel, input_iterator_tag) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_InputIter> __advance_to(
+      _InputIter& __iter, __difference_type<_InputIter> __count, const _InputIter& __sentinel, input_iterator_tag) {
     __difference_type<_InputIter> __dist = 0;
     for (; __dist < __count && __iter != __sentinel; ++__dist)
       ++__iter;
@@ -109,7 +109,10 @@ struct _IterOps<_ClassicAlgPolicy> {
   // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
   template <class _BiDirIter>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_BiDirIter>
-  __advance_to(_BiDirIter& __iter, __difference_type<_BiDirIter> __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
+  __advance_to(_BiDirIter& __iter,
+               __difference_type<_BiDirIter> __count,
+               const _BiDirIter& __sentinel,
+               bidirectional_iterator_tag) {
     __difference_type<_BiDirIter> __dist = 0;
     if (__count >= 0)
       for (; __dist < __count && __iter != __sentinel; ++__dist)
@@ -123,7 +126,10 @@ struct _IterOps<_ClassicAlgPolicy> {
   // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
   template <class _RandIter>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_RandIter>
-  __advance_to(_RandIter& __iter, __difference_type<_RandIter> __count, const _RandIter& __sentinel, random_access_iterator_tag) {
+  __advance_to(_RandIter& __iter,
+               __difference_type<_RandIter> __count,
+               const _RandIter& __sentinel,
+               random_access_iterator_tag) {
     auto __dist = _IterOps::distance(__iter, __sentinel);
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(
         __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count < 0");

>From 161d81cfb744fc3cd5abe9717ae93f8e5c23874e Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Wed, 24 Apr 2024 16:14:15 +0100
Subject: [PATCH 36/56] Address review comments about lower_bound.h

---
 libcxx/include/__algorithm/lower_bound.h | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index f92befc97e1c0..c5d549a0d54c6 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -57,18 +57,14 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lo
 // would yield \Omega(n*log(n)) comparisons and, for non-random iterators, \Omega(n^2) iterator increments, whereas the
 // one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of
 // comparisons.
-template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
-__lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
-  // static_assert(std::is_base_of<std::forward_iterator_tag, typename _IterOps<_AlgPolicy>::template
-  // __iterator_category<_Iter>>::value,
-  //       "lower_bound() is a multipass algorithm and requires forward iterator or better");
-
+template <class _AlgPolicy, class _ForwardIterator, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+__lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
   // step = 0, ensuring we can always short-circuit when distance is 1 later on
   if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value))
     return __first;
 
-  using _Distance = typename iterator_traits<_Iter>::difference_type;
+  using _Distance = typename iterator_traits<_ForwardIterator>::difference_type;
   for (_Distance __step = 1; __first != __last; __step <<= 1) {
     auto __it   = __first;
     auto __dist = __step - _IterOps<_AlgPolicy>::__advance_to(__it, __step, __last);
@@ -87,9 +83,9 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
   return __first;
 }
 
-template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
-__lower_bound(_RandIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
+template <class _AlgPolicy, class _RandomAccessIterator, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
+__lower_bound(_RandomAccessIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
   const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
   return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
 }

>From 3c9f8002b1ac954c0830005fd2cad3cad30f649d Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Wed, 24 Apr 2024 19:30:58 +0100
Subject: [PATCH 37/56] Address review comments about set_intersection.h:
 unnecessary namespace qualification, insufficient comments, and direct use of
 iterator traits.

---
 libcxx/include/__algorithm/set_intersection.h | 22 +++++--------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index a5a86baa345cb..cbacb2c39605f 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -134,7 +134,8 @@ struct _LIBCPP_NODISCARD __set_intersector {
   }
 };
 
-// with forward iterators we can use binary search to skip over entries
+// with forward iterators we can make multiple passes over the data, allowing the use of one-sided binary search to reduce best-case
+// complexity to log(N)
 template <class _AlgPolicy,
           class _Compare,
           class _InForwardIter1,
@@ -190,25 +191,12 @@ __set_intersection(
     }
   }
 
-  return std::__set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>(
+  return __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>(
       _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)),
       _IterOps<_AlgPolicy>::next(std::move(__first2), std::move(__last2)),
       std::move(__result));
 }
 
-template <class _AlgPolicy, class _Iter>
-class __set_intersection_iter_category {
-  template <class _It>
-  using __cat = typename std::_IterOps<_AlgPolicy>::template __iterator_category<_It>;
-  template <class _It>
-  static __cat<_It> test(__cat<_It>*);
-  template <class>
-  static std::input_iterator_tag test(...);
-
-public:
-  using __type = decltype(test<_Iter>(nullptr));
-};
-
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
 _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
 _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
@@ -221,8 +209,8 @@ __set_intersection(
       std::move(__last2),
       std::move(__result),
       __comp,
-      typename std::__set_intersection_iter_category<_AlgPolicy, _InIter1>::__type(),
-      typename std::__set_intersection_iter_category<_AlgPolicy, _InIter2>::__type());
+      typename std::_IterOps<_AlgPolicy>::template __iterator_category<_InIter1>(),
+      typename std::_IterOps<_AlgPolicy>::template __iterator_category<_InIter2>());
 }
 
 template <class _InputIterator1, class _InputIterator2, class _OutputIterator, class _Compare>

>From 4aa4a823367cfad153836d0ebdae6f80c15d02dd Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Wed, 24 Apr 2024 20:42:53 +0100
Subject: [PATCH 38/56] Address review comment about replacing `struct
 __set_intersector` with a function. I think I managed to preserve readability
 by keeping `__add_output_unless()` as a lambda.

---
 libcxx/include/__algorithm/set_intersection.h | 140 ++++++------------
 1 file changed, 44 insertions(+), 96 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index cbacb2c39605f..d0aa551037b1e 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -43,99 +43,18 @@ struct __set_intersection_result {
       : __in1_(std::move(__in_iter1)), __in2_(std::move(__in_iter2)), __out_(std::move(__out_iter)) {}
 };
 
-template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-struct _LIBCPP_NODISCARD __set_intersector {
-  _InIter1& __first1_;
-  const _Sent1& __last1_;
-  _InIter2& __first2_;
-  const _Sent2& __last2_;
-  _OutIter& __result_;
-  _Compare& __comp_;
-  bool __prev_advanced_ = true;
-
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersector(
-      _InIter1& __first1, _Sent1& __last1, _InIter2& __first2, _Sent2& __last2, _OutIter& __result, _Compare& __comp)
-      : __first1_(__first1),
-        __last1_(__last1),
-        __first2_(__first2),
-        __last2_(__last2),
-        __result_(__result),
-        __comp_(__comp) {}
-
-  _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-  operator()() {
-    while (__first2_ != __last2_) {
-      __advance1_and_maybe_add_result();
-      if (__first1_ == __last1_)
-        break;
-      __advance2_and_maybe_add_result();
-    }
-    return __set_intersection_result<_InIter1, _InIter2, _OutIter>(
-        _IterOps<_AlgPolicy>::next(std::move(__first1_), std::move(__last1_)),
-        _IterOps<_AlgPolicy>::next(std::move(__first2_), std::move(__last2_)),
-        std::move(__result_));
-  }
-
-private:
-  // advance __iter to the first element in the range where !__comp_(__iter, __value)
-  // add result if this is the second consecutive call without advancing
-  // this method only works if you alternate calls between __advance1_and_maybe_add_result() and
-  // __advance2_and_maybe_add_result()
-  template <class _Iter, class _Sent, class _Value>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-  __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
-    _LIBCPP_CONSTEXPR std::__identity __proj;
-    // use one-sided binary search for improved algorithmic complexity bounds
-    // understanding how we can use binary search and still respect complexity
-    // guarantees is _not_ straightforward, so let me explain: the guarantee
-    // is "at most 2*(N+M)-1 comparisons", and one-sided binary search will
-    // necessarily overshoot depending on the position of the needle in the
-    // haystack -- for instance, if we're searching for 3 in (1, 2, 3, 4),
-    // we'll check if 3<1, then 3<2, then 3<4, and, finally, 3<3, for a total of
-    // 4 comparisons, when linear search would have yielded 3. However,
-    // because we won't need to perform the intervening reciprocal comparisons
-    // (ie 1<3, 2<3, 4<3), that extra comparison doesn't run afoul of the
-    // guarantee. Additionally, this type of scenario can only happen for match
-    // distances of up to 5 elements, because 2*log2(8) is 6, and we'll still
-    // be worse-off at position 5 of an 8-element set. From then onwards
-    // these scenarios can't happen.
-    // TL;DR: we'll be 1 comparison worse-off compared to the classic linear-
-    // searching algorithm if matching position 3 of a set with 4 elements,
-    // or position 5 if the set has 7 or 8 elements, but we'll never exceed
-    // the complexity guarantees from the standard.
-    _Iter __tmp = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj);
-    std::swap(__tmp, __iter);
-    __add_output_unless(__tmp != __iter);
-  }
-
-  // advance __first1_ to the first element in the range where !__comp_(*__first1_, *__first2_)
-  // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance1_and_maybe_add_result() {
-    __advance_and_maybe_add_result(__first1_, __last1_, *__first2_);
-  }
-
-  // advance __first2_ to the first element in the range where !__comp_(*__first2_, *__first1_)
-  // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance2_and_maybe_add_result() {
-    __advance_and_maybe_add_result(__first2_, __last2_, *__first1_);
-  }
-
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __add_output_unless(bool __advanced) {
-    if (__advanced | __prev_advanced_) {
-      __prev_advanced_ = __advanced;
-    } else {
-      *__result_ = *__first1_;
-      ++__result_;
-      ++__first1_;
-      ++__first2_;
-      __prev_advanced_ = true;
-    }
-  }
-};
-
-// with forward iterators we can make multiple passes over the data, allowing the use of one-sided binary search to reduce best-case
-// complexity to log(N)
+// With forward iterators we can make multiple passes over the data, allowing the use of one-sided binary search to
+// reduce best-case complexity to log(N). Understanding how we can use binary search and still respect complexity
+// guarantees is _not_ straightforward: the guarantee is "at most 2*(N+M)-1 comparisons", and one-sided binary search
+// will necessarily overshoot depending on the position of the needle in the haystack -- for instance, if we're
+// searching for 3 in (1, 2, 3, 4), we'll check if 3<1, then 3<2, then 3<4, and, finally, 3<3, for a total of 4
+// comparisons, when linear search would have yielded 3. However, because we won't need to perform the intervening
+// reciprocal comparisons (ie 1<3, 2<3, 4<3), that extra comparison doesn't run afoul of the guarantee. Additionally,
+// this type of scenario can only happen for match distances of up to 5 elements, because 2*log2(8) is 6, and we'll
+// still be worse-off at position 5 of an 8-element set. From then onwards these scenarios can't happen. TL;DR: we'll be
+// 1 comparison worse-off compared to the classic linear- searching algorithm if matching position 3 of a set with 4
+// elements, or position 5 if the set has 7 or 8 elements, but we'll never exceed the complexity guarantees from the
+// standard.
 template <class _AlgPolicy,
           class _Compare,
           class _InForwardIter1,
@@ -154,9 +73,38 @@ __set_intersection(
     _Compare&& __comp,
     std::forward_iterator_tag,
     std::forward_iterator_tag) {
-  std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
-      __intersector(__first1, __last1, __first2, __last2, __result, __comp);
-  return __intersector();
+  _LIBCPP_CONSTEXPR std::__identity __proj;
+  bool __prev_advanced = true;
+
+  auto __add_output_unless = [&](bool __advanced) {
+    if (__advanced | __prev_advanced) {
+      __prev_advanced = __advanced;
+    } else {
+      *__result = *__first1;
+      ++__result;
+      ++__first1;
+      ++__first2;
+      __prev_advanced = true;
+    }
+  };
+
+  while (__first2 != __last2) {
+    _InForwardIter1 __first1_next =
+        std::__lower_bound_onesided<_AlgPolicy>(__first1, __last1, *__first2, __comp, __proj);
+    std::swap(__first1_next, __first1);
+    __add_output_unless(__first1 != __first1_next);
+    if (__first1 == __last1)
+      break;
+
+    _InForwardIter2 __first2_next =
+        std::__lower_bound_onesided<_AlgPolicy>(__first2, __last2, *__first1, __comp, __proj);
+    std::swap(__first2_next, __first2);
+    __add_output_unless(__first2 != __first2_next);
+  }
+  return __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>(
+      _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)),
+      _IterOps<_AlgPolicy>::next(std::move(__first2), std::move(__last2)),
+      std::move(__result));
 }
 
 // input iterators are not suitable for multipass algorithms, so we stick to the classic single-pass version

>From 8307b2db9c238b7d3c2d8648e827860d5be4a899 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Fri, 26 Apr 2024 16:04:55 +0100
Subject: [PATCH 39/56] Make `__add_output_unless()` a freestanding function,
 `__set_intersection_add_output_unless()`, because the lambda [tripped the
 "MacOS with C++03" test
 run](https://buildkite.com/llvm-project/libcxx-ci/builds/35055#018f173c-f155-4fbb-b6d7-a7aba01cec9e):
 ``` ```

---
 libcxx/include/__algorithm/set_intersection.h | 34 +++++++++++--------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index d0aa551037b1e..293f500f541ac 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -43,6 +43,24 @@ struct __set_intersection_result {
       : __in1_(std::move(__in_iter1)), __in2_(std::move(__in_iter2)), __out_(std::move(__out_iter)) {}
 };
 
+// Helper for __set_intersection() with one-sided binary search: populate result and advance input iterators if they
+// haven't advanced in the last 2 calls. This function is very intimately related to the way it is used and doesn't
+// attempt to abstract that, it's not appropriate for general usage outside of its context. It would be a lambda of
+// __set_intersection() if that hadn't stumped the compiler in c++03 mode in some platforms.
+template <class _InForwardIter1, class _InForwardIter2, class _OutIter>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_intersection_add_output_unless(
+    bool __advanced, _InForwardIter1& __first1, _InForwardIter2& __first2, _OutIter& __result, bool& __prev_advanced) {
+  if (__advanced | __prev_advanced) {
+    __prev_advanced = __advanced;
+  } else {
+    *__result = *__first1;
+    ++__result;
+    ++__first1;
+    ++__first2;
+    __prev_advanced = true;
+  }
+}
+
 // With forward iterators we can make multiple passes over the data, allowing the use of one-sided binary search to
 // reduce best-case complexity to log(N). Understanding how we can use binary search and still respect complexity
 // guarantees is _not_ straightforward: the guarantee is "at most 2*(N+M)-1 comparisons", and one-sided binary search
@@ -76,30 +94,18 @@ __set_intersection(
   _LIBCPP_CONSTEXPR std::__identity __proj;
   bool __prev_advanced = true;
 
-  auto __add_output_unless = [&](bool __advanced) {
-    if (__advanced | __prev_advanced) {
-      __prev_advanced = __advanced;
-    } else {
-      *__result = *__first1;
-      ++__result;
-      ++__first1;
-      ++__first2;
-      __prev_advanced = true;
-    }
-  };
-
   while (__first2 != __last2) {
     _InForwardIter1 __first1_next =
         std::__lower_bound_onesided<_AlgPolicy>(__first1, __last1, *__first2, __comp, __proj);
     std::swap(__first1_next, __first1);
-    __add_output_unless(__first1 != __first1_next);
+    std::__set_intersection_add_output_unless(__first1 != __first1_next, __first1, __first2, __result, __prev_advanced);
     if (__first1 == __last1)
       break;
 
     _InForwardIter2 __first2_next =
         std::__lower_bound_onesided<_AlgPolicy>(__first2, __last2, *__first1, __comp, __proj);
     std::swap(__first2_next, __first2);
-    __add_output_unless(__first2 != __first2_next);
+    std::__set_intersection_add_output_unless(__first2 != __first2_next, __first1, __first2, __result, __prev_advanced);
   }
   return __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>(
       _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)),

>From be6c5c848d5615adbe44546d9f78df680bd54767 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Sat, 27 Apr 2024 11:22:37 +0100
Subject: [PATCH 40/56] Address comment about using ` std::forward<_Compare>()`
 for consistency in `__set_intersection()` base overload.

---
 libcxx/include/__algorithm/set_intersection.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 293f500f541ac..822f6ebcab60f 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -162,7 +162,7 @@ __set_intersection(
       std::move(__first2),
       std::move(__last2),
       std::move(__result),
-      __comp,
+       std::forward<_Compare>(__comp),
       typename std::_IterOps<_AlgPolicy>::template __iterator_category<_InIter1>(),
       typename std::_IterOps<_AlgPolicy>::template __iterator_category<_InIter2>());
 }

>From 62a6010a27dd30b8e0469c4be89d3d597421a995 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Sat, 27 Apr 2024 20:03:22 +0100
Subject: [PATCH 41/56] Address review feedback: remove benchmark counters.

---
 .../algorithms/set_intersection.bench.cpp     | 26 ++-----------------
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 2233b85f1162f..a9752d9a5a15c 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -135,10 +135,6 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
   return std::pair<Container, Container>();
 }
 
-// use environment variable to enable additional counters: instrumentation will
-// impact CPU utilisation, let's give the user the option
-static const bool TRACK_COUNTERS = getenv("TRACK_COUNTERS") != nullptr;
-
 template <class ValueType, class Container, class Overlap>
 struct SetIntersection {
   using ContainerType = typename Container::template type<Value<ValueType>>;
@@ -164,26 +160,8 @@ struct SetIntersection {
       while (state.KeepRunningBatch(BATCH_SIZE)) {
         for (unsigned i = 0; i < BATCH_SIZE; ++i) {
           const auto& [c1, c2] = input;
-          if (TRACK_COUNTERS) {
-            size_t cmp{}, strides{}, displacement{};
-            auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
-              ++cmp;
-              return std::less<Value<ValueType>>{}(lhs, rhs);
-            };
-            stride_counting_iterator b1(c1.begin(), &strides, &displacement);
-            stride_counting_iterator e1(c1.end(), &strides, &displacement);
-            stride_counting_iterator b2(c2.begin(), &strides, &displacement);
-            stride_counting_iterator e2(c2.end(), &strides, &displacement);
-            auto res = std::set_intersection(b1, e1, b2, e2, out.begin(), tracking_less);
-            benchmark::DoNotOptimize(res);
-            state.counters["comparisons"]       = cmp;
-            state.counters["iter_strides"]      = strides;
-            state.counters["iter_displacement"] = displacement;
-
-          } else {
-            auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin());
-            benchmark::DoNotOptimize(res);
-          }
+          auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin());
+          benchmark::DoNotOptimize(res);
         }
       }
     }

>From e2af5cc3558f9c94b835826fe2927ac88600dce0 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Sat, 27 Apr 2024 20:04:15 +0100
Subject: [PATCH 42/56] clang-format of the last 2 changes

---
 libcxx/benchmarks/algorithms/set_intersection.bench.cpp | 2 +-
 libcxx/include/__algorithm/set_intersection.h           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index a9752d9a5a15c..9cdf6a7859442 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -160,7 +160,7 @@ struct SetIntersection {
       while (state.KeepRunningBatch(BATCH_SIZE)) {
         for (unsigned i = 0; i < BATCH_SIZE; ++i) {
           const auto& [c1, c2] = input;
-          auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin());
+          auto res             = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin());
           benchmark::DoNotOptimize(res);
         }
       }
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 822f6ebcab60f..9c2145731473c 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -162,7 +162,7 @@ __set_intersection(
       std::move(__first2),
       std::move(__last2),
       std::move(__result),
-       std::forward<_Compare>(__comp),
+      std::forward<_Compare>(__comp),
       typename std::_IterOps<_AlgPolicy>::template __iterator_category<_InIter1>(),
       typename std::_IterOps<_AlgPolicy>::template __iterator_category<_InIter2>());
 }

>From 89201ea2ee881bf71574e1bc043bfbbc9afd355a Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Sun, 28 Apr 2024 15:20:47 +0100
Subject: [PATCH 43/56] Oops, leftover template type name!

---
 libcxx/include/__algorithm/lower_bound.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index c5d549a0d54c6..06b58bd6dd2d8 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -83,9 +83,9 @@ __lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __va
   return __first;
 }
 
-template <class _AlgPolicy, class _RandomAccessIterator, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
-__lower_bound(_RandomAccessIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
+template <class _AlgPolicy, class _ForwardIterator, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+__lower_bound(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
   const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
   return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
 }

>From 5f6e7feaa267af9482cc14dbb8b09fc8497ae326 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 29 Apr 2024 20:38:31 +0100
Subject: [PATCH 44/56] Remove unnecessary PauseTiming()/ResumeTiming() in the
 benchmark data generation stage, time won't be measured before we go into the
 benchmark::State loops.

---
 libcxx/benchmarks/algorithms/set_intersection.bench.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 9cdf6a7859442..ba7670e782537 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -149,13 +149,10 @@ struct SetIntersection {
   }
 
   void run(benchmark::State& state) const {
-    state.PauseTiming();
     auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap());
     std::vector<Value<ValueType>> out(std::min(size1_, size2_));
 
     const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_));
-    state.ResumeTiming();
-
     for (const auto& _ : state) {
       while (state.KeepRunningBatch(BATCH_SIZE)) {
         for (unsigned i = 0; i < BATCH_SIZE; ++i) {

>From 109e5a431320e948da00fed3ffcd044927e013f8 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <iuri.chaer at gmail.com>
Date: Fri, 24 May 2024 20:21:30 +0100
Subject: [PATCH 45/56] Apply suggestions from code review

@ldionne's latest inline review suggestions.

Co-authored-by: Louis Dionne <ldionne.2 at gmail.com>
---
 libcxx/benchmarks/algorithms/set_intersection.bench.cpp | 7 ++++---
 libcxx/include/__algorithm/lower_bound.h                | 4 ++--
 libcxx/include/__algorithm/set_intersection.h           | 7 +++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index ba7670e782537..03680ff7ee95e 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -9,7 +9,7 @@
 #include <algorithm>
 #include <iterator>
 #include <set>
-#include <stdlib.h>
+#include <cstdlib>
 #include <vector>
 
 #include "common.h"
@@ -88,7 +88,7 @@ std::vector<T> getVectorOfRandom(size_t N) {
   return std::vector<T>(v);
 }
 
-// realistically, data won't all be nicely contiguous in a container,
+// Realistically, data won't all be nicely contiguous in a container,
 // we'll go through some effort to ensure that it's shuffled through memory
 // this is especially important for containers with non-contiguous element
 // storage, but it will affect even a std::vector, because when you copy a
@@ -110,7 +110,7 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
     return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(src.begin() + size1, src.end()));
   }
 
-  // all other overlap types will have to copy some part of the data, but if
+  // All other overlap types will have to copy some part of the data, but if
   // we copy after sorting it will likely have high locality, so we sort
   // each copy separately
   auto copy = src;
@@ -180,4 +180,5 @@ int main(int argc, char** argv) { /**/
   makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(
       Quantities, Quantities);
   benchmark::RunSpecifiedBenchmarks();
+  return 0;
 }
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 06b58bd6dd2d8..8be2677243431 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -51,10 +51,10 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lo
 // One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
 // advantage of being \Omega(1) rather than the classic algorithm's \Omega(log(n)), with the downside of executing at
 // most 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
-// the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
+// the first one is when operating over non-random-access iterators, because the classic algorithm requires knowing the
 // container's size upfront, which adds \Omega(n) iterator increments to the complexity. The second one is when you're
 // traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
-// would yield \Omega(n*log(n)) comparisons and, for non-random iterators, \Omega(n^2) iterator increments, whereas the
+// would yield \Omega(n*log(n)) comparisons and, for non-random-access iterators, \Omega(n^2) iterator increments, whereas the
 // one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of
 // comparisons.
 template <class _AlgPolicy, class _ForwardIterator, class _Sent, class _Type, class _Proj, class _Comp>
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 9c2145731473c..80f1a47a56590 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -45,12 +45,11 @@ struct __set_intersection_result {
 
 // Helper for __set_intersection() with one-sided binary search: populate result and advance input iterators if they
 // haven't advanced in the last 2 calls. This function is very intimately related to the way it is used and doesn't
-// attempt to abstract that, it's not appropriate for general usage outside of its context. It would be a lambda of
-// __set_intersection() if that hadn't stumped the compiler in c++03 mode in some platforms.
+// attempt to abstract that, it's not appropriate for general usage outside of its context.
 template <class _InForwardIter1, class _InForwardIter2, class _OutIter>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_intersection_add_output_unless(
     bool __advanced, _InForwardIter1& __first1, _InForwardIter2& __first2, _OutIter& __result, bool& __prev_advanced) {
-  if (__advanced | __prev_advanced) {
+  if (__advanced || __prev_advanced) {
     __prev_advanced = __advanced;
   } else {
     *__result = *__first1;
@@ -70,7 +69,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_intersection_add_
 // reciprocal comparisons (ie 1<3, 2<3, 4<3), that extra comparison doesn't run afoul of the guarantee. Additionally,
 // this type of scenario can only happen for match distances of up to 5 elements, because 2*log2(8) is 6, and we'll
 // still be worse-off at position 5 of an 8-element set. From then onwards these scenarios can't happen. TL;DR: we'll be
-// 1 comparison worse-off compared to the classic linear- searching algorithm if matching position 3 of a set with 4
+// 1 comparison worse-off compared to the classic linear-searching algorithm if matching position 3 of a set with 4
 // elements, or position 5 if the set has 7 or 8 elements, but we'll never exceed the complexity guarantees from the
 // standard.
 template <class _AlgPolicy,

>From cc95b51ed4cc01820f0bcdc585dd1b05e88e70ae Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Fri, 24 May 2024 21:34:50 +0100
Subject: [PATCH 46/56] clang-format fixups to inline code suggestions

---
 libcxx/benchmarks/algorithms/set_intersection.bench.cpp | 2 +-
 libcxx/include/__algorithm/lower_bound.h                | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 03680ff7ee95e..b3fb15fc77b31 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
+#include <cstdlib>
 #include <iterator>
 #include <set>
-#include <cstdlib>
 #include <vector>
 
 #include "common.h"
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 8be2677243431..c417d84835497 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -54,8 +54,8 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lo
 // the first one is when operating over non-random-access iterators, because the classic algorithm requires knowing the
 // container's size upfront, which adds \Omega(n) iterator increments to the complexity. The second one is when you're
 // traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
-// would yield \Omega(n*log(n)) comparisons and, for non-random-access iterators, \Omega(n^2) iterator increments, whereas the
-// one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of
+// would yield \Omega(n*log(n)) comparisons and, for non-random-access iterators, \Omega(n^2) iterator increments,
+// whereas the one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of
 // comparisons.
 template <class _AlgPolicy, class _ForwardIterator, class _Sent, class _Type, class _Proj, class _Comp>
 _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator

>From 91e4e5151beeddeb8ab85ec0f4003795056c9188 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Fri, 24 May 2024 20:24:23 +0100
Subject: [PATCH 47/56] Move new `_IterOps<_ClassicAlgPolicy>::__advance_to()`
 overloads next to its pre-existing sibling and remove leftover comments.

---
 .../include/__algorithm/iterator_operations.h | 109 +++++++++---------
 1 file changed, 53 insertions(+), 56 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index 6edafe26fb42b..8ced989233bc4 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -86,62 +86,6 @@ struct _IterOps<_ClassicAlgPolicy> {
     std::advance(__iter, __count);
   }
 
-  // advance with sentinel, a la std::ranges::advance
-  // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
-  // use the incoming type for returning and steer clear of negative overflows
-  template <class _Iter>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_Iter>
-  __advance_to(_Iter& __iter, __difference_type<_Iter> __count, const _Iter& __sentinel) {
-    return _IterOps::__advance_to(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
-  }
-
-private:
-  // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
-  template <class _InputIter>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_InputIter> __advance_to(
-      _InputIter& __iter, __difference_type<_InputIter> __count, const _InputIter& __sentinel, input_iterator_tag) {
-    __difference_type<_InputIter> __dist = 0;
-    for (; __dist < __count && __iter != __sentinel; ++__dist)
-      ++__iter;
-    return __count - __dist;
-  }
-
-  // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
-  template <class _BiDirIter>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_BiDirIter>
-  __advance_to(_BiDirIter& __iter,
-               __difference_type<_BiDirIter> __count,
-               const _BiDirIter& __sentinel,
-               bidirectional_iterator_tag) {
-    __difference_type<_BiDirIter> __dist = 0;
-    if (__count >= 0)
-      for (; __dist < __count && __iter != __sentinel; ++__dist)
-        ++__iter;
-    else
-      for (__count = -__count; __dist < __count && __iter != __sentinel; ++__dist)
-        --__iter;
-    return __count - __dist;
-  }
-
-  // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
-  template <class _RandIter>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_RandIter>
-  __advance_to(_RandIter& __iter,
-               __difference_type<_RandIter> __count,
-               const _RandIter& __sentinel,
-               random_access_iterator_tag) {
-    auto __dist = _IterOps::distance(__iter, __sentinel);
-    _LIBCPP_ASSERT_VALID_INPUT_RANGE(
-        __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count < 0");
-    if (__count < 0)
-      __dist = __dist > __count ? __dist : __count;
-    else
-      __dist = __dist < __count ? __dist : __count;
-    __iter += __dist;
-    return __count - __dist;
-  }
-
-public:
   // distance
   template <class _Iter>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static typename iterator_traits<_Iter>::difference_type
@@ -217,6 +161,59 @@ struct _IterOps<_ClassicAlgPolicy> {
   _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX14 void __advance_to(_Iter& __first, _Iter __last) {
     __first = __last;
   }
+
+  // advance with sentinel, a la std::ranges::advance
+  template <class _Iter>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_Iter>
+  __advance_to(_Iter& __iter, __difference_type<_Iter> __count, const _Iter& __sentinel) {
+    return _IterOps::__advance_to(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
+  }
+
+private:
+  // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
+  template <class _InputIter>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_InputIter> __advance_to(
+      _InputIter& __iter, __difference_type<_InputIter> __count, const _InputIter& __sentinel, input_iterator_tag) {
+    __difference_type<_InputIter> __dist = 0;
+    for (; __dist < __count && __iter != __sentinel; ++__dist)
+      ++__iter;
+    return __count - __dist;
+  }
+
+  // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
+  template <class _BiDirIter>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_BiDirIter>
+  __advance_to(_BiDirIter& __iter,
+               __difference_type<_BiDirIter> __count,
+               const _BiDirIter& __sentinel,
+               bidirectional_iterator_tag) {
+    __difference_type<_BiDirIter> __dist = 0;
+    if (__count >= 0)
+      for (; __dist < __count && __iter != __sentinel; ++__dist)
+        ++__iter;
+    else
+      for (__count = -__count; __dist < __count && __iter != __sentinel; ++__dist)
+        --__iter;
+    return __count - __dist;
+  }
+
+  // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
+  template <class _RandIter>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_RandIter>
+  __advance_to(_RandIter& __iter,
+               __difference_type<_RandIter> __count,
+               const _RandIter& __sentinel,
+               random_access_iterator_tag) {
+    auto __dist = _IterOps::distance(__iter, __sentinel);
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(
+        __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count < 0");
+    if (__count < 0)
+      __dist = __dist > __count ? __dist : __count;
+    else
+      __dist = __dist < __count ? __dist : __count;
+    __iter += __dist;
+    return __count - __dist;
+  }
 };
 
 _LIBCPP_END_NAMESPACE_STD

>From c977bb7b04bcd00af120cede68fb522dc70bc85f Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Fri, 24 May 2024 21:30:58 +0100
Subject: [PATCH 48/56] Refactor operation counts out of
 `stride_counting_iterator`.

---
 .../ranges_set_intersection.pass.cpp          | 35 +++-----
 .../iterator_count.pass.cpp                   | 11 ++-
 .../iterator_count_sentinel.pass.cpp          | 61 ++++++-------
 .../iterator_sentinel.pass.cpp                | 28 ++++--
 .../range.adaptors/range.drop/begin.pass.cpp  |  8 +-
 .../ranges/range.adaptors/range.drop/types.h  |  8 +-
 .../range.adaptors/range.transform/types.h    |  6 --
 libcxx/test/support/test_iterators.h          | 86 ++++++++-----------
 8 files changed, 118 insertions(+), 125 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index 4858493145af9..b4e7f54a9c877 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -283,17 +283,16 @@ struct [[nodiscard]] OperationCounts {
   std::size_t comparisons{};
   struct PerInput {
     std::size_t proj{};
-    std::size_t iterator_strides{};
-    std::ptrdiff_t iterator_displacement{};
+    IteratorOpCounts iterops;
 
     // IGNORES proj!
     [[nodiscard]] constexpr bool operator==(const PerInput& o) const {
-      return iterator_strides == o.iterator_strides && iterator_displacement == o.iterator_displacement;
+      return iterops.increments == o.iterops.increments && iterops.decrements == o.iterops.decrements;
     }
 
     [[nodiscard]] constexpr bool matchesExpectation(const PerInput& expect) {
-      return proj <= expect.proj && iterator_strides <= expect.iterator_strides &&
-             iterator_displacement <= expect.iterator_displacement;
+      return proj <= expect.proj &&
+             iterops.increments + iterops.decrements <= expect.iterops.increments + expect.iterops.decrements;
     }
   };
   std::array<PerInput, 2> in;
@@ -337,14 +336,11 @@ constexpr void testSetIntersectionAndReturnOpCounts(
 
   std::array<int, N3> out;
 
-  stride_counting_iterator b1(
-      In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
-  stride_counting_iterator e1(
-      In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
-  stride_counting_iterator b2(
-      In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
-  stride_counting_iterator e2(
-      In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+  stride_counting_iterator b1(In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterops);
+  stride_counting_iterator e1(In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterops);
+
+  stride_counting_iterator b2(In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterops);
+  stride_counting_iterator e2(In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterops);
 
   std::set_intersection(b1, e1, b2, e2, Out(out.data()), comp);
 
@@ -383,14 +379,11 @@ constexpr void testRangesSetIntersectionAndReturnOpCounts(
 
   std::array<int, N3> out;
 
-  stride_counting_iterator b1(
-      In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
-  stride_counting_iterator e1(
-      In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
-  stride_counting_iterator b2(
-      In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
-  stride_counting_iterator e2(
-      In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+  stride_counting_iterator b1(In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterops);
+  stride_counting_iterator e1(In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterops);
+
+  stride_counting_iterator b2(In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterops);
+  stride_counting_iterator e2(In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterops);
 
   std::ranges::subrange r1{b1, SentinelWorkaround<decltype(e1)>{e1}};
   std::ranges::subrange r2{b2, SentinelWorkaround<decltype(e2)>{e2}};
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count.pass.cpp
index cda49acad985b..cda1747439cf7 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <iterator>
 
+#include <algorithm>
 #include <cassert>
 
 #include "test_iterators.h"
@@ -32,12 +33,16 @@ constexpr void check(int* first, std::iter_difference_t<It> n, int* expected) {
 
   // Count operations
   if constexpr (Count) {
-    auto it = stride_counting_iterator(It(first));
+    IteratorOpCounts ops;
+    auto it = stride_counting_iterator(It(first), &ops);
     std::ranges::advance(it, n);
     if constexpr (std::random_access_iterator<It>) {
-      assert(it.stride_count() <= 1);
+      assert(ops.increments + ops.decrements <= 1);
     } else {
-      assert(it.stride_count() == abs(M));
+      const auto big   = std::max(ops.increments, ops.decrements);
+      const auto small = std::min(ops.increments, ops.decrements);
+      assert(big == std::size_t(abs(M)));
+      assert(small == 0);
     }
   }
 }
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
index 76439ef93a607..2ab88a62892ae 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
@@ -38,14 +38,16 @@ check_forward(int* first, int* last, std::iter_difference_t<It> n, int* expected
 
   // Count operations
   if constexpr (Count) {
-    auto it = stride_counting_iterator(It(first));
-    auto sent = sentinel_wrapper(stride_counting_iterator(It(last)));
+    IteratorOpCounts ops;
+    auto it   = stride_counting_iterator(It(first), &ops);
+    auto sent = sentinel_wrapper(stride_counting_iterator(It(last), &ops));
     (void)std::ranges::advance(it, n, sent);
     // We don't have a sized sentinel, so we have to increment one-by-one
     // regardless of the iterator category.
-    assert(it.stride_count() == M);
-    assert(it.stride_displacement() == M);
-    assert(it.equals_count() == expected_equals_count);
+    assert(static_cast<Difference>(ops.increments) == M);
+    assert(static_cast<Difference>(ops.decrements) == 0);
+    assert(ops.zero_moves == 0);
+    assert(ops.equal_cmps == static_cast<std::size_t>(expected_equals_count));
   }
 }
 
@@ -65,28 +67,24 @@ constexpr void check_forward_sized_sentinel(int* first, int* last, std::iter_dif
 
   // Count operations
   {
-    auto it = stride_counting_iterator(It(first));
+    IteratorOpCounts ops;
+    auto it   = stride_counting_iterator(It(first), &ops);
     auto sent = distance_apriori_sentinel(size);
     (void)std::ranges::advance(it, n, sent);
     if constexpr (std::random_access_iterator<It>) {
-      assert(it.stride_count() <= 1);
-      assert(it.stride_displacement() <= 1);
+      assert(ops.increments + ops.zero_moves == 1);
+      assert(ops.decrements == 0);
     } else {
-      assert(it.stride_count() == M);
-      assert(it.stride_displacement() == M);
+      assert(static_cast<Difference>(ops.increments) == M);
+      assert(ops.decrements == 0);
+      assert(ops.zero_moves == 0);
     }
   }
 }
 
-struct Expected {
-  int stride_count;
-  int stride_displacement;
-  int equals_count;
-};
-
 template <bool Count, typename It>
 constexpr void
-check_backward(int* first, int* last, std::iter_difference_t<It> n, int* expected, Expected expected_counts) {
+check_backward(int* first, int* last, std::iter_difference_t<It> n, int* expected, IteratorOpCounts expected_counts) {
   // Check preconditions for `advance` when called with negative `n`
   // (see [range.iter.op.advance]). In addition, allow `n == 0`.
   assert(n <= 0);
@@ -105,16 +103,18 @@ check_backward(int* first, int* last, std::iter_difference_t<It> n, int* expecte
 
   // Count operations
   {
-    auto it = stride_counting_iterator(It(last));
-    auto sent = stride_counting_iterator(It(first));
+    IteratorOpCounts ops;
+    auto it   = stride_counting_iterator(It(last), &ops);
+    auto sent = stride_counting_iterator(It(first), &ops);
     static_assert(std::bidirectional_iterator<stride_counting_iterator<It>>);
     static_assert(Count == !std::sized_sentinel_for<It, It>);
 
     (void)std::ranges::advance(it, n, sent);
 
-    assert(it.stride_count() == expected_counts.stride_count);
-    assert(it.stride_displacement() == expected_counts.stride_displacement);
-    assert(it.equals_count() == expected_counts.equals_count);
+    assert(ops.increments == expected_counts.increments);
+    assert(ops.decrements == expected_counts.decrements);
+    assert(ops.zero_moves == expected_counts.zero_moves);
+    assert(ops.equal_cmps == expected_counts.equal_cmps);
   }
 }
 
@@ -217,21 +217,22 @@ constexpr bool test() {
       {
         int* expected = n > size ? range : range + size - n;
         {
-          Expected expected_counts = {
-              .stride_count        = static_cast<int>(range + size - expected),
-              .stride_displacement = -expected_counts.stride_count,
-              .equals_count        = n > size ? size + 1 : n,
+          IteratorOpCounts expected_counts = {
+              .increments = 0,
+              .decrements = static_cast<std::size_t>(range + size - expected),
+              .equal_cmps = static_cast<std::size_t>(n > size ? size + 1 : n),
           };
 
           check_backward<true, bidirectional_iterator<int*>>(range, range + size, -n, expected, expected_counts);
         }
         {
-          Expected expected_counts = {
+          IteratorOpCounts expected_counts = {
               // If `n >= size`, the algorithm can just do `it = std::move(sent);`
               // instead of doing iterator arithmetic.
-              .stride_count        = (n >= size) ? 0 : 1,
-              .stride_displacement = (n >= size) ? 0 : 1,
-              .equals_count        = 0,
+              .increments = 0,
+              .decrements = static_cast<std::size_t>((n == 0 || n >= size) ? 0 : 1),
+              .zero_moves = static_cast<std::size_t>(n == 0 && size != 0 ? 1 : 0),
+              .equal_cmps = 0,
           };
 
           check_backward<false, random_access_iterator<int*>>(range, range + size, -n, expected, expected_counts);
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp
index 2e9a28e4ad395..75f7a51d6e14b 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <iterator>
 
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 
@@ -31,11 +32,12 @@ constexpr void check_assignable(int* first, int* last, int* expected) {
 
   // Count operations
   if constexpr (Count) {
-    auto it = stride_counting_iterator(It(first));
-    auto sent = assignable_sentinel(stride_counting_iterator(It(last)));
+    IteratorOpCounts ops;
+    auto it   = stride_counting_iterator(It(first), &ops);
+    auto sent = assignable_sentinel(stride_counting_iterator(It(last), &ops));
     std::ranges::advance(it, sent);
     assert(base(base(it)) == expected);
-    assert(it.stride_count() == 0); // because we got here by assigning from last, not by incrementing
+    assert(ops.increments + ops.decrements == 0); // because we got here by assigning from last, not by incrementing
   }
 }
 
@@ -53,13 +55,17 @@ constexpr void check_sized_sentinel(int* first, int* last, int* expected) {
 
   // Count operations
   if constexpr (Count) {
-    auto it = stride_counting_iterator(It(first));
+    IteratorOpCounts ops;
+    auto it   = stride_counting_iterator(It(first), &ops);
     auto sent = distance_apriori_sentinel(size);
     std::ranges::advance(it, sent);
     if constexpr (std::random_access_iterator<It>) {
-      assert(it.stride_count() == 1);
+      assert(ops.increments + ops.decrements + ops.zero_moves == 1);
     } else {
-      assert(it.stride_count() == size);
+      const auto big   = std::max(ops.increments, ops.decrements);
+      const auto small = std::min(ops.increments, ops.decrements);
+      assert(big == static_cast<size_t>(size > 0 ? size : -size));
+      assert(small == 0);
     }
   }
 }
@@ -78,10 +84,14 @@ constexpr void check_sentinel(int* first, int* last, int* expected) {
 
   // Count operations
   if constexpr (Count) {
-    auto it = stride_counting_iterator(It(first));
-    auto sent = sentinel_wrapper(stride_counting_iterator(It(last)));
+    IteratorOpCounts ops;
+    auto it   = stride_counting_iterator(It(first), &ops);
+    auto sent = sentinel_wrapper(stride_counting_iterator(It(last), &ops));
     std::ranges::advance(it, sent);
-    assert(it.stride_count() == size);
+    const auto big   = std::max(ops.increments, ops.decrements);
+    const auto small = std::min(ops.increments, ops.decrements);
+    assert(big == static_cast<size_t>(size > 0 ? size : -size));
+    assert(small == 0);
   }
 }
 
diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/begin.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop/begin.pass.cpp
index 28ac53c2445d4..7e01b5884728f 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.drop/begin.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.drop/begin.pass.cpp
@@ -73,12 +73,12 @@ constexpr bool test() {
   std::ranges::drop_view dropView7(MoveOnlyView(), 10);
   assert(dropView7.begin() == globalBuff + 8);
 
-  CountedView view8;
+  IteratorOpCounts opcounts;
+  CountedView view8(&opcounts);
+  ;
   std::ranges::drop_view dropView8(view8, 5);
   assert(base(base(dropView8.begin())) == globalBuff + 5);
-  assert(dropView8.begin().stride_count() == 5);
-  assert(base(base(dropView8.begin())) == globalBuff + 5);
-  assert(dropView8.begin().stride_count() == 5);
+  assert(opcounts.increments == 5);
 
   static_assert(!BeginInvocable<const ForwardView>);
 
diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/types.h b/libcxx/test/std/ranges/range.adaptors/range.drop/types.h
index ae861bce40f1e..990fa4a0c064a 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.drop/types.h
+++ b/libcxx/test/std/ranges/range.adaptors/range.drop/types.h
@@ -122,8 +122,12 @@ struct Range {
 
 using CountedIter = stride_counting_iterator<forward_iterator<int*>>;
 struct CountedView : std::ranges::view_base {
-  constexpr CountedIter begin() const { return CountedIter(ForwardIter(globalBuff)); }
-  constexpr CountedIter end() const { return CountedIter(ForwardIter(globalBuff + 8)); }
+  explicit constexpr CountedView(IteratorOpCounts* opcounts) noexcept : opcounts_(opcounts) {}
+  constexpr CountedIter begin() const { return CountedIter(ForwardIter(globalBuff), opcounts_); }
+  constexpr CountedIter end() const { return CountedIter(ForwardIter(globalBuff + 8), opcounts_); }
+
+private:
+  IteratorOpCounts* opcounts_;
 };
 
 struct View : std::ranges::view_base {
diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h
index 14f85722a8c19..cc5679f229de2 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h
+++ b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h
@@ -119,12 +119,6 @@ struct Range {
   int *end() const;
 };
 
-using CountedIter = stride_counting_iterator<forward_iterator<int*>>;
-struct CountedView : std::ranges::view_base {
-  constexpr CountedIter begin() const { return CountedIter(ForwardIter(globalBuff)); }
-  constexpr CountedIter end() const { return CountedIter(ForwardIter(globalBuff + 8)); }
-};
-
 struct TimesTwo {
   constexpr int operator()(int x) const { return x * 2; }
 };
diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 4aff683e822a1..25ed860bc202f 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -724,33 +724,18 @@ struct common_input_iterator {
 
 #  endif // TEST_STD_VER >= 20
 
-// Iterator adaptor that counts the number of times the iterator has had a successor/predecessor
-// operation or an equality comparison operation called. Has three recorders:
-// * `stride_count`, which records the total number of calls to an op++, op--, op+=, or op-=.
-// * `stride_displacement`, which records the displacement of the calls. This means that both
-//   op++/op+= will increase the displacement counter by 1, and op--/op-= will decrease the
-//   displacement counter by 1.
-// * `equals_count`, which records the total number of calls to an op== or op!=. If compared
-//   against a sentinel object, that sentinel object must call the `record_equality_comparison`
-//   function so that the comparison is counted correctly.
-template <class It,
-          class StrideCountType        = std::iter_difference_t<It>,
-          class StrideDisplacementType = std::iter_difference_t<It>>
-class stride_counting_iterator {
-  template <typename UnderlyingType>
-  struct concrete_or_ref {
-    using value_type            = std::remove_cv_t<std::remove_reference_t<UnderlyingType>>;
-    constexpr concrete_or_ref() = default;
-    explicit constexpr concrete_or_ref(UnderlyingType* c) noexcept : ptr_{c} {}
-
-    constexpr operator value_type&() noexcept { return ptr_ ? *ptr_ : val_; }
-    constexpr operator const value_type&() const noexcept { return ptr_ ? *ptr_ : val_; }
-
-  private:
-    value_type val_{};
-    value_type* ptr_{nullptr};
-  };
+struct IteratorOpCounts {
+  std::size_t increments = 0; ///< Number of times the iterator moved forward (++it, it++, it+=positive, it-=negative).
+  std::size_t decrements = 0; ///< Number of times the iterator moved backward (--it, it--, it-=positive, it+=negative).
+  std::size_t zero_moves = 0; ///< Number of times a call was made to move the iterator by 0 positions (it+=0, it-=0).
+  std::size_t equal_cmps = 0; ///< Total number of calls to op== or op!=. If compared against a sentinel object, that
+                              ///  sentinel object must call the `record_equality_comparison` function so that the
+                              ///  comparison is counted correctly.
+};
 
+// Iterator adaptor that records its operation counts in a IteratorOpCounts
+template <class It>
+class stride_counting_iterator {
 public:
     using value_type = typename iter_value_or_void<It>::type;
     using difference_type = std::iter_difference_t<It>;
@@ -764,13 +749,12 @@ class stride_counting_iterator {
     >>>>>;
     using iterator_category = iterator_concept;
 
-    stride_counting_iterator() requires std::default_initializable<It> = default;
-
-    constexpr explicit stride_counting_iterator(It const& it) : base_(base(it)) { }
+    stride_counting_iterator()
+      requires std::default_initializable<It>
+    = default;
 
-    constexpr explicit stride_counting_iterator(
-        It const& it, StrideCountType* stride_count, StrideDisplacementType* stride_displacement)
-        : base_(base(it)), stride_count_(stride_count), stride_displacement_(stride_displacement) {}
+    constexpr explicit stride_counting_iterator(It const& it, IteratorOpCounts* counts = nullptr)
+        : base_(base(it)), counts_(counts) {}
 
     constexpr stride_counting_iterator(const stride_counting_iterator& o) { *this = o; }
     constexpr stride_counting_iterator(stride_counting_iterator&& o) { *this = o; }
@@ -780,12 +764,6 @@ class stride_counting_iterator {
 
     friend constexpr It base(stride_counting_iterator const& it) { return It(it.base_); }
 
-    constexpr StrideCountType stride_count() const { return stride_count_; }
-
-    constexpr StrideDisplacementType stride_displacement() const { return stride_displacement_; }
-
-    constexpr difference_type equals_count() const { return equals_count_; }
-
     constexpr decltype(auto) operator*() const { return *It(base_); }
 
     constexpr decltype(auto) operator[](difference_type n) const { return It(base_)[n]; }
@@ -793,8 +771,7 @@ class stride_counting_iterator {
     constexpr stride_counting_iterator& operator++() {
         It tmp(base_);
         base_ = base(++tmp);
-        ++stride_count_;
-        ++stride_displacement_;
+        moved_by(1);
         return *this;
     }
 
@@ -813,8 +790,7 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(--tmp);
-        ++stride_count_;
-        --stride_displacement_;
+        moved_by(-1);
         return *this;
     }
 
@@ -831,8 +807,7 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(tmp += n);
-        ++stride_count_;
-        ++stride_displacement_;
+        moved_by(n);
         return *this;
     }
 
@@ -841,8 +816,7 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(tmp -= n);
-        ++stride_count_;
-        --stride_displacement_;
+        moved_by(-n);
         return *this;
     }
 
@@ -870,7 +844,10 @@ class stride_counting_iterator {
         return base(x) - base(y);
     }
 
-    constexpr void record_equality_comparison() const { ++equals_count_; }
+    constexpr void record_equality_comparison() const {
+      if (counts_ != nullptr)
+        ++counts_->equal_cmps;
+    }
 
     constexpr bool operator==(stride_counting_iterator const& other) const
         requires std::sentinel_for<It, It>
@@ -907,10 +884,19 @@ class stride_counting_iterator {
     void operator,(T const &) = delete;
 
 private:
+  constexpr void moved_by(difference_type n) {
+    if (counts_ == nullptr)
+      return;
+    if (n > 0)
+      ++counts_->increments;
+    else if (n < 0)
+      ++counts_->decrements;
+    else
+      ++counts_->zero_moves;
+  }
+
     decltype(base(std::declval<It>())) base_;
-    concrete_or_ref<StrideCountType> stride_count_;
-    concrete_or_ref<StrideDisplacementType> stride_displacement_;
-    mutable difference_type equals_count_ = 0;
+    IteratorOpCounts* counts_ = nullptr;
 };
 template <class It>
 stride_counting_iterator(It) -> stride_counting_iterator<It>;

>From b4fad5bba111e0dd9a6ac82533edd260303e8af1 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 27 May 2024 20:51:33 +0100
Subject: [PATCH 49/56] 
 's/stride_counting_iterator/operation_counting_iterator/'

---
 .../iterator_count.pass.cpp                   |   2 +-
 .../iterator_count_sentinel.pass.cpp          |  12 +-
 .../iterator_sentinel.pass.cpp                |  10 +-
 .../ranges/range.adaptors/range.drop/types.h  |   2 +-
 libcxx/test/support/test_iterators.h          | 137 +++++++++---------
 5 files changed, 82 insertions(+), 81 deletions(-)

diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count.pass.cpp
index cda1747439cf7..18f8d15335ec6 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count.pass.cpp
@@ -34,7 +34,7 @@ constexpr void check(int* first, std::iter_difference_t<It> n, int* expected) {
   // Count operations
   if constexpr (Count) {
     IteratorOpCounts ops;
-    auto it = stride_counting_iterator(It(first), &ops);
+    auto it = operation_counting_iterator(It(first), &ops);
     std::ranges::advance(it, n);
     if constexpr (std::random_access_iterator<It>) {
       assert(ops.increments + ops.decrements <= 1);
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
index 2ab88a62892ae..d613105805dd5 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
@@ -39,8 +39,8 @@ check_forward(int* first, int* last, std::iter_difference_t<It> n, int* expected
   // Count operations
   if constexpr (Count) {
     IteratorOpCounts ops;
-    auto it   = stride_counting_iterator(It(first), &ops);
-    auto sent = sentinel_wrapper(stride_counting_iterator(It(last), &ops));
+    auto it   = operation_counting_iterator(It(first), &ops);
+    auto sent = sentinel_wrapper(operation_counting_iterator(It(last), &ops));
     (void)std::ranges::advance(it, n, sent);
     // We don't have a sized sentinel, so we have to increment one-by-one
     // regardless of the iterator category.
@@ -68,7 +68,7 @@ constexpr void check_forward_sized_sentinel(int* first, int* last, std::iter_dif
   // Count operations
   {
     IteratorOpCounts ops;
-    auto it   = stride_counting_iterator(It(first), &ops);
+    auto it   = operation_counting_iterator(It(first), &ops);
     auto sent = distance_apriori_sentinel(size);
     (void)std::ranges::advance(it, n, sent);
     if constexpr (std::random_access_iterator<It>) {
@@ -104,9 +104,9 @@ check_backward(int* first, int* last, std::iter_difference_t<It> n, int* expecte
   // Count operations
   {
     IteratorOpCounts ops;
-    auto it   = stride_counting_iterator(It(last), &ops);
-    auto sent = stride_counting_iterator(It(first), &ops);
-    static_assert(std::bidirectional_iterator<stride_counting_iterator<It>>);
+    auto it   = operation_counting_iterator(It(last), &ops);
+    auto sent = operation_counting_iterator(It(first), &ops);
+    static_assert(std::bidirectional_iterator<operation_counting_iterator<It>>);
     static_assert(Count == !std::sized_sentinel_for<It, It>);
 
     (void)std::ranges::advance(it, n, sent);
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp
index 75f7a51d6e14b..147c26e429063 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp
@@ -33,8 +33,8 @@ constexpr void check_assignable(int* first, int* last, int* expected) {
   // Count operations
   if constexpr (Count) {
     IteratorOpCounts ops;
-    auto it   = stride_counting_iterator(It(first), &ops);
-    auto sent = assignable_sentinel(stride_counting_iterator(It(last), &ops));
+    auto it   = operation_counting_iterator(It(first), &ops);
+    auto sent = assignable_sentinel(operation_counting_iterator(It(last), &ops));
     std::ranges::advance(it, sent);
     assert(base(base(it)) == expected);
     assert(ops.increments + ops.decrements == 0); // because we got here by assigning from last, not by incrementing
@@ -56,7 +56,7 @@ constexpr void check_sized_sentinel(int* first, int* last, int* expected) {
   // Count operations
   if constexpr (Count) {
     IteratorOpCounts ops;
-    auto it   = stride_counting_iterator(It(first), &ops);
+    auto it   = operation_counting_iterator(It(first), &ops);
     auto sent = distance_apriori_sentinel(size);
     std::ranges::advance(it, sent);
     if constexpr (std::random_access_iterator<It>) {
@@ -85,8 +85,8 @@ constexpr void check_sentinel(int* first, int* last, int* expected) {
   // Count operations
   if constexpr (Count) {
     IteratorOpCounts ops;
-    auto it   = stride_counting_iterator(It(first), &ops);
-    auto sent = sentinel_wrapper(stride_counting_iterator(It(last), &ops));
+    auto it   = operation_counting_iterator(It(first), &ops);
+    auto sent = sentinel_wrapper(operation_counting_iterator(It(last), &ops));
     std::ranges::advance(it, sent);
     const auto big   = std::max(ops.increments, ops.decrements);
     const auto small = std::min(ops.increments, ops.decrements);
diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/types.h b/libcxx/test/std/ranges/range.adaptors/range.drop/types.h
index 990fa4a0c064a..73d1e5045ad22 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.drop/types.h
+++ b/libcxx/test/std/ranges/range.adaptors/range.drop/types.h
@@ -120,7 +120,7 @@ struct Range {
   int *end() const;
 };
 
-using CountedIter = stride_counting_iterator<forward_iterator<int*>>;
+using CountedIter = operation_counting_iterator<forward_iterator<int*>>;
 struct CountedView : std::ranges::view_base {
   explicit constexpr CountedView(IteratorOpCounts* opcounts) noexcept : opcounts_(opcounts) {}
   constexpr CountedIter begin() const { return CountedIter(ForwardIter(globalBuff), opcounts_); }
diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 25ed860bc202f..f4e639aed5dca 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -735,7 +735,7 @@ struct IteratorOpCounts {
 
 // Iterator adaptor that records its operation counts in a IteratorOpCounts
 template <class It>
-class stride_counting_iterator {
+class operation_counting_iterator {
 public:
     using value_type = typename iter_value_or_void<It>::type;
     using difference_type = std::iter_difference_t<It>;
@@ -749,99 +749,100 @@ class stride_counting_iterator {
     >>>>>;
     using iterator_category = iterator_concept;
 
-    stride_counting_iterator()
+    operation_counting_iterator()
       requires std::default_initializable<It>
     = default;
 
-    constexpr explicit stride_counting_iterator(It const& it, IteratorOpCounts* counts = nullptr)
+    constexpr explicit operation_counting_iterator(It const& it, IteratorOpCounts* counts = nullptr)
         : base_(base(it)), counts_(counts) {}
 
-    constexpr stride_counting_iterator(const stride_counting_iterator& o) { *this = o; }
-    constexpr stride_counting_iterator(stride_counting_iterator&& o) { *this = o; }
+    constexpr operation_counting_iterator(const operation_counting_iterator& o) { *this = o; }
+    constexpr operation_counting_iterator(operation_counting_iterator&& o) { *this = o; }
 
-    constexpr stride_counting_iterator& operator=(const stride_counting_iterator& o) = default;
-    constexpr stride_counting_iterator& operator=(stride_counting_iterator&& o) { return *this = o; }
+    constexpr operation_counting_iterator& operator=(const operation_counting_iterator& o) = default;
+    constexpr operation_counting_iterator& operator=(operation_counting_iterator&& o) { return *this = o; }
 
-    friend constexpr It base(stride_counting_iterator const& it) { return It(it.base_); }
+    friend constexpr It base(operation_counting_iterator const& it) { return It(it.base_); }
 
     constexpr decltype(auto) operator*() const { return *It(base_); }
 
     constexpr decltype(auto) operator[](difference_type n) const { return It(base_)[n]; }
 
-    constexpr stride_counting_iterator& operator++() {
-        It tmp(base_);
-        base_ = base(++tmp);
-        moved_by(1);
-        return *this;
+    constexpr operation_counting_iterator& operator++() {
+      It tmp(base_);
+      base_ = base(++tmp);
+      moved_by(1);
+      return *this;
     }
 
     constexpr void operator++(int) { ++*this; }
 
-    constexpr stride_counting_iterator operator++(int)
-        requires std::forward_iterator<It>
+    constexpr operation_counting_iterator operator++(int)
+      requires std::forward_iterator<It>
     {
-        auto temp = *this;
-        ++*this;
-        return temp;
+      auto temp = *this;
+      ++*this;
+      return temp;
     }
 
-    constexpr stride_counting_iterator& operator--()
-        requires std::bidirectional_iterator<It>
+    constexpr operation_counting_iterator& operator--()
+      requires std::bidirectional_iterator<It>
     {
-        It tmp(base_);
-        base_ = base(--tmp);
-        moved_by(-1);
-        return *this;
+      It tmp(base_);
+      base_ = base(--tmp);
+      moved_by(-1);
+      return *this;
     }
 
-    constexpr stride_counting_iterator operator--(int)
-        requires std::bidirectional_iterator<It>
+    constexpr operation_counting_iterator operator--(int)
+      requires std::bidirectional_iterator<It>
     {
-        auto temp = *this;
-        --*this;
-        return temp;
+      auto temp = *this;
+      --*this;
+      return temp;
     }
 
-    constexpr stride_counting_iterator& operator+=(difference_type const n)
-        requires std::random_access_iterator<It>
+    constexpr operation_counting_iterator& operator+=(difference_type const n)
+      requires std::random_access_iterator<It>
     {
-        It tmp(base_);
-        base_ = base(tmp += n);
-        moved_by(n);
-        return *this;
+      It tmp(base_);
+      base_ = base(tmp += n);
+      moved_by(n);
+      return *this;
     }
 
-    constexpr stride_counting_iterator& operator-=(difference_type const n)
-        requires std::random_access_iterator<It>
+    constexpr operation_counting_iterator& operator-=(difference_type const n)
+      requires std::random_access_iterator<It>
     {
-        It tmp(base_);
-        base_ = base(tmp -= n);
-        moved_by(-n);
-        return *this;
+      It tmp(base_);
+      base_ = base(tmp -= n);
+      moved_by(-n);
+      return *this;
     }
 
-    friend constexpr stride_counting_iterator operator+(stride_counting_iterator it, difference_type n)
-        requires std::random_access_iterator<It>
+    friend constexpr operation_counting_iterator operator+(operation_counting_iterator it, difference_type n)
+      requires std::random_access_iterator<It>
     {
-        return it += n;
+      return it += n;
     }
 
-    friend constexpr stride_counting_iterator operator+(difference_type n, stride_counting_iterator it)
-        requires std::random_access_iterator<It>
+    friend constexpr operation_counting_iterator operator+(difference_type n, operation_counting_iterator it)
+      requires std::random_access_iterator<It>
     {
-        return it += n;
+      return it += n;
     }
 
-    friend constexpr stride_counting_iterator operator-(stride_counting_iterator it, difference_type n)
-        requires std::random_access_iterator<It>
+    friend constexpr operation_counting_iterator operator-(operation_counting_iterator it, difference_type n)
+      requires std::random_access_iterator<It>
     {
-        return it -= n;
+      return it -= n;
     }
 
-    friend constexpr difference_type operator-(stride_counting_iterator const& x, stride_counting_iterator const& y)
-        requires std::sized_sentinel_for<It, It>
+    friend constexpr difference_type
+    operator-(operation_counting_iterator const& x, operation_counting_iterator const& y)
+      requires std::sized_sentinel_for<It, It>
     {
-        return base(x) - base(y);
+      return base(x) - base(y);
     }
 
     constexpr void record_equality_comparison() const {
@@ -849,35 +850,35 @@ class stride_counting_iterator {
         ++counts_->equal_cmps;
     }
 
-    constexpr bool operator==(stride_counting_iterator const& other) const
-        requires std::sentinel_for<It, It>
+    constexpr bool operator==(operation_counting_iterator const& other) const
+      requires std::sentinel_for<It, It>
     {
       record_equality_comparison();
       return It(base_) == It(other.base_);
     }
 
-    friend constexpr bool operator<(stride_counting_iterator const& x, stride_counting_iterator const& y)
-        requires std::random_access_iterator<It>
+    friend constexpr bool operator<(operation_counting_iterator const& x, operation_counting_iterator const& y)
+      requires std::random_access_iterator<It>
     {
-        return It(x.base_) < It(y.base_);
+      return It(x.base_) < It(y.base_);
     }
 
-    friend constexpr bool operator>(stride_counting_iterator const& x, stride_counting_iterator const& y)
-        requires std::random_access_iterator<It>
+    friend constexpr bool operator>(operation_counting_iterator const& x, operation_counting_iterator const& y)
+      requires std::random_access_iterator<It>
     {
-        return It(x.base_) > It(y.base_);
+      return It(x.base_) > It(y.base_);
     }
 
-    friend constexpr bool operator<=(stride_counting_iterator const& x, stride_counting_iterator const& y)
-        requires std::random_access_iterator<It>
+    friend constexpr bool operator<=(operation_counting_iterator const& x, operation_counting_iterator const& y)
+      requires std::random_access_iterator<It>
     {
-        return It(x.base_) <= It(y.base_);
+      return It(x.base_) <= It(y.base_);
     }
 
-    friend constexpr bool operator>=(stride_counting_iterator const& x, stride_counting_iterator const& y)
-        requires std::random_access_iterator<It>
+    friend constexpr bool operator>=(operation_counting_iterator const& x, operation_counting_iterator const& y)
+      requires std::random_access_iterator<It>
     {
-        return It(x.base_) >= It(y.base_);
+      return It(x.base_) >= It(y.base_);
     }
 
     template <class T>
@@ -899,7 +900,7 @@ class stride_counting_iterator {
     IteratorOpCounts* counts_ = nullptr;
 };
 template <class It>
-stride_counting_iterator(It) -> stride_counting_iterator<It>;
+operation_counting_iterator(It) -> operation_counting_iterator<It>;
 
 #endif // TEST_STD_VER > 17
 

>From 87f12c2904a671aa49e64c2b15f6c11327f7ca01 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Fri, 24 May 2024 23:17:26 +0100
Subject: [PATCH 50/56] change complexity test interface,
 's/testSetIntersectionAndReturnOpCounts/counted_set_intersection/', and split
 complexity tests into `set_intersection_complexity.pass.cpp`.

---
 .../ranges_set_intersection.pass.cpp          | 242 +---------------
 .../set_intersection_complexity.pass.cpp      | 262 ++++++++++++++++++
 2 files changed, 269 insertions(+), 235 deletions(-)
 create mode 100644 libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index b4e7f54a9c877..0af4a393f1303 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -29,8 +29,7 @@
 #include <array>
 #include <concepts>
 #include <cstddef>
-#include <iterator>
-#include <type_traits>
+#include <ranges>
 
 #include "almost_satisfies_types.h"
 #include "MoveOnly.h"
@@ -96,17 +95,14 @@ static_assert(!HasSetIntersectionRange<UncheckedRange<MoveOnly*>, UncheckedRange
 
 using std::ranges::set_intersection_result;
 
-// TODO: std::ranges::set_intersection calls std::ranges::copy
-// std::ranges::copy(contiguous_iterator<int*>, sentinel_wrapper<contiguous_iterator<int*>>, contiguous_iterator<int*>) doesn't seem to work.
-// It seems that std::ranges::copy calls std::copy, which unwraps contiguous_iterator<int*> into int*,
-// and then it failed because there is no == between int* and sentinel_wrapper<contiguous_iterator<int*>>
-template <typename Iter>
-using SentinelWorkaround = std::conditional_t<std::contiguous_iterator<Iter>, Iter, sentinel_wrapper<Iter>>;
-
 template <class In1, class In2, class Out, std::size_t N1, std::size_t N2, std::size_t N3>
 constexpr void testSetIntersectionImpl(std::array<int, N1> in1, std::array<int, N2> in2, std::array<int, N3> expected) {
-  using Sent1 = SentinelWorkaround<In1>;
-  using Sent2 = SentinelWorkaround<In2>;
+  // TODO: std::ranges::set_intersection calls std::ranges::copy
+  // std::ranges::copy(contiguous_iterator<int*>, sentinel_wrapper<contiguous_iterator<int*>>, contiguous_iterator<int*>) doesn't seem to work.
+  // It seems that std::ranges::copy calls std::copy, which unwraps contiguous_iterator<int*> into int*,
+  // and then it failed because there is no == between int* and sentinel_wrapper<contiguous_iterator<int*>>
+  using Sent1 = std::conditional_t<std::contiguous_iterator<In1>, In1, sentinel_wrapper<In1>>;
+  using Sent2 = std::conditional_t<std::contiguous_iterator<In2>, In2, sentinel_wrapper<In2>>;
 
   // iterator overload
   {
@@ -278,227 +274,6 @@ constexpr void runAllIteratorPermutationsTests() {
   static_assert(withAllPermutationsOfInIter1AndInIter2<contiguous_iterator<int*>>());
 }
 
-namespace {
-struct [[nodiscard]] OperationCounts {
-  std::size_t comparisons{};
-  struct PerInput {
-    std::size_t proj{};
-    IteratorOpCounts iterops;
-
-    // IGNORES proj!
-    [[nodiscard]] constexpr bool operator==(const PerInput& o) const {
-      return iterops.increments == o.iterops.increments && iterops.decrements == o.iterops.decrements;
-    }
-
-    [[nodiscard]] constexpr bool matchesExpectation(const PerInput& expect) {
-      return proj <= expect.proj &&
-             iterops.increments + iterops.decrements <= expect.iterops.increments + expect.iterops.decrements;
-    }
-  };
-  std::array<PerInput, 2> in;
-
-  [[nodiscard]] constexpr bool matchesExpectation(const OperationCounts& expect) {
-    // __debug_less will perform an additional comparison in an assertion
-    constexpr unsigned comparison_multiplier =
-#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
-        2;
-#else
-        1;
-#endif
-    return comparisons <= comparison_multiplier * expect.comparisons && in[0].matchesExpectation(expect.in[0]) &&
-           in[1].matchesExpectation(expect.in[1]);
-  }
-
-  [[nodiscard]] constexpr bool operator==(const OperationCounts& o) const {
-    return comparisons == o.comparisons && std::ranges::equal(in, o.in);
-  }
-};
-} // namespace
-
-template <template <class...> class In1,
-          template <class...>
-          class In2,
-          class Out,
-          std::size_t N1,
-          std::size_t N2,
-          std::size_t N3>
-constexpr void testSetIntersectionAndReturnOpCounts(
-    std::array<int, N1> in1,
-    std::array<int, N2> in2,
-    std::array<int, N3> expected,
-    const OperationCounts& expectedOpCounts) {
-  OperationCounts ops;
-
-  const auto comp = [&ops](int x, int y) {
-    ++ops.comparisons;
-    return x < y;
-  };
-
-  std::array<int, N3> out;
-
-  stride_counting_iterator b1(In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterops);
-  stride_counting_iterator e1(In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterops);
-
-  stride_counting_iterator b2(In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterops);
-  stride_counting_iterator e2(In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterops);
-
-  std::set_intersection(b1, e1, b2, e2, Out(out.data()), comp);
-
-  assert(std::ranges::equal(out, expected));
-  assert(ops.matchesExpectation(expectedOpCounts));
-}
-
-template <template <class...> class In1,
-          template <class...>
-          class In2,
-          class Out,
-          std::size_t N1,
-          std::size_t N2,
-          std::size_t N3>
-constexpr void testRangesSetIntersectionAndReturnOpCounts(
-    std::array<int, N1> in1,
-    std::array<int, N2> in2,
-    std::array<int, N3> expected,
-    const OperationCounts& expectedOpCounts) {
-  OperationCounts ops;
-
-  const auto comp = [&ops](int x, int y) {
-    ++ops.comparisons;
-    return x < y;
-  };
-
-  const auto proj1 = [&ops](const int& i) {
-    ++ops.in[0].proj;
-    return i;
-  };
-
-  const auto proj2 = [&ops](const int& i) {
-    ++ops.in[1].proj;
-    return i;
-  };
-
-  std::array<int, N3> out;
-
-  stride_counting_iterator b1(In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterops);
-  stride_counting_iterator e1(In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterops);
-
-  stride_counting_iterator b2(In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterops);
-  stride_counting_iterator e2(In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterops);
-
-  std::ranges::subrange r1{b1, SentinelWorkaround<decltype(e1)>{e1}};
-  std::ranges::subrange r2{b2, SentinelWorkaround<decltype(e2)>{e2}};
-  std::same_as<set_intersection_result<decltype(e1), decltype(e2), Out>> decltype(auto) result =
-      std::ranges::set_intersection(r1, r2, Out{out.data()}, comp, proj1, proj2);
-  assert(std::ranges::equal(out, expected));
-  assert(base(result.in1) == base(e1));
-  assert(base(result.in2) == base(e2));
-  assert(base(result.out) == out.data() + out.size());
-  assert(ops.matchesExpectation(expectedOpCounts));
-}
-
-template <template <typename...> class In1, template <typename...> class In2, class Out>
-constexpr void testComplexityParameterizedIter() {
-  // Worst-case complexity:
-  // Let N=(last1 - first1) and M=(last2 - first2)
-  // At most 2*(N+M) - 1 comparisons and applications of each projection.
-  // At most 2*(N+M) iterator mutations.
-  {
-    std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
-    std::array r2{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
-    std::array<int, 0> expected{};
-
-    OperationCounts expectedCounts;
-    expectedCounts.comparisons                 = 37;
-    expectedCounts.in[0].proj                  = 37;
-    expectedCounts.in[0].iterator_strides      = 30;
-    expectedCounts.in[0].iterator_displacement = 30;
-    expectedCounts.in[1]                       = expectedCounts.in[0];
-
-    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
-    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
-  }
-
-  {
-    std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
-    std::array r2{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
-    std::array expected{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
-
-    OperationCounts expectedCounts;
-    expectedCounts.comparisons                 = 38;
-    expectedCounts.in[0].proj                  = 38;
-    expectedCounts.in[0].iterator_strides      = 30;
-    expectedCounts.in[0].iterator_displacement = 30;
-    expectedCounts.in[1]                       = expectedCounts.in[0];
-
-    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
-    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
-  }
-
-  // Lower complexity when there is low overlap between ranges: we can make 2*log(X) comparisons when one range
-  // has X elements that can be skipped over (and then 1 more to confirm that the value we found is equal).
-  {
-    std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-    std::array r2{15};
-    std::array expected{15};
-
-    OperationCounts expectedCounts;
-    expectedCounts.comparisons                 = 9;
-    expectedCounts.in[0].proj                  = 9;
-    expectedCounts.in[0].iterator_strides      = 23;
-    expectedCounts.in[0].iterator_displacement = 23;
-    expectedCounts.in[1].proj                  = 9;
-    expectedCounts.in[1].iterator_strides      = 1;
-    expectedCounts.in[1].iterator_displacement = 1;
-
-    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
-    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
-  }
-
-  {
-    std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-    std::array r2{0, 16};
-    std::array<int, 0> expected{};
-
-    OperationCounts expectedCounts;
-    expectedCounts.comparisons                 = 10;
-    expectedCounts.in[0].proj                  = 10;
-    expectedCounts.in[0].iterator_strides      = 24;
-    expectedCounts.in[0].iterator_displacement = 24;
-    expectedCounts.in[1].proj                  = 10;
-    expectedCounts.in[1].iterator_strides      = 4;
-    expectedCounts.in[1].iterator_displacement = 4;
-
-    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
-    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
-  }
-}
-
-template <template <typename...> class In2, class Out>
-constexpr void testComplexityParameterizedIterPermutateIn1() {
-  //common_input_iterator
-  testComplexityParameterizedIter<forward_iterator, In2, Out>();
-  testComplexityParameterizedIter<bidirectional_iterator, In2, Out>();
-  testComplexityParameterizedIter<random_access_iterator, In2, Out>();
-}
-
-template <class Out>
-constexpr bool testComplexityParameterizedIterPermutateIn1In2() {
-  testComplexityParameterizedIterPermutateIn1<forward_iterator, Out>();
-  testComplexityParameterizedIterPermutateIn1<bidirectional_iterator, Out>();
-  testComplexityParameterizedIterPermutateIn1<random_access_iterator, Out>();
-  return true;
-}
-
-constexpr void runAllComplexityTests() {
-  testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
-  testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
-  testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
-
-  static_assert(testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>());
-  static_assert(testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>());
-  static_assert(testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>());
-}
-
 constexpr bool test() {
   // check that every element is copied exactly once
   {
@@ -799,8 +574,5 @@ int main(int, char**) {
   // than the step limit.
   runAllIteratorPermutationsTests();
 
-  // similar for complexity tests
-  runAllComplexityTests();
-
   return 0;
 }
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
new file mode 100644
index 0000000000000..047d62f4a2c99
--- /dev/null
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
@@ -0,0 +1,262 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <algorithm>
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+// Algorithmic complexity tests for both std::set_intersection and std::ranges::set_intersection
+
+// template<InputIterator InIter1, InputIterator InIter2, typename OutIter>
+//   requires OutputIterator<OutIter, InIter1::reference>
+//         && OutputIterator<OutIter, InIter2::reference>
+//         && HasLess<InIter2::value_type, InIter1::value_type>
+//         && HasLess<InIter1::value_type, InIter2::value_type>
+//   constexpr OutIter       // constexpr after C++17
+//   set_intersection(InIter1 first1, InIter1 last1, InIter2 first2, InIter2 last2,
+//                    OutIter result);
+//
+// template<input_iterator I1, sentinel_for<I1> S1, input_iterator I2, sentinel_for<I2> S2,
+//          weakly_incrementable O, class Comp = ranges::less,
+//          class Proj1 = identity, class Proj2 = identity>
+//   requires mergeable<I1, I2, O, Comp, Proj1, Proj2>
+//   constexpr set_intersection_result<I1, I2, O>
+//     set_intersection(I1 first1, S1 last1, I2 first2, S2 last2, O result,
+//                      Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {});                         // since C++20
+//
+// template<input_range R1, input_range R2, weakly_incrementable O,
+//          class Comp = ranges::less, class Proj1 = identity, class Proj2 = identity>
+//   requires mergeable<iterator_t<R1>, iterator_t<R2>, O, Comp, Proj1, Proj2>
+//   constexpr set_intersection_result<borrowed_iterator_t<R1>, borrowed_iterator_t<R2>, O>
+//     set_intersection(R1&& r1, R2&& r2, O result,
+//                      Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {});                         // since C++20
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <ranges>
+
+#include "test_iterators.h"
+
+namespace {
+struct [[nodiscard]] OperationCounts {
+  std::size_t comparisons{};
+  struct PerInput {
+    std::size_t proj{};
+    IteratorOpCounts iterops;
+
+    [[nodiscard]] constexpr bool isNotBetterThan(const PerInput& other) {
+      return proj >= other.proj && iterops.increments + iterops.decrements + iterops.zero_moves >=
+                                       other.iterops.increments + other.iterops.decrements + other.iterops.zero_moves;
+    }
+  };
+  std::array<PerInput, 2> in;
+
+  [[nodiscard]] constexpr bool isNotBetterThan(const OperationCounts& expect) {
+    // __debug_less will perform an additional comparison in an assertion
+    constexpr unsigned comparison_multiplier =
+#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
+        2;
+#else
+        1;
+#endif
+    return comparisons >= comparison_multiplier * expect.comparisons && in[0].isNotBetterThan(expect.in[0]) &&
+           in[1].isNotBetterThan(expect.in[1]);
+  }
+};
+
+template <std::size_t ResultSize>
+struct counted_set_intersection_result {
+  std::array<int, ResultSize> result;
+  OperationCounts opcounts;
+
+  constexpr counted_set_intersection_result() = default;
+
+  constexpr explicit counted_set_intersection_result(std::array<int, ResultSize>&& contents) : result{contents} {}
+
+  constexpr void assertNotBetterThan(const counted_set_intersection_result& other) {
+    assert(result == other.result);
+    assert(opcounts.isNotBetterThan(other.opcounts));
+  }
+};
+
+template <std::size_t ResultSize>
+counted_set_intersection_result(std::array<int, ResultSize>) -> counted_set_intersection_result<ResultSize>;
+
+template <template <class...> class InIterType1,
+          template <class...>
+          class InIterType2,
+          class OutIterType,
+          std::size_t ResultSize,
+          std::size_t N1,
+          std::size_t N2>
+constexpr counted_set_intersection_result<ResultSize>
+counted_set_intersection(std::array<int, N1> in1, std::array<int, N2> in2) {
+  counted_set_intersection_result<ResultSize> out;
+
+  const auto comp = [&out](int x, int y) {
+    ++out.opcounts.comparisons;
+    return x < y;
+  };
+
+  operation_counting_iterator b1(InIterType1<decltype(in1.begin())>(in1.begin()), &out.opcounts.in[0].iterops);
+  operation_counting_iterator e1(InIterType1<decltype(in1.end()) >(in1.end()), &out.opcounts.in[0].iterops);
+
+  operation_counting_iterator b2(InIterType2<decltype(in2.begin())>(in2.begin()), &out.opcounts.in[1].iterops);
+  operation_counting_iterator e2(InIterType2<decltype(in2.end()) >(in2.end()), &out.opcounts.in[1].iterops);
+
+  std::set_intersection(b1, e1, b2, e2, OutIterType(out.result.data()), comp);
+
+  return out;
+}
+
+template <template <class...> class InIterType1,
+          template <class...>
+          class InIterType2,
+          class OutIterType,
+          std::size_t ResultSize,
+          std::size_t N1,
+          std::size_t N2>
+constexpr counted_set_intersection_result<ResultSize>
+counted_ranges_set_intersection(std::array<int, N1> in1, std::array<int, N2> in2) {
+  counted_set_intersection_result<ResultSize> out;
+
+  const auto comp = [&out](int x, int y) {
+    ++out.opcounts.comparisons;
+    return x < y;
+  };
+
+  const auto proj1 = [&out](const int& i) {
+    ++out.opcounts.in[0].proj;
+    return i;
+  };
+
+  const auto proj2 = [&out](const int& i) {
+    ++out.opcounts.in[1].proj;
+    return i;
+  };
+
+  operation_counting_iterator b1(InIterType1<decltype(in1.begin())>(in1.begin()), &out.opcounts.in[0].iterops);
+  operation_counting_iterator e1(InIterType1<decltype(in1.end()) >(in1.end()), &out.opcounts.in[0].iterops);
+
+  operation_counting_iterator b2(InIterType2<decltype(in2.begin())>(in2.begin()), &out.opcounts.in[1].iterops);
+  operation_counting_iterator e2(InIterType2<decltype(in2.end()) >(in2.end()), &out.opcounts.in[1].iterops);
+
+  std::ranges::subrange r1{b1, sentinel_wrapper<decltype(e1)>{e1}};
+  std::ranges::subrange r2{b2, sentinel_wrapper<decltype(e2)>{e2}};
+  std::same_as<std::ranges::set_intersection_result<decltype(e1), decltype(e2), OutIterType>> decltype(auto) result =
+      std::ranges::set_intersection(r1, r2, OutIterType{out.result.data()}, comp, proj1, proj2);
+  assert(base(result.in1) == base(e1));
+  assert(base(result.in2) == base(e2));
+  assert(base(result.out) == out.result.data() + out.result.size());
+
+  return out;
+}
+
+template <template <typename...> class In1, template <typename...> class In2, class Out>
+constexpr void testComplexityParameterizedIter() {
+  // Worst-case complexity:
+  // Let N=(last1 - first1) and M=(last2 - first2)
+  // At most 2*(N+M) - 1 comparisons and applications of each projection.
+  // At most 2*(N+M) iterator mutations.
+  {
+    std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+    std::array r2{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+    counted_set_intersection_result<0> expected;
+    expected.opcounts.comparisons              = 37;
+    expected.opcounts.in[0].proj               = 37;
+    expected.opcounts.in[0].iterops.increments = 30;
+    expected.opcounts.in[0].iterops.decrements = 0;
+    expected.opcounts.in[1]                    = expected.opcounts.in[0];
+
+    expected.assertNotBetterThan(counted_set_intersection<In1, In2, Out, expected.result.size()>(r1, r2));
+    expected.assertNotBetterThan(counted_ranges_set_intersection<In1, In2, Out, expected.result.size()>(r1, r2));
+  }
+
+  {
+    std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+    std::array r2{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+    counted_set_intersection_result expected(std::array{1, 3, 5, 7, 9, 11, 13, 15, 17, 19});
+    expected.opcounts.comparisons              = 38;
+    expected.opcounts.in[0].proj               = 38;
+    expected.opcounts.in[0].iterops.increments = 30;
+    expected.opcounts.in[0].iterops.decrements = 0;
+    expected.opcounts.in[1]                    = expected.opcounts.in[0];
+
+    expected.assertNotBetterThan(counted_set_intersection<In1, In2, Out, expected.result.size()>(r1, r2));
+    expected.assertNotBetterThan(counted_ranges_set_intersection<In1, In2, Out, expected.result.size()>(r1, r2));
+  }
+
+  // Lower complexity when there is low overlap between ranges: we can make 2*log(X) comparisons when one range
+  // has X elements that can be skipped over (and then 1 more to confirm that the value we found is equal).
+  {
+    std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+    std::array r2{15};
+
+    counted_set_intersection_result expected(std::array{15});
+    expected.opcounts.comparisons              = 9;
+    expected.opcounts.in[0].proj               = 9;
+    expected.opcounts.in[0].iterops.increments = 23;
+    expected.opcounts.in[0].iterops.decrements = 0;
+    expected.opcounts.in[1].proj               = 9;
+    expected.opcounts.in[1].iterops.increments = 1;
+    expected.opcounts.in[1].iterops.decrements = 0;
+
+    expected.assertNotBetterThan(counted_set_intersection<In1, In2, Out, expected.result.size()>(r1, r2));
+    expected.assertNotBetterThan(counted_ranges_set_intersection<In1, In2, Out, expected.result.size()>(r1, r2));
+  }
+
+  {
+    std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+    std::array r2{0, 16};
+    counted_set_intersection_result<0> expected;
+
+    expected.opcounts.comparisons              = 10;
+    expected.opcounts.in[0].proj               = 10;
+    expected.opcounts.in[0].iterops.increments = 24;
+    expected.opcounts.in[0].iterops.decrements = 0;
+    expected.opcounts.in[1].proj               = 10;
+    expected.opcounts.in[1].iterops.increments = 4;
+    expected.opcounts.in[1].iterops.decrements = 0;
+
+    expected.assertNotBetterThan(counted_set_intersection<In1, In2, Out, expected.result.size()>(r1, r2));
+    expected.assertNotBetterThan(counted_ranges_set_intersection<In1, In2, Out, expected.result.size()>(r1, r2));
+  }
+}
+
+template <template <typename...> class In2, class Out>
+constexpr void testComplexityParameterizedIterPermutateIn1() {
+  //common_input_iterator
+  testComplexityParameterizedIter<forward_iterator, In2, Out>();
+  testComplexityParameterizedIter<bidirectional_iterator, In2, Out>();
+  testComplexityParameterizedIter<random_access_iterator, In2, Out>();
+}
+
+template <class Out>
+constexpr bool testComplexityParameterizedIterPermutateIn1In2() {
+  testComplexityParameterizedIterPermutateIn1<forward_iterator, Out>();
+  testComplexityParameterizedIterPermutateIn1<bidirectional_iterator, Out>();
+  testComplexityParameterizedIterPermutateIn1<random_access_iterator, Out>();
+  return true;
+}
+
+} // unnamed namespace
+
+int main(int, char**) {
+  testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
+  testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
+  testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
+
+  static_assert(testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>());
+  static_assert(testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>());
+  static_assert(testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>());
+
+  return 0;
+}

>From 505c00414e2340b6d2538af7e83674668f577a27 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 27 May 2024 21:34:30 +0100
Subject: [PATCH 51/56] Move last complexity test to the new file, and add a
 matching one for `std::set_intersection()`

---
 .../ranges_set_intersection.pass.cpp          | 69 ---------------
 .../set_intersection_complexity.pass.cpp      | 87 +++++++++++++++++++
 2 files changed, 87 insertions(+), 69 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index 0af4a393f1303..f7870485cfefc 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -465,75 +465,6 @@ constexpr bool test() {
     }
   }
 
-  // Complexity: At most 2 * ((last1 - first1) + (last2 - first2)) - 1 comparisons and applications of each projection.
-  {
-    std::array<Data, 5> r1{{{1}, {3}, {5}, {7}, {9}}};
-    std::array<Data, 5> r2{{{2}, {4}, {6}, {8}, {10}}};
-    std::array<int, 0> expected{};
-
-    const std::size_t maxOperation = 2 * (r1.size() + r2.size()) - 1;
-
-    // iterator overload
-    {
-      std::array<Data, 0> out{};
-      std::size_t numberOfComp  = 0;
-      std::size_t numberOfProj1 = 0;
-      std::size_t numberOfProj2 = 0;
-
-      const auto comp = [&numberOfComp](int x, int y) {
-        ++numberOfComp;
-        return x < y;
-      };
-
-      const auto proj1 = [&numberOfProj1](const Data& d) {
-        ++numberOfProj1;
-        return d.data;
-      };
-
-      const auto proj2 = [&numberOfProj2](const Data& d) {
-        ++numberOfProj2;
-        return d.data;
-      };
-
-      std::ranges::set_intersection(r1.begin(), r1.end(), r2.begin(), r2.end(), out.data(), comp, proj1, proj2);
-
-      assert(std::ranges::equal(out, expected, {}, &Data::data));
-      assert(numberOfComp <= maxOperation);
-      assert(numberOfProj1 <= maxOperation);
-      assert(numberOfProj2 <= maxOperation);
-    }
-
-    // range overload
-    {
-      std::array<Data, 0> out{};
-      std::size_t numberOfComp  = 0;
-      std::size_t numberOfProj1 = 0;
-      std::size_t numberOfProj2 = 0;
-
-      const auto comp = [&numberOfComp](int x, int y) {
-        ++numberOfComp;
-        return x < y;
-      };
-
-      const auto proj1 = [&numberOfProj1](const Data& d) {
-        ++numberOfProj1;
-        return d.data;
-      };
-
-      const auto proj2 = [&numberOfProj2](const Data& d) {
-        ++numberOfProj2;
-        return d.data;
-      };
-
-      std::ranges::set_intersection(r1, r2, out.data(), comp, proj1, proj2);
-
-      assert(std::ranges::equal(out, expected, {}, &Data::data));
-      assert(numberOfComp < maxOperation);
-      assert(numberOfProj1 < maxOperation);
-      assert(numberOfProj2 < maxOperation);
-    }
-  }
-
   // Comparator convertible to bool
   {
     struct ConvertibleToBool {
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
index 047d62f4a2c99..ba6470828197e 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
@@ -247,13 +247,100 @@ constexpr bool testComplexityParameterizedIterPermutateIn1In2() {
   return true;
 }
 
+constexpr bool testComplexityBasic() {
+  // Complexity: At most 2 * ((last1 - first1) + (last2 - first2)) - 1 comparisons and applications of each projection.
+  std::array<Data, 5> r1{{{1}, {3}, {5}, {7}, {9}}};
+  std::array<Data, 5> r2{{{2}, {4}, {6}, {8}, {10}}};
+  std::array<int, 0> expected{};
+
+  const std::size_t maxOperation = 2 * (r1.size() + r2.size()) - 1;
+
+  // std::set_intersection
+  {
+    std::array<Data, 0> out{};
+    std::size_t numberOfComp = 0;
+
+    const auto comp = [&numberOfComp](int x, int y) {
+      ++numberOfComp;
+      return x < y;
+    };
+
+    std::set_intersection(r1.begin(), r1.end(), r2.begin(), r2.end(), out.data(), comp);
+
+    assert(std::ranges::equal(out, expected, {}, &Data::data));
+    assert(numberOfComp <= maxOperation);
+  }
+
+  // ranges::set_intersection iterator overload
+  {
+    std::array<Data, 0> out{};
+    std::size_t numberOfComp  = 0;
+    std::size_t numberOfProj1 = 0;
+    std::size_t numberOfProj2 = 0;
+
+    const auto comp = [&numberOfComp](int x, int y) {
+      ++numberOfComp;
+      return x < y;
+    };
+
+    const auto proj1 = [&numberOfProj1](const Data& d) {
+      ++numberOfProj1;
+      return d.data;
+    };
+
+    const auto proj2 = [&numberOfProj2](const Data& d) {
+      ++numberOfProj2;
+      return d.data;
+    };
+
+    std::ranges::set_intersection(r1.begin(), r1.end(), r2.begin(), r2.end(), out.data(), comp, proj1, proj2);
+
+    assert(std::ranges::equal(out, expected, {}, &Data::data));
+    assert(numberOfComp <= maxOperation);
+    assert(numberOfProj1 <= maxOperation);
+    assert(numberOfProj2 <= maxOperation);
+  }
+
+  // ranges::set_intersection range overload
+  {
+    std::array<Data, 0> out{};
+    std::size_t numberOfComp  = 0;
+    std::size_t numberOfProj1 = 0;
+    std::size_t numberOfProj2 = 0;
+
+    const auto comp = [&numberOfComp](int x, int y) {
+      ++numberOfComp;
+      return x < y;
+    };
+
+    const auto proj1 = [&numberOfProj1](const Data& d) {
+      ++numberOfProj1;
+      return d.data;
+    };
+
+    const auto proj2 = [&numberOfProj2](const Data& d) {
+      ++numberOfProj2;
+      return d.data;
+    };
+
+    std::ranges::set_intersection(r1, r2, out.data(), comp, proj1, proj2);
+
+    assert(std::ranges::equal(out, expected, {}, &Data::data));
+    assert(numberOfComp < maxOperation);
+    assert(numberOfProj1 < maxOperation);
+    assert(numberOfProj2 < maxOperation);
+  }
+}
+
 } // unnamed namespace
 
 int main(int, char**) {
+  testComplexityBasic();
   testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
   testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
   testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
 
+  static_assert(testComplexityBasic());
   static_assert(testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>());
   static_assert(testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>());
   static_assert(testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>());

>From 95b118a01927d596894844c2bfa2261b9a60a5be Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 27 May 2024 22:35:54 +0100
Subject: [PATCH 52/56] Add yet another complexity test, this one validating
 the standard guarantees for a single match over an input of 1 to 20 elements.

---
 .../set_intersection_complexity.pass.cpp      | 114 +++++++++++++-----
 1 file changed, 83 insertions(+), 31 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
index ba6470828197e..d4464dc649987 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
@@ -93,10 +93,9 @@ template <template <class...> class InIterType1,
           class InIterType2,
           class OutIterType,
           std::size_t ResultSize,
-          std::size_t N1,
-          std::size_t N2>
-constexpr counted_set_intersection_result<ResultSize>
-counted_set_intersection(std::array<int, N1> in1, std::array<int, N2> in2) {
+          std::ranges::input_range R1,
+          std::ranges::input_range R2>
+constexpr counted_set_intersection_result<ResultSize> counted_set_intersection(const R1& in1, const R2& in2) {
   counted_set_intersection_result<ResultSize> out;
 
   const auto comp = [&out](int x, int y) {
@@ -120,10 +119,9 @@ template <template <class...> class InIterType1,
           class InIterType2,
           class OutIterType,
           std::size_t ResultSize,
-          std::size_t N1,
-          std::size_t N2>
-constexpr counted_set_intersection_result<ResultSize>
-counted_ranges_set_intersection(std::array<int, N1> in1, std::array<int, N2> in2) {
+          std::ranges::input_range R1,
+          std::ranges::input_range R2>
+constexpr counted_set_intersection_result<ResultSize> counted_ranges_set_intersection(const R1& in1, const R2& in2) {
   counted_set_intersection_result<ResultSize> out;
 
   const auto comp = [&out](int x, int y) {
@@ -240,24 +238,76 @@ constexpr void testComplexityParameterizedIterPermutateIn1() {
 }
 
 template <class Out>
-constexpr bool testComplexityParameterizedIterPermutateIn1In2() {
+constexpr void testComplexityParameterizedIterPermutateIn1In2() {
   testComplexityParameterizedIterPermutateIn1<forward_iterator, Out>();
   testComplexityParameterizedIterPermutateIn1<bidirectional_iterator, Out>();
   testComplexityParameterizedIterPermutateIn1<random_access_iterator, Out>();
+}
+
+constexpr bool testComplexity() {
+  testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
+  testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
+  testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
+  return true;
+}
+
+template <template <typename...> class In1, template <typename...> class In2, class Out>
+constexpr void testComplexityGuaranteesParameterizedIter() {
+  // now a more generic validation of the complexity guarantees when searching for a single value
+  for (unsigned range_size = 1; range_size < 20; ++range_size) {
+    std::ranges::iota_view<int, int> r1(0, range_size);
+    for (int i : r1) {
+      // At most 2 * ((last1 - first1) + (last2 - first2)) - 1 comparisons
+      counted_set_intersection_result<1> expected(std::array{i});
+      expected.opcounts.comparisons              = 2 * (r1.size() + 1) - 1;
+      expected.opcounts.in[0].proj               = expected.opcounts.comparisons;
+      expected.opcounts.in[1].proj               = expected.opcounts.comparisons;
+      expected.opcounts.in[0].iterops.increments = 2 * r1.size();
+      expected.opcounts.in[1].iterops.increments = 2;
+      expected.opcounts.in[0].iterops.decrements = expected.opcounts.in[0].iterops.increments;
+      expected.opcounts.in[1].iterops.decrements = expected.opcounts.in[1].iterops.increments;
+
+      expected.assertNotBetterThan(
+          counted_set_intersection<In1, In2, Out, expected.result.size()>(r1, expected.result));
+      expected.assertNotBetterThan(
+          counted_ranges_set_intersection<In1, In2, Out, expected.result.size()>(r1, expected.result));
+    }
+  }
+}
+
+template <template <typename...> class In2, class Out>
+constexpr void testComplexityGuaranteesParameterizedIterPermutateIn1() {
+  //common_input_iterator
+  testComplexityGuaranteesParameterizedIter<forward_iterator, In2, Out>();
+  testComplexityGuaranteesParameterizedIter<bidirectional_iterator, In2, Out>();
+  testComplexityGuaranteesParameterizedIter<random_access_iterator, In2, Out>();
+}
+
+template <class Out>
+constexpr void testComplexityGuaranteesParameterizedIterPermutateIn1In2() {
+  testComplexityGuaranteesParameterizedIterPermutateIn1<forward_iterator, Out>();
+  testComplexityGuaranteesParameterizedIterPermutateIn1<bidirectional_iterator, Out>();
+  testComplexityGuaranteesParameterizedIterPermutateIn1<random_access_iterator, Out>();
+}
+
+constexpr bool testComplexityGuarantees() {
+  testComplexityGuaranteesParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
+  testComplexityGuaranteesParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
+  testComplexityGuaranteesParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
   return true;
 }
 
 constexpr bool testComplexityBasic() {
   // Complexity: At most 2 * ((last1 - first1) + (last2 - first2)) - 1 comparisons and applications of each projection.
-  std::array<Data, 5> r1{{{1}, {3}, {5}, {7}, {9}}};
-  std::array<Data, 5> r2{{{2}, {4}, {6}, {8}, {10}}};
+  std::array<int, 5> r1{1, 3, 5, 7, 9};
+  std::array<int, 5> r2{2, 4, 6, 8, 10};
   std::array<int, 0> expected{};
 
   const std::size_t maxOperation = 2 * (r1.size() + r2.size()) - 1;
 
   // std::set_intersection
   {
-    std::array<Data, 0> out{};
+    std::array<int, 0> out{};
     std::size_t numberOfComp = 0;
 
     const auto comp = [&numberOfComp](int x, int y) {
@@ -267,13 +317,13 @@ constexpr bool testComplexityBasic() {
 
     std::set_intersection(r1.begin(), r1.end(), r2.begin(), r2.end(), out.data(), comp);
 
-    assert(std::ranges::equal(out, expected, {}, &Data::data));
+    assert(std::ranges::equal(out, expected));
     assert(numberOfComp <= maxOperation);
   }
 
   // ranges::set_intersection iterator overload
   {
-    std::array<Data, 0> out{};
+    std::array<int, 0> out{};
     std::size_t numberOfComp  = 0;
     std::size_t numberOfProj1 = 0;
     std::size_t numberOfProj2 = 0;
@@ -283,19 +333,19 @@ constexpr bool testComplexityBasic() {
       return x < y;
     };
 
-    const auto proj1 = [&numberOfProj1](const Data& d) {
+    const auto proj1 = [&numberOfProj1](int d) {
       ++numberOfProj1;
-      return d.data;
+      return d;
     };
 
-    const auto proj2 = [&numberOfProj2](const Data& d) {
+    const auto proj2 = [&numberOfProj2](int d) {
       ++numberOfProj2;
-      return d.data;
+      return d;
     };
 
     std::ranges::set_intersection(r1.begin(), r1.end(), r2.begin(), r2.end(), out.data(), comp, proj1, proj2);
 
-    assert(std::ranges::equal(out, expected, {}, &Data::data));
+    assert(std::ranges::equal(out, expected));
     assert(numberOfComp <= maxOperation);
     assert(numberOfProj1 <= maxOperation);
     assert(numberOfProj2 <= maxOperation);
@@ -303,7 +353,7 @@ constexpr bool testComplexityBasic() {
 
   // ranges::set_intersection range overload
   {
-    std::array<Data, 0> out{};
+    std::array<int, 0> out{};
     std::size_t numberOfComp  = 0;
     std::size_t numberOfProj1 = 0;
     std::size_t numberOfProj2 = 0;
@@ -313,37 +363,39 @@ constexpr bool testComplexityBasic() {
       return x < y;
     };
 
-    const auto proj1 = [&numberOfProj1](const Data& d) {
+    const auto proj1 = [&numberOfProj1](int d) {
       ++numberOfProj1;
-      return d.data;
+      return d;
     };
 
-    const auto proj2 = [&numberOfProj2](const Data& d) {
+    const auto proj2 = [&numberOfProj2](int d) {
       ++numberOfProj2;
-      return d.data;
+      return d;
     };
 
     std::ranges::set_intersection(r1, r2, out.data(), comp, proj1, proj2);
 
-    assert(std::ranges::equal(out, expected, {}, &Data::data));
+    assert(std::ranges::equal(out, expected));
     assert(numberOfComp < maxOperation);
     assert(numberOfProj1 < maxOperation);
     assert(numberOfProj2 < maxOperation);
   }
+  return true;
 }
 
 } // unnamed namespace
 
 int main(int, char**) {
   testComplexityBasic();
-  testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
-  testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
-  testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
+  testComplexity();
+  testComplexityGuarantees();
 
   static_assert(testComplexityBasic());
-  static_assert(testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>());
-  static_assert(testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>());
-  static_assert(testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>());
+  static_assert(testComplexity());
+
+  // we hit maximum constexpr evaluation step limit even if we split this into
+  // the 3 types of the first type layer, so let's skip the constexpr validation
+  // static_assert(testComplexityGuarantees());
 
   return 0;
 }

>From b1bfa0fb2d4d2448116f5f3270fce37dcf7d18bb Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 28 May 2024 22:07:17 +0100
Subject: [PATCH 53/56] Take into account additional comparison performed in
 assertion in hardening mode inside `testComplexityBasic()` as well.

---
 .../set_intersection_complexity.pass.cpp      | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
index d4464dc649987..ddf4087ddd6cd 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp
@@ -44,6 +44,16 @@
 #include "test_iterators.h"
 
 namespace {
+
+// __debug_less will perform an additional comparison in an assertion
+static constexpr unsigned std_less_comparison_count_multiplier() noexcept {
+#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
+  return 2;
+#else
+  return 1;
+#endif
+}
+
 struct [[nodiscard]] OperationCounts {
   std::size_t comparisons{};
   struct PerInput {
@@ -58,15 +68,8 @@ struct [[nodiscard]] OperationCounts {
   std::array<PerInput, 2> in;
 
   [[nodiscard]] constexpr bool isNotBetterThan(const OperationCounts& expect) {
-    // __debug_less will perform an additional comparison in an assertion
-    constexpr unsigned comparison_multiplier =
-#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
-        2;
-#else
-        1;
-#endif
-    return comparisons >= comparison_multiplier * expect.comparisons && in[0].isNotBetterThan(expect.in[0]) &&
-           in[1].isNotBetterThan(expect.in[1]);
+    return std_less_comparison_count_multiplier() * comparisons >= expect.comparisons &&
+           in[0].isNotBetterThan(expect.in[0]) && in[1].isNotBetterThan(expect.in[1]);
   }
 };
 
@@ -303,7 +306,7 @@ constexpr bool testComplexityBasic() {
   std::array<int, 5> r2{2, 4, 6, 8, 10};
   std::array<int, 0> expected{};
 
-  const std::size_t maxOperation = 2 * (r1.size() + r2.size()) - 1;
+  const std::size_t maxOperation = std_less_comparison_count_multiplier() * (2 * (r1.size() + r2.size()) - 1);
 
   // std::set_intersection
   {

>From f501bdcd37f8f4a213fed18989929a9f3357f8c0 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 28 May 2024 22:46:13 +0100
Subject: [PATCH 54/56] Add release note. It reads a bit awkward, maybe I'll
 come up with something better after some more thinking...

---
 libcxx/docs/ReleaseNotes/19.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index b466b4cd8140c..0373df004f5eb 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -62,6 +62,10 @@ Improvements and New Features
 - The ``std::ranges::minmax`` algorithm has been optimized for integral types, resulting in a performance increase of
   up to 100x.
 
+- The ``std::set_intersection`` and ``std::ranges::set_intersection`` algorithms have been optimized to fast-forward over
+  contiguous ranges of non-matching values, reducing the number of comparisons from linear to 
+  logarithmic growth with the number of elements in best-case scenarios.
+
 - The ``_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM`` macro has been added to make the declarations in ``<strstream>`` available.
 
 - The ``_LIBCPP_ENABLE_CXX26_REMOVED_SHARED_PTR_ATOMICS`` macro has been added to make the declarations in ``<memory>``

>From c5df570eb19d9a2fd296916b55960df621d9609f Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 16 Jul 2024 16:40:14 +0100
Subject: [PATCH 55/56] * s/__(prev_)?advanced/__$1_may_be_equal/g *
 s/__set_intersection_add_output_unless/__set_intersection_add_output_if_equal/
 * Add comments to explain the logic for equality comparison

---
 libcxx/include/__algorithm/set_intersection.h | 31 ++++++++++++-------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 80f1a47a56590..89668734de274 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -44,19 +44,24 @@ struct __set_intersection_result {
 };
 
 // Helper for __set_intersection() with one-sided binary search: populate result and advance input iterators if they
-// haven't advanced in the last 2 calls. This function is very intimately related to the way it is used and doesn't
-// attempt to abstract that, it's not appropriate for general usage outside of its context.
+// are found to potentially contain the same value in two consecutive calls. This function is very intimately related to
+// the way it is used and doesn't attempt to abstract that, it's not appropriate for general usage outside of its
+// context.
 template <class _InForwardIter1, class _InForwardIter2, class _OutIter>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_intersection_add_output_unless(
-    bool __advanced, _InForwardIter1& __first1, _InForwardIter2& __first2, _OutIter& __result, bool& __prev_advanced) {
-  if (__advanced || __prev_advanced) {
-    __prev_advanced = __advanced;
-  } else {
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_intersection_add_output_if_equal(
+    bool __may_be_equal,
+    _InForwardIter1& __first1,
+    _InForwardIter2& __first2,
+    _OutIter& __result,
+    bool& __prev_maybe_equal) {
+  if (__may_be_equal && __prev_maybe_equal) {
     *__result = *__first1;
     ++__result;
     ++__first1;
     ++__first2;
-    __prev_advanced = true;
+    __prev_maybe_equal = false;
+  } else {
+    __prev_maybe_equal = __may_be_equal;
   }
 }
 
@@ -91,20 +96,24 @@ __set_intersection(
     std::forward_iterator_tag,
     std::forward_iterator_tag) {
   _LIBCPP_CONSTEXPR std::__identity __proj;
-  bool __prev_advanced = true;
+  bool __prev_maybe_equal = false;
 
   while (__first2 != __last2) {
     _InForwardIter1 __first1_next =
         std::__lower_bound_onesided<_AlgPolicy>(__first1, __last1, *__first2, __comp, __proj);
     std::swap(__first1_next, __first1);
-    std::__set_intersection_add_output_unless(__first1 != __first1_next, __first1, __first2, __result, __prev_advanced);
+    // keeping in mind that a==b iff !(a<b) && !(b<a):
+    // if we can't advance __first1, that means !(*__first1 < *_first2), therefore __may_be_equal==true
+    std::__set_intersection_add_output_if_equal(
+        __first1 == __first1_next, __first1, __first2, __result, __prev_maybe_equal);
     if (__first1 == __last1)
       break;
 
     _InForwardIter2 __first2_next =
         std::__lower_bound_onesided<_AlgPolicy>(__first2, __last2, *__first1, __comp, __proj);
     std::swap(__first2_next, __first2);
-    std::__set_intersection_add_output_unless(__first2 != __first2_next, __first1, __first2, __result, __prev_advanced);
+    std::__set_intersection_add_output_if_equal(
+        __first2 == __first2_next, __first1, __first2, __result, __prev_maybe_equal);
   }
   return __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>(
       _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)),

>From 6189e9503ef41e7eb49ae7acc0ff0ecf482e2c1d Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 16 Jul 2024 16:47:07 +0100
Subject: [PATCH 56/56] Oops

---
 libcxx/include/__algorithm/set_intersection.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 89668734de274..bb0d86cd0f58d 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -53,15 +53,15 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_intersection_add_
     _InForwardIter1& __first1,
     _InForwardIter2& __first2,
     _OutIter& __result,
-    bool& __prev_maybe_equal) {
-  if (__may_be_equal && __prev_maybe_equal) {
+    bool& __prev_may_be_equal) {
+  if (__may_be_equal && __prev_may_be_equal) {
     *__result = *__first1;
     ++__result;
     ++__first1;
     ++__first2;
-    __prev_maybe_equal = false;
+    __prev_may_be_equal = false;
   } else {
-    __prev_maybe_equal = __may_be_equal;
+    __prev_may_be_equal = __may_be_equal;
   }
 }
 
@@ -96,7 +96,7 @@ __set_intersection(
     std::forward_iterator_tag,
     std::forward_iterator_tag) {
   _LIBCPP_CONSTEXPR std::__identity __proj;
-  bool __prev_maybe_equal = false;
+  bool __prev_may_be_equal = false;
 
   while (__first2 != __last2) {
     _InForwardIter1 __first1_next =
@@ -105,7 +105,7 @@ __set_intersection(
     // keeping in mind that a==b iff !(a<b) && !(b<a):
     // if we can't advance __first1, that means !(*__first1 < *_first2), therefore __may_be_equal==true
     std::__set_intersection_add_output_if_equal(
-        __first1 == __first1_next, __first1, __first2, __result, __prev_maybe_equal);
+        __first1 == __first1_next, __first1, __first2, __result, __prev_may_be_equal);
     if (__first1 == __last1)
       break;
 
@@ -113,7 +113,7 @@ __set_intersection(
         std::__lower_bound_onesided<_AlgPolicy>(__first2, __last2, *__first1, __comp, __proj);
     std::swap(__first2_next, __first2);
     std::__set_intersection_add_output_if_equal(
-        __first2 == __first2_next, __first1, __first2, __result, __prev_maybe_equal);
+        __first2 == __first2_next, __first1, __first2, __result, __prev_may_be_equal);
   }
   return __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>(
       _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)),



More information about the libcxx-commits mailing list