[llvm] [clang-tools-extra] [libcxx] [libc++] Introduce one-sided binary search for lower_bound on non-random iterators, and use that to improve the average complexity of set_intersection. (PR #75230)

Iuri Chaer via cfe-commits cfe-commits at lists.llvm.org
Thu Feb 1 10:06:27 PST 2024


https://github.com/ichaer updated https://github.com/llvm/llvm-project/pull/75230

>From b65415f5b70591eae965cae1316054145d399158 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:52:13 +0100
Subject: [PATCH 01/17] [libc++][test] Add lower_bound complexity validation
 tests prior to introducing one-sided binary search for non-random iterators.

---
 .../lower.bound/lower_bound.pass.cpp          | 19 +++++--
 .../lower.bound/lower_bound_comp.pass.cpp     | 28 ++++++++--
 libcxx/test/support/test_iterators.h          | 55 ++++++++++++++-----
 3 files changed, 79 insertions(+), 23 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
index a2d8ab632303c..5c11962d13777 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
@@ -39,11 +39,20 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-    Iter i = std::lower_bound(first, last, value);
-    for (Iter j = first; j != i; ++j)
-        assert(*j < value);
-    for (Iter j = i; j != last; ++j)
-        assert(!(*j < value));
+  std::size_t strides{};
+  std::size_t displacement{};
+  stride_counting_iterator f(first, &strides, &displacement);
+  stride_counting_iterator l(last, &strides, &displacement);
+
+  auto i = std::lower_bound(f, l, value);
+  for (auto j = f; j != i; ++j)
+    assert(*j < value);
+  for (auto j = i; j != l; ++j)
+    assert(!(*j < value));
+
+  auto len = std::distance(first, last);
+  assert(strides <= 2.5 * len + 1);
+  assert(displacement <= 2.5 * len + 1);
 }
 
 template <class Iter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
index b9133028d9ade..05fd43eada461 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
@@ -17,6 +17,7 @@
 #include <vector>
 #include <cassert>
 #include <cstddef>
+#include <cmath>
 
 #include "test_macros.h"
 #include "test_iterators.h"
@@ -38,11 +39,28 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-    Iter i = std::lower_bound(first, last, value, std::greater<int>());
-    for (Iter j = first; j != i; ++j)
-        assert(std::greater<int>()(*j, value));
-    for (Iter j = i; j != last; ++j)
-        assert(!std::greater<int>()(*j, value));
+  std::size_t strides{};
+  std::size_t displacement{};
+  stride_counting_iterator f(first, &strides, &displacement);
+  stride_counting_iterator l(last, &strides, &displacement);
+
+  std::size_t comparisons{};
+  auto cmp = [&comparisons](int rhs, int lhs) {
+    ++comparisons;
+    return std::greater<int>()(rhs, lhs);
+  };
+
+  auto i = std::lower_bound(f, l, value, cmp);
+
+  for (auto j = f; j != i; ++j)
+    assert(std::greater<int>()(*j, value));
+  for (auto j = i; j != l; ++j)
+    assert(!std::greater<int>()(*j, value));
+
+  auto len = std::distance(first, last);
+  assert(strides <= 2.5 * len + 1);
+  assert(displacement <= 2.5 * len + 1);
+  assert(comparisons <= 2 * ceil(log(len + 1) + 2));
 }
 
 template <class Iter>
diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 1133b9597d09c..3b86a93564e4b 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -730,7 +730,9 @@ struct common_input_iterator {
 // * `stride_displacement`, which records the displacement of the calls. This means that both
 //   op++/op+= will increase the displacement counter by 1, and op--/op-= will decrease the
 //   displacement counter by 1.
-template <class It>
+template <class It,
+          class StrideCountType        = std::iter_difference_t<It>,
+          class StrideDisplacementType = std::iter_difference_t<It>>
 class stride_counting_iterator {
 public:
     using value_type = typename iter_value_or_void<It>::type;
@@ -743,16 +745,40 @@ class stride_counting_iterator {
         std::conditional_t<std::input_iterator<It>,         std::input_iterator_tag,
         /* else */                                          std::output_iterator_tag
     >>>>>;
+    using iterator_category = iterator_concept;
 
     stride_counting_iterator() requires std::default_initializable<It> = default;
 
     constexpr explicit stride_counting_iterator(It const& it) : base_(base(it)) { }
 
+    constexpr explicit stride_counting_iterator(
+        It const& it, StrideCountType* stride_count, StrideDisplacementType* stride_displacement)
+        : base_(base(it)), stride_count_(stride_count), stride_displacement_(stride_displacement) {}
+
+    constexpr stride_counting_iterator(const stride_counting_iterator& o) { *this = o; }
+    constexpr stride_counting_iterator(stride_counting_iterator&& o) { *this = o; }
+
+    constexpr stride_counting_iterator& operator=(const stride_counting_iterator& o) {
+      base_ = o.base_;
+      // if memory backing count is owned by the object, copy values
+      if (o.stride_count_ == &o.stride_count_default_) {
+        assert(o.stride_displacement_ == &o.stride_displacement_default_);
+        *stride_count_        = *o.stride_count_;
+        *stride_displacement_ = *o.stride_displacement_;
+        return *this;
+      }
+      // otherwise share the same externally-owned variables
+      stride_count_        = o.stride_count_;
+      stride_displacement_ = o.stride_displacement_;
+      return *this;
+    }
+    constexpr stride_counting_iterator& operator=(stride_counting_iterator&& o) { return *this = o; }
+
     friend constexpr It base(stride_counting_iterator const& it) { return It(it.base_); }
 
-    constexpr difference_type stride_count() const { return stride_count_; }
+    constexpr StrideCountType stride_count() const { return *stride_count_; }
 
-    constexpr difference_type stride_displacement() const { return stride_displacement_; }
+    constexpr StrideDisplacementType stride_displacement() const { return *stride_displacement_; }
 
     constexpr decltype(auto) operator*() const { return *It(base_); }
 
@@ -761,8 +787,8 @@ class stride_counting_iterator {
     constexpr stride_counting_iterator& operator++() {
         It tmp(base_);
         base_ = base(++tmp);
-        ++stride_count_;
-        ++stride_displacement_;
+        ++*stride_count_;
+        ++*stride_displacement_;
         return *this;
     }
 
@@ -781,8 +807,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(--tmp);
-        ++stride_count_;
-        --stride_displacement_;
+        ++*stride_count_;
+        --*stride_displacement_;
         return *this;
     }
 
@@ -799,8 +825,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(tmp += n);
-        ++stride_count_;
-        ++stride_displacement_;
+        ++*stride_count_;
+        ++*stride_displacement_;
         return *this;
     }
 
@@ -809,8 +835,8 @@ class stride_counting_iterator {
     {
         It tmp(base_);
         base_ = base(tmp -= n);
-        ++stride_count_;
-        --stride_displacement_;
+        ++*stride_count_;
+        --*stride_displacement_;
         return *this;
     }
 
@@ -873,8 +899,11 @@ class stride_counting_iterator {
 
 private:
     decltype(base(std::declval<It>())) base_;
-    difference_type stride_count_ = 0;
-    difference_type stride_displacement_ = 0;
+    StrideCountType stride_count_default_               = 0;
+    StrideDisplacementType stride_displacement_default_ = 0;
+
+    StrideCountType* stride_count_               = &stride_count_default_;
+    StrideDisplacementType* stride_displacement_ = &stride_displacement_default_;
 };
 template <class It>
 stride_counting_iterator(It) -> stride_counting_iterator<It>;

>From f6bcf2743080ced55d9d589daed611c5e9696ac5 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:52:37 +0100
Subject: [PATCH 02/17] [libc++] Introduce one-sided binary search for
 lower_bound on non-random iterators.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided
version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
---
 .../include/__algorithm/iterator_operations.h | 47 +++++++++++++
 libcxx/include/__algorithm/lower_bound.h      | 69 +++++++++++++++++--
 2 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index e6176da4f5606..d73573747087e 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -87,6 +87,53 @@ struct _IterOps<_ClassicAlgPolicy> {
     std::advance(__iter, __count);
   }
 
+  // advance with sentinel, a la std::ranges::advance
+  // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
+  // use the incoming type for returning and steer clear of negative overflows
+  template <class _Iter, class _Distance>
+  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+    return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
+  }
+
+  // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
+  template <class _InputIter, class _Distance>
+  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
+    _Distance __dist{};
+    for (; __dist < __count && __iter != __sentinel; ++__dist)
+      ++__iter;
+    return __count - __dist;
+  }
+
+  // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
+  template <class _BiDirIter, class _Distance>
+  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
+    _Distance __dist{};
+    if (__count >= 0)
+      for (; __dist < __count && __iter != __sentinel; ++__dist)
+        ++__iter;
+    else
+      for (__count = -__count; __dist < __count && __iter != __sentinel; ++__dist)
+        --__iter;
+    return __count - __dist;
+  }
+
+  // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
+  template <class _RandIter, class _Distance>
+  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
+    auto __dist = _IterOps::distance(__iter, __sentinel);
+    _LIBCPP_ASSERT_UNCATEGORIZED(
+        __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count<0");
+    if (__count < 0)
+      __dist = __dist > __count ? __dist : __count;
+    else
+      __dist = __dist < __count ? __dist : __count;
+    __iter += __dist;
+    return __count - __dist;
+  }
+
   // distance
   template <class _Iter>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 91c3bdaafd0cf..b432829667fa9 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -27,11 +27,13 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
-_Iter __lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
-  auto __len = _IterOps<_AlgPolicy>::distance(__first, __last);
-
+template <class _AlgPolicy, class _Iter, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting(
+    _Iter __first,
+    const _Type& __value,
+    typename iterator_traits<_Iter>::difference_type __len,
+    _Comp& __comp,
+    _Proj& __proj) {
   while (__len != 0) {
     auto __l2 = std::__half_positive(__len);
     _Iter __m = __first;
@@ -46,13 +48,68 @@ _Iter __lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp& __
   return __first;
 }
 
+// One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
+// advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
+// 2*(log(n)-1) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
+// the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
+// container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
+// traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
+// would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided
+// version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
+template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
+__lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
+  // static_assert(std::is_base_of<std::forward_iterator_tag, typename _IterOps<_AlgPolicy>::template
+  // __iterator_category<_Iter>>::value,
+  //       "lower_bound() is a multipass algorithm and requires forward iterator or better");
+
+  using _Distance = typename iterator_traits<_Iter>::difference_type;
+  for (_Distance __step = 1; __first != __last; __step <<= 1) {
+    auto __it   = __first;
+    auto __dist = __step - _IterOps<_AlgPolicy>::advance(__it, __step, __last);
+    // once we reach the last range where needle can be we must start
+    // looking inwards, bisecting that range
+    if (__it == __last || !std::__invoke(__comp, std::__invoke(__proj, *__it), __value)) {
+      return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
+    }
+    // range not found, move forward!
+    __first = std::move(__it);
+  }
+  return __first;
+}
+
+template <class _AlgPolicy, class _InputIter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIter __lower_bound(
+    _InputIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj, std::input_iterator_tag) {
+  return std::__lower_bound_onesided<_AlgPolicy>(__first, __last, __value, __comp, __proj);
+}
+
+template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter __lower_bound(
+    _RandIter __first,
+    _Sent __last,
+    const _Type& __value,
+    _Comp& __comp,
+    _Proj& __proj,
+    std::random_access_iterator_tag) {
+  const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
+  return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
+}
+
+template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
+__lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp&& __comp, _Proj&& __proj) {
+  return std::__lower_bound<_AlgPolicy>(
+      __first, __last, __value, __comp, __proj, typename _IterOps<_AlgPolicy>::template __iterator_category<_Iter>());
+}
+
 template <class _ForwardIterator, class _Tp, class _Compare>
 _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
   static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value,
                 "The comparator has to be callable");
   auto __proj = std::__identity();
-  return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, __comp, __proj);
+  return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, std::move(__comp), std::move(__proj));
 }
 
 template <class _ForwardIterator, class _Tp>

>From 36bb63e36b56f98da2b808ab55410bec5c1d0bb5 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:53:09 +0100
Subject: [PATCH 03/17] [libc++][test] Add set_intersection complexity
 validation tests prior to introducing use of one-sided binary search to
 fast-forward over ranges of elements.

---
 .../ranges_set_intersection.pass.cpp          | 240 +++++++++++++++++-
 1 file changed, 234 insertions(+), 6 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index 0ee89e0131a07..30cedd19038d7 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -28,6 +28,9 @@
 #include <algorithm>
 #include <array>
 #include <concepts>
+#include <cstddef>
+#include <iterator>
+#include <type_traits>
 
 #include "almost_satisfies_types.h"
 #include "MoveOnly.h"
@@ -93,14 +96,17 @@ static_assert(!HasSetIntersectionRange<UncheckedRange<MoveOnly*>, UncheckedRange
 
 using std::ranges::set_intersection_result;
 
+// TODO: std::ranges::set_intersection calls std::ranges::copy
+// std::ranges::copy(contiguous_iterator<int*>, sentinel_wrapper<contiguous_iterator<int*>>, contiguous_iterator<int*>) doesn't seem to work.
+// It seems that std::ranges::copy calls std::copy, which unwraps contiguous_iterator<int*> into int*,
+// and then it failed because there is no == between int* and sentinel_wrapper<contiguous_iterator<int*>>
+template <typename Iter>
+using SentinelWorkaround = std::conditional_t<std::contiguous_iterator<Iter>, Iter, sentinel_wrapper<Iter>>;
+
 template <class In1, class In2, class Out, std::size_t N1, std::size_t N2, std::size_t N3>
 constexpr void testSetIntersectionImpl(std::array<int, N1> in1, std::array<int, N2> in2, std::array<int, N3> expected) {
-  // TODO: std::ranges::set_intersection calls std::ranges::copy
-  // std::ranges::copy(contiguous_iterator<int*>, sentinel_wrapper<contiguous_iterator<int*>>, contiguous_iterator<int*>) doesn't seem to work.
-  // It seems that std::ranges::copy calls std::copy, which unwraps contiguous_iterator<int*> into int*,
-  // and then it failed because there is no == between int* and sentinel_wrapper<contiguous_iterator<int*>>
-  using Sent1 = std::conditional_t<std::contiguous_iterator<In1>, In1, sentinel_wrapper<In1>>;
-  using Sent2 = std::conditional_t<std::contiguous_iterator<In2>, In2, sentinel_wrapper<In2>>;
+  using Sent1 = SentinelWorkaround<In1>;
+  using Sent2 = SentinelWorkaround<In2>;
 
   // iterator overload
   {
@@ -272,6 +278,225 @@ constexpr void runAllIteratorPermutationsTests() {
   static_assert(withAllPermutationsOfInIter1AndInIter2<contiguous_iterator<int*>>());
 }
 
+namespace {
+struct [[nodiscard]] OperationCounts {
+  std::size_t comparisons{};
+  struct PerInput {
+    std::size_t proj{};
+    std::size_t iterator_strides{};
+    std::ptrdiff_t iterator_displacement{};
+
+    // IGNORES proj!
+    [[nodiscard]] constexpr bool operator==(const PerInput& o) const {
+      return iterator_strides == o.iterator_strides && iterator_displacement == o.iterator_displacement;
+    }
+
+    [[nodiscard]] constexpr bool matchesExpectation(const PerInput& expect) {
+      return proj <= expect.proj && iterator_strides <= expect.iterator_strides &&
+             iterator_displacement <= expect.iterator_displacement;
+    }
+  };
+  std::array<PerInput, 2> in;
+
+  [[nodiscard]] constexpr bool matchesExpectation(const OperationCounts& expect) {
+    return comparisons <= expect.comparisons && in[0].matchesExpectation(expect.in[0]) &&
+           in[1].matchesExpectation(expect.in[1]);
+  }
+
+  [[nodiscard]] constexpr bool operator==(const OperationCounts& o) const {
+    return comparisons == o.comparisons && std::ranges::equal(in, o.in);
+  }
+};
+} // namespace
+
+#include <iostream>
+template <template <class...> class In1,
+          template <class...>
+          class In2,
+          class Out,
+          std::size_t N1,
+          std::size_t N2,
+          std::size_t N3>
+constexpr void testSetIntersectionAndReturnOpCounts(
+    std::array<int, N1> in1,
+    std::array<int, N2> in2,
+    std::array<int, N3> expected,
+    const OperationCounts& expectedOpCounts) {
+  OperationCounts ops;
+
+  const auto comp = [&ops](int x, int y) {
+    ++ops.comparisons;
+    return x < y;
+  };
+
+  std::array<int, N3> out;
+
+  stride_counting_iterator b1(
+      In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+  stride_counting_iterator e1(
+      In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+  stride_counting_iterator b2(
+      In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+  stride_counting_iterator e2(
+      In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+
+  std::set_intersection(b1, e1, b2, e2, Out(out.data()), comp);
+
+  assert(std::ranges::equal(out, expected));
+  assert(ops.matchesExpectation(expectedOpCounts));
+}
+
+template <template <class...> class In1,
+          template <class...>
+          class In2,
+          class Out,
+          std::size_t N1,
+          std::size_t N2,
+          std::size_t N3>
+constexpr void testRangesSetIntersectionAndReturnOpCounts(
+    std::array<int, N1> in1,
+    std::array<int, N2> in2,
+    std::array<int, N3> expected,
+    const OperationCounts& expectedOpCounts) {
+  OperationCounts ops;
+
+  const auto comp = [&ops](int x, int y) {
+    ++ops.comparisons;
+    return x < y;
+  };
+
+  const auto proj1 = [&ops](const int& i) {
+    ++ops.in[0].proj;
+    return i;
+  };
+
+  const auto proj2 = [&ops](const int& i) {
+    ++ops.in[1].proj;
+    return i;
+  };
+
+  std::array<int, N3> out;
+
+  stride_counting_iterator b1(
+      In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+  stride_counting_iterator e1(
+      In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+  stride_counting_iterator b2(
+      In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+  stride_counting_iterator e2(
+      In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+
+  std::ranges::subrange r1{b1, SentinelWorkaround<decltype(e1)>{e1}};
+  std::ranges::subrange r2{b2, SentinelWorkaround<decltype(e2)>{e2}};
+  std::same_as<set_intersection_result<decltype(e1), decltype(e2), Out>> decltype(auto) result =
+      std::ranges::set_intersection(r1, r2, Out{out.data()}, comp, proj1, proj2);
+  assert(std::ranges::equal(out, expected));
+  assert(base(result.in1) == base(e1));
+  assert(base(result.in2) == base(e2));
+  assert(base(result.out) == out.data() + out.size());
+  assert(ops.matchesExpectation(expectedOpCounts));
+}
+
+template <template <typename...> class In1, template <typename...> class In2, class Out>
+constexpr void testComplexityParameterizedIter() {
+  // Worst-case complexity:
+  // Let N=(last1 - first1) and M=(last2 - first2)
+  // At most 2*(N+M) - 1 comparisons and applications of each projection.
+  // At most 2*(N+M) iterator mutations.
+  {
+    std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+    std::array r2{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+    std::array<int, 0> expected{};
+
+    OperationCounts expectedCounts;
+    expectedCounts.comparisons                 = 37;
+    expectedCounts.in[0].proj                  = 37;
+    expectedCounts.in[0].iterator_strides      = 30;
+    expectedCounts.in[0].iterator_displacement = 30;
+    expectedCounts.in[1]                       = expectedCounts.in[0];
+
+    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+  }
+
+  {
+    std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+    std::array r2{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+    std::array expected{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+    OperationCounts expectedCounts;
+    expectedCounts.comparisons                 = 38;
+    expectedCounts.in[0].proj                  = 38;
+    expectedCounts.in[0].iterator_strides      = 30;
+    expectedCounts.in[0].iterator_displacement = 30;
+    expectedCounts.in[1]                       = expectedCounts.in[0];
+
+    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+  }
+
+  // Lower complexity when there is low overlap between ranges: we can make 2*log(X) comparisons when one range
+  // has X elements that can be skipped over.
+  {
+    std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+    std::array r2{15};
+    std::array expected{15};
+
+    OperationCounts expectedCounts;
+    expectedCounts.comparisons                 = 8;
+    expectedCounts.in[0].proj                  = 8;
+    expectedCounts.in[0].iterator_strides      = 24;
+    expectedCounts.in[0].iterator_displacement = 24;
+    expectedCounts.in[1].proj                  = 8;
+    expectedCounts.in[1].iterator_strides      = 3;
+    expectedCounts.in[1].iterator_displacement = 3;
+
+    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+  }
+
+  {
+    std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+    std::array r2{0, 16};
+    std::array<int, 0> expected{};
+
+    OperationCounts expectedCounts;
+    expectedCounts.comparisons                 = 10;
+    expectedCounts.in[0].proj                  = 10;
+    expectedCounts.in[0].iterator_strides      = 24;
+    expectedCounts.in[0].iterator_displacement = 24;
+    expectedCounts.in[1].proj                  = 10;
+    expectedCounts.in[1].iterator_strides      = 4;
+    expectedCounts.in[1].iterator_displacement = 4;
+
+    testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+    testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+  }
+}
+
+template <template <typename...> class In2, class Out>
+constexpr void testComplexityParameterizedIterPermutateIn1() {
+  //common_input_iterator
+  testComplexityParameterizedIter<forward_iterator, In2, Out>();
+  testComplexityParameterizedIter<bidirectional_iterator, In2, Out>();
+  testComplexityParameterizedIter<random_access_iterator, In2, Out>();
+}
+
+template <class Out>
+constexpr void testComplexityParameterizedIterPermutateIn1In2() {
+  testComplexityParameterizedIterPermutateIn1<forward_iterator, Out>();
+  testComplexityParameterizedIterPermutateIn1<bidirectional_iterator, Out>();
+  testComplexityParameterizedIterPermutateIn1<random_access_iterator, Out>();
+}
+
+constexpr bool testComplexityMultipleTypes() {
+  //testComplexityParameterizedIter<cpp20_input_iterator, random_access_iterator, OutIter>();
+  testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
+  testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
+  testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
+  return true;
+}
+
 constexpr bool test() {
   // check that every element is copied exactly once
   {
@@ -572,5 +797,8 @@ int main(int, char**) {
   // than the step limit.
   runAllIteratorPermutationsTests();
 
+  testComplexityMultipleTypes();
+  static_assert(testComplexityMultipleTypes());
+
   return 0;
 }

>From c23272c389329d3af83c0f58f896ee6ea47260ed Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:53:31 +0100
Subject: [PATCH 04/17] [libc++] Introduce use of __lower_bound_onesided to
 improve average complexity of set_intersection.

---
 libcxx/include/__algorithm/set_intersection.h | 154 +++++++++++++++++-
 1 file changed, 150 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index f2603fe1365ac..556738022f485 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -12,9 +12,13 @@
 #include <__algorithm/comp.h>
 #include <__algorithm/comp_ref_type.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/lower_bound.h>
 #include <__config>
+#include <__functional/identity.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/next.h>
+#include <__type_traits/is_same.h>
+#include <__utility/exchange.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -36,9 +40,122 @@ struct __set_intersection_result {
 };
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-__set_intersection(
-    _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+struct _LIBCPP_NODISCARD_EXT __set_intersector {
+  _InIter1& __first1_;
+  const _Sent1& __last1_;
+  _InIter2& __first2_;
+  const _Sent2& __last2_;
+  _OutIter& __result_;
+  _Compare& __comp_;
+  static constexpr auto __proj_ = std::__identity();
+  bool __prev_advanced_         = true;
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersector(
+      _InIter1& __first1, _Sent1& __last1, _InIter2& __first2, _Sent2& __last2, _OutIter& __result, _Compare& __comp)
+      : __first1_(__first1),
+        __last1_(__last1),
+        __first2_(__first2),
+        __last2_(__last2),
+        __result_(__result),
+        __comp_(__comp) {}
+
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+      _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+      operator()() && {
+    while (__first2_ != __last2_) {
+      __advance1_and_maybe_add_result();
+      if (__first1_ == __last1_)
+        break;
+      __advance2_and_maybe_add_result();
+    }
+    return __set_intersection_result<_InIter1, _InIter2, _OutIter>(
+        _IterOps<_AlgPolicy>::next(std::move(__first1_), std::move(__last1_)),
+        _IterOps<_AlgPolicy>::next(std::move(__first2_), std::move(__last2_)),
+        std::move(__result_));
+  }
+
+private:
+  // advance __iter to the first element in the range where !__comp_(__iter, __value)
+  // add result if this is the second consecutive call without advancing
+  // this method only works if you alternate calls between __advance1_and_maybe_add_result() and
+  // __advance2_and_maybe_add_result()
+  template <class _Iter, class _Sent, class _Value>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+  __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
+    // use one-sided lower bound for improved algorithmic complexity bounds
+    const auto __tmp =
+        std::exchange(__iter, std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_));
+    __add_output_unless(__tmp != __iter);
+  }
+
+  // advance __first1_ to the first element in the range where !__comp_(*__first1_, *__first2_)
+  // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance1_and_maybe_add_result() {
+    __advance_and_maybe_add_result(__first1_, __last1_, *__first2_);
+  }
+
+  // advance __first2_ to the first element in the range where !__comp_(*__first2_, *__first1_)
+  // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance2_and_maybe_add_result() {
+    __advance_and_maybe_add_result(__first2_, __last2_, *__first1_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __add_output_unless(bool __advanced) {
+    if (__advanced | __prev_advanced_) {
+      __prev_advanced_ = __advanced;
+    } else {
+      *__result_ = *__first1_;
+      ++__result_;
+      ++__first1_;
+      ++__first2_;
+      __prev_advanced_ = true;
+    }
+  }
+};
+
+// with forward iterators we can use binary search to skip over entries
+template <class _AlgPolicy,
+          class _Compare,
+          class _InForwardIter1,
+          class _Sent1,
+          class _InForwardIter2,
+          class _Sent2,
+          class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
+    __set_intersection(
+        _InForwardIter1 __first1,
+        _Sent1 __last1,
+        _InForwardIter2 __first2,
+        _Sent2 __last2,
+        _OutIter __result,
+        _Compare&& __comp,
+        std::forward_iterator_tag,
+        std::forward_iterator_tag) {
+  std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
+      __intersector(__first1, __last1, __first2, __last2, __result, __comp);
+  return std::move(__intersector)();
+}
+
+// input iterators are not suitable for multipass algorithms, so we stick to the classic single-pass version
+template <class _AlgPolicy,
+          class _Compare,
+          class _InInputIter1,
+          class _Sent1,
+          class _InInputIter2,
+          class _Sent2,
+          class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
+    __set_intersection(
+        _InInputIter1 __first1,
+        _Sent1 __last1,
+        _InInputIter2 __first2,
+        _Sent2 __last2,
+        _OutIter __result,
+        _Compare&& __comp,
+        std::input_iterator_tag,
+        std::input_iterator_tag) {
   while (__first1 != __last1 && __first2 != __last2) {
     if (__comp(*__first1, *__first2))
       ++__first1;
@@ -52,12 +169,41 @@ __set_intersection(
     }
   }
 
-  return __set_intersection_result<_InIter1, _InIter2, _OutIter>(
+  return std::__set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>(
       _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)),
       _IterOps<_AlgPolicy>::next(std::move(__first2), std::move(__last2)),
       std::move(__result));
 }
 
+template <class _AlgPolicy, class _Iter>
+class __set_intersection_iter_category {
+  template <class _It>
+  using __cat = typename std::_IterOps<_AlgPolicy>::template __iterator_category<_It>;
+  template <class _It>
+  static auto test(__cat<_It>*) -> __cat<_It>;
+  template <class>
+  static std::input_iterator_tag test(...);
+
+public:
+  using __type = decltype(test<_Iter>(nullptr));
+};
+
+template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+    __set_intersection(
+        _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+  return std::__set_intersection<_AlgPolicy>(
+      std::move(__first1),
+      std::move(__last1),
+      std::move(__first2),
+      std::move(__last2),
+      std::move(__result),
+      std::forward<_Compare>(__comp),
+      typename std::__set_intersection_iter_category<_AlgPolicy, _InIter1>::__type(),
+      typename std::__set_intersection_iter_category<_AlgPolicy, _InIter2>::__type());
+}
+
 template <class _InputIterator1, class _InputIterator2, class _OutputIterator, class _Compare>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_intersection(
     _InputIterator1 __first1,

>From 0b57ea00b44dbe69bc5125a08691a72b0dea42ce Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 2 Jan 2024 17:18:21 +0000
Subject: [PATCH 05/17] Fix `constexpr` annotations.

---
 libcxx/include/__algorithm/iterator_operations.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index d73573747087e..21117e6b7d760 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -91,13 +91,13 @@ struct _IterOps<_ClassicAlgPolicy> {
   // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
   // use the incoming type for returning and steer clear of negative overflows
   template <class _Iter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
     return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
   }
 
   // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
   template <class _InputIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
     _Distance __dist{};
     for (; __dist < __count && __iter != __sentinel; ++__dist)
@@ -107,7 +107,7 @@ struct _IterOps<_ClassicAlgPolicy> {
 
   // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
   template <class _BiDirIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
     _Distance __dist{};
     if (__count >= 0)

>From 08af54897cd8e39a25a1e97b0174b68beb408cd0 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 2 Jan 2024 17:18:59 +0000
Subject: [PATCH 06/17] Remove std::exchange dependency from
 std::set_intersection so it works before C++14

---
 libcxx/include/__algorithm/set_intersection.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 556738022f485..46f6fbe4d3dd2 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -83,8 +83,8 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
   __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
     // use one-sided lower bound for improved algorithmic complexity bounds
-    const auto __tmp =
-        std::exchange(__iter, std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_));
+    const auto __tmp = std::move(__iter);
+    __iter = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_);
     __add_output_unless(__tmp != __iter);
   }
 

>From 7aa3927064083b6a96bfcc4e00d1b4fc24d9c96e Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 2 Jan 2024 17:20:06 +0000
Subject: [PATCH 07/17] Review feedback: don't use one-sided lower bound in
 lower_bound() itself since that violates the complexity guarantees from the
 standard.

---
 libcxx/include/__algorithm/lower_bound.h       | 18 ++----------------
 .../lower.bound/lower_bound.pass.cpp           | 10 +++++-----
 .../lower.bound/lower_bound_comp.pass.cpp      | 13 ++++++-------
 3 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index b432829667fa9..3febcb411268f 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -78,38 +78,24 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
   return __first;
 }
 
-template <class _AlgPolicy, class _InputIter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIter __lower_bound(
-    _InputIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj, std::input_iterator_tag) {
-  return std::__lower_bound_onesided<_AlgPolicy>(__first, __last, __value, __comp, __proj);
-}
-
 template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
 _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter __lower_bound(
     _RandIter __first,
     _Sent __last,
     const _Type& __value,
     _Comp& __comp,
-    _Proj& __proj,
-    std::random_access_iterator_tag) {
+    _Proj& __proj) {
   const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
   return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
 }
 
-template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
-__lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp&& __comp, _Proj&& __proj) {
-  return std::__lower_bound<_AlgPolicy>(
-      __first, __last, __value, __comp, __proj, typename _IterOps<_AlgPolicy>::template __iterator_category<_Iter>());
-}
-
 template <class _ForwardIterator, class _Tp, class _Compare>
 _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
   static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value,
                 "The comparator has to be callable");
   auto __proj = std::__identity();
-  return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, std::move(__comp), std::move(__proj));
+  return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, __comp, __proj);
 }
 
 template <class _ForwardIterator, class _Tp>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
index 5c11962d13777..dd2916338e8f6 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
@@ -45,14 +45,14 @@ test(Iter first, Iter last, const T& value)
   stride_counting_iterator l(last, &strides, &displacement);
 
   auto i = std::lower_bound(f, l, value);
-  for (auto j = f; j != i; ++j)
+  for (auto j = base(f); j != base(i); ++j)
     assert(*j < value);
-  for (auto j = i; j != l; ++j)
+  for (auto j = base(i); j != base(l); ++j)
     assert(!(*j < value));
 
-  auto len = std::distance(first, last);
-  assert(strides <= 2.5 * len + 1);
-  assert(displacement <= 2.5 * len + 1);
+  auto len = static_cast<std::size_t>(std::distance(first, last));
+  assert(strides <= 2 * len);
+  assert(displacement <= 2 * len);
 }
 
 template <class Iter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
index 05fd43eada461..ff928e23b9006 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
@@ -51,16 +51,15 @@ test(Iter first, Iter last, const T& value)
   };
 
   auto i = std::lower_bound(f, l, value, cmp);
-
-  for (auto j = f; j != i; ++j)
+  for (auto j = base(f); j != base(i); ++j)
     assert(std::greater<int>()(*j, value));
-  for (auto j = i; j != l; ++j)
+  for (auto j = base(i); j != base(l); ++j)
     assert(!std::greater<int>()(*j, value));
 
-  auto len = std::distance(first, last);
-  assert(strides <= 2.5 * len + 1);
-  assert(displacement <= 2.5 * len + 1);
-  assert(comparisons <= 2 * ceil(log(len + 1) + 2));
+  auto len = static_cast<std::size_t>(std::distance(first, last));
+  assert(strides <= 2 * len);
+  assert(displacement <= 2 * len);
+  assert(comparisons <= std::ceil(std::log2(len + 1)));
 }
 
 template <class Iter>

>From c44c2a2b8ea818287b859c5ce318d195c59e9d65 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 2 Jan 2024 17:21:33 +0000
Subject: [PATCH 08/17] Create new benchmark for set_intersection().

---
 libcxx/benchmarks/CMakeLists.txt              |   1 +
 .../algorithms/set_intersection.bench.cpp     | 224 ++++++++++++++++++
 2 files changed, 225 insertions(+)
 create mode 100644 libcxx/benchmarks/algorithms/set_intersection.bench.cpp

diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 7591f34d938bf..da2ea6fd4c3d1 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -192,6 +192,7 @@ set(BENCHMARK_TESTS
     algorithms/ranges_sort.bench.cpp
     algorithms/ranges_sort_heap.bench.cpp
     algorithms/ranges_stable_sort.bench.cpp
+    algorithms/set_intersection.bench.cpp
     algorithms/sort.bench.cpp
     algorithms/sort_heap.bench.cpp
     algorithms/stable_sort.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
new file mode 100644
index 0000000000000..c6a01707d6531
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -0,0 +1,224 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <forward_list>
+#include <iterator>
+#include <set>
+#include <vector>
+
+#include "common.h"
+
+namespace {
+
+// types of containers we'll want to test, covering interesting iterator types
+struct VectorContainer {
+  template <typename... Args>
+  using type = std::vector<Args...>;
+
+  static constexpr const char* Name = "Vector";
+};
+
+struct SetContainer {
+  template <typename... Args>
+  using type = std::set<Args...>;
+
+  static constexpr const char* Name = "Set";
+};
+
+struct ForwardListContainer {
+  template <typename... Args>
+  using type = std::forward_list<Args...>;
+
+  static constexpr const char* Name = "ForwardList";
+};
+
+using AllContainerTypes = std::tuple<VectorContainer, SetContainer, ForwardListContainer>;
+
+// set_intersection performance may depend on where matching values lie
+enum class OverlapPosition {
+    Nowhere,
+    Front,
+    Back,
+    Interlaced,
+};
+
+struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 4> {
+  static constexpr const char* Names[] = {
+      "Nowhere", "Front", "Back", "Interlaced"};
+};
+
+// functor that moves elements from an iterator range into a new Container instance
+template <typename Container>
+struct MoveInto {};
+
+template <typename T>
+struct MoveInto<std::vector<T>> {
+    template <class It>
+    [[nodiscard]] static std::vector<T> operator()(It first, It last) {
+        std::vector<T> out;
+        std::move(first, last, std::back_inserter(out));
+        return out;
+    }
+};
+
+template <typename T>
+struct MoveInto<std::forward_list<T>> {
+    template <class It>
+    [[nodiscard]] static std::forward_list<T> operator()(It first, It last) {
+        std::forward_list<T> out;
+        std::move(first, last, std::front_inserter(out));
+        out.reverse();
+        return out;
+    }
+};
+
+template <typename T>
+struct MoveInto<std::set<T>> {
+    template <class It>
+    [[nodiscard]] static std::set<T> operator()(It first, It last) {
+        std::set<T> out;
+        std::move(first, last, std::inserter(out, out.begin()));
+        return out;
+    }
+};
+
+// lightweight wrapping around fillValues() which puts a little effort into
+// making that would be contiguous when sorted non-contiguous in memory
+template <typename T>
+std::vector<T> getVectorOfRandom(size_t N) {
+  std::vector<T> V;
+  fillValues(V, N, Order::Random);
+  sortValues(V, Order::Random);
+  return std::vector<T>(V);
+}
+
+// forward_iterator wrapping which, for each increment, moves the underlying iterator forward Stride elements
+template <typename Wrapped>
+struct StridedFwdIt {
+  Wrapped Base;
+  unsigned Stride;
+
+  using iterator_category = std::forward_iterator_tag;
+  using difference_type = typename Wrapped::difference_type;
+  using value_type = typename Wrapped::value_type;
+  using pointer = typename Wrapped::pointer;
+  using reference = typename Wrapped::reference;
+
+  StridedFwdIt(Wrapped B, unsigned Stride_) : Base(B), Stride(Stride_) { assert(Stride != 0); }
+
+  StridedFwdIt operator++() { for (unsigned I=0; I<Stride; ++I) ++Base; return *this; }
+  StridedFwdIt operator++(int) { auto Tmp = *this; ++*this; return Tmp; }
+  value_type& operator*() { return *Base; }
+  const value_type& operator*() const { return *Base; }
+  value_type& operator->() { return *Base; }
+  const value_type& operator->() const { return *Base; }
+  bool operator==(const StridedFwdIt& o) const { return Base==o.Base; }
+  bool operator!=(const StridedFwdIt& o) const { return !operator==(o); }
+};
+template <typename Wrapped> StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>;
+
+
+// realistically, data won't all be nicely contiguous in a container
+// we'll go through some effort to ensure that it's shuffled through memory
+template <class Container>
+std::pair<Container, Container> genCacheUnfriendlyData(size_t Size1, size_t Size2, OverlapPosition Pos) {
+  using ValueType = typename Container::value_type;
+  const MoveInto<Container> moveInto;
+  const auto SrcSize = Pos == OverlapPosition::Nowhere ? Size1 + Size2 : std::max(Size1, Size2);
+  std::vector<ValueType> Src = getVectorOfRandom<ValueType>(SrcSize);
+
+  if (Pos == OverlapPosition::Nowhere) {
+    std::sort(Src.begin(), Src.end());
+    return std::make_pair(
+        moveInto(Src.begin(), Src.begin() + Size1),
+        moveInto(Src.begin() + Size1, Src.end()));
+  }
+
+  // all other overlap types will have to copy some part of the data, but if
+  // we copy after sorting it will likely have high cache locality, so we sort
+  // each copy separately
+  auto Copy = Src;
+  std::sort(Src.begin(), Src.end());
+  std::sort(Copy.begin(), Copy.end());
+
+  switch(Pos) {
+    case OverlapPosition::Nowhere:
+      break;
+
+    case OverlapPosition::Front:
+      return std::make_pair(
+          moveInto(Src.begin(), Src.begin() + Size1),
+          moveInto(Copy.begin(), Copy.begin() + Size2));
+
+    case OverlapPosition::Back:
+      return std::make_pair(
+          moveInto(Src.begin() + (Src.size() - Size1), Src.end()),
+          moveInto(Copy.begin() + (Copy.size() - Size2), Copy.end()));
+
+    case OverlapPosition::Interlaced:
+      const auto Stride1 = Size1 < Size2 ? Size2/Size1 : 1;
+      const auto Stride2 = Size2 < Size1 ? Size1/Size2 : 1;
+      return std::make_pair(
+          moveInto(StridedFwdIt(Src.begin(), Stride1), StridedFwdIt(Src.end(), Stride1)),
+          moveInto(StridedFwdIt(Copy.begin(), Stride2), StridedFwdIt(Copy.end(), Stride2)));
+  }
+  abort();
+  return std::pair<Container, Container>();
+}
+
+
+template <class ValueType, class Container, class Overlap>
+struct SetIntersection {
+  using ContainerType = typename Container::template type<Value<ValueType>>;
+  size_t Size1;
+  size_t Size2;
+
+  SetIntersection(size_t M, size_t N) : Size1(M), Size2(N) {}
+
+  void run(benchmark::State& state) const {
+    state.PauseTiming();
+    auto Input = genCacheUnfriendlyData<ContainerType>(Size1, Size2, Overlap());
+    std::vector<Value<ValueType>> out(std::min(Size1, Size2));
+
+    size_t cmp;
+    auto trackingLess = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
+        ++cmp;
+        return std::less<Value<ValueType>>{}(lhs, rhs);
+    };
+
+    const auto BatchSize =  std::max(size_t{16}, (2*TestSetElements) / (Size1+Size2));
+    state.ResumeTiming();
+
+    for (const auto& _ : state) {
+      while (state.KeepRunningBatch(BatchSize)) {
+        for (unsigned i=0; i<BatchSize; ++i) {
+          const auto& [C1, C2] = Input;
+          auto outIter = std::set_intersection(C1.begin(), C1.end(), C2.begin(), C2.end(), out.begin(), trackingLess);
+          benchmark::DoNotOptimize(outIter);
+          state.counters["Comparisons"] = cmp;
+        }
+      }
+    }
+  }
+
+  std::string name() const {
+    return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name +
+        ValueType::name() + '_' + std::to_string(Size1) + '_' + std::to_string(Size2);
+  }
+};
+
+} // namespace
+
+int main(int argc, char** argv) {/**/
+  benchmark::Initialize(&argc, argv);
+  if (benchmark::ReportUnrecognizedArguments(argc, argv))
+    return 1;
+  makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(Quantities, Quantities);
+  benchmark::RunSpecifiedBenchmarks();
+}

>From 46cc95f71742e32d8131a5b08fa271b122a919c3 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Fri, 5 Jan 2024 23:04:19 +0000
Subject: [PATCH 09/17] Formatting fixups.

---
 .../algorithms/set_intersection.bench.cpp     | 201 +++++++++---------
 .../include/__algorithm/iterator_operations.h |   3 +-
 libcxx/include/__algorithm/lower_bound.h      |   8 +-
 libcxx/include/__algorithm/set_intersection.h |  52 ++---
 4 files changed, 131 insertions(+), 133 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index c6a01707d6531..4fa411bba4354 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -42,15 +42,14 @@ using AllContainerTypes = std::tuple<VectorContainer, SetContainer, ForwardListC
 
 // set_intersection performance may depend on where matching values lie
 enum class OverlapPosition {
-    Nowhere,
-    Front,
-    Back,
-    Interlaced,
+  None,
+  Front,
+  Back,
+  Interlaced,
 };
 
 struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 4> {
-  static constexpr const char* Names[] = {
-      "Nowhere", "Front", "Back", "Interlaced"};
+  static constexpr const char* Names[] = {"None", "Front", "Back", "Interlaced"};
 };
 
 // functor that moves elements from an iterator range into a new Container instance
@@ -59,148 +58,149 @@ struct MoveInto {};
 
 template <typename T>
 struct MoveInto<std::vector<T>> {
-    template <class It>
-    [[nodiscard]] static std::vector<T> operator()(It first, It last) {
-        std::vector<T> out;
-        std::move(first, last, std::back_inserter(out));
-        return out;
-    }
+  template <class It>
+  [[nodiscard]] static std::vector<T> operator()(It first, It last) {
+    std::vector<T> out;
+    std::move(first, last, std::back_inserter(out));
+    return out;
+  }
 };
 
 template <typename T>
 struct MoveInto<std::forward_list<T>> {
-    template <class It>
-    [[nodiscard]] static std::forward_list<T> operator()(It first, It last) {
-        std::forward_list<T> out;
-        std::move(first, last, std::front_inserter(out));
-        out.reverse();
-        return out;
-    }
+  template <class It>
+  [[nodiscard]] static std::forward_list<T> operator()(It first, It last) {
+    std::forward_list<T> out;
+    std::move(first, last, std::front_inserter(out));
+    out.reverse();
+    return out;
+  }
 };
 
 template <typename T>
 struct MoveInto<std::set<T>> {
-    template <class It>
-    [[nodiscard]] static std::set<T> operator()(It first, It last) {
-        std::set<T> out;
-        std::move(first, last, std::inserter(out, out.begin()));
-        return out;
-    }
+  template <class It>
+  [[nodiscard]] static std::set<T> operator()(It first, It last) {
+    std::set<T> out;
+    std::move(first, last, std::inserter(out, out.begin()));
+    return out;
+  }
 };
 
 // lightweight wrapping around fillValues() which puts a little effort into
 // making that would be contiguous when sorted non-contiguous in memory
 template <typename T>
 std::vector<T> getVectorOfRandom(size_t N) {
-  std::vector<T> V;
-  fillValues(V, N, Order::Random);
-  sortValues(V, Order::Random);
-  return std::vector<T>(V);
+  std::vector<T> v;
+  fillValues(v, N, Order::Random);
+  sortValues(v, Order::Random);
+  return std::vector<T>(v);
 }
 
 // forward_iterator wrapping which, for each increment, moves the underlying iterator forward Stride elements
 template <typename Wrapped>
 struct StridedFwdIt {
-  Wrapped Base;
-  unsigned Stride;
+  Wrapped base_;
+  unsigned stride_;
 
   using iterator_category = std::forward_iterator_tag;
-  using difference_type = typename Wrapped::difference_type;
-  using value_type = typename Wrapped::value_type;
-  using pointer = typename Wrapped::pointer;
-  using reference = typename Wrapped::reference;
-
-  StridedFwdIt(Wrapped B, unsigned Stride_) : Base(B), Stride(Stride_) { assert(Stride != 0); }
-
-  StridedFwdIt operator++() { for (unsigned I=0; I<Stride; ++I) ++Base; return *this; }
-  StridedFwdIt operator++(int) { auto Tmp = *this; ++*this; return Tmp; }
-  value_type& operator*() { return *Base; }
-  const value_type& operator*() const { return *Base; }
-  value_type& operator->() { return *Base; }
-  const value_type& operator->() const { return *Base; }
-  bool operator==(const StridedFwdIt& o) const { return Base==o.Base; }
+  using difference_type   = typename Wrapped::difference_type;
+  using value_type        = typename Wrapped::value_type;
+  using pointer           = typename Wrapped::pointer;
+  using reference         = typename Wrapped::reference;
+
+  StridedFwdIt(Wrapped base, unsigned stride) : base_(base), stride_(stride) { assert(stride_ != 0); }
+
+  StridedFwdIt operator++() {
+    for (unsigned i = 0; i < stride_; ++i)
+      ++base_;
+    return *this;
+  }
+  StridedFwdIt operator++(int) {
+    auto tmp = *this;
+    ++*this;
+    return tmp;
+  }
+  value_type& operator*() { return *base_; }
+  const value_type& operator*() const { return *base_; }
+  value_type& operator->() { return *base_; }
+  const value_type& operator->() const { return *base_; }
+  bool operator==(const StridedFwdIt& o) const { return base_ == o.base_; }
   bool operator!=(const StridedFwdIt& o) const { return !operator==(o); }
 };
-template <typename Wrapped> StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>;
-
+template <typename Wrapped>
+StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>;
 
 // realistically, data won't all be nicely contiguous in a container
 // we'll go through some effort to ensure that it's shuffled through memory
 template <class Container>
-std::pair<Container, Container> genCacheUnfriendlyData(size_t Size1, size_t Size2, OverlapPosition Pos) {
+std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) {
   using ValueType = typename Container::value_type;
-  const MoveInto<Container> moveInto;
-  const auto SrcSize = Pos == OverlapPosition::Nowhere ? Size1 + Size2 : std::max(Size1, Size2);
-  std::vector<ValueType> Src = getVectorOfRandom<ValueType>(SrcSize);
-
-  if (Pos == OverlapPosition::Nowhere) {
-    std::sort(Src.begin(), Src.end());
-    return std::make_pair(
-        moveInto(Src.begin(), Src.begin() + Size1),
-        moveInto(Src.begin() + Size1, Src.end()));
+  const MoveInto<Container> move_into;
+  const auto src_size = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2);
+  std::vector<ValueType> src = getVectorOfRandom<ValueType>(src_size);
+
+  if (pos == OverlapPosition::None) {
+    std::sort(src.begin(), src.end());
+    return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(src.begin() + size1, src.end()));
   }
 
   // all other overlap types will have to copy some part of the data, but if
   // we copy after sorting it will likely have high cache locality, so we sort
   // each copy separately
-  auto Copy = Src;
-  std::sort(Src.begin(), Src.end());
-  std::sort(Copy.begin(), Copy.end());
-
-  switch(Pos) {
-    case OverlapPosition::Nowhere:
-      break;
-
-    case OverlapPosition::Front:
-      return std::make_pair(
-          moveInto(Src.begin(), Src.begin() + Size1),
-          moveInto(Copy.begin(), Copy.begin() + Size2));
-
-    case OverlapPosition::Back:
-      return std::make_pair(
-          moveInto(Src.begin() + (Src.size() - Size1), Src.end()),
-          moveInto(Copy.begin() + (Copy.size() - Size2), Copy.end()));
-
-    case OverlapPosition::Interlaced:
-      const auto Stride1 = Size1 < Size2 ? Size2/Size1 : 1;
-      const auto Stride2 = Size2 < Size1 ? Size1/Size2 : 1;
-      return std::make_pair(
-          moveInto(StridedFwdIt(Src.begin(), Stride1), StridedFwdIt(Src.end(), Stride1)),
-          moveInto(StridedFwdIt(Copy.begin(), Stride2), StridedFwdIt(Copy.end(), Stride2)));
+  auto copy = src;
+  std::sort(src.begin(), src.end());
+  std::sort(copy.begin(), copy.end());
+
+  switch (pos) {
+  case OverlapPosition::None:
+    break;
+
+  case OverlapPosition::Front:
+    return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(copy.begin(), copy.begin() + size2));
+
+  case OverlapPosition::Back:
+    return std::make_pair(move_into(src.begin() + (src.size() - size1), src.end()),
+                          move_into(copy.begin() + (copy.size() - size2), copy.end()));
+
+  case OverlapPosition::Interlaced:
+    const auto stride1 = size1 < size2 ? size2 / size1 : 1;
+    const auto stride2 = size2 < size1 ? size1 / size2 : 1;
+    return std::make_pair(move_into(StridedFwdIt(src.begin(), stride1), StridedFwdIt(src.end(), stride1)),
+                          move_into(StridedFwdIt(copy.begin(), stride2), StridedFwdIt(copy.end(), stride2)));
   }
   abort();
   return std::pair<Container, Container>();
 }
 
-
 template <class ValueType, class Container, class Overlap>
 struct SetIntersection {
   using ContainerType = typename Container::template type<Value<ValueType>>;
-  size_t Size1;
-  size_t Size2;
+  size_t size1_;
+  size_t size2_;
 
-  SetIntersection(size_t M, size_t N) : Size1(M), Size2(N) {}
+  SetIntersection(size_t size1, size_t size2) : size1_(size1), size2_(size2) {}
 
   void run(benchmark::State& state) const {
     state.PauseTiming();
-    auto Input = genCacheUnfriendlyData<ContainerType>(Size1, Size2, Overlap());
-    std::vector<Value<ValueType>> out(std::min(Size1, Size2));
+    auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap());
+    std::vector<Value<ValueType>> out(std::min(size1_, size2_));
 
     size_t cmp;
-    auto trackingLess = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
-        ++cmp;
-        return std::less<Value<ValueType>>{}(lhs, rhs);
+    auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
+      ++cmp;
+      return std::less<Value<ValueType>>{}(lhs, rhs);
     };
 
-    const auto BatchSize =  std::max(size_t{16}, (2*TestSetElements) / (Size1+Size2));
+    const auto BATCH_SIZE = std::max(size_t{16}, (2 * TestSetElements) / (size1_ + size2_));
     state.ResumeTiming();
 
     for (const auto& _ : state) {
-      while (state.KeepRunningBatch(BatchSize)) {
-        for (unsigned i=0; i<BatchSize; ++i) {
-          const auto& [C1, C2] = Input;
-          auto outIter = std::set_intersection(C1.begin(), C1.end(), C2.begin(), C2.end(), out.begin(), trackingLess);
-          benchmark::DoNotOptimize(outIter);
+      while (state.KeepRunningBatch(BATCH_SIZE)) {
+        for (unsigned i = 0; i < BATCH_SIZE; ++i) {
+          const auto& [c1, c2] = input;
+          auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin(), tracking_less);
+          benchmark::DoNotOptimize(res);
           state.counters["Comparisons"] = cmp;
         }
       }
@@ -208,17 +208,18 @@ struct SetIntersection {
   }
 
   std::string name() const {
-    return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name +
-        ValueType::name() + '_' + std::to_string(Size1) + '_' + std::to_string(Size2);
+    return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name + ValueType::name() + '_' +
+           std::to_string(size1_) + '_' + std::to_string(size2_);
   }
 };
 
 } // namespace
 
-int main(int argc, char** argv) {/**/
+int main(int argc, char** argv) { /**/
   benchmark::Initialize(&argc, argv);
   if (benchmark::ReportUnrecognizedArguments(argc, argv))
     return 1;
-  makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(Quantities, Quantities);
+  makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(
+      Quantities, Quantities);
   benchmark::RunSpecifiedBenchmarks();
 }
diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index 21117e6b7d760..6ce9895f545a5 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -91,7 +91,8 @@ struct _IterOps<_ClassicAlgPolicy> {
   // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
   // use the incoming type for returning and steer clear of negative overflows
   template <class _Iter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
+  advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
     return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
   }
 
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 3febcb411268f..b1ecd1ae0d569 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -79,12 +79,8 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
 }
 
 template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter __lower_bound(
-    _RandIter __first,
-    _Sent __last,
-    const _Type& __value,
-    _Comp& __comp,
-    _Proj& __proj) {
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
+__lower_bound(_RandIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
   const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
   return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
 }
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 46f6fbe4d3dd2..a18bb6ff947b7 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -60,8 +60,8 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
         __comp_(__comp) {}
 
   _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-      _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-      operator()() && {
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+  operator()() && {
     while (__first2_ != __last2_) {
       __advance1_and_maybe_add_result();
       if (__first1_ == __last1_)
@@ -84,7 +84,7 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
     // use one-sided lower bound for improved algorithmic complexity bounds
     const auto __tmp = std::move(__iter);
-    __iter = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_);
+    __iter           = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_);
     __add_output_unless(__tmp != __iter);
   }
 
@@ -122,16 +122,16 @@ template <class _AlgPolicy,
           class _Sent2,
           class _OutIter>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
-    __set_intersection(
-        _InForwardIter1 __first1,
-        _Sent1 __last1,
-        _InForwardIter2 __first2,
-        _Sent2 __last2,
-        _OutIter __result,
-        _Compare&& __comp,
-        std::forward_iterator_tag,
-        std::forward_iterator_tag) {
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
+__set_intersection(
+    _InForwardIter1 __first1,
+    _Sent1 __last1,
+    _InForwardIter2 __first2,
+    _Sent2 __last2,
+    _OutIter __result,
+    _Compare&& __comp,
+    std::forward_iterator_tag,
+    std::forward_iterator_tag) {
   std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
       __intersector(__first1, __last1, __first2, __last2, __result, __comp);
   return std::move(__intersector)();
@@ -146,16 +146,16 @@ template <class _AlgPolicy,
           class _Sent2,
           class _OutIter>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
-    __set_intersection(
-        _InInputIter1 __first1,
-        _Sent1 __last1,
-        _InInputIter2 __first2,
-        _Sent2 __last2,
-        _OutIter __result,
-        _Compare&& __comp,
-        std::input_iterator_tag,
-        std::input_iterator_tag) {
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
+__set_intersection(
+    _InInputIter1 __first1,
+    _Sent1 __last1,
+    _InInputIter2 __first2,
+    _Sent2 __last2,
+    _OutIter __result,
+    _Compare&& __comp,
+    std::input_iterator_tag,
+    std::input_iterator_tag) {
   while (__first1 != __last1 && __first2 != __last2) {
     if (__comp(*__first1, *__first2))
       ++__first1;
@@ -190,9 +190,9 @@ class __set_intersection_iter_category {
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-    __set_intersection(
-        _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+__set_intersection(
+    _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
   return std::__set_intersection<_AlgPolicy>(
       std::move(__first1),
       std::move(__last1),

>From 450f5cebd41e425133fd221bf23b40bb20922eef Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 8 Jan 2024 21:51:27 +0000
Subject: [PATCH 10/17] General improvements to benchmark, including
 simplifying and slimming it down for faster runs, and including comparison
 counter.

---
 .../algorithms/set_intersection.bench.cpp     | 72 +++++++------------
 1 file changed, 27 insertions(+), 45 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 4fa411bba4354..baa5a7cdf0507 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
-#include <forward_list>
 #include <iterator>
 #include <set>
 #include <vector>
@@ -31,57 +30,26 @@ struct SetContainer {
   static constexpr const char* Name = "Set";
 };
 
-struct ForwardListContainer {
-  template <typename... Args>
-  using type = std::forward_list<Args...>;
-
-  static constexpr const char* Name = "ForwardList";
-};
-
-using AllContainerTypes = std::tuple<VectorContainer, SetContainer, ForwardListContainer>;
+using AllContainerTypes = std::tuple<VectorContainer, SetContainer>;
 
 // set_intersection performance may depend on where matching values lie
 enum class OverlapPosition {
   None,
   Front,
-  Back,
+  // performance-wise, matches at the back are identical to ones at the front
   Interlaced,
 };
 
-struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 4> {
-  static constexpr const char* Names[] = {"None", "Front", "Back", "Interlaced"};
+struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 3> {
+  static constexpr const char* Names[] = {"None", "Front", "Interlaced"};
 };
 
 // functor that moves elements from an iterator range into a new Container instance
 template <typename Container>
-struct MoveInto {};
-
-template <typename T>
-struct MoveInto<std::vector<T>> {
-  template <class It>
-  [[nodiscard]] static std::vector<T> operator()(It first, It last) {
-    std::vector<T> out;
-    std::move(first, last, std::back_inserter(out));
-    return out;
-  }
-};
-
-template <typename T>
-struct MoveInto<std::forward_list<T>> {
+struct MoveInto {
   template <class It>
-  [[nodiscard]] static std::forward_list<T> operator()(It first, It last) {
-    std::forward_list<T> out;
-    std::move(first, last, std::front_inserter(out));
-    out.reverse();
-    return out;
-  }
-};
-
-template <typename T>
-struct MoveInto<std::set<T>> {
-  template <class It>
-  [[nodiscard]] static std::set<T> operator()(It first, It last) {
-    std::set<T> out;
+  [[nodiscard]] static Container operator()(It first, It last) {
+    Container out;
     std::move(first, last, std::inserter(out, out.begin()));
     return out;
   }
@@ -137,7 +105,7 @@ template <class Container>
 std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) {
   using ValueType = typename Container::value_type;
   const MoveInto<Container> move_into;
-  const auto src_size = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2);
+  const auto src_size        = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2);
   std::vector<ValueType> src = getVectorOfRandom<ValueType>(src_size);
 
   if (pos == OverlapPosition::None) {
@@ -159,10 +127,6 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
   case OverlapPosition::Front:
     return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(copy.begin(), copy.begin() + size2));
 
-  case OverlapPosition::Back:
-    return std::make_pair(move_into(src.begin() + (src.size() - size1), src.end()),
-                          move_into(copy.begin() + (copy.size() - size2), copy.end()));
-
   case OverlapPosition::Interlaced:
     const auto stride1 = size1 < size2 ? size2 / size1 : 1;
     const auto stride2 = size2 < size1 ? size1 / size2 : 1;
@@ -181,6 +145,11 @@ struct SetIntersection {
 
   SetIntersection(size_t size1, size_t size2) : size1_(size1), size2_(size2) {}
 
+  bool skip() const noexcept {
+    // let's save some time and skip simmetrical runs
+    return size1_ <= size2_;
+  }
+
   void run(benchmark::State& state) const {
     state.PauseTiming();
     auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap());
@@ -192,12 +161,13 @@ struct SetIntersection {
       return std::less<Value<ValueType>>{}(lhs, rhs);
     };
 
-    const auto BATCH_SIZE = std::max(size_t{16}, (2 * TestSetElements) / (size1_ + size2_));
+    const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_));
     state.ResumeTiming();
 
     for (const auto& _ : state) {
       while (state.KeepRunningBatch(BATCH_SIZE)) {
         for (unsigned i = 0; i < BATCH_SIZE; ++i) {
+          cmp                  = 0;
           const auto& [c1, c2] = input;
           auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin(), tracking_less);
           benchmark::DoNotOptimize(res);
@@ -219,6 +189,18 @@ int main(int argc, char** argv) { /**/
   benchmark::Initialize(&argc, argv);
   if (benchmark::ReportUnrecognizedArguments(argc, argv))
     return 1;
+  const std::vector<size_t> Quantities = {
+      1 << 0,
+      1 << 4,
+      1 << 8,
+      1 << 14,
+// Running each benchmark in parallel consumes too much memory with MSAN
+// and can lead to the test process being killed.
+#if !TEST_HAS_FEATURE(memory_sanitizer)
+      1 << 18
+#endif
+  };
+
   makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>(
       Quantities, Quantities);
   benchmark::RunSpecifiedBenchmarks();

>From d0c5f2b8d23c76db2ba325aa0fb6172d1b6eb1da Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Mon, 15 Jan 2024 16:19:29 +0000
Subject: [PATCH 11/17] Huh, I wonder how I got `git clang-format` to miss
 those changes =/

---
 .../algorithms/set_intersection.bench.cpp     | 10 ++--
 libcxx/include/__algorithm/set_intersection.h | 58 +++++++++----------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index baa5a7cdf0507..38010170508a8 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -190,14 +190,14 @@ int main(int argc, char** argv) { /**/
   if (benchmark::ReportUnrecognizedArguments(argc, argv))
     return 1;
   const std::vector<size_t> Quantities = {
-      1 << 0,
-      1 << 4,
-      1 << 8,
-      1 << 14,
+    1 << 0,
+    1 << 4,
+    1 << 8,
+    1 << 14,
 // Running each benchmark in parallel consumes too much memory with MSAN
 // and can lead to the test process being killed.
 #if !TEST_HAS_FEATURE(memory_sanitizer)
-      1 << 18
+    1 << 18
 #endif
   };
 
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index a18bb6ff947b7..504350d10779e 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -59,9 +59,9 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
         __result_(__result),
         __comp_(__comp) {}
 
-  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-  operator()() && {
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+      __set_intersection_result<_InIter1, _InIter2, _OutIter>
+      operator()() && {
     while (__first2_ != __last2_) {
       __advance1_and_maybe_add_result();
       if (__first1_ == __last1_)
@@ -121,17 +121,17 @@ template <class _AlgPolicy,
           class _InForwardIter2,
           class _Sent2,
           class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
-__set_intersection(
-    _InForwardIter1 __first1,
-    _Sent1 __last1,
-    _InForwardIter2 __first2,
-    _Sent2 __last2,
-    _OutIter __result,
-    _Compare&& __comp,
-    std::forward_iterator_tag,
-    std::forward_iterator_tag) {
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+    __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
+    __set_intersection(
+        _InForwardIter1 __first1,
+        _Sent1 __last1,
+        _InForwardIter2 __first2,
+        _Sent2 __last2,
+        _OutIter __result,
+        _Compare&& __comp,
+        std::forward_iterator_tag,
+        std::forward_iterator_tag) {
   std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
       __intersector(__first1, __last1, __first2, __last2, __result, __comp);
   return std::move(__intersector)();
@@ -145,17 +145,17 @@ template <class _AlgPolicy,
           class _InInputIter2,
           class _Sent2,
           class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
-__set_intersection(
-    _InInputIter1 __first1,
-    _Sent1 __last1,
-    _InInputIter2 __first2,
-    _Sent2 __last2,
-    _OutIter __result,
-    _Compare&& __comp,
-    std::input_iterator_tag,
-    std::input_iterator_tag) {
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+    __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
+    __set_intersection(
+        _InInputIter1 __first1,
+        _Sent1 __last1,
+        _InInputIter2 __first2,
+        _Sent2 __last2,
+        _OutIter __result,
+        _Compare&& __comp,
+        std::input_iterator_tag,
+        std::input_iterator_tag) {
   while (__first1 != __last1 && __first2 != __last2) {
     if (__comp(*__first1, *__first2))
       ++__first1;
@@ -189,10 +189,10 @@ class __set_intersection_iter_category {
 };
 
 template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-__set_intersection(
-    _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+    __set_intersection_result<_InIter1, _InIter2, _OutIter>
+    __set_intersection(
+        _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
   return std::__set_intersection<_AlgPolicy>(
       std::move(__first1),
       std::move(__last1),

>From faa31150e13902941cfa0c9ef87bff265b12d898 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Jan 2024 16:25:56 +0000
Subject: [PATCH 12/17] Oops, bad mistake while porting into libc++!
 `__lower_bound_onesided()` must start with `__step==0`, otherwise we can't
 match the complexity of linear search when continually matching (like a
 std::set_intersection() of matching containers will).

---
 libcxx/include/__algorithm/lower_bound.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index b1ecd1ae0d569..dc86e2fa5c81d 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -63,6 +63,12 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
   // __iterator_category<_Iter>>::value,
   //       "lower_bound() is a multipass algorithm and requires forward iterator or better");
 
+  // split the step 0 scenario: this allows us to match worst-case complexity
+  // when replacing linear search
+  if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value))
+    return __first;
+  ++__first;
+
   using _Distance = typename iterator_traits<_Iter>::difference_type;
   for (_Distance __step = 1; __first != __last; __step <<= 1) {
     auto __it   = __first;

>From 995d04b872c8552633c36e38d382897e8329d1e2 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Jan 2024 22:31:41 +0000
Subject: [PATCH 13/17] Oops, bad tracking of displacement on
 `stride_counting_iterator`

---
 libcxx/test/support/test_iterators.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 3b86a93564e4b..d1e077e1b2655 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -826,7 +826,7 @@ class stride_counting_iterator {
         It tmp(base_);
         base_ = base(tmp += n);
         ++*stride_count_;
-        ++*stride_displacement_;
+        *stride_displacement_ += n;
         return *this;
     }
 
@@ -836,7 +836,7 @@ class stride_counting_iterator {
         It tmp(base_);
         base_ = base(tmp -= n);
         ++*stride_count_;
-        --*stride_displacement_;
+        *stride_displacement_ -= n;
         return *this;
     }
 

>From d568d491cef941e2cb03d85bcce9b7d2ec7314c4 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 23 Jan 2024 22:33:13 +0000
Subject: [PATCH 14/17] Add more counters to the set_intersection benchmark,
 guard them behind an environment variable so we can choose to either measure
 time more accurately or obtain more information.

This led me down an interesting road of validating benchmark results and finding a significant discrepancy in timings between when I run all test cases at once or `--benchmark-filter` them individually.
---
 .../algorithms/set_intersection.bench.cpp     | 38 +++++++++++++------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
index 38010170508a8..b2de0c3223b00 100644
--- a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
+++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp
@@ -9,9 +9,11 @@
 #include <algorithm>
 #include <iterator>
 #include <set>
+#include <stdlib.h>
 #include <vector>
 
 #include "common.h"
+#include "test_iterators.h"
 
 namespace {
 
@@ -137,6 +139,10 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
   return std::pair<Container, Container>();
 }
 
+// use environment variable to enable additional counters: instrumentation will
+// impact CPU utilisation, let's give the user the option
+static const bool TRACK_COUNTERS = getenv("TRACK_COUNTERS") != nullptr;
+
 template <class ValueType, class Container, class Overlap>
 struct SetIntersection {
   using ContainerType = typename Container::template type<Value<ValueType>>;
@@ -147,7 +153,7 @@ struct SetIntersection {
 
   bool skip() const noexcept {
     // let's save some time and skip simmetrical runs
-    return size1_ <= size2_;
+    return size1_ < size2_;
   }
 
   void run(benchmark::State& state) const {
@@ -155,23 +161,33 @@ struct SetIntersection {
     auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap());
     std::vector<Value<ValueType>> out(std::min(size1_, size2_));
 
-    size_t cmp;
-    auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
-      ++cmp;
-      return std::less<Value<ValueType>>{}(lhs, rhs);
-    };
-
     const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_));
     state.ResumeTiming();
 
     for (const auto& _ : state) {
       while (state.KeepRunningBatch(BATCH_SIZE)) {
         for (unsigned i = 0; i < BATCH_SIZE; ++i) {
-          cmp                  = 0;
           const auto& [c1, c2] = input;
-          auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin(), tracking_less);
-          benchmark::DoNotOptimize(res);
-          state.counters["Comparisons"] = cmp;
+          if (TRACK_COUNTERS) {
+            size_t cmp{}, strides{}, displacement{};
+            auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
+              ++cmp;
+              return std::less<Value<ValueType>>{}(lhs, rhs);
+            };
+            stride_counting_iterator b1(c1.begin(), &strides, &displacement);
+            stride_counting_iterator e1(c1.end(), &strides, &displacement);
+            stride_counting_iterator b2(c2.begin(), &strides, &displacement);
+            stride_counting_iterator e2(c2.end(), &strides, &displacement);
+            auto res = std::set_intersection(b1, e1, b2, e2, out.begin(), tracking_less);
+            benchmark::DoNotOptimize(res);
+            state.counters["comparisons"]       = cmp;
+            state.counters["iter_strides"]      = strides;
+            state.counters["iter_displacement"] = displacement;
+
+          } else {
+            auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin());
+            benchmark::DoNotOptimize(res);
+          }
         }
       }
     }

>From bb872e0b1d19a77450b8455c348d3f4669adcefb Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Thu, 1 Feb 2024 13:41:07 +0000
Subject: [PATCH 15/17] Revert "Oops, bad tracking of displacement on
 `stride_counting_iterator`"

This reverts commit 995d04b872c8552633c36e38d382897e8329d1e2.
---
 libcxx/test/support/test_iterators.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 189684022d1a3..191de7f3c8a36 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -826,7 +826,7 @@ class stride_counting_iterator {
         It tmp(base_);
         base_ = base(tmp += n);
         ++*stride_count_;
-        *stride_displacement_ += n;
+        ++*stride_displacement_;
         return *this;
     }
 
@@ -836,7 +836,7 @@ class stride_counting_iterator {
         It tmp(base_);
         base_ = base(tmp -= n);
         ++*stride_count_;
-        *stride_displacement_ -= n;
+        --*stride_displacement_;
         return *this;
     }
 

>From a1cd8ffc82fe6021dbafb7b543e5472eda3ef87a Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Thu, 1 Feb 2024 17:39:08 +0000
Subject: [PATCH 16/17] * Fix C++03 compatibility issues. * Fix tests I had
 broken. * More tweaks and better comments.

---
 .../include/__algorithm/iterator_operations.h |  6 ++--
 libcxx/include/__algorithm/lower_bound.h      | 12 ++++---
 libcxx/include/__algorithm/set_intersection.h | 36 ++++++++++++++-----
 .../lower.bound/lower_bound.pass.cpp          | 11 ++++--
 .../lower.bound/lower_bound_comp.pass.cpp     | 25 +++++++++----
 .../ranges_set_intersection.pass.cpp          | 22 ++++++------
 6 files changed, 76 insertions(+), 36 deletions(-)

diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index d9a6e7f35df72..449d03d52e324 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -98,7 +98,7 @@ struct _IterOps<_ClassicAlgPolicy> {
   template <class _InputIter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
-    _Distance __dist{};
+    _Distance __dist = _Distance();
     for (; __dist < __count && __iter != __sentinel; ++__dist)
       ++__iter;
     return __count - __dist;
@@ -108,7 +108,7 @@ struct _IterOps<_ClassicAlgPolicy> {
   template <class _BiDirIter, class _Distance>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance
   __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
-    _Distance __dist{};
+    _Distance __dist = _Distance();
     if (__count >= 0)
       for (; __dist < __count && __iter != __sentinel; ++__dist)
         ++__iter;
@@ -120,7 +120,7 @@ struct _IterOps<_ClassicAlgPolicy> {
 
   // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
   template <class _RandIter, class _Distance>
-  _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR static _Distance
   __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
     auto __dist = _IterOps::distance(__iter, __sentinel);
     _LIBCPP_ASSERT_UNCATEGORIZED(
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 12a9c4850460b..e22700fa80269 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -50,7 +50,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
 
 // One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
 // advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
-// 2*(log(n)-1) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
+// 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
 // the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
 // container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
 // traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
@@ -63,11 +63,9 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
   // __iterator_category<_Iter>>::value,
   //       "lower_bound() is a multipass algorithm and requires forward iterator or better");
 
-  // split the step 0 scenario: this allows us to match worst-case complexity
-  // when replacing linear search
+  // step = 0, ensuring we can always short-circuit when distance is 1 later on
   if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value))
     return __first;
-  ++__first;
 
   using _Distance = typename iterator_traits<_Iter>::difference_type;
   for (_Distance __step = 1; __first != __last; __step <<= 1) {
@@ -76,10 +74,14 @@ __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp&
     // once we reach the last range where needle can be we must start
     // looking inwards, bisecting that range
     if (__it == __last || !std::__invoke(__comp, std::__invoke(__proj, *__it), __value)) {
+      // we've already checked the previous value and it was less, we can save
+      // one comparison by skipping bisection
+      if (__dist == 1)
+        return __it;
       return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
     }
     // range not found, move forward!
-    __first = std::move(__it);
+    __first = __it;
   }
   return __first;
 }
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index e3aa99d004eee..00fedec3701d6 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -20,6 +20,7 @@
 #include <__type_traits/is_same.h>
 #include <__utility/exchange.h>
 #include <__utility/move.h>
+#include <__utility/swap.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -50,8 +51,7 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   const _Sent2& __last2_;
   _OutIter& __result_;
   _Compare& __comp_;
-  static constexpr auto __proj_ = std::__identity();
-  bool __prev_advanced_         = true;
+  bool __prev_advanced_ = true;
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersector(
       _InIter1& __first1, _Sent1& __last1, _InIter2& __first2, _Sent2& __last2, _OutIter& __result, _Compare& __comp)
@@ -64,7 +64,7 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
 
   _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
       __set_intersection_result<_InIter1, _InIter2, _OutIter>
-      operator()() && {
+      operator()() {
     while (__first2_ != __last2_) {
       __advance1_and_maybe_add_result();
       if (__first1_ == __last1_)
@@ -85,9 +85,27 @@ struct _LIBCPP_NODISCARD_EXT __set_intersector {
   template <class _Iter, class _Sent, class _Value>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
   __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
-    // use one-sided lower bound for improved algorithmic complexity bounds
-    const auto __tmp = std::move(__iter);
-    __iter           = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_);
+    static _LIBCPP_CONSTEXPR std::__identity __proj;
+    // use one-sided binary search for improved algorithmic complexity bounds
+    // understanding how we can use binary search and still respect complexity
+    // guarantees is _not_ straightforward, so let me explain: the guarantee
+    // is "at most 2*(N+M)-1 comparisons", and one-sided binary search will
+    // necessarily overshoot depending on the position of the needle in the
+    // haystack -- for instance, if we're searching for 3 in (1, 2, 3, 4),
+    // we'll check if 3<1, then 3<2, then 3<4, and, finally, 3<3, for a total of
+    // 4 comparisons, when linear search would have yielded 3. However,
+    // because we won't need to perform the intervening reciprocal comparisons
+    // (ie 1<3, 2<3, 4<3), that extra comparison doesn't run afoul of the
+    // guarantee. Additionally, this type of scenario can only happen for match
+    // distances of up to 5 elements, because 2*log2(8) is 6, and we'll still
+    // be worse-off at position 5 of an 8-element set. From then onwards
+    // these scenarios can't happen.
+    // TL;DR: we'll be 1 comparison worse-off compared to the classic linear-
+    // searching algorithm if matching position 3 of a set with 4 elements,
+    // or position 5 if the set has 7 or 8 elements, but we'll never exceed
+    // the complexity guarantees from the standard.
+    _Iter __tmp = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj);
+    std::swap(__tmp, __iter);
     __add_output_unless(__tmp != __iter);
   }
 
@@ -137,7 +155,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
         std::forward_iterator_tag) {
   std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
       __intersector(__first1, __last1, __first2, __last2, __result, __comp);
-  return std::move(__intersector)();
+  return __intersector();
 }
 
 // input iterators are not suitable for multipass algorithms, so we stick to the classic single-pass version
@@ -183,7 +201,7 @@ class __set_intersection_iter_category {
   template <class _It>
   using __cat = typename std::_IterOps<_AlgPolicy>::template __iterator_category<_It>;
   template <class _It>
-  static auto test(__cat<_It>*) -> __cat<_It>;
+  static __cat<_It> test(__cat<_It>*);
   template <class>
   static std::input_iterator_tag test(...);
 
@@ -202,7 +220,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
       std::move(__first2),
       std::move(__last2),
       std::move(__result),
-      std::forward<_Compare>(__comp),
+      __comp,
       typename std::__set_intersection_iter_category<_AlgPolicy, _InIter1>::__type(),
       typename std::__set_intersection_iter_category<_AlgPolicy, _InIter2>::__type());
 }
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
index dd2916338e8f6..196af84b69222 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
@@ -39,10 +39,15 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-  std::size_t strides{};
-  std::size_t displacement{};
+#if TEST_STD_VER > 17
+  std::size_t strides      = 0;
+  std::size_t displacement = 0;
   stride_counting_iterator f(first, &strides, &displacement);
   stride_counting_iterator l(last, &strides, &displacement);
+#else
+  Iter& f = first;
+  Iter& l = last;
+#endif
 
   auto i = std::lower_bound(f, l, value);
   for (auto j = base(f); j != base(i); ++j)
@@ -50,9 +55,11 @@ test(Iter first, Iter last, const T& value)
   for (auto j = base(i); j != base(l); ++j)
     assert(!(*j < value));
 
+#if TEST_STD_VER > 17
   auto len = static_cast<std::size_t>(std::distance(first, last));
   assert(strides <= 2 * len);
   assert(displacement <= 2 * len);
+#endif
 }
 
 template <class Iter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
index ff928e23b9006..643fd0052e479 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
@@ -39,16 +39,27 @@ template <class Iter, class T>
 void
 test(Iter first, Iter last, const T& value)
 {
-  std::size_t strides{};
-  std::size_t displacement{};
+#if TEST_STD_VER > 17
+  std::size_t strides      = 0;
+  std::size_t displacement = 0;
   stride_counting_iterator f(first, &strides, &displacement);
   stride_counting_iterator l(last, &strides, &displacement);
+#else
+  Iter& f = first;
+  Iter& l = last;
+#endif
+
+  std::size_t comparisons = 0;
+  struct InstrumentedGreater {
+    explicit InstrumentedGreater(std::size_t* cmp) : comparisons_(cmp) {}
+    bool operator()(int rhs, int lhs) const {
+      ++*comparisons_;
+      return std::greater<int>()(rhs, lhs);
+    }
 
-  std::size_t comparisons{};
-  auto cmp = [&comparisons](int rhs, int lhs) {
-    ++comparisons;
-    return std::greater<int>()(rhs, lhs);
+    std::size_t* comparisons_;
   };
+  InstrumentedGreater cmp(&comparisons);
 
   auto i = std::lower_bound(f, l, value, cmp);
   for (auto j = base(f); j != base(i); ++j)
@@ -57,8 +68,10 @@ test(Iter first, Iter last, const T& value)
     assert(!std::greater<int>()(*j, value));
 
   auto len = static_cast<std::size_t>(std::distance(first, last));
+#if TEST_STD_VER > 17
   assert(strides <= 2 * len);
   assert(displacement <= 2 * len);
+#endif
   assert(comparisons <= std::ceil(std::log2(len + 1)));
 }
 
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index f658a95778c85..2f3b0df9cda7c 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -436,20 +436,20 @@ constexpr void testComplexityParameterizedIter() {
   }
 
   // Lower complexity when there is low overlap between ranges: we can make 2*log(X) comparisons when one range
-  // has X elements that can be skipped over.
+  // has X elements that can be skipped over (and then 1 more to confirm that the value we found is equal).
   {
     std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
     std::array r2{15};
     std::array expected{15};
 
     OperationCounts expectedCounts;
-    expectedCounts.comparisons                 = 8;
-    expectedCounts.in[0].proj                  = 8;
-    expectedCounts.in[0].iterator_strides      = 24;
-    expectedCounts.in[0].iterator_displacement = 24;
-    expectedCounts.in[1].proj                  = 8;
-    expectedCounts.in[1].iterator_strides      = 3;
-    expectedCounts.in[1].iterator_displacement = 3;
+    expectedCounts.comparisons                 = 9;
+    expectedCounts.in[0].proj                  = 9;
+    expectedCounts.in[0].iterator_strides      = 23;
+    expectedCounts.in[0].iterator_displacement = 23;
+    expectedCounts.in[1].proj                  = 9;
+    expectedCounts.in[1].iterator_strides      = 1;
+    expectedCounts.in[1].iterator_displacement = 1;
 
     testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
     testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
@@ -721,9 +721,9 @@ constexpr bool test() {
       std::ranges::set_intersection(r1.begin(), r1.end(), r2.begin(), r2.end(), out.data(), comp, proj1, proj2);
 
       assert(std::ranges::equal(out, expected, {}, &Data::data));
-      assert(numberOfComp < maxOperation);
-      assert(numberOfProj1 < maxOperation);
-      assert(numberOfProj2 < maxOperation);
+      assert(numberOfComp <= maxOperation);
+      assert(numberOfProj1 <= maxOperation);
+      assert(numberOfProj2 <= maxOperation);
     }
 
     // range overload

>From 24d1d5b9a9d93f567be004e4a36e5b9147898b06 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Thu, 1 Feb 2024 18:04:17 +0000
Subject: [PATCH 17/17] Remove non-ascii characters, CI doesn't like them.

---
 libcxx/include/__algorithm/lower_bound.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index e22700fa80269..6016502404002 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -49,13 +49,14 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
 }
 
 // One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
-// advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
-// 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
+// advantage of being \Omega(1) rather than the classic algorithm's \Omega(log(n)), with the downside of executing at
+// most 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
 // the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
-// container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
+// container's size upfront, which adds \Omega(n) iterator increments to the complexity. The second one is when you're
 // traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
-// would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided
-// version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
+// would yield \Omega(n*log(n)) comparisons and, for non-random iterators, \Omega(n^2) iterator increments, whereas the
+// one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of
+// comparisons.
 template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
 __lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {



More information about the cfe-commits mailing list