[libcxx-commits] [libcxx] [libc++] Introduce one-sided binary search for lower_bound on non-random iterators, and use that to improve the average complexity of set_intersection. (PR #75230)
via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Dec 12 10:18:18 PST 2023
https://github.com/ichaer created https://github.com/llvm/llvm-project/pull/75230
One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines: the first one is when operating over non-random iterators, because the classic algorithm requires knowing the container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
One use case that can often fit both scenarios is a std::set_intersection() on std::set instances.
This change is split into 4 separate commits. First we add tests validating the algorithmic complexity of std::lower_bound(), then we introduce the use of one-sided binary search for non-random iterators, then we add complexity validation tests for std::set_intersection(), and, finally, we introduce explicit use of the one-sided binary search version of std::lower_bound() for the instances of std::set_intersection() where there is no question about it improving time complexity.
>From b65415f5b70591eae965cae1316054145d399158 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:52:13 +0100
Subject: [PATCH 1/4] [libc++][test] Add lower_bound complexity validation
tests prior to introducing one-sided binary search for non-random iterators.
---
.../lower.bound/lower_bound.pass.cpp | 19 +++++--
.../lower.bound/lower_bound_comp.pass.cpp | 28 ++++++++--
libcxx/test/support/test_iterators.h | 55 ++++++++++++++-----
3 files changed, 79 insertions(+), 23 deletions(-)
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
index a2d8ab632303c..5c11962d13777 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound.pass.cpp
@@ -39,11 +39,20 @@ template <class Iter, class T>
void
test(Iter first, Iter last, const T& value)
{
- Iter i = std::lower_bound(first, last, value);
- for (Iter j = first; j != i; ++j)
- assert(*j < value);
- for (Iter j = i; j != last; ++j)
- assert(!(*j < value));
+ std::size_t strides{};
+ std::size_t displacement{};
+ stride_counting_iterator f(first, &strides, &displacement);
+ stride_counting_iterator l(last, &strides, &displacement);
+
+ auto i = std::lower_bound(f, l, value);
+ for (auto j = f; j != i; ++j)
+ assert(*j < value);
+ for (auto j = i; j != l; ++j)
+ assert(!(*j < value));
+
+ auto len = std::distance(first, last);
+ assert(strides <= 2.5 * len + 1);
+ assert(displacement <= 2.5 * len + 1);
}
template <class Iter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
index b9133028d9ade..05fd43eada461 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.binary.search/lower.bound/lower_bound_comp.pass.cpp
@@ -17,6 +17,7 @@
#include <vector>
#include <cassert>
#include <cstddef>
+#include <cmath>
#include "test_macros.h"
#include "test_iterators.h"
@@ -38,11 +39,28 @@ template <class Iter, class T>
void
test(Iter first, Iter last, const T& value)
{
- Iter i = std::lower_bound(first, last, value, std::greater<int>());
- for (Iter j = first; j != i; ++j)
- assert(std::greater<int>()(*j, value));
- for (Iter j = i; j != last; ++j)
- assert(!std::greater<int>()(*j, value));
+ std::size_t strides{};
+ std::size_t displacement{};
+ stride_counting_iterator f(first, &strides, &displacement);
+ stride_counting_iterator l(last, &strides, &displacement);
+
+ std::size_t comparisons{};
+ auto cmp = [&comparisons](int rhs, int lhs) {
+ ++comparisons;
+ return std::greater<int>()(rhs, lhs);
+ };
+
+ auto i = std::lower_bound(f, l, value, cmp);
+
+ for (auto j = f; j != i; ++j)
+ assert(std::greater<int>()(*j, value));
+ for (auto j = i; j != l; ++j)
+ assert(!std::greater<int>()(*j, value));
+
+ auto len = std::distance(first, last);
+ assert(strides <= 2.5 * len + 1);
+ assert(displacement <= 2.5 * len + 1);
+ assert(comparisons <= 2 * ceil(log(len + 1) + 2));
}
template <class Iter>
diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 1133b9597d09c..3b86a93564e4b 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -730,7 +730,9 @@ struct common_input_iterator {
// * `stride_displacement`, which records the displacement of the calls. This means that both
// op++/op+= will increase the displacement counter by 1, and op--/op-= will decrease the
// displacement counter by 1.
-template <class It>
+template <class It,
+ class StrideCountType = std::iter_difference_t<It>,
+ class StrideDisplacementType = std::iter_difference_t<It>>
class stride_counting_iterator {
public:
using value_type = typename iter_value_or_void<It>::type;
@@ -743,16 +745,40 @@ class stride_counting_iterator {
std::conditional_t<std::input_iterator<It>, std::input_iterator_tag,
/* else */ std::output_iterator_tag
>>>>>;
+ using iterator_category = iterator_concept;
stride_counting_iterator() requires std::default_initializable<It> = default;
constexpr explicit stride_counting_iterator(It const& it) : base_(base(it)) { }
+ constexpr explicit stride_counting_iterator(
+ It const& it, StrideCountType* stride_count, StrideDisplacementType* stride_displacement)
+ : base_(base(it)), stride_count_(stride_count), stride_displacement_(stride_displacement) {}
+
+ constexpr stride_counting_iterator(const stride_counting_iterator& o) { *this = o; }
+ constexpr stride_counting_iterator(stride_counting_iterator&& o) { *this = o; }
+
+ constexpr stride_counting_iterator& operator=(const stride_counting_iterator& o) {
+ base_ = o.base_;
+ // if memory backing count is owned by the object, copy values
+ if (o.stride_count_ == &o.stride_count_default_) {
+ assert(o.stride_displacement_ == &o.stride_displacement_default_);
+ *stride_count_ = *o.stride_count_;
+ *stride_displacement_ = *o.stride_displacement_;
+ return *this;
+ }
+ // otherwise share the same externally-owned variables
+ stride_count_ = o.stride_count_;
+ stride_displacement_ = o.stride_displacement_;
+ return *this;
+ }
+ constexpr stride_counting_iterator& operator=(stride_counting_iterator&& o) { return *this = o; }
+
friend constexpr It base(stride_counting_iterator const& it) { return It(it.base_); }
- constexpr difference_type stride_count() const { return stride_count_; }
+ constexpr StrideCountType stride_count() const { return *stride_count_; }
- constexpr difference_type stride_displacement() const { return stride_displacement_; }
+ constexpr StrideDisplacementType stride_displacement() const { return *stride_displacement_; }
constexpr decltype(auto) operator*() const { return *It(base_); }
@@ -761,8 +787,8 @@ class stride_counting_iterator {
constexpr stride_counting_iterator& operator++() {
It tmp(base_);
base_ = base(++tmp);
- ++stride_count_;
- ++stride_displacement_;
+ ++*stride_count_;
+ ++*stride_displacement_;
return *this;
}
@@ -781,8 +807,8 @@ class stride_counting_iterator {
{
It tmp(base_);
base_ = base(--tmp);
- ++stride_count_;
- --stride_displacement_;
+ ++*stride_count_;
+ --*stride_displacement_;
return *this;
}
@@ -799,8 +825,8 @@ class stride_counting_iterator {
{
It tmp(base_);
base_ = base(tmp += n);
- ++stride_count_;
- ++stride_displacement_;
+ ++*stride_count_;
+ ++*stride_displacement_;
return *this;
}
@@ -809,8 +835,8 @@ class stride_counting_iterator {
{
It tmp(base_);
base_ = base(tmp -= n);
- ++stride_count_;
- --stride_displacement_;
+ ++*stride_count_;
+ --*stride_displacement_;
return *this;
}
@@ -873,8 +899,11 @@ class stride_counting_iterator {
private:
decltype(base(std::declval<It>())) base_;
- difference_type stride_count_ = 0;
- difference_type stride_displacement_ = 0;
+ StrideCountType stride_count_default_ = 0;
+ StrideDisplacementType stride_displacement_default_ = 0;
+
+ StrideCountType* stride_count_ = &stride_count_default_;
+ StrideDisplacementType* stride_displacement_ = &stride_displacement_default_;
};
template <class It>
stride_counting_iterator(It) -> stride_counting_iterator<It>;
>From f6bcf2743080ced55d9d589daed611c5e9696ac5 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:52:37 +0100
Subject: [PATCH 2/4] [libc++] Introduce one-sided binary search for
lower_bound on non-random iterators.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided
version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
---
.../include/__algorithm/iterator_operations.h | 47 +++++++++++++
libcxx/include/__algorithm/lower_bound.h | 69 +++++++++++++++++--
2 files changed, 110 insertions(+), 6 deletions(-)
diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h
index e6176da4f5606..d73573747087e 100644
--- a/libcxx/include/__algorithm/iterator_operations.h
+++ b/libcxx/include/__algorithm/iterator_operations.h
@@ -87,6 +87,53 @@ struct _IterOps<_ClassicAlgPolicy> {
std::advance(__iter, __count);
}
+ // advance with sentinel, a la std::ranges::advance
+ // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe:
+ // use the incoming type for returning and steer clear of negative overflows
+ template <class _Iter, class _Distance>
+ _LIBCPP_HIDE_FROM_ABI constexpr static _Distance advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) {
+ return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category());
+ }
+
+ // advance with sentinel, a la std::ranges::advance -- InputIterator specialization
+ template <class _InputIter, class _Distance>
+ _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+ __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) {
+ _Distance __dist{};
+ for (; __dist < __count && __iter != __sentinel; ++__dist)
+ ++__iter;
+ return __count - __dist;
+ }
+
+ // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization
+ template <class _BiDirIter, class _Distance>
+ _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+ __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) {
+ _Distance __dist{};
+ if (__count >= 0)
+ for (; __dist < __count && __iter != __sentinel; ++__dist)
+ ++__iter;
+ else
+ for (__count = -__count; __dist < __count && __iter != __sentinel; ++__dist)
+ --__iter;
+ return __count - __dist;
+ }
+
+ // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization
+ template <class _RandIter, class _Distance>
+ _LIBCPP_HIDE_FROM_ABI constexpr static _Distance
+ __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) {
+ auto __dist = _IterOps::distance(__iter, __sentinel);
+ _LIBCPP_ASSERT_UNCATEGORIZED(
+ __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count<0");
+ if (__count < 0)
+ __dist = __dist > __count ? __dist : __count;
+ else
+ __dist = __dist < __count ? __dist : __count;
+ __iter += __dist;
+ return __count - __dist;
+ }
+
// distance
template <class _Iter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 91c3bdaafd0cf..b432829667fa9 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -27,11 +27,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
-_Iter __lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
- auto __len = _IterOps<_AlgPolicy>::distance(__first, __last);
-
+template <class _AlgPolicy, class _Iter, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting(
+ _Iter __first,
+ const _Type& __value,
+ typename iterator_traits<_Iter>::difference_type __len,
+ _Comp& __comp,
+ _Proj& __proj) {
while (__len != 0) {
auto __l2 = std::__half_positive(__len);
_Iter __m = __first;
@@ -46,13 +48,68 @@ _Iter __lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp& __
return __first;
}
+// One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general
+// advantage of being Ω(1) rather than the classic algorithm's Ω(log(n)), with the downside of executing at most
+// 2*(log(n)-1) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines:
+// the first one is when operating over non-random iterators, because the classic algorithm requires knowing the
+// container's size upfront, which adds Ω(n) iterator increments to the complexity. The second one is when you're
+// traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm
+// would yield Ω(n*log(n)) comparisons and, for non-random iterators, Ω(n^2) iterator increments, whereas the one-sided
+// version will yield O(n) operations on both counts, with a Ω(log(n)) bound on the number of comparisons.
+template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
+__lower_bound_onesided(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) {
+ // static_assert(std::is_base_of<std::forward_iterator_tag, typename _IterOps<_AlgPolicy>::template
+ // __iterator_category<_Iter>>::value,
+ // "lower_bound() is a multipass algorithm and requires forward iterator or better");
+
+ using _Distance = typename iterator_traits<_Iter>::difference_type;
+ for (_Distance __step = 1; __first != __last; __step <<= 1) {
+ auto __it = __first;
+ auto __dist = __step - _IterOps<_AlgPolicy>::advance(__it, __step, __last);
+ // once we reach the last range where needle can be we must start
+ // looking inwards, bisecting that range
+ if (__it == __last || !std::__invoke(__comp, std::__invoke(__proj, *__it), __value)) {
+ return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
+ }
+ // range not found, move forward!
+ __first = std::move(__it);
+ }
+ return __first;
+}
+
+template <class _AlgPolicy, class _InputIter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIter __lower_bound(
+ _InputIter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj, std::input_iterator_tag) {
+ return std::__lower_bound_onesided<_AlgPolicy>(__first, __last, __value, __comp, __proj);
+}
+
+template <class _AlgPolicy, class _RandIter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter __lower_bound(
+ _RandIter __first,
+ _Sent __last,
+ const _Type& __value,
+ _Comp& __comp,
+ _Proj& __proj,
+ std::random_access_iterator_tag) {
+ const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last);
+ return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj);
+}
+
+template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp>
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
+__lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp&& __comp, _Proj&& __proj) {
+ return std::__lower_bound<_AlgPolicy>(
+ __first, __last, __value, __comp, __proj, typename _IterOps<_AlgPolicy>::template __iterator_category<_Iter>());
+}
+
template <class _ForwardIterator, class _Tp, class _Compare>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
_ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value,
"The comparator has to be callable");
auto __proj = std::__identity();
- return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, __comp, __proj);
+ return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, std::move(__comp), std::move(__proj));
}
template <class _ForwardIterator, class _Tp>
>From 36bb63e36b56f98da2b808ab55410bec5c1d0bb5 Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:53:09 +0100
Subject: [PATCH 3/4] [libc++][test] Add set_intersection complexity validation
tests prior to introducing use of one-sided binary search to fast-forward
over ranges of elements.
---
.../ranges_set_intersection.pass.cpp | 240 +++++++++++++++++-
1 file changed, 234 insertions(+), 6 deletions(-)
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
index 0ee89e0131a07..30cedd19038d7 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp
@@ -28,6 +28,9 @@
#include <algorithm>
#include <array>
#include <concepts>
+#include <cstddef>
+#include <iterator>
+#include <type_traits>
#include "almost_satisfies_types.h"
#include "MoveOnly.h"
@@ -93,14 +96,17 @@ static_assert(!HasSetIntersectionRange<UncheckedRange<MoveOnly*>, UncheckedRange
using std::ranges::set_intersection_result;
+// TODO: std::ranges::set_intersection calls std::ranges::copy
+// std::ranges::copy(contiguous_iterator<int*>, sentinel_wrapper<contiguous_iterator<int*>>, contiguous_iterator<int*>) doesn't seem to work.
+// It seems that std::ranges::copy calls std::copy, which unwraps contiguous_iterator<int*> into int*,
+// and then it failed because there is no == between int* and sentinel_wrapper<contiguous_iterator<int*>>
+template <typename Iter>
+using SentinelWorkaround = std::conditional_t<std::contiguous_iterator<Iter>, Iter, sentinel_wrapper<Iter>>;
+
template <class In1, class In2, class Out, std::size_t N1, std::size_t N2, std::size_t N3>
constexpr void testSetIntersectionImpl(std::array<int, N1> in1, std::array<int, N2> in2, std::array<int, N3> expected) {
- // TODO: std::ranges::set_intersection calls std::ranges::copy
- // std::ranges::copy(contiguous_iterator<int*>, sentinel_wrapper<contiguous_iterator<int*>>, contiguous_iterator<int*>) doesn't seem to work.
- // It seems that std::ranges::copy calls std::copy, which unwraps contiguous_iterator<int*> into int*,
- // and then it failed because there is no == between int* and sentinel_wrapper<contiguous_iterator<int*>>
- using Sent1 = std::conditional_t<std::contiguous_iterator<In1>, In1, sentinel_wrapper<In1>>;
- using Sent2 = std::conditional_t<std::contiguous_iterator<In2>, In2, sentinel_wrapper<In2>>;
+ using Sent1 = SentinelWorkaround<In1>;
+ using Sent2 = SentinelWorkaround<In2>;
// iterator overload
{
@@ -272,6 +278,225 @@ constexpr void runAllIteratorPermutationsTests() {
static_assert(withAllPermutationsOfInIter1AndInIter2<contiguous_iterator<int*>>());
}
+namespace {
+struct [[nodiscard]] OperationCounts {
+ std::size_t comparisons{};
+ struct PerInput {
+ std::size_t proj{};
+ std::size_t iterator_strides{};
+ std::ptrdiff_t iterator_displacement{};
+
+ // IGNORES proj!
+ [[nodiscard]] constexpr bool operator==(const PerInput& o) const {
+ return iterator_strides == o.iterator_strides && iterator_displacement == o.iterator_displacement;
+ }
+
+ [[nodiscard]] constexpr bool matchesExpectation(const PerInput& expect) {
+ return proj <= expect.proj && iterator_strides <= expect.iterator_strides &&
+ iterator_displacement <= expect.iterator_displacement;
+ }
+ };
+ std::array<PerInput, 2> in;
+
+ [[nodiscard]] constexpr bool matchesExpectation(const OperationCounts& expect) {
+ return comparisons <= expect.comparisons && in[0].matchesExpectation(expect.in[0]) &&
+ in[1].matchesExpectation(expect.in[1]);
+ }
+
+ [[nodiscard]] constexpr bool operator==(const OperationCounts& o) const {
+ return comparisons == o.comparisons && std::ranges::equal(in, o.in);
+ }
+};
+} // namespace
+
+#include <iostream>
+template <template <class...> class In1,
+ template <class...>
+ class In2,
+ class Out,
+ std::size_t N1,
+ std::size_t N2,
+ std::size_t N3>
+constexpr void testSetIntersectionAndReturnOpCounts(
+ std::array<int, N1> in1,
+ std::array<int, N2> in2,
+ std::array<int, N3> expected,
+ const OperationCounts& expectedOpCounts) {
+ OperationCounts ops;
+
+ const auto comp = [&ops](int x, int y) {
+ ++ops.comparisons;
+ return x < y;
+ };
+
+ std::array<int, N3> out;
+
+ stride_counting_iterator b1(
+ In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+ stride_counting_iterator e1(
+ In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+ stride_counting_iterator b2(
+ In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+ stride_counting_iterator e2(
+ In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+
+ std::set_intersection(b1, e1, b2, e2, Out(out.data()), comp);
+
+ assert(std::ranges::equal(out, expected));
+ assert(ops.matchesExpectation(expectedOpCounts));
+}
+
+template <template <class...> class In1,
+ template <class...>
+ class In2,
+ class Out,
+ std::size_t N1,
+ std::size_t N2,
+ std::size_t N3>
+constexpr void testRangesSetIntersectionAndReturnOpCounts(
+ std::array<int, N1> in1,
+ std::array<int, N2> in2,
+ std::array<int, N3> expected,
+ const OperationCounts& expectedOpCounts) {
+ OperationCounts ops;
+
+ const auto comp = [&ops](int x, int y) {
+ ++ops.comparisons;
+ return x < y;
+ };
+
+ const auto proj1 = [&ops](const int& i) {
+ ++ops.in[0].proj;
+ return i;
+ };
+
+ const auto proj2 = [&ops](const int& i) {
+ ++ops.in[1].proj;
+ return i;
+ };
+
+ std::array<int, N3> out;
+
+ stride_counting_iterator b1(
+ In1<decltype(in1.begin())>(in1.begin()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+ stride_counting_iterator e1(
+ In1<decltype(in1.end()) >(in1.end()), &ops.in[0].iterator_strides, &ops.in[0].iterator_displacement);
+ stride_counting_iterator b2(
+ In2<decltype(in2.begin())>(in2.begin()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+ stride_counting_iterator e2(
+ In2<decltype(in2.end()) >(in2.end()), &ops.in[1].iterator_strides, &ops.in[1].iterator_displacement);
+
+ std::ranges::subrange r1{b1, SentinelWorkaround<decltype(e1)>{e1}};
+ std::ranges::subrange r2{b2, SentinelWorkaround<decltype(e2)>{e2}};
+ std::same_as<set_intersection_result<decltype(e1), decltype(e2), Out>> decltype(auto) result =
+ std::ranges::set_intersection(r1, r2, Out{out.data()}, comp, proj1, proj2);
+ assert(std::ranges::equal(out, expected));
+ assert(base(result.in1) == base(e1));
+ assert(base(result.in2) == base(e2));
+ assert(base(result.out) == out.data() + out.size());
+ assert(ops.matchesExpectation(expectedOpCounts));
+}
+
+template <template <typename...> class In1, template <typename...> class In2, class Out>
+constexpr void testComplexityParameterizedIter() {
+ // Worst-case complexity:
+ // Let N=(last1 - first1) and M=(last2 - first2)
+ // At most 2*(N+M) - 1 comparisons and applications of each projection.
+ // At most 2*(N+M) iterator mutations.
+ {
+ std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+ std::array r2{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+ std::array<int, 0> expected{};
+
+ OperationCounts expectedCounts;
+ expectedCounts.comparisons = 37;
+ expectedCounts.in[0].proj = 37;
+ expectedCounts.in[0].iterator_strides = 30;
+ expectedCounts.in[0].iterator_displacement = 30;
+ expectedCounts.in[1] = expectedCounts.in[0];
+
+ testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+ testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+ }
+
+ {
+ std::array r1{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+ std::array r2{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+ std::array expected{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+ OperationCounts expectedCounts;
+ expectedCounts.comparisons = 38;
+ expectedCounts.in[0].proj = 38;
+ expectedCounts.in[0].iterator_strides = 30;
+ expectedCounts.in[0].iterator_displacement = 30;
+ expectedCounts.in[1] = expectedCounts.in[0];
+
+ testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+ testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+ }
+
+ // Lower complexity when there is low overlap between ranges: we can make 2*log(X) comparisons when one range
+ // has X elements that can be skipped over.
+ {
+ std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ std::array r2{15};
+ std::array expected{15};
+
+ OperationCounts expectedCounts;
+ expectedCounts.comparisons = 8;
+ expectedCounts.in[0].proj = 8;
+ expectedCounts.in[0].iterator_strides = 24;
+ expectedCounts.in[0].iterator_displacement = 24;
+ expectedCounts.in[1].proj = 8;
+ expectedCounts.in[1].iterator_strides = 3;
+ expectedCounts.in[1].iterator_displacement = 3;
+
+ testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+ testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+ }
+
+ {
+ std::array r1{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ std::array r2{0, 16};
+ std::array<int, 0> expected{};
+
+ OperationCounts expectedCounts;
+ expectedCounts.comparisons = 10;
+ expectedCounts.in[0].proj = 10;
+ expectedCounts.in[0].iterator_strides = 24;
+ expectedCounts.in[0].iterator_displacement = 24;
+ expectedCounts.in[1].proj = 10;
+ expectedCounts.in[1].iterator_strides = 4;
+ expectedCounts.in[1].iterator_displacement = 4;
+
+ testSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+ testRangesSetIntersectionAndReturnOpCounts<In1, In2, Out>(r1, r2, expected, expectedCounts);
+ }
+}
+
+template <template <typename...> class In2, class Out>
+constexpr void testComplexityParameterizedIterPermutateIn1() {
+ //common_input_iterator
+ testComplexityParameterizedIter<forward_iterator, In2, Out>();
+ testComplexityParameterizedIter<bidirectional_iterator, In2, Out>();
+ testComplexityParameterizedIter<random_access_iterator, In2, Out>();
+}
+
+template <class Out>
+constexpr void testComplexityParameterizedIterPermutateIn1In2() {
+ testComplexityParameterizedIterPermutateIn1<forward_iterator, Out>();
+ testComplexityParameterizedIterPermutateIn1<bidirectional_iterator, Out>();
+ testComplexityParameterizedIterPermutateIn1<random_access_iterator, Out>();
+}
+
+constexpr bool testComplexityMultipleTypes() {
+ //testComplexityParameterizedIter<cpp20_input_iterator, random_access_iterator, OutIter>();
+ testComplexityParameterizedIterPermutateIn1In2<forward_iterator<int*>>();
+ testComplexityParameterizedIterPermutateIn1In2<bidirectional_iterator<int*>>();
+ testComplexityParameterizedIterPermutateIn1In2<random_access_iterator<int*>>();
+ return true;
+}
+
constexpr bool test() {
// check that every element is copied exactly once
{
@@ -572,5 +797,8 @@ int main(int, char**) {
// than the step limit.
runAllIteratorPermutationsTests();
+ testComplexityMultipleTypes();
+ static_assert(testComplexityMultipleTypes());
+
return 0;
}
>From c23272c389329d3af83c0f58f896ee6ea47260ed Mon Sep 17 00:00:00 2001
From: Iuri Chaer <ichaer at splunk.com>
Date: Tue, 17 Oct 2023 13:53:31 +0100
Subject: [PATCH 4/4] [libc++] Introduce use of __lower_bound_onesided to
improve average complexity of set_intersection.
---
libcxx/include/__algorithm/set_intersection.h | 154 +++++++++++++++++-
1 file changed, 150 insertions(+), 4 deletions(-)
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index f2603fe1365ac..556738022f485 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -12,9 +12,13 @@
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/lower_bound.h>
#include <__config>
+#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/next.h>
+#include <__type_traits/is_same.h>
+#include <__utility/exchange.h>
#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -36,9 +40,122 @@ struct __set_intersection_result {
};
template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-__set_intersection(
- _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+struct _LIBCPP_NODISCARD_EXT __set_intersector {
+ _InIter1& __first1_;
+ const _Sent1& __last1_;
+ _InIter2& __first2_;
+ const _Sent2& __last2_;
+ _OutIter& __result_;
+ _Compare& __comp_;
+ static constexpr auto __proj_ = std::__identity();
+ bool __prev_advanced_ = true;
+
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersector(
+ _InIter1& __first1, _Sent1& __last1, _InIter2& __first2, _Sent2& __last2, _OutIter& __result, _Compare& __comp)
+ : __first1_(__first1),
+ __last1_(__last1),
+ __first2_(__first2),
+ __last2_(__last2),
+ __result_(__result),
+ __comp_(__comp) {}
+
+ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+ operator()() && {
+ while (__first2_ != __last2_) {
+ __advance1_and_maybe_add_result();
+ if (__first1_ == __last1_)
+ break;
+ __advance2_and_maybe_add_result();
+ }
+ return __set_intersection_result<_InIter1, _InIter2, _OutIter>(
+ _IterOps<_AlgPolicy>::next(std::move(__first1_), std::move(__last1_)),
+ _IterOps<_AlgPolicy>::next(std::move(__first2_), std::move(__last2_)),
+ std::move(__result_));
+ }
+
+private:
+ // advance __iter to the first element in the range where !__comp_(__iter, __value)
+ // add result if this is the second consecutive call without advancing
+ // this method only works if you alternate calls between __advance1_and_maybe_add_result() and
+ // __advance2_and_maybe_add_result()
+ template <class _Iter, class _Sent, class _Value>
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+ __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
+ // use one-sided lower bound for improved algorithmic complexity bounds
+ const auto __tmp =
+ std::exchange(__iter, std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj_));
+ __add_output_unless(__tmp != __iter);
+ }
+
+ // advance __first1_ to the first element in the range where !__comp_(*__first1_, *__first2_)
+ // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance1_and_maybe_add_result() {
+ __advance_and_maybe_add_result(__first1_, __last1_, *__first2_);
+ }
+
+ // advance __first2_ to the first element in the range where !__comp_(*__first2_, *__first1_)
+ // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance2_and_maybe_add_result() {
+ __advance_and_maybe_add_result(__first2_, __last2_, *__first1_);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __add_output_unless(bool __advanced) {
+ if (__advanced | __prev_advanced_) {
+ __prev_advanced_ = __advanced;
+ } else {
+ *__result_ = *__first1_;
+ ++__result_;
+ ++__first1_;
+ ++__first2_;
+ __prev_advanced_ = true;
+ }
+ }
+};
+
+// with forward iterators we can use binary search to skip over entries
+template <class _AlgPolicy,
+ class _Compare,
+ class _InForwardIter1,
+ class _Sent1,
+ class _InForwardIter2,
+ class _Sent2,
+ class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>
+ __set_intersection(
+ _InForwardIter1 __first1,
+ _Sent1 __last1,
+ _InForwardIter2 __first2,
+ _Sent2 __last2,
+ _OutIter __result,
+ _Compare&& __comp,
+ std::forward_iterator_tag,
+ std::forward_iterator_tag) {
+ std::__set_intersector<_AlgPolicy, _Compare, _InForwardIter1, _Sent1, _InForwardIter2, _Sent2, _OutIter>
+ __intersector(__first1, __last1, __first2, __last2, __result, __comp);
+ return std::move(__intersector)();
+}
+
+// input iterators are not suitable for multipass algorithms, so we stick to the classic single-pass version
+template <class _AlgPolicy,
+ class _Compare,
+ class _InInputIter1,
+ class _Sent1,
+ class _InInputIter2,
+ class _Sent2,
+ class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>
+ __set_intersection(
+ _InInputIter1 __first1,
+ _Sent1 __last1,
+ _InInputIter2 __first2,
+ _Sent2 __last2,
+ _OutIter __result,
+ _Compare&& __comp,
+ std::input_iterator_tag,
+ std::input_iterator_tag) {
while (__first1 != __last1 && __first2 != __last2) {
if (__comp(*__first1, *__first2))
++__first1;
@@ -52,12 +169,41 @@ __set_intersection(
}
}
- return __set_intersection_result<_InIter1, _InIter2, _OutIter>(
+ return std::__set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>(
_IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)),
_IterOps<_AlgPolicy>::next(std::move(__first2), std::move(__last2)),
std::move(__result));
}
+template <class _AlgPolicy, class _Iter>
+class __set_intersection_iter_category {
+ template <class _It>
+ using __cat = typename std::_IterOps<_AlgPolicy>::template __iterator_category<_It>;
+ template <class _It>
+ static auto test(__cat<_It>*) -> __cat<_It>;
+ template <class>
+ static std::input_iterator_tag test(...);
+
+public:
+ using __type = decltype(test<_Iter>(nullptr));
+};
+
+template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
+ __set_intersection(
+ _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+ return std::__set_intersection<_AlgPolicy>(
+ std::move(__first1),
+ std::move(__last1),
+ std::move(__first2),
+ std::move(__last2),
+ std::move(__result),
+ std::forward<_Compare>(__comp),
+ typename std::__set_intersection_iter_category<_AlgPolicy, _InIter1>::__type(),
+ typename std::__set_intersection_iter_category<_AlgPolicy, _InIter2>::__type());
+}
+
template <class _InputIterator1, class _InputIterator2, class _OutputIterator, class _Compare>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_intersection(
_InputIterator1 __first1,
More information about the libcxx-commits
mailing list