[libcxx-commits] [libcxx] [libc++] Speed up set_intersection() by fast-forwarding over ranges of non-matching elements with one-sided binary search. (PR #75230)
Iuri Chaer via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Apr 28 06:16:00 PDT 2024
================
@@ -39,9 +44,139 @@ struct __set_intersection_result {
};
template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter>
-__set_intersection(
- _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) {
+struct _LIBCPP_NODISCARD_EXT __set_intersector {
+ _InIter1& __first1_;
+ const _Sent1& __last1_;
+ _InIter2& __first2_;
+ const _Sent2& __last2_;
+ _OutIter& __result_;
+ _Compare& __comp_;
+ bool __prev_advanced_ = true;
+
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersector(
+ _InIter1& __first1, _Sent1& __last1, _InIter2& __first2, _Sent2& __last2, _OutIter& __result, _Compare& __comp)
+ : __first1_(__first1),
+ __last1_(__last1),
+ __first2_(__first2),
+ __last2_(__last2),
+ __result_(__result),
+ __comp_(__comp) {}
+
+ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+ __set_intersection_result<_InIter1, _InIter2, _OutIter>
+ operator()() {
+ while (__first2_ != __last2_) {
+ __advance1_and_maybe_add_result();
+ if (__first1_ == __last1_)
+ break;
+ __advance2_and_maybe_add_result();
+ }
+ return __set_intersection_result<_InIter1, _InIter2, _OutIter>(
+ _IterOps<_AlgPolicy>::next(std::move(__first1_), std::move(__last1_)),
+ _IterOps<_AlgPolicy>::next(std::move(__first2_), std::move(__last2_)),
+ std::move(__result_));
+ }
+
+private:
+ // advance __iter to the first element in the range where !__comp_(__iter, __value)
+ // add result if this is the second consecutive call without advancing
+ // this method only works if you alternate calls between __advance1_and_maybe_add_result() and
+ // __advance2_and_maybe_add_result()
+ template <class _Iter, class _Sent, class _Value>
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+ __advance_and_maybe_add_result(_Iter& __iter, const _Sent& __sentinel, const _Value& __value) {
+ _LIBCPP_CONSTEXPR std::__identity __proj;
+ // use one-sided binary search for improved algorithmic complexity bounds
+ // understanding how we can use binary search and still respect complexity
+ // guarantees is _not_ straightforward, so let me explain: the guarantee
+ // is "at most 2*(N+M)-1 comparisons", and one-sided binary search will
+ // necessarily overshoot depending on the position of the needle in the
+ // haystack -- for instance, if we're searching for 3 in (1, 2, 3, 4),
+ // we'll check if 3<1, then 3<2, then 3<4, and, finally, 3<3, for a total of
+ // 4 comparisons, when linear search would have yielded 3. However,
+ // because we won't need to perform the intervening reciprocal comparisons
+ // (ie 1<3, 2<3, 4<3), that extra comparison doesn't run afoul of the
+ // guarantee. Additionally, this type of scenario can only happen for match
+ // distances of up to 5 elements, because 2*log2(8) is 6, and we'll still
+ // be worse-off at position 5 of an 8-element set. From then onwards
+ // these scenarios can't happen.
+ // TL;DR: we'll be 1 comparison worse-off compared to the classic linear-
+ // searching algorithm if matching position 3 of a set with 4 elements,
+ // or position 5 if the set has 7 or 8 elements, but we'll never exceed
+ // the complexity guarantees from the standard.
+ _Iter __tmp = std::__lower_bound_onesided<_AlgPolicy>(__iter, __sentinel, __value, __comp_, __proj);
+ std::swap(__tmp, __iter);
+ __add_output_unless(__tmp != __iter);
+ }
+
+ // advance __first1_ to the first element in the range where !__comp_(*__first1_, *__first2_)
+ // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance1_and_maybe_add_result() {
+ __advance_and_maybe_add_result(__first1_, __last1_, *__first2_);
+ }
+
+ // advance __first2_ to the first element in the range where !__comp_(*__first2_, *__first1_)
+ // add result if neither __first1_ nor __first2_ advanced in the last attempt (meaning they are equal)
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __advance2_and_maybe_add_result() {
+ __advance_and_maybe_add_result(__first2_, __last2_, *__first1_);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __add_output_unless(bool __advanced) {
+ if (__advanced | __prev_advanced_) {
+ __prev_advanced_ = __advanced;
+ } else {
+ *__result_ = *__first1_;
+ ++__result_;
+ ++__first1_;
+ ++__first2_;
+ __prev_advanced_ = true;
+ }
+ }
+};
+
+// with forward iterators we can use binary search to skip over entries
----------------
ichaer wrote:
I've moved comments around and tried to make it clearer, see what you think.
https://github.com/llvm/llvm-project/pull/75230
More information about the libcxx-commits
mailing list