[libcxx-commits] [libcxx] [libc++] Optimize make_heap() and sift_down() (PR #121480)
Yang Kun via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Jan 2 06:34:09 PST 2025
https://github.com/omikrun created https://github.com/llvm/llvm-project/pull/121480
I was reading the source code of libc++ for the purpose of learning, then I found some possible optimizations for heap operations.
Here is a small benchmark to show it (all of these uses `std::vector<int>::iterator`).
| Before | After |
| ---------- | ---------- |
| 47954600ns | 35104600ns |
| 47842000ns | 34409900ns |
| 47859100ns | 39392700ns |
| 48278100ns | 34362900ns |
| 47680500ns | 34356800ns |
I removed the boundary check in `__sift_down()` due to it always evaluates to false I think.
I've also changed the type of `__start` in `__sift_down()` to difference_type to achieve small acceleration in performance for continuous iterator. However, in fact, I'm not sure whether it will affect random access iterators like `std::deque<int>::iterator`. Here is the benchmark result.
| Before | After |
| ---------- | ---------- |
| 37261800ns | 34256200ns |
| 37620500ns | 34649800ns |
| 36458900ns | 34274700ns |
| 37833700ns | 34543900ns |
| 37174000ns | 33924300ns |
I would really appreciate it if you could figure out the wrongs in my changes.
>From d31cea6d9fee2524758426e719bc9802370b663e Mon Sep 17 00:00:00 2001
From: Yang Kun <193369907+omikrun at users.noreply.github.com>
Date: Thu, 2 Jan 2025 22:11:35 +0800
Subject: [PATCH] [libc++] Optimize make_heap() and sift_down()
---
libcxx/include/__algorithm/make_heap.h | 9 +--
libcxx/include/__algorithm/partial_sort.h | 2 +-
.../include/__algorithm/partial_sort_copy.h | 2 +-
libcxx/include/__algorithm/sift_down.h | 61 +++++++------------
.../include/__cxx03/__algorithm/make_heap.h | 9 +--
.../__cxx03/__algorithm/partial_sort.h | 2 +-
.../__cxx03/__algorithm/partial_sort_copy.h | 2 +-
.../include/__cxx03/__algorithm/sift_down.h | 61 +++++++------------
8 files changed, 58 insertions(+), 90 deletions(-)
diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index e8f0cdb27333a4..0d58cd80628d7e 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -34,10 +34,11 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
difference_type __n = __last - __first;
if (__n > 1) {
- // start from the first parent, there is no need to consider children
- for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) {
- std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
- }
+ difference_type __start = __n / 2;
+ do
+ // start from the first parent, there is no need to consider children
+ std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, --__start);
+ while (__start != 0);
}
}
diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h
index 7f8d0c49147e3a..46e48c33f798d5 100644
--- a/libcxx/include/__algorithm/partial_sort.h
+++ b/libcxx/include/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
for (; __i != __last; ++__i) {
if (__comp(*__i, *__first)) {
_IterOps<_AlgPolicy>::iter_swap(__i, __first);
- std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+ std::__sift_down<_AlgPolicy>(__first, __comp, __len, 0);
}
}
std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);
diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 172f53b290d548..a4437e4f932cee 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
for (; __first != __last; ++__first)
if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
*__result_first = *__first;
- std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+ std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, 0);
}
std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
}
diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index 42803e30631fb1..1ee31eed5ba65e 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -29,54 +29,37 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__sift_down(_RandomAccessIterator __first,
_Compare&& __comp,
typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- _RandomAccessIterator __start) {
+ typename iterator_traits<_RandomAccessIterator>::difference_type __start) {
using _Ops = _IterOps<_AlgPolicy>;
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
- // left-child of __start is at 2 * __start + 1
- // right-child of __start is at 2 * __start + 2
- difference_type __child = __start - __first;
-
- if (__len < 2 || (__len - 2) / 2 < __child)
- return;
-
- __child = 2 * __child + 1;
- _RandomAccessIterator __child_i = __first + __child;
-
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
- // right-child exists and is greater than left-child
- ++__child_i;
- ++__child;
- }
-
- // check if we are in heap-order
- if (__comp(*__child_i, *__start))
- // we are, __start is larger than its largest child
- return;
-
- value_type __top(_Ops::__iter_move(__start));
- do {
- // we are not in heap-order, swap the parent with its largest child
- *__start = _Ops::__iter_move(__child_i);
- __start = __child_i;
-
- if ((__len - 2) / 2 < __child)
- break;
- // recompute the child based off of the updated parent
- __child = 2 * __child + 1;
- __child_i = __first + __child;
+ _LIBCPP_ASSERT_INTERNAL(__len >= 2, "shouldn't be called unless __len >= 2");
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
- // right-child exists and is greater than left-child
- ++__child_i;
- ++__child;
+ value_type __top(_Ops::__iter_move(__first + __start));
+ // left-child of __start is at 2 * __start + 1
+ // right-child of __start is at __child_i + 1
+ for (difference_type __child; (__child = 2 * __start + 1) < __len;) {
+ _RandomAccessIterator __child_i = __first + __child;
+ if (__child + 1 < __len) {
+ _RandomAccessIterator __right_child_i = _Ops::next(__child_i);
+ if (__comp(*__child_i, *__right_child_i)) {
+ // right-child exists and is greater than left-child
+ __child_i = __right_child_i;
+ ++__child;
+ }
}
// check if we are in heap-order
- } while (!__comp(*__child_i, __top));
- *__start = std::move(__top);
+ if (!__comp(*__child_i, __top))
+ break;
+
+ // we are not in heap-order, swap the parent with its largest child
+ *(__first + __start) = _Ops::__iter_move(__child_i);
+ __start = __child;
+ }
+ *(__first + __start) = std::move(__top);
}
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
diff --git a/libcxx/include/__cxx03/__algorithm/make_heap.h b/libcxx/include/__cxx03/__algorithm/make_heap.h
index 35a7f7bf9779f4..95103bbf438a67 100644
--- a/libcxx/include/__cxx03/__algorithm/make_heap.h
+++ b/libcxx/include/__cxx03/__algorithm/make_heap.h
@@ -34,10 +34,11 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
difference_type __n = __last - __first;
if (__n > 1) {
- // start from the first parent, there is no need to consider children
- for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) {
- std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
- }
+ difference_type __start = __n / 2;
+ do
+ // start from the first parent, there is no need to consider children
+ std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, --__start);
+ while (__start != 0);
}
}
diff --git a/libcxx/include/__cxx03/__algorithm/partial_sort.h b/libcxx/include/__cxx03/__algorithm/partial_sort.h
index 04597fc32b9a2e..c354ae4de6f5a6 100644
--- a/libcxx/include/__cxx03/__algorithm/partial_sort.h
+++ b/libcxx/include/__cxx03/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
for (; __i != __last; ++__i) {
if (__comp(*__i, *__first)) {
_IterOps<_AlgPolicy>::iter_swap(__i, __first);
- std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+ std::__sift_down<_AlgPolicy>(__first, __comp, __len, 0);
}
}
std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);
diff --git a/libcxx/include/__cxx03/__algorithm/partial_sort_copy.h b/libcxx/include/__cxx03/__algorithm/partial_sort_copy.h
index d4b5fafba96784..5c755dfecddd54 100644
--- a/libcxx/include/__cxx03/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__cxx03/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
for (; __first != __last; ++__first)
if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
*__result_first = *__first;
- std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+ std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, 0);
}
std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
}
diff --git a/libcxx/include/__cxx03/__algorithm/sift_down.h b/libcxx/include/__cxx03/__algorithm/sift_down.h
index 774a6d2450d578..d45ff5dfa674c1 100644
--- a/libcxx/include/__cxx03/__algorithm/sift_down.h
+++ b/libcxx/include/__cxx03/__algorithm/sift_down.h
@@ -29,54 +29,37 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__sift_down(_RandomAccessIterator __first,
_Compare&& __comp,
typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- _RandomAccessIterator __start) {
+ typename iterator_traits<_RandomAccessIterator>::difference_type __start) {
using _Ops = _IterOps<_AlgPolicy>;
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
- // left-child of __start is at 2 * __start + 1
- // right-child of __start is at 2 * __start + 2
- difference_type __child = __start - __first;
-
- if (__len < 2 || (__len - 2) / 2 < __child)
- return;
-
- __child = 2 * __child + 1;
- _RandomAccessIterator __child_i = __first + __child;
-
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
- // right-child exists and is greater than left-child
- ++__child_i;
- ++__child;
- }
-
- // check if we are in heap-order
- if (__comp(*__child_i, *__start))
- // we are, __start is larger than its largest child
- return;
-
- value_type __top(_Ops::__iter_move(__start));
- do {
- // we are not in heap-order, swap the parent with its largest child
- *__start = _Ops::__iter_move(__child_i);
- __start = __child_i;
-
- if ((__len - 2) / 2 < __child)
- break;
- // recompute the child based off of the updated parent
- __child = 2 * __child + 1;
- __child_i = __first + __child;
+ _LIBCPP_ASSERT_INTERNAL(__len >= 2, "shouldn't be called unless __len >= 2");
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
- // right-child exists and is greater than left-child
- ++__child_i;
- ++__child;
+ value_type __top(_Ops::__iter_move(__first + __start));
+ // left-child of __start is at 2 * __start + 1
+ // right-child of __start is at __child_i + 1
+ for (difference_type __child; (__child = 2 * __start + 1) < __len;) {
+ _RandomAccessIterator __child_i = __first + __child;
+ if (__child + 1 < __len) {
+ _RandomAccessIterator __right_child_i = _Ops::next(__child_i);
+ if (__comp(*__child_i, *__right_child_i)) {
+ // right-child exists and is greater than left-child
+ __child_i = __right_child_i;
+ ++__child;
+ }
}
// check if we are in heap-order
- } while (!__comp(*__child_i, __top));
- *__start = std::move(__top);
+ if (!__comp(*__child_i, __top))
+ break;
+
+ // we are not in heap-order, swap the parent with its largest child
+ *(__first + __start) = _Ops::__iter_move(__child_i);
+ __start = __child;
+ }
+ *(__first + __start) = std::move(__top);
}
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
More information about the libcxx-commits
mailing list