[libcxx-commits] [libcxx] [libc++] Optimize make_heap() and sift_down() (PR #121480)

Yang Kun via libcxx-commits libcxx-commits at lists.llvm.org
Thu Jan 2 06:34:09 PST 2025


https://github.com/omikrun created https://github.com/llvm/llvm-project/pull/121480

I was reading the source code of libc++ for the purpose of learning, then I found some possible optimizations for heap operations.

Here is a small benchmark to show it (all of these uses `std::vector<int>::iterator`).

| Before     | After      |
| ---------- | ---------- |
| 47954600ns | 35104600ns |
| 47842000ns | 34409900ns |
| 47859100ns | 39392700ns |
| 48278100ns | 34362900ns |
| 47680500ns | 34356800ns |

I removed the boundary check in `__sift_down()` due to it always evaluates to false I think.

I've also changed the type of `__start` in `__sift_down()` to difference_type to achieve small acceleration in performance for continuous iterator. However, in fact, I'm not sure whether it will affect random access iterators like `std::deque<int>::iterator`. Here is the benchmark result.

| Before     | After      |
| ---------- | ---------- |
| 37261800ns | 34256200ns |
| 37620500ns | 34649800ns |
| 36458900ns | 34274700ns |
| 37833700ns | 34543900ns |
| 37174000ns | 33924300ns |

I would really appreciate it if you could figure out the wrongs in my changes.

>From d31cea6d9fee2524758426e719bc9802370b663e Mon Sep 17 00:00:00 2001
From: Yang Kun <193369907+omikrun at users.noreply.github.com>
Date: Thu, 2 Jan 2025 22:11:35 +0800
Subject: [PATCH] [libc++] Optimize make_heap() and sift_down()

---
 libcxx/include/__algorithm/make_heap.h        |  9 +--
 libcxx/include/__algorithm/partial_sort.h     |  2 +-
 .../include/__algorithm/partial_sort_copy.h   |  2 +-
 libcxx/include/__algorithm/sift_down.h        | 61 +++++++------------
 .../include/__cxx03/__algorithm/make_heap.h   |  9 +--
 .../__cxx03/__algorithm/partial_sort.h        |  2 +-
 .../__cxx03/__algorithm/partial_sort_copy.h   |  2 +-
 .../include/__cxx03/__algorithm/sift_down.h   | 61 +++++++------------
 8 files changed, 58 insertions(+), 90 deletions(-)

diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index e8f0cdb27333a4..0d58cd80628d7e 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -34,10 +34,11 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
   using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
   difference_type __n   = __last - __first;
   if (__n > 1) {
-    // start from the first parent, there is no need to consider children
-    for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) {
-      std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
-    }
+    difference_type __start = __n / 2;
+    do
+      // start from the first parent, there is no need to consider children
+      std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, --__start);
+    while (__start != 0);
   }
 }
 
diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h
index 7f8d0c49147e3a..46e48c33f798d5 100644
--- a/libcxx/include/__algorithm/partial_sort.h
+++ b/libcxx/include/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
   for (; __i != __last; ++__i) {
     if (__comp(*__i, *__first)) {
       _IterOps<_AlgPolicy>::iter_swap(__i, __first);
-      std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+      std::__sift_down<_AlgPolicy>(__first, __comp, __len, 0);
     }
   }
   std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);
diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 172f53b290d548..a4437e4f932cee 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
     for (; __first != __last; ++__first)
       if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
         *__result_first = *__first;
-        std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+        std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, 0);
       }
     std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
   }
diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index 42803e30631fb1..1ee31eed5ba65e 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -29,54 +29,37 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __sift_down(_RandomAccessIterator __first,
             _Compare&& __comp,
             typename iterator_traits<_RandomAccessIterator>::difference_type __len,
-            _RandomAccessIterator __start) {
+            typename iterator_traits<_RandomAccessIterator>::difference_type __start) {
   using _Ops = _IterOps<_AlgPolicy>;
 
   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
-  // left-child of __start is at 2 * __start + 1
-  // right-child of __start is at 2 * __start + 2
-  difference_type __child = __start - __first;
-
-  if (__len < 2 || (__len - 2) / 2 < __child)
-    return;
-
-  __child                         = 2 * __child + 1;
-  _RandomAccessIterator __child_i = __first + __child;
-
-  if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
-    // right-child exists and is greater than left-child
-    ++__child_i;
-    ++__child;
-  }
-
-  // check if we are in heap-order
-  if (__comp(*__child_i, *__start))
-    // we are, __start is larger than its largest child
-    return;
-
-  value_type __top(_Ops::__iter_move(__start));
-  do {
-    // we are not in heap-order, swap the parent with its largest child
-    *__start = _Ops::__iter_move(__child_i);
-    __start  = __child_i;
-
-    if ((__len - 2) / 2 < __child)
-      break;
 
-    // recompute the child based off of the updated parent
-    __child   = 2 * __child + 1;
-    __child_i = __first + __child;
+  _LIBCPP_ASSERT_INTERNAL(__len >= 2, "shouldn't be called unless __len >= 2");
 
-    if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
-      // right-child exists and is greater than left-child
-      ++__child_i;
-      ++__child;
+  value_type __top(_Ops::__iter_move(__first + __start));
+  // left-child of __start is at 2 * __start + 1
+  // right-child of __start is at __child_i + 1
+  for (difference_type __child; (__child = 2 * __start + 1) < __len;) {
+    _RandomAccessIterator __child_i = __first + __child;
+    if (__child + 1 < __len) {
+      _RandomAccessIterator __right_child_i = _Ops::next(__child_i);
+      if (__comp(*__child_i, *__right_child_i)) {
+        // right-child exists and is greater than left-child
+        __child_i = __right_child_i;
+        ++__child;
+      }
     }
 
     // check if we are in heap-order
-  } while (!__comp(*__child_i, __top));
-  *__start = std::move(__top);
+    if (!__comp(*__child_i, __top))
+      break;
+
+    // we are not in heap-order, swap the parent with its largest child
+    *(__first + __start) = _Ops::__iter_move(__child_i);
+    __start              = __child;
+  }
+  *(__first + __start) = std::move(__top);
 }
 
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
diff --git a/libcxx/include/__cxx03/__algorithm/make_heap.h b/libcxx/include/__cxx03/__algorithm/make_heap.h
index 35a7f7bf9779f4..95103bbf438a67 100644
--- a/libcxx/include/__cxx03/__algorithm/make_heap.h
+++ b/libcxx/include/__cxx03/__algorithm/make_heap.h
@@ -34,10 +34,11 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar
   using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
   difference_type __n   = __last - __first;
   if (__n > 1) {
-    // start from the first parent, there is no need to consider children
-    for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) {
-      std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
-    }
+    difference_type __start = __n / 2;
+    do
+      // start from the first parent, there is no need to consider children
+      std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, --__start);
+    while (__start != 0);
   }
 }
 
diff --git a/libcxx/include/__cxx03/__algorithm/partial_sort.h b/libcxx/include/__cxx03/__algorithm/partial_sort.h
index 04597fc32b9a2e..c354ae4de6f5a6 100644
--- a/libcxx/include/__cxx03/__algorithm/partial_sort.h
+++ b/libcxx/include/__cxx03/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
   for (; __i != __last; ++__i) {
     if (__comp(*__i, *__first)) {
       _IterOps<_AlgPolicy>::iter_swap(__i, __first);
-      std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+      std::__sift_down<_AlgPolicy>(__first, __comp, __len, 0);
     }
   }
   std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);
diff --git a/libcxx/include/__cxx03/__algorithm/partial_sort_copy.h b/libcxx/include/__cxx03/__algorithm/partial_sort_copy.h
index d4b5fafba96784..5c755dfecddd54 100644
--- a/libcxx/include/__cxx03/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__cxx03/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
     for (; __first != __last; ++__first)
       if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
         *__result_first = *__first;
-        std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+        std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, 0);
       }
     std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
   }
diff --git a/libcxx/include/__cxx03/__algorithm/sift_down.h b/libcxx/include/__cxx03/__algorithm/sift_down.h
index 774a6d2450d578..d45ff5dfa674c1 100644
--- a/libcxx/include/__cxx03/__algorithm/sift_down.h
+++ b/libcxx/include/__cxx03/__algorithm/sift_down.h
@@ -29,54 +29,37 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __sift_down(_RandomAccessIterator __first,
             _Compare&& __comp,
             typename iterator_traits<_RandomAccessIterator>::difference_type __len,
-            _RandomAccessIterator __start) {
+            typename iterator_traits<_RandomAccessIterator>::difference_type __start) {
   using _Ops = _IterOps<_AlgPolicy>;
 
   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
-  // left-child of __start is at 2 * __start + 1
-  // right-child of __start is at 2 * __start + 2
-  difference_type __child = __start - __first;
-
-  if (__len < 2 || (__len - 2) / 2 < __child)
-    return;
-
-  __child                         = 2 * __child + 1;
-  _RandomAccessIterator __child_i = __first + __child;
-
-  if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
-    // right-child exists and is greater than left-child
-    ++__child_i;
-    ++__child;
-  }
-
-  // check if we are in heap-order
-  if (__comp(*__child_i, *__start))
-    // we are, __start is larger than its largest child
-    return;
-
-  value_type __top(_Ops::__iter_move(__start));
-  do {
-    // we are not in heap-order, swap the parent with its largest child
-    *__start = _Ops::__iter_move(__child_i);
-    __start  = __child_i;
-
-    if ((__len - 2) / 2 < __child)
-      break;
 
-    // recompute the child based off of the updated parent
-    __child   = 2 * __child + 1;
-    __child_i = __first + __child;
+  _LIBCPP_ASSERT_INTERNAL(__len >= 2, "shouldn't be called unless __len >= 2");
 
-    if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
-      // right-child exists and is greater than left-child
-      ++__child_i;
-      ++__child;
+  value_type __top(_Ops::__iter_move(__first + __start));
+  // left-child of __start is at 2 * __start + 1
+  // right-child of __start is at __child_i + 1
+  for (difference_type __child; (__child = 2 * __start + 1) < __len;) {
+    _RandomAccessIterator __child_i = __first + __child;
+    if (__child + 1 < __len) {
+      _RandomAccessIterator __right_child_i = _Ops::next(__child_i);
+      if (__comp(*__child_i, *__right_child_i)) {
+        // right-child exists and is greater than left-child
+        __child_i = __right_child_i;
+        ++__child;
+      }
     }
 
     // check if we are in heap-order
-  } while (!__comp(*__child_i, __top));
-  *__start = std::move(__top);
+    if (!__comp(*__child_i, __top))
+      break;
+
+    // we are not in heap-order, swap the parent with its largest child
+    *(__first + __start) = _Ops::__iter_move(__child_i);
+    __start              = __child;
+  }
+  *(__first + __start) = std::move(__top);
 }
 
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>



More information about the libcxx-commits mailing list