[libcxx-commits] [libcxx] [libc++] Optimize ranges::rotate for vector<bool>::iterator (PR #121168)

Peng Liu via libcxx-commits libcxx-commits at lists.llvm.org
Tue Mar 11 19:06:17 PDT 2025


https://github.com/winner245 updated https://github.com/llvm/llvm-project/pull/121168

>From 19cdccccbe20ca1cf911cc1b605e066a28cc0991 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Thu, 26 Dec 2024 19:36:15 -0500
Subject: [PATCH] Optimize ranges::rotate for vector<bool>::iterator

---
 libcxx/docs/ReleaseNotes/21.rst               |  2 +-
 libcxx/include/__algorithm/rotate.h           | 45 +++++++++++++
 libcxx/include/__bit_reference                | 48 ++------------
 libcxx/include/__fwd/bit_reference.h          |  3 +
 libcxx/include/__vector/vector_bool.h         |  1 +
 .../algorithms/modifying/rotate.bench.cpp     | 64 +++++++++++++++++++
 .../alg.rotate/ranges_rotate.pass.cpp         | 35 ++++++++++
 .../alg.rotate/rotate.pass.cpp                | 23 +++++++
 8 files changed, 176 insertions(+), 45 deletions(-)
 create mode 100644 libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp

diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 633d114693c82..f41cf9b51c292 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -46,7 +46,7 @@ Implemented Papers
 Improvements and New Features
 -----------------------------
 
-- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward}`` algorithms have been optimized for
+- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward, rotate}`` algorithms have been optimized for
   ``std::vector<bool>::iterator``, resulting in a performance improvement of up to 2000x.
 
 - The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h
index df4ca95aac95b..c676980f0c1ca 100644
--- a/libcxx/include/__algorithm/rotate.h
+++ b/libcxx/include/__algorithm/rotate.h
@@ -9,12 +9,19 @@
 #ifndef _LIBCPP___ALGORITHM_ROTATE_H
 #define _LIBCPP___ALGORITHM_ROTATE_H
 
+#include <__algorithm/copy.h>
+#include <__algorithm/copy_backward.h>
 #include <__algorithm/iterator_operations.h>
 #include <__algorithm/move.h>
 #include <__algorithm/move_backward.h>
 #include <__algorithm/swap_ranges.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__fwd/bit_reference.h>
 #include <__iterator/iterator_traits.h>
+#include <__memory/construct_at.h>
+#include <__memory/pointer_traits.h>
+#include <__type_traits/is_constant_evaluated.h>
 #include <__type_traits/is_trivially_assignable.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
@@ -185,6 +192,44 @@ __rotate(_Iterator __first, _Iterator __middle, _Sentinel __last) {
   return _Ret(std::move(__result), std::move(__last_iter));
 }
 
+template <class, class _Cp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, false>, __bit_iterator<_Cp, false> >
+__rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
+  using _I1             = __bit_iterator<_Cp, false>;
+  using difference_type = typename _I1::difference_type;
+  difference_type __d1  = __middle - __first;
+  difference_type __d2  = __last - __middle;
+  _I1 __r               = __first + __d2;
+  while (__d1 != 0 && __d2 != 0) {
+    if (__d1 <= __d2) {
+      if (__d1 <= __bit_array<_Cp>::capacity()) {
+        __bit_array<_Cp> __b(__d1);
+        std::copy(__first, __middle, __b.begin());
+        std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
+        break;
+      } else {
+        __bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
+        __first                         = __middle;
+        __middle                        = __mp;
+        __d2 -= __d1;
+      }
+    } else {
+      if (__d2 <= __bit_array<_Cp>::capacity()) {
+        __bit_array<_Cp> __b(__d2);
+        std::copy(__middle, __last, __b.begin());
+        std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
+        break;
+      } else {
+        __bit_iterator<_Cp, false> __mp = __first + __d2;
+        std::swap_ranges(__first, __mp, __middle);
+        __first = __mp;
+        __d1 -= __d2;
+      }
+    }
+  }
+  return std::make_pair(__r, __last);
+}
+
 template <class _ForwardIterator>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
 rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) {
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index f91250c4a440d..af88c253762b5 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -16,6 +16,7 @@
 #include <__algorithm/copy_n.h>
 #include <__algorithm/equal.h>
 #include <__algorithm/min.h>
+#include <__algorithm/rotate.h>
 #include <__algorithm/swap_ranges.h>
 #include <__assert>
 #include <__bit/countr.h>
@@ -216,8 +217,6 @@ private:
         __mask_(__m) {}
 };
 
-// rotate
-
 template <class _Cp>
 struct __bit_array {
   using difference_type _LIBCPP_NODEBUG   = typename __size_difference_type_traits<_Cp>::difference_type;
@@ -249,45 +248,6 @@ struct __bit_array {
   }
 };
 
-template <class _Cp>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false>
-rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
-  using _I1             = __bit_iterator<_Cp, false>;
-  using difference_type = typename _I1::difference_type;
-
-  difference_type __d1 = __middle - __first;
-  difference_type __d2 = __last - __middle;
-  _I1 __r              = __first + __d2;
-  while (__d1 != 0 && __d2 != 0) {
-    if (__d1 <= __d2) {
-      if (__d1 <= __bit_array<_Cp>::capacity()) {
-        __bit_array<_Cp> __b(__d1);
-        std::copy(__first, __middle, __b.begin());
-        std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
-        break;
-      } else {
-        __bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
-        __first                         = __middle;
-        __middle                        = __mp;
-        __d2 -= __d1;
-      }
-    } else {
-      if (__d2 <= __bit_array<_Cp>::capacity()) {
-        __bit_array<_Cp> __b(__d2);
-        std::copy(__middle, __last, __b.begin());
-        std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
-        break;
-      } else {
-        __bit_iterator<_Cp, false> __mp = __first + __d2;
-        std::swap_ranges(__first, __mp, __middle);
-        __first = __mp;
-        __d1 -= __d2;
-      }
-    }
-  }
-  return __r;
-}
-
 template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
 class __bit_iterator {
 public:
@@ -507,9 +467,9 @@ private:
   template <class, class _Cl, class _Cr>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
       __swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
-  template <class _Dp>
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
-      rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
+  template <class, class _Dp>
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false> >
+      __rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
   template <class _Dp, bool _IsConst1, bool _IsConst2>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
       __equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
diff --git a/libcxx/include/__fwd/bit_reference.h b/libcxx/include/__fwd/bit_reference.h
index d65f043e89ad6..451e76d548dc3 100644
--- a/libcxx/include/__fwd/bit_reference.h
+++ b/libcxx/include/__fwd/bit_reference.h
@@ -23,6 +23,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
 class __bit_iterator;
 
+template <class _Cp>
+struct __bit_array;
+
 template <class, class = void>
 struct __size_difference_type_traits;
 
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 1d3960bd01597..b02b0dc1725f2 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -14,6 +14,7 @@
 #include <__algorithm/fill_n.h>
 #include <__algorithm/iterator_operations.h>
 #include <__algorithm/max.h>
+#include <__algorithm/rotate.h>
 #include <__assert>
 #include <__bit_reference>
 #include <__config>
diff --git a/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp b/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp
new file mode 100644
index 0000000000000..eb84eb5cdf25c
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cstddef>
+#include <deque>
+#include <iterator>
+#include <list>
+#include <ranges>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "../../GenerateInput.h"
+#include "test_macros.h"
+
+int main(int argc, char** argv) {
+  auto std_rotate = [](auto first, auto middle, auto last) { return std::rotate(first, middle, last); };
+
+  // Benchmark {std,ranges}::rotate where we rotate various fractions of the range. It is possible to
+  // special-case some of these fractions to cleverly perform swap_ranges.
+  {
+    auto bm = []<class Container>(std::string name, auto rotate, double fraction) {
+      benchmark::RegisterBenchmark(
+          name,
+          [=](auto& st) {
+            std::size_t const size = st.range(0);
+            using ValueType        = typename Container::value_type;
+            Container c;
+            std::generate_n(std::back_inserter(c), size, [] { return Generate<ValueType>::random(); });
+
+            auto nth = std::next(c.begin(), static_cast<std::size_t>(size * fraction));
+            for ([[maybe_unused]] auto _ : st) {
+              benchmark::DoNotOptimize(c);
+              auto result = rotate(c.begin(), nth, c.end());
+              benchmark::DoNotOptimize(result);
+            }
+          })
+          ->Arg(32)
+          ->Arg(50) // non power-of-two
+          ->RangeMultiplier(2)
+          ->Range(64, 1 << 20);
+    };
+
+    bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 1/4)", std_rotate, 0.25);
+    bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 51%)", std_rotate, 0.51);
+#if TEST_STD_VER >= 23 // vector<bool>::iterator is not std::permutable before C++23
+    bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 1/4)", std::ranges::rotate, 0.25);
+    bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 51%)", std::ranges::rotate, 0.51);
+#endif
+  }
+
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+  benchmark::Shutdown();
+  return 0;
+}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
index 56d0dfaf9ee27..5f594400e8321 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
@@ -21,8 +21,10 @@
 #include <array>
 #include <cassert>
 #include <ranges>
+#include <vector>
 
 #include "almost_satisfies_types.h"
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "type_algorithms.h"
 
@@ -131,6 +133,29 @@ constexpr void test_iter_sent() {
   test_one<Iter, Sent, 7>({1, 2, 3, 4, 5, 6, 7}, 7, {1, 2, 3, 4, 5, 6, 7});
 }
 
+#if TEST_STD_VER >= 23
+template <std::size_t N>
+TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
+  for (int offset = -4; offset <= 4; ++offset) {
+    std::vector<bool> a(N, false);
+    std::size_t mid = N / 2 + offset;
+    for (std::size_t i = mid; i < N; ++i)
+      a[i] = true;
+
+    // (iterator, sentinel)-overload
+    std::ranges::rotate(std::ranges::begin(a), std::ranges::begin(a) + mid, std::ranges::end(a));
+    for (std::size_t i = 0; i < N; ++i)
+      assert(a[i] == (i < N - mid));
+
+    // range-overload
+    std::ranges::rotate(a, std::ranges::begin(a) + (N - mid));
+    for (std::size_t i = 0; i < N; ++i)
+      assert(a[i] == (i >= mid));
+  }
+  return true;
+};
+#endif
+
 constexpr bool test() {
   types::for_each(types::forward_iterator_list<int*>(), []<class Iter>() {
     test_iter_sent<Iter, Iter>();
@@ -167,6 +192,16 @@ constexpr bool test() {
     }
   }
 
+#if TEST_STD_VER >= 23
+  test_vector_bool<8>();
+  test_vector_bool<19>();
+  test_vector_bool<32>();
+  test_vector_bool<49>();
+  test_vector_bool<64>();
+  test_vector_bool<199>();
+  test_vector_bool<256>();
+#endif
+
   return true;
 }
 
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
index ce45bb1916816..448abdddbb4cb 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <memory>
 #include <type_traits>
+#include <vector>
 
 #include "test_macros.h"
 #include "test_iterators.h"
@@ -420,6 +421,20 @@ struct TestUniquePtr {
 
 #endif // TEST_STD_VER >= 11
 
+template <std::size_t N>
+TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
+  for (int offset = -4; offset <= 4; ++offset) {
+    std::vector<bool> a(N, false);
+    std::size_t mid = N / 2 + offset;
+    for (std::size_t i = mid; i < N; ++i)
+      a[i] = true;
+    std::rotate(a.begin(), a.begin() + mid, a.end());
+    for (std::size_t i = 0; i < N; ++i)
+      assert(a[i] == (i < N - mid));
+  }
+  return true;
+};
+
 TEST_CONSTEXPR_CXX20 bool test() {
   types::for_each(types::forward_iterator_list<int*>(), TestIter());
 
@@ -428,6 +443,14 @@ TEST_CONSTEXPR_CXX20 bool test() {
     types::for_each(types::forward_iterator_list<std::unique_ptr<int>*>(), TestUniquePtr());
 #endif
 
+  test_vector_bool<8>();
+  test_vector_bool<19>();
+  test_vector_bool<32>();
+  test_vector_bool<49>();
+  test_vector_bool<64>();
+  test_vector_bool<199>();
+  test_vector_bool<256>();
+
   return true;
 }
 



More information about the libcxx-commits mailing list