[libcxx-commits] [libcxx] [libc++] Optimize ranges::rotate for vector<bool>::iterator (PR #121168)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Mar 11 19:06:17 PDT 2025
https://github.com/winner245 updated https://github.com/llvm/llvm-project/pull/121168
>From 19cdccccbe20ca1cf911cc1b605e066a28cc0991 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Thu, 26 Dec 2024 19:36:15 -0500
Subject: [PATCH] Optimize ranges::rotate for vector<bool>::iterator
---
libcxx/docs/ReleaseNotes/21.rst | 2 +-
libcxx/include/__algorithm/rotate.h | 45 +++++++++++++
libcxx/include/__bit_reference | 48 ++------------
libcxx/include/__fwd/bit_reference.h | 3 +
libcxx/include/__vector/vector_bool.h | 1 +
.../algorithms/modifying/rotate.bench.cpp | 64 +++++++++++++++++++
.../alg.rotate/ranges_rotate.pass.cpp | 35 ++++++++++
.../alg.rotate/rotate.pass.cpp | 23 +++++++
8 files changed, 176 insertions(+), 45 deletions(-)
create mode 100644 libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 633d114693c82..f41cf9b51c292 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -46,7 +46,7 @@ Implemented Papers
Improvements and New Features
-----------------------------
-- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward}`` algorithms have been optimized for
+- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward, rotate}`` algorithms have been optimized for
``std::vector<bool>::iterator``, resulting in a performance improvement of up to 2000x.
- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h
index df4ca95aac95b..c676980f0c1ca 100644
--- a/libcxx/include/__algorithm/rotate.h
+++ b/libcxx/include/__algorithm/rotate.h
@@ -9,12 +9,19 @@
#ifndef _LIBCPP___ALGORITHM_ROTATE_H
#define _LIBCPP___ALGORITHM_ROTATE_H
+#include <__algorithm/copy.h>
+#include <__algorithm/copy_backward.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/move.h>
#include <__algorithm/move_backward.h>
#include <__algorithm/swap_ranges.h>
#include <__config>
+#include <__cstddef/size_t.h>
+#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
+#include <__memory/construct_at.h>
+#include <__memory/pointer_traits.h>
+#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -185,6 +192,44 @@ __rotate(_Iterator __first, _Iterator __middle, _Sentinel __last) {
return _Ret(std::move(__result), std::move(__last_iter));
}
+template <class, class _Cp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, false>, __bit_iterator<_Cp, false> >
+__rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
+ using _I1 = __bit_iterator<_Cp, false>;
+ using difference_type = typename _I1::difference_type;
+ difference_type __d1 = __middle - __first;
+ difference_type __d2 = __last - __middle;
+ _I1 __r = __first + __d2;
+ while (__d1 != 0 && __d2 != 0) {
+ if (__d1 <= __d2) {
+ if (__d1 <= __bit_array<_Cp>::capacity()) {
+ __bit_array<_Cp> __b(__d1);
+ std::copy(__first, __middle, __b.begin());
+ std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
+ break;
+ } else {
+ __bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
+ __first = __middle;
+ __middle = __mp;
+ __d2 -= __d1;
+ }
+ } else {
+ if (__d2 <= __bit_array<_Cp>::capacity()) {
+ __bit_array<_Cp> __b(__d2);
+ std::copy(__middle, __last, __b.begin());
+ std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
+ break;
+ } else {
+ __bit_iterator<_Cp, false> __mp = __first + __d2;
+ std::swap_ranges(__first, __mp, __middle);
+ __first = __mp;
+ __d1 -= __d2;
+ }
+ }
+ }
+ return std::make_pair(__r, __last);
+}
+
template <class _ForwardIterator>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) {
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index f91250c4a440d..af88c253762b5 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -16,6 +16,7 @@
#include <__algorithm/copy_n.h>
#include <__algorithm/equal.h>
#include <__algorithm/min.h>
+#include <__algorithm/rotate.h>
#include <__algorithm/swap_ranges.h>
#include <__assert>
#include <__bit/countr.h>
@@ -216,8 +217,6 @@ private:
__mask_(__m) {}
};
-// rotate
-
template <class _Cp>
struct __bit_array {
using difference_type _LIBCPP_NODEBUG = typename __size_difference_type_traits<_Cp>::difference_type;
@@ -249,45 +248,6 @@ struct __bit_array {
}
};
-template <class _Cp>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false>
-rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
- using _I1 = __bit_iterator<_Cp, false>;
- using difference_type = typename _I1::difference_type;
-
- difference_type __d1 = __middle - __first;
- difference_type __d2 = __last - __middle;
- _I1 __r = __first + __d2;
- while (__d1 != 0 && __d2 != 0) {
- if (__d1 <= __d2) {
- if (__d1 <= __bit_array<_Cp>::capacity()) {
- __bit_array<_Cp> __b(__d1);
- std::copy(__first, __middle, __b.begin());
- std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
- break;
- } else {
- __bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
- __first = __middle;
- __middle = __mp;
- __d2 -= __d1;
- }
- } else {
- if (__d2 <= __bit_array<_Cp>::capacity()) {
- __bit_array<_Cp> __b(__d2);
- std::copy(__middle, __last, __b.begin());
- std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
- break;
- } else {
- __bit_iterator<_Cp, false> __mp = __first + __d2;
- std::swap_ranges(__first, __mp, __middle);
- __first = __mp;
- __d1 -= __d2;
- }
- }
- }
- return __r;
-}
-
template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
class __bit_iterator {
public:
@@ -507,9 +467,9 @@ private:
template <class, class _Cl, class _Cr>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
__swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
- template <class _Dp>
- _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
- rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
+ template <class, class _Dp>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false> >
+ __rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
template <class _Dp, bool _IsConst1, bool _IsConst2>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
__equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
diff --git a/libcxx/include/__fwd/bit_reference.h b/libcxx/include/__fwd/bit_reference.h
index d65f043e89ad6..451e76d548dc3 100644
--- a/libcxx/include/__fwd/bit_reference.h
+++ b/libcxx/include/__fwd/bit_reference.h
@@ -23,6 +23,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
class __bit_iterator;
+template <class _Cp>
+struct __bit_array;
+
template <class, class = void>
struct __size_difference_type_traits;
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 1d3960bd01597..b02b0dc1725f2 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -14,6 +14,7 @@
#include <__algorithm/fill_n.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/max.h>
+#include <__algorithm/rotate.h>
#include <__assert>
#include <__bit_reference>
#include <__config>
diff --git a/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp b/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp
new file mode 100644
index 0000000000000..eb84eb5cdf25c
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cstddef>
+#include <deque>
+#include <iterator>
+#include <list>
+#include <ranges>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "../../GenerateInput.h"
+#include "test_macros.h"
+
+int main(int argc, char** argv) {
+ auto std_rotate = [](auto first, auto middle, auto last) { return std::rotate(first, middle, last); };
+
+ // Benchmark {std,ranges}::rotate where we rotate various fractions of the range. It is possible to
+ // special-case some of these fractions to cleverly perform swap_ranges.
+ {
+ auto bm = []<class Container>(std::string name, auto rotate, double fraction) {
+ benchmark::RegisterBenchmark(
+ name,
+ [=](auto& st) {
+ std::size_t const size = st.range(0);
+ using ValueType = typename Container::value_type;
+ Container c;
+ std::generate_n(std::back_inserter(c), size, [] { return Generate<ValueType>::random(); });
+
+ auto nth = std::next(c.begin(), static_cast<std::size_t>(size * fraction));
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = rotate(c.begin(), nth, c.end());
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->RangeMultiplier(2)
+ ->Range(64, 1 << 20);
+ };
+
+ bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 1/4)", std_rotate, 0.25);
+ bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 51%)", std_rotate, 0.51);
+#if TEST_STD_VER >= 23 // vector<bool>::iterator is not std::permutable before C++23
+ bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 1/4)", std::ranges::rotate, 0.25);
+ bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 51%)", std::ranges::rotate, 0.51);
+#endif
+ }
+
+ benchmark::Initialize(&argc, argv);
+ benchmark::RunSpecifiedBenchmarks();
+ benchmark::Shutdown();
+ return 0;
+}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
index 56d0dfaf9ee27..5f594400e8321 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
@@ -21,8 +21,10 @@
#include <array>
#include <cassert>
#include <ranges>
+#include <vector>
#include "almost_satisfies_types.h"
+#include "test_macros.h"
#include "test_iterators.h"
#include "type_algorithms.h"
@@ -131,6 +133,29 @@ constexpr void test_iter_sent() {
test_one<Iter, Sent, 7>({1, 2, 3, 4, 5, 6, 7}, 7, {1, 2, 3, 4, 5, 6, 7});
}
+#if TEST_STD_VER >= 23
+template <std::size_t N>
+TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
+ for (int offset = -4; offset <= 4; ++offset) {
+ std::vector<bool> a(N, false);
+ std::size_t mid = N / 2 + offset;
+ for (std::size_t i = mid; i < N; ++i)
+ a[i] = true;
+
+ // (iterator, sentinel)-overload
+ std::ranges::rotate(std::ranges::begin(a), std::ranges::begin(a) + mid, std::ranges::end(a));
+ for (std::size_t i = 0; i < N; ++i)
+ assert(a[i] == (i < N - mid));
+
+ // range-overload
+ std::ranges::rotate(a, std::ranges::begin(a) + (N - mid));
+ for (std::size_t i = 0; i < N; ++i)
+ assert(a[i] == (i >= mid));
+ }
+ return true;
+};
+#endif
+
constexpr bool test() {
types::for_each(types::forward_iterator_list<int*>(), []<class Iter>() {
test_iter_sent<Iter, Iter>();
@@ -167,6 +192,16 @@ constexpr bool test() {
}
}
+#if TEST_STD_VER >= 23
+ test_vector_bool<8>();
+ test_vector_bool<19>();
+ test_vector_bool<32>();
+ test_vector_bool<49>();
+ test_vector_bool<64>();
+ test_vector_bool<199>();
+ test_vector_bool<256>();
+#endif
+
return true;
}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
index ce45bb1916816..448abdddbb4cb 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
@@ -15,6 +15,7 @@
#include <cassert>
#include <memory>
#include <type_traits>
+#include <vector>
#include "test_macros.h"
#include "test_iterators.h"
@@ -420,6 +421,20 @@ struct TestUniquePtr {
#endif // TEST_STD_VER >= 11
+template <std::size_t N>
+TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
+ for (int offset = -4; offset <= 4; ++offset) {
+ std::vector<bool> a(N, false);
+ std::size_t mid = N / 2 + offset;
+ for (std::size_t i = mid; i < N; ++i)
+ a[i] = true;
+ std::rotate(a.begin(), a.begin() + mid, a.end());
+ for (std::size_t i = 0; i < N; ++i)
+ assert(a[i] == (i < N - mid));
+ }
+ return true;
+};
+
TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<int*>(), TestIter());
@@ -428,6 +443,14 @@ TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<std::unique_ptr<int>*>(), TestUniquePtr());
#endif
+ test_vector_bool<8>();
+ test_vector_bool<19>();
+ test_vector_bool<32>();
+ test_vector_bool<49>();
+ test_vector_bool<64>();
+ test_vector_bool<199>();
+ test_vector_bool<256>();
+
return true;
}
More information about the libcxx-commits
mailing list