[libcxx-commits] [libcxx] [libc++] Optimize ranges::copy_backward for vector<bool>::iterator (PR #121026)
via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Jan 2 13:04:07 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Peng Liu (winner245)
<details>
<summary>Changes</summary>
As a follow-up to #<!-- -->121013, this PR optimizes the performance of `std::ranges::copy_backward` for `vector<bool>::iterator`, addressing a subtask outlined in issue #<!-- -->64038. The optimizations yield performance improvements of up to **2000x** for aligned copies and **60x** for unaligned copies.
- Aligned copy
```
---------------------------------------------------------------------------------
Benchmark Before After Improvement
---------------------------------------------------------------------------------
bm_ranges_copy_backward_aligned/8 10.7 ns 1.74 ns 6x
bm_ranges_copy_backward_aligned/64 78.7 ns 3.07 ns 26x
bm_ranges_copy_backward_aligned/512 617 ns 2.77 ns 223x
bm_ranges_copy_backward_aligned/4096 4902 ns 5.91 ns 829x
bm_ranges_copy_backward_aligned/32768 43111 ns 40.1 ns 1075x
bm_ranges_copy_backward_aligned/65536 80592 ns 71.9 ns 1121x
bm_ranges_copy_backward_aligned/102400 129705 ns 121 ns 1072x
bm_ranges_copy_backward_aligned/106496 134432 ns 85.3 ns 1576x
bm_ranges_copy_backward_aligned/110592 140523 ns 68.5 ns 2051x
bm_ranges_copy_backward_aligned/114688 139851 ns 70.8 ns 1975x
bm_ranges_copy_backward_aligned/118784 146393 ns 73.3 ns 1997x
bm_ranges_copy_backward_aligned/122880 152805 ns 73.5 ns 2079x
bm_ranges_copy_backward_aligned/126976 154205 ns 76.8 ns 2008x
bm_ranges_copy_backward_aligned/131072 160370 ns 136 ns 1179x
bm_ranges_copy_backward_aligned/135168 165428 ns 161 ns 1028x
bm_ranges_copy_backward_aligned/139264 169763 ns 109 ns 1557x
bm_ranges_copy_backward_aligned/143360 175159 ns 86.8 ns 2018x
bm_ranges_copy_backward_aligned/147456 178807 ns 88.1 ns 2030x
bm_ranges_copy_backward_aligned/151552 184829 ns 92.1 ns 2007x
bm_ranges_copy_backward_aligned/155648 193208 ns 90.8 ns 2128x
bm_ranges_copy_backward_aligned/159744 198470 ns 93.4 ns 2125x
bm_ranges_copy_backward_aligned/163840 205261 ns 170 ns 1207x
bm_ranges_copy_backward_aligned/167936 210831 ns 198 ns 1064x
bm_ranges_copy_backward_aligned/172032 215258 ns 132 ns 1630x
bm_ranges_copy_backward_aligned/176128 219453 ns 111 ns 1977x
bm_ranges_copy_backward_aligned/180224 227048 ns 110 ns 2064x
bm_ranges_copy_backward_aligned/184320 230324 ns 115 ns 2002x
bm_ranges_copy_backward_aligned/188416 234305 ns 121 ns 1936x
bm_ranges_copy_backward_aligned/192512 239388 ns 127 ns 1884x
bm_ranges_copy_backward_aligned/196608 246586 ns 214 ns 1152x
bm_ranges_copy_backward_aligned/200704 249973 ns 264 ns 946x
bm_ranges_copy_backward_aligned/204800 259443 ns 221 ns 1173x
```
- Unaligned copy
```
------------------------------------------------------------------------------------
Benchmark Before After Improvement
------------------------------------------------------------------------------------
bm_ranges_copy_backward_unaligned/8 11.0 ns 9.49 ns 1.2x
bm_ranges_copy_backward_unaligned/64 95.9 ns 8.18 ns 12x
bm_ranges_copy_backward_unaligned/512 720 ns 18.8 ns 38x
bm_ranges_copy_backward_unaligned/4096 5871 ns 103 ns 57x
bm_ranges_copy_backward_unaligned/32768 45974 ns 741 ns 62x
bm_ranges_copy_backward_unaligned/262144 374481 ns 6100 ns 61x
bm_ranges_copy_backward_unaligned/1048576 1531217 ns 25406 ns 60x
```
---
Patch is 33.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121026.diff
8 Files Affected:
- (modified) libcxx/docs/ReleaseNotes/20.rst (+3)
- (modified) libcxx/include/__algorithm/copy_backward.h (+131)
- (modified) libcxx/include/__bit_reference (+3-131)
- (modified) libcxx/include/__vector/vector_bool.h (+1)
- (modified) libcxx/include/bitset (+2)
- (added) libcxx/test/benchmarks/algorithms/copy_backward.bench.cpp (+55)
- (modified) libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_backward.pass.cpp (+59-32)
- (modified) libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_backward.pass.cpp (+72-35)
``````````diff
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index c8a07fb8b73348..9c4e43899226f6 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -73,6 +73,9 @@ Improvements and New Features
optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
+- The ``std::ranges::copy_backward`` algorithm has been optimized for ``std::vector<bool>::iterator``\s, resulting in
+ a performance improvement of up to 2000x.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__algorithm/copy_backward.h b/libcxx/include/__algorithm/copy_backward.h
index 48a768f577f553..02ffc14361e6a9 100644
--- a/libcxx/include/__algorithm/copy_backward.h
+++ b/libcxx/include/__algorithm/copy_backward.h
@@ -10,11 +10,14 @@
#define _LIBCPP___ALGORITHM_COPY_BACKWARD_H
#include <__algorithm/copy_move_common.h>
+#include <__algorithm/copy_n.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/min.h>
#include <__config>
+#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
+#include <__memory/pointer_traits.h>
#include <__type_traits/common_type.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_constructible.h>
@@ -34,6 +37,124 @@ template <class _AlgPolicy, class _InIter, class _Sent, class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter>
__copy_backward(_InIter __first, _Sent __last, _OutIter __result);
+template <class _Cp, bool _IsConst>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_backward_aligned(
+ __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
+ using _In = __bit_iterator<_Cp, _IsConst>;
+ using difference_type = typename _In::difference_type;
+ using __storage_type = typename _In::__storage_type;
+
+ const int __bits_per_word = _In::__bits_per_word;
+ difference_type __n = __last - __first;
+ if (__n > 0) {
+ // do first word
+ if (__last.__ctz_ != 0) {
+ difference_type __dn = std::min(static_cast<difference_type>(__last.__ctz_), __n);
+ __n -= __dn;
+ unsigned __clz = __bits_per_word - __last.__ctz_;
+ __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz);
+ __storage_type __b = *__last.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b;
+ __result.__ctz_ = static_cast<unsigned>(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word);
+ // __last.__ctz_ = 0
+ }
+ // __last.__ctz_ == 0 || __n == 0
+ // __result.__ctz_ == 0 || __n == 0
+ // do middle words
+ __storage_type __nw = __n / __bits_per_word;
+ __result.__seg_ -= __nw;
+ __last.__seg_ -= __nw;
+ std::copy_n(std::__to_address(__last.__seg_), __nw, std::__to_address(__result.__seg_));
+ __n -= __nw * __bits_per_word;
+ // do last word
+ if (__n > 0) {
+ __storage_type __m = ~__storage_type(0) << (__bits_per_word - __n);
+ __storage_type __b = *--__last.__seg_ & __m;
+ *--__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b;
+ __result.__ctz_ = static_cast<unsigned>(-__n & (__bits_per_word - 1));
+ }
+ }
+ return __result;
+}
+
+template <class _Cp, bool _IsConst>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_backward_unaligned(
+ __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
+ using _In = __bit_iterator<_Cp, _IsConst>;
+ using difference_type = typename _In::difference_type;
+ using __storage_type = typename _In::__storage_type;
+
+ const int __bits_per_word = _In::__bits_per_word;
+ difference_type __n = __last - __first;
+ if (__n > 0) {
+ // do first word
+ if (__last.__ctz_ != 0) {
+ difference_type __dn = std::min(static_cast<difference_type>(__last.__ctz_), __n);
+ __n -= __dn;
+ unsigned __clz_l = __bits_per_word - __last.__ctz_;
+ __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_l);
+ __storage_type __b = *__last.__seg_ & __m;
+ unsigned __clz_r = __bits_per_word - __result.__ctz_;
+ __storage_type __ddn = std::min(__dn, static_cast<difference_type>(__result.__ctz_));
+ if (__ddn > 0) {
+ __m = (~__storage_type(0) << (__result.__ctz_ - __ddn)) & (~__storage_type(0) >> __clz_r);
+ *__result.__seg_ &= ~__m;
+ if (__result.__ctz_ > __last.__ctz_)
+ *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_);
+ else
+ *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_);
+ __result.__ctz_ = static_cast<unsigned>(((-__ddn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word);
+ __dn -= __ddn;
+ }
+ if (__dn > 0) {
+ // __result.__ctz_ == 0
+ --__result.__seg_;
+ __result.__ctz_ = static_cast<unsigned>(-__dn & (__bits_per_word - 1));
+ __m = ~__storage_type(0) << __result.__ctz_;
+ *__result.__seg_ &= ~__m;
+ __last.__ctz_ -= __dn + __ddn;
+ *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_);
+ }
+ // __last.__ctz_ = 0
+ }
+ // __last.__ctz_ == 0 || __n == 0
+ // __result.__ctz_ != 0 || __n == 0
+ // do middle words
+ unsigned __clz_r = __bits_per_word - __result.__ctz_;
+ __storage_type __m = ~__storage_type(0) >> __clz_r;
+ for (; __n >= __bits_per_word; __n -= __bits_per_word) {
+ __storage_type __b = *--__last.__seg_;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b >> __clz_r;
+ *--__result.__seg_ &= __m;
+ *__result.__seg_ |= __b << __result.__ctz_;
+ }
+ // do last word
+ if (__n > 0) {
+ __m = ~__storage_type(0) << (__bits_per_word - __n);
+ __storage_type __b = *--__last.__seg_ & __m;
+ __clz_r = __bits_per_word - __result.__ctz_;
+ __storage_type __dn = std::min(__n, static_cast<difference_type>(__result.__ctz_));
+ __m = (~__storage_type(0) << (__result.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_r);
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_);
+ __result.__ctz_ = static_cast<unsigned>(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word);
+ __n -= __dn;
+ if (__n > 0) {
+ // __result.__ctz_ == 0
+ --__result.__seg_;
+ __result.__ctz_ = static_cast<unsigned>(-__n & (__bits_per_word - 1));
+ __m = ~__storage_type(0) << __result.__ctz_;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn));
+ }
+ }
+ }
+ return __result;
+}
+
template <class _AlgPolicy>
struct __copy_backward_impl {
template <class _InIter, class _Sent, class _OutIter>
@@ -107,6 +228,16 @@ struct __copy_backward_impl {
}
}
+ template <class _Cp, bool _IsConst>
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
+ operator()(__bit_iterator<_Cp, _IsConst> __first,
+ __bit_iterator<_Cp, _IsConst> __last,
+ __bit_iterator<_Cp, false> __result) {
+ if (__last.__ctz_ == __result.__ctz_)
+ return std::make_pair(__last, std::__copy_backward_aligned(__first, __last, __result));
+ return std::make_pair(__last, std::__copy_backward_unaligned(__first, __last, __result));
+ }
+
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 9fa24c98d493fd..1d9294993201fd 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -10,6 +10,7 @@
#ifndef _LIBCPP___BIT_REFERENCE
#define _LIBCPP___BIT_REFERENCE
+#include <__algorithm/copy_backward.h>
#include <__algorithm/copy_n.h>
#include <__algorithm/min.h>
#include <__bit/countr.h>
@@ -293,134 +294,6 @@ copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last
return std::__copy_unaligned(__first, __last, __result);
}
-// copy_backward
-
-template <class _Cp, bool _IsConst>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_backward_aligned(
- __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
- using _In = __bit_iterator<_Cp, _IsConst>;
- using difference_type = typename _In::difference_type;
- using __storage_type = typename _In::__storage_type;
-
- const int __bits_per_word = _In::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__last.__ctz_ != 0) {
- difference_type __dn = std::min(static_cast<difference_type>(__last.__ctz_), __n);
- __n -= __dn;
- unsigned __clz = __bits_per_word - __last.__ctz_;
- __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz);
- __storage_type __b = *__last.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b;
- __result.__ctz_ = static_cast<unsigned>(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word);
- // __last.__ctz_ = 0
- }
- // __last.__ctz_ == 0 || __n == 0
- // __result.__ctz_ == 0 || __n == 0
- // do middle words
- __storage_type __nw = __n / __bits_per_word;
- __result.__seg_ -= __nw;
- __last.__seg_ -= __nw;
- std::copy_n(std::__to_address(__last.__seg_), __nw, std::__to_address(__result.__seg_));
- __n -= __nw * __bits_per_word;
- // do last word
- if (__n > 0) {
- __storage_type __m = ~__storage_type(0) << (__bits_per_word - __n);
- __storage_type __b = *--__last.__seg_ & __m;
- *--__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b;
- __result.__ctz_ = static_cast<unsigned>(-__n & (__bits_per_word - 1));
- }
- }
- return __result;
-}
-
-template <class _Cp, bool _IsConst>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_backward_unaligned(
- __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
- using _In = __bit_iterator<_Cp, _IsConst>;
- using difference_type = typename _In::difference_type;
- using __storage_type = typename _In::__storage_type;
-
- const int __bits_per_word = _In::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__last.__ctz_ != 0) {
- difference_type __dn = std::min(static_cast<difference_type>(__last.__ctz_), __n);
- __n -= __dn;
- unsigned __clz_l = __bits_per_word - __last.__ctz_;
- __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_l);
- __storage_type __b = *__last.__seg_ & __m;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- __storage_type __ddn = std::min(__dn, static_cast<difference_type>(__result.__ctz_));
- if (__ddn > 0) {
- __m = (~__storage_type(0) << (__result.__ctz_ - __ddn)) & (~__storage_type(0) >> __clz_r);
- *__result.__seg_ &= ~__m;
- if (__result.__ctz_ > __last.__ctz_)
- *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_);
- else
- *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_);
- __result.__ctz_ = static_cast<unsigned>(((-__ddn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word);
- __dn -= __ddn;
- }
- if (__dn > 0) {
- // __result.__ctz_ == 0
- --__result.__seg_;
- __result.__ctz_ = static_cast<unsigned>(-__dn & (__bits_per_word - 1));
- __m = ~__storage_type(0) << __result.__ctz_;
- *__result.__seg_ &= ~__m;
- __last.__ctz_ -= __dn + __ddn;
- *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_);
- }
- // __last.__ctz_ = 0
- }
- // __last.__ctz_ == 0 || __n == 0
- // __result.__ctz_ != 0 || __n == 0
- // do middle words
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- __storage_type __m = ~__storage_type(0) >> __clz_r;
- for (; __n >= __bits_per_word; __n -= __bits_per_word) {
- __storage_type __b = *--__last.__seg_;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b >> __clz_r;
- *--__result.__seg_ &= __m;
- *__result.__seg_ |= __b << __result.__ctz_;
- }
- // do last word
- if (__n > 0) {
- __m = ~__storage_type(0) << (__bits_per_word - __n);
- __storage_type __b = *--__last.__seg_ & __m;
- __clz_r = __bits_per_word - __result.__ctz_;
- __storage_type __dn = std::min(__n, static_cast<difference_type>(__result.__ctz_));
- __m = (~__storage_type(0) << (__result.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_r);
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_);
- __result.__ctz_ = static_cast<unsigned>(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word);
- __n -= __dn;
- if (__n > 0) {
- // __result.__ctz_ == 0
- --__result.__seg_;
- __result.__ctz_ = static_cast<unsigned>(-__n & (__bits_per_word - 1));
- __m = ~__storage_type(0) << __result.__ctz_;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn));
- }
- }
- }
- return __result;
-}
-
-template <class _Cp, bool _IsConst>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> copy_backward(
- __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
- if (__last.__ctz_ == __result.__ctz_)
- return std::__copy_backward_aligned(__first, __last, __result);
- return std::__copy_backward_unaligned(__first, __last, __result);
-}
-
// move
template <class _Cp, bool _IsConst>
@@ -983,9 +856,8 @@ private:
template <class _Dp, bool _IC>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_unaligned(
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
- template <class _Dp, bool _IC>
- _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
- copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
+ template <class _AlgPolicy>
+ friend struct __copy_backward_impl;
template <class _Cl, class _Cr>
friend __bit_iterator<_Cr, false>
__swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 525fc35b26cc9e..b683ccf21e8479 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -10,6 +10,7 @@
#define _LIBCPP___VECTOR_VECTOR_BOOL_H
#include <__algorithm/copy.h>
+#include <__algorithm/copy_backward.h>
#include <__algorithm/fill_n.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/max.h>
diff --git a/libcxx/include/bitset b/libcxx/include/bitset
index 8b361824805571..bb09bc5415fb03 100644
--- a/libcxx/include/bitset
+++ b/libcxx/include/bitset
@@ -129,6 +129,8 @@ template <size_t N> struct hash<std::bitset<N>>;
#if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
# include <__cxx03/bitset>
#else
+# include <__algorithm/copy.h>
+# include <__algorithm/copy_backward.h>
# include <__algorithm/count.h>
# include <__algorithm/fill.h>
# include <__algorithm/fill_n.h>
diff --git a/libcxx/test/benchmarks/algorithms/copy_backward.bench.cpp b/libcxx/test/benchmarks/algorithms/copy_backward.bench.cpp
new file mode 100644
index 00000000000000..3f797b5555a138
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/copy_backward.bench.cpp
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <vector>
+
+static void bm_ranges_copy_backward(benchmark::State& state, bool aligned) {
+ auto n = state.range();
+ std::vector<bool> in(n, true);
+ std::vector<bool> out(aligned ? n : n + 8);
+ benchmark::DoNotOptimize(&in);
+ auto dst = aligned ? out.end() : out.end() - 4;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::ranges::copy_backward(in, dst));
+ benchmark::DoNotOptimize(&out);
+ }
+}
+
+static void bm_copy_backward(benchmark::State& state, bool aligned) {
+ auto n = state.range();
+ std::vector<bool> in(n, true);
+ std::vector<bool> out(aligned ? n : n + 8);
+ benchmark::DoNotOptimize(&in);
+ auto beg = in.begin();
+ auto end = in.end();
+ auto dst = aligned ? out.end() : out.end() - 4;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::copy_backward(beg, end, dst));
+ benchmark::DoNotOptimize(&out);
+ }
+}
+
+static void bm_ranges_copy_backward_aligned(benchmark::State& state) { bm_ranges_copy_backward(state, true); }
+static void bm_ranges_copy_backward_unaligned(benchmark::State& state) { bm_ranges_copy_backward(state, false); }
+
+static void bm_copy_backward_aligned(benchmark::State& state) { bm_copy_backward(state, true); }
+static void bm_copy_backward_unaligned(benchmark::State& state) { bm_copy_backward(state, false); }
+
+// Test the range version of std::copy for vector<bool>::iterator
+BENCHMARK(bm_ranges_copy_backward_aligned)->Range(8, 1 << 16)->DenseRange(102400, 204800, 4096);
+BENCHMARK(bm_ranges_copy_backward_unaligned)->Range(8, 1 << 20);
+
+// Test the iterator-pair version of std::copy for vector<bool>::iterator
+BENCHMARK(bm_copy_backward_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_copy_backward_unaligned)->Range(8, 1 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_backward.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_backward.pass.cpp
index 928903de1ade2d..c4d064897e9e0b 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_backward.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_backward.pass.cpp
@@ -15,6 +15,7 @@
#include <algorithm>
#include <cassert>
+#include <vector>
#include "test_macros.h"
#include "test_iterators.h"
@@ -36,47 +37,66 @@ class Derived : public PaddedBase {
};
template <class InIter, class OutIter>
-TEST_CONSTEXPR_CXX20 void
-test_copy_backward()
-{
+TEST_CONSTEXPR_CXX20 void test_copy_backward() {
{
const unsigned N = 1000;
- int ia[N] = {};
+ int ia[N] = {};
for (unsigned i = 0; i < N; ++i)
- ia[i] = i;
+ ia[i] = i;
int ib[N] = {0};
- OutIter r = std::copy_backward(InIter(ia), InIter(ia+N), OutIter(ib+N));
+ OutIter r = std::copy_backward(InIter(ia), InIter(ia + N), OutIter(ib + N));
assert(base(r) == ib);
for (unsigned i = 0; i < N; ++i)
- assert(ia[i] == ib[i]);
+ assert(ia[i] == ib[i]);
}
}
-TEST_CONSTEXPR_CXX20 bool
-test()
-{
- test_copy_backward<bidirectional_iterator<const int*>, bidirectional_iterator<int*> >();
- test_copy_backward<bidirectional_iterator<const int*>, random_access_iterator<int...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121026
More information about the libcxx-commits
mailing list