[libcxx-commits] [libcxx] [libc++] Optimize ranges::swap_ranges for vector<bool>::iterator (PR #121150)
via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Jan 22 05:24:42 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Peng Liu (winner245)
<details>
<summary>Changes</summary>
This PR optimizes the performance of `std::ranges::swap_ranges` for `vector<bool>::iterator`, addressing a subtask outlined in issue #<!-- -->64038. The optimizations yield performance improvements of up to **650x** for aligned range swap and **70x** for unaligned range swap comparison.
- Aligned
```
------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------------
bm_ranges_swap_ranges_vb_aligned/8 15.7 ns 3.92 ns 4x
bm_ranges_swap_ranges_vb_aligned/64 117 ns 4.18 ns 28x
bm_ranges_swap_ranges_vb_aligned/512 1106 ns 7.29 ns 152x
bm_ranges_swap_ranges_vb_aligned/4096 8705 ns 15.9 ns 547x
bm_ranges_swap_ranges_vb_aligned/32768 66222 ns 116 ns 571x
bm_ranges_swap_ranges_vb_aligned/65536 131172 ns 230 ns 570x
bm_ranges_swap_ranges_vb_aligned/102400 205342 ns 396 ns 519x
bm_ranges_swap_ranges_vb_aligned/155648 312357 ns 488 ns 640x
bm_ranges_swap_ranges_vb_aligned/159744 325772 ns 500 ns 652x
bm_ranges_swap_ranges_vb_aligned/176128 368038 ns 556 ns 662x
bm_ranges_swap_ranges_vb_aligned/180224 365690 ns 560 ns 653x
bm_ranges_swap_ranges_vb_aligned/188416 395709 ns 608 ns 651x
bm_ranges_swap_ranges_vb_aligned/192512 409670 ns 633 ns 647x
bm_ranges_swap_ranges_vb_aligned/204800 438148 ns 736 ns 595x
bm_ranges_swap_ranges_vb_aligned/262144 544785 ns 1025 ns 531x
bm_ranges_swap_ranges_vb_aligned/1048576 2190270 ns 4211 ns 520x
```
- Unaligned
```
------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------------
bm_ranges_swap_ranges_vb_unaligned/8 15.4 ns 9.31 ns 1.7x
bm_ranges_swap_ranges_vb_unaligned/64 141 ns 11.8 ns 12x
bm_ranges_swap_ranges_vb_unaligned/512 1111 ns 23.1 ns 48x
bm_ranges_swap_ranges_vb_unaligned/4096 8908 ns 124 ns 72x
bm_ranges_swap_ranges_vb_unaligned/32768 72110 ns 971 ns 74x
bm_ranges_swap_ranges_vb_unaligned/262144 579345 ns 7660 ns 76x
bm_ranges_swap_ranges_vb_unaligned/1048576 2286527 ns 31063 ns 74x
```
---
Patch is 35.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121150.diff
6 Files Affected:
- (modified) libcxx/include/__algorithm/swap_ranges.h (+162)
- (modified) libcxx/include/__bit_reference (+7-151)
- (added) libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp (+66)
- (modified) libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp (+12-14)
- (modified) libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp (+40-16)
- (modified) libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp (+129-115)
``````````diff
diff --git a/libcxx/include/__algorithm/swap_ranges.h b/libcxx/include/__algorithm/swap_ranges.h
index 54b453b72360e0..2731d4feaa63d4 100644
--- a/libcxx/include/__algorithm/swap_ranges.h
+++ b/libcxx/include/__algorithm/swap_ranges.h
@@ -10,9 +10,12 @@
#define _LIBCPP___ALGORITHM_SWAP_RANGES_H
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/min.h>
#include <__config>
+#include <__fwd/bit_reference.h>
#include <__utility/move.h>
#include <__utility/pair.h>
+#include <__utility/swap.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -23,6 +26,165 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
+template <class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_aligned(
+ __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
+ using _I1 = __bit_iterator<_Cl, false>;
+ using difference_type = typename _I1::difference_type;
+ using __storage_type = typename _I1::__storage_type;
+
+ const int __bits_per_word = _I1::__bits_per_word;
+ difference_type __n = __last - __first;
+ if (__n > 0) {
+ // do first word
+ if (__first.__ctz_ != 0) {
+ unsigned __clz = __bits_per_word - __first.__ctz_;
+ difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
+ __n -= __dn;
+ __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1;
+ *__first.__seg_ |= __b2;
+ __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
+ ++__first.__seg_;
+ // __first.__ctz_ = 0;
+ }
+ // __first.__ctz_ == 0;
+ // do middle words
+ for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
+ swap(*__first.__seg_, *__result.__seg_);
+ // do last word
+ if (__n > 0) {
+ __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1;
+ *__first.__seg_ |= __b2;
+ __result.__ctz_ = static_cast<unsigned>(__n);
+ }
+ }
+ return __result;
+}
+
+template <class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_unaligned(
+ __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
+ using _I1 = __bit_iterator<_Cl, false>;
+ using difference_type = typename _I1::difference_type;
+ using __storage_type = typename _I1::__storage_type;
+
+ const int __bits_per_word = _I1::__bits_per_word;
+ difference_type __n = __last - __first;
+ if (__n > 0) {
+ // do first word
+ if (__first.__ctz_ != 0) {
+ unsigned __clz_f = __bits_per_word - __first.__ctz_;
+ difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
+ __n -= __dn;
+ __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ unsigned __clz_r = __bits_per_word - __result.__ctz_;
+ __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
+ __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ if (__result.__ctz_ > __first.__ctz_) {
+ unsigned __s = __result.__ctz_ - __first.__ctz_;
+ *__result.__seg_ |= __b1 << __s;
+ *__first.__seg_ |= __b2 >> __s;
+ } else {
+ unsigned __s = __first.__ctz_ - __result.__ctz_;
+ *__result.__seg_ |= __b1 >> __s;
+ *__first.__seg_ |= __b2 << __s;
+ }
+ __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
+ __dn -= __ddn;
+ if (__dn > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __dn);
+ __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ unsigned __s = __first.__ctz_ + __ddn;
+ *__result.__seg_ |= __b1 >> __s;
+ *__first.__seg_ |= __b2 << __s;
+ __result.__ctz_ = static_cast<unsigned>(__dn);
+ }
+ ++__first.__seg_;
+ // __first.__ctz_ = 0;
+ }
+ // __first.__ctz_ == 0;
+ // do middle words
+ __storage_type __m = ~__storage_type(0) << __result.__ctz_;
+ unsigned __clz_r = __bits_per_word - __result.__ctz_;
+ for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
+ __storage_type __b1 = *__first.__seg_;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 << __result.__ctz_;
+ *__first.__seg_ = __b2 >> __result.__ctz_;
+ ++__result.__seg_;
+ __b2 = *__result.__seg_ & ~__m;
+ *__result.__seg_ &= __m;
+ *__result.__seg_ |= __b1 >> __clz_r;
+ *__first.__seg_ |= __b2 << __clz_r;
+ }
+ // do last word
+ if (__n > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __dn = std::min<__storage_type>(__n, __clz_r);
+ __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 << __result.__ctz_;
+ *__first.__seg_ |= __b2 >> __result.__ctz_;
+ __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
+ __n -= __dn;
+ if (__n > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 >> __dn;
+ *__first.__seg_ |= __b2 << __dn;
+ __result.__ctz_ = static_cast<unsigned>(__n);
+ }
+ }
+ }
+ return __result;
+}
+
+// 2+1 iterators: size2 >= size1; used by std::swap_ranges.
+template <class, class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+__swap_ranges(__bit_iterator<_Cl, false> __first1,
+ __bit_iterator<_Cl, false> __last1,
+ __bit_iterator<_Cr, false> __first2) {
+ if (__first1.__ctz_ == __first2.__ctz_)
+ return std::make_pair(__last1, std::__swap_ranges_aligned(__first1, __last1, __first2));
+ return std::make_pair(__last1, std::__swap_ranges_unaligned(__first1, __last1, __first2));
+}
+
+// 2+2 iterators: used by std::ranges::swap_ranges.
+template <class _AlgPolicy, class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+__swap_ranges(__bit_iterator<_Cl, false> __first1,
+ __bit_iterator<_Cl, false> __last1,
+ __bit_iterator<_Cr, false> __first2,
+ __bit_iterator<_Cr, false> __last2) {
+ if (__last1 - __first1 < __last2 - __first2)
+ return std::make_pair(__last1, std::__swap_ranges<_AlgPolicy>(__first1, __last1, __first2).second);
+ return std::make_pair(std::__swap_ranges<_AlgPolicy>(__first2, __last2, __first1).second, __last2);
+}
+
// 2+2 iterators: the shorter size will be used.
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _Sentinel2>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator1, _ForwardIterator2>
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 67abb023122edf..26dfb330384a36 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -12,6 +12,7 @@
#include <__algorithm/copy_n.h>
#include <__algorithm/min.h>
+#include <__algorithm/swap_ranges.h>
#include <__bit/countr.h>
#include <__compare/ordering.h>
#include <__config>
@@ -24,6 +25,7 @@
#include <__type_traits/conditional.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/void_t.h>
+#include <__utility/pair.h>
#include <__utility/swap.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -451,152 +453,6 @@ inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> move_backward(
return std::copy_backward(__first, __last, __result);
}
-// swap_ranges
-
-template <class _Cl, class _Cr>
-_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_aligned(
- __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
- using _I1 = __bit_iterator<_Cl, false>;
- using difference_type = typename _I1::difference_type;
- using __storage_type = typename _I1::__storage_type;
-
- const int __bits_per_word = _I1::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__first.__ctz_ != 0) {
- unsigned __clz = __bits_per_word - __first.__ctz_;
- difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
- __n -= __dn;
- __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1;
- *__first.__seg_ |= __b2;
- __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
- ++__first.__seg_;
- // __first.__ctz_ = 0;
- }
- // __first.__ctz_ == 0;
- // do middle words
- for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
- swap(*__first.__seg_, *__result.__seg_);
- // do last word
- if (__n > 0) {
- __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1;
- *__first.__seg_ |= __b2;
- __result.__ctz_ = static_cast<unsigned>(__n);
- }
- }
- return __result;
-}
-
-template <class _Cl, class _Cr>
-_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_unaligned(
- __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
- using _I1 = __bit_iterator<_Cl, false>;
- using difference_type = typename _I1::difference_type;
- using __storage_type = typename _I1::__storage_type;
-
- const int __bits_per_word = _I1::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__first.__ctz_ != 0) {
- unsigned __clz_f = __bits_per_word - __first.__ctz_;
- difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
- __n -= __dn;
- __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
- __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- if (__result.__ctz_ > __first.__ctz_) {
- unsigned __s = __result.__ctz_ - __first.__ctz_;
- *__result.__seg_ |= __b1 << __s;
- *__first.__seg_ |= __b2 >> __s;
- } else {
- unsigned __s = __first.__ctz_ - __result.__ctz_;
- *__result.__seg_ |= __b1 >> __s;
- *__first.__seg_ |= __b2 << __s;
- }
- __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
- __dn -= __ddn;
- if (__dn > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __dn);
- __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- unsigned __s = __first.__ctz_ + __ddn;
- *__result.__seg_ |= __b1 >> __s;
- *__first.__seg_ |= __b2 << __s;
- __result.__ctz_ = static_cast<unsigned>(__dn);
- }
- ++__first.__seg_;
- // __first.__ctz_ = 0;
- }
- // __first.__ctz_ == 0;
- // do middle words
- __storage_type __m = ~__storage_type(0) << __result.__ctz_;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
- __storage_type __b1 = *__first.__seg_;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 << __result.__ctz_;
- *__first.__seg_ = __b2 >> __result.__ctz_;
- ++__result.__seg_;
- __b2 = *__result.__seg_ & ~__m;
- *__result.__seg_ &= __m;
- *__result.__seg_ |= __b1 >> __clz_r;
- *__first.__seg_ |= __b2 << __clz_r;
- }
- // do last word
- if (__n > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __dn = std::min<__storage_type>(__n, __clz_r);
- __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 << __result.__ctz_;
- *__first.__seg_ |= __b2 >> __result.__ctz_;
- __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
- __n -= __dn;
- if (__n > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 >> __dn;
- *__first.__seg_ |= __b2 << __dn;
- __result.__ctz_ = static_cast<unsigned>(__n);
- }
- }
- }
- return __result;
-}
-
-template <class _Cl, class _Cr>
-inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges(
- __bit_iterator<_Cl, false> __first1, __bit_iterator<_Cl, false> __last1, __bit_iterator<_Cr, false> __first2) {
- if (__first1.__ctz_ == __first2.__ctz_)
- return std::__swap_ranges_aligned(__first1, __last1, __first2);
- return std::__swap_ranges_unaligned(__first1, __last1, __first2);
-}
-
// rotate
template <class _Cp>
@@ -1001,14 +857,14 @@ private:
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_unaligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
- template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
- swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
+ template <class, class _Cl, class _Cr>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+ __swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
diff --git a/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp b/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
new file mode 100644
index 00000000000000..66629799ac5ee0
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <vector>
+
+static void bm_ranges_swap_ranges_vb_aligned(benchmark::State& state) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(n, false);
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::ranges::swap_ranges(vec1, vec2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+static void bm_ranges_swap_ranges_vb_unaligned(benchmark::State& state) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(n + 8, true);
+ auto beg1 = std::ranges::begin(vec1);
+ auto end1 = std::ranges::end(vec1);
+ auto beg2 = std::ranges::begin(vec2) + 4;
+ auto end2 = std::ranges::end(vec2) - 4;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::ranges::swap_ranges(beg1, end1, beg2, end2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+// Test std::ranges::swap_ranges for vector<bool>::iterator
+BENCHMARK(bm_ranges_swap_ranges_vb_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_ranges_swap_ranges_vb_unaligned)->Range(8, 1 << 20);
+
+static void bm_swap_ranges_vb(benchmark::State& state, bool aligned) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(aligned ? n : n + 8, true);
+ auto beg1 = vec1.begin();
+ auto end1 = vec1.end();
+ auto beg2 = aligned ? vec2.begin() : vec2.begin() + 4;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::swap_ranges(beg1, end1, beg2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+static void bm_swap_ranges_vb_aligned(benchmark::State& state) { bm_swap_ranges_vb(state, true); }
+static void bm_swap_ranges_vb_unaligned(benchmark::State& state) { bm_swap_ranges_vb(state, false); }
+
+// Test std::swap_ranges for vector<bool>::iterator
+BENCHMARK(bm_swap_ranges_vb_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_swap_ranges_vb_unaligned)->Range(8, 1 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
index b3a9f5fc259ef7..0394a48a0bb9a2 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
@@ -19,25 +19,23 @@
#include "test_macros.h"
#if TEST_STD_VER > 17
-constexpr bool test_swap_constexpr()
-{
- int i = 1;
- int j = 2;
- std::iter_swap(&i, &j);
- return i == 2 && j == 1;
+constexpr bool test_swap_constexpr() {
+ int i = 1;
+ int j = 2;
+ std::iter_swap(&i, &j);
+ return i == 2 && j == 1;
}
#endif // TEST_STD_VER > 17
-int main(int, char**)
-{
- int i = 1;
- int j = 2;
- std::iter_swap(&i, &j);
- assert(i == 2);
- assert(j == 1);
+int main(int, char**) {
+ int i = 1;
+...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121150
More information about the libcxx-commits
mailing list