[libcxx-commits] [libcxx] [libc++] Optimize ranges::swap_ranges for vector<bool>::iterator (PR #121150)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Dec 26 05:29:56 PST 2024
https://github.com/winner245 created https://github.com/llvm/llvm-project/pull/121150
This PR optimizes the performance of `std::ranges::swap_ranges` for `vector<bool>::iterator`, addressing a subtask outlined in issue #64038. The optimizations yield performance improvements of up to **x** for aligned range swap and **60x** for unaligned range swap comparison.
- Aligned
- Unaligned
>From faed5b39d558aa26a30536c149629b7303fb4478 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Wed, 25 Dec 2024 16:06:35 -0500
Subject: [PATCH] Optimize ranges::swap_ranges for vector<bool>::iterator
---
libcxx/include/__algorithm/swap_ranges.h | 161 +++++++++++
libcxx/include/__bit_reference | 157 +----------
.../algorithms/swap_ranges.bench.cpp | 66 +++++
.../alg.swap/iter_swap.pass.cpp | 26 +-
.../alg.swap/ranges.swap_ranges.pass.cpp | 58 ++--
.../alg.swap/swap_ranges.pass.cpp | 253 ++++++++++--------
6 files changed, 422 insertions(+), 299 deletions(-)
create mode 100644 libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
diff --git a/libcxx/include/__algorithm/swap_ranges.h b/libcxx/include/__algorithm/swap_ranges.h
index 54b453b72360e0..be151523a9aa01 100644
--- a/libcxx/include/__algorithm/swap_ranges.h
+++ b/libcxx/include/__algorithm/swap_ranges.h
@@ -10,7 +10,9 @@
#define _LIBCPP___ALGORITHM_SWAP_RANGES_H
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/min.h>
#include <__config>
+#include <__fwd/bit_reference.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -23,6 +25,165 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
+template <class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_aligned(
+ __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
+ using _I1 = __bit_iterator<_Cl, false>;
+ using difference_type = typename _I1::difference_type;
+ using __storage_type = typename _I1::__storage_type;
+
+ const int __bits_per_word = _I1::__bits_per_word;
+ difference_type __n = __last - __first;
+ if (__n > 0) {
+ // do first word
+ if (__first.__ctz_ != 0) {
+ unsigned __clz = __bits_per_word - __first.__ctz_;
+ difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
+ __n -= __dn;
+ __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1;
+ *__first.__seg_ |= __b2;
+ __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
+ ++__first.__seg_;
+ // __first.__ctz_ = 0;
+ }
+ // __first.__ctz_ == 0;
+ // do middle words
+ for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
+ swap(*__first.__seg_, *__result.__seg_);
+ // do last word
+ if (__n > 0) {
+ __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1;
+ *__first.__seg_ |= __b2;
+ __result.__ctz_ = static_cast<unsigned>(__n);
+ }
+ }
+ return __result;
+}
+
+template <class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_unaligned(
+ __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
+ using _I1 = __bit_iterator<_Cl, false>;
+ using difference_type = typename _I1::difference_type;
+ using __storage_type = typename _I1::__storage_type;
+
+ const int __bits_per_word = _I1::__bits_per_word;
+ difference_type __n = __last - __first;
+ if (__n > 0) {
+ // do first word
+ if (__first.__ctz_ != 0) {
+ unsigned __clz_f = __bits_per_word - __first.__ctz_;
+ difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
+ __n -= __dn;
+ __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ unsigned __clz_r = __bits_per_word - __result.__ctz_;
+ __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
+ __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ if (__result.__ctz_ > __first.__ctz_) {
+ unsigned __s = __result.__ctz_ - __first.__ctz_;
+ *__result.__seg_ |= __b1 << __s;
+ *__first.__seg_ |= __b2 >> __s;
+ } else {
+ unsigned __s = __first.__ctz_ - __result.__ctz_;
+ *__result.__seg_ |= __b1 >> __s;
+ *__first.__seg_ |= __b2 << __s;
+ }
+ __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
+ __dn -= __ddn;
+ if (__dn > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __dn);
+ __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ unsigned __s = __first.__ctz_ + __ddn;
+ *__result.__seg_ |= __b1 >> __s;
+ *__first.__seg_ |= __b2 << __s;
+ __result.__ctz_ = static_cast<unsigned>(__dn);
+ }
+ ++__first.__seg_;
+ // __first.__ctz_ = 0;
+ }
+ // __first.__ctz_ == 0;
+ // do middle words
+ __storage_type __m = ~__storage_type(0) << __result.__ctz_;
+ unsigned __clz_r = __bits_per_word - __result.__ctz_;
+ for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
+ __storage_type __b1 = *__first.__seg_;
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 << __result.__ctz_;
+ *__first.__seg_ = __b2 >> __result.__ctz_;
+ ++__result.__seg_;
+ __b2 = *__result.__seg_ & ~__m;
+ *__result.__seg_ &= __m;
+ *__result.__seg_ |= __b1 >> __clz_r;
+ *__first.__seg_ |= __b2 << __clz_r;
+ }
+ // do last word
+ if (__n > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __storage_type __b1 = *__first.__seg_ & __m;
+ *__first.__seg_ &= ~__m;
+ __storage_type __dn = std::min<__storage_type>(__n, __clz_r);
+ __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
+ __storage_type __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 << __result.__ctz_;
+ *__first.__seg_ |= __b2 >> __result.__ctz_;
+ __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
+ __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
+ __n -= __dn;
+ if (__n > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __b2 = *__result.__seg_ & __m;
+ *__result.__seg_ &= ~__m;
+ *__result.__seg_ |= __b1 >> __dn;
+ *__first.__seg_ |= __b2 << __dn;
+ __result.__ctz_ = static_cast<unsigned>(__n);
+ }
+ }
+ }
+ return __result;
+}
+
+// 2+1 iterators: size2 >= size1; used by std::swap_ranges.
+template <class, class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+__swap_ranges(__bit_iterator<_Cl, false> __first1,
+ __bit_iterator<_Cl, false> __last1,
+ __bit_iterator<_Cr, false> __first2) {
+ if (__first1.__ctz_ == __first2.__ctz_)
+ return std::make_pair(__last1, std::__swap_ranges_aligned(__first1, __last1, __first2));
+ return std::make_pair(__last1, std::__swap_ranges_unaligned(__first1, __last1, __first2));
+}
+
+// 2+2 iterators: used by std::ranges::swap_ranges.
+template <class _AlgPolicy, class _Cl, class _Cr>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+__swap_ranges(__bit_iterator<_Cl, false> __first1,
+ __bit_iterator<_Cl, false> __last1,
+ __bit_iterator<_Cr, false> __first2,
+ __bit_iterator<_Cr, false> __last2) {
+ if (__last1 - __first1 < __last2 - __first2)
+ return std::make_pair(__last1, std::__swap_ranges<_AlgPolicy>(__first1, __last1, __first2).second);
+ return std::make_pair(std::__swap_ranges<_AlgPolicy>(__first2, __last2, __first1).second, __last2);
+}
+
// 2+2 iterators: the shorter size will be used.
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _Sentinel2>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator1, _ForwardIterator2>
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 9fa24c98d493fd..0b8c31f643fadd 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -12,6 +12,7 @@
#include <__algorithm/copy_n.h>
#include <__algorithm/min.h>
+#include <__algorithm/swap_ranges.h>
#include <__bit/countr.h>
#include <__compare/ordering.h>
#include <__config>
@@ -437,152 +438,6 @@ inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> move_backward(
return std::copy_backward(__first, __last, __result);
}
-// swap_ranges
-
-template <class _Cl, class _Cr>
-_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_aligned(
- __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
- using _I1 = __bit_iterator<_Cl, false>;
- using difference_type = typename _I1::difference_type;
- using __storage_type = typename _I1::__storage_type;
-
- const int __bits_per_word = _I1::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__first.__ctz_ != 0) {
- unsigned __clz = __bits_per_word - __first.__ctz_;
- difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
- __n -= __dn;
- __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1;
- *__first.__seg_ |= __b2;
- __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
- ++__first.__seg_;
- // __first.__ctz_ = 0;
- }
- // __first.__ctz_ == 0;
- // do middle words
- for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
- swap(*__first.__seg_, *__result.__seg_);
- // do last word
- if (__n > 0) {
- __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1;
- *__first.__seg_ |= __b2;
- __result.__ctz_ = static_cast<unsigned>(__n);
- }
- }
- return __result;
-}
-
-template <class _Cl, class _Cr>
-_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_unaligned(
- __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
- using _I1 = __bit_iterator<_Cl, false>;
- using difference_type = typename _I1::difference_type;
- using __storage_type = typename _I1::__storage_type;
-
- const int __bits_per_word = _I1::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__first.__ctz_ != 0) {
- unsigned __clz_f = __bits_per_word - __first.__ctz_;
- difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
- __n -= __dn;
- __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
- __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- if (__result.__ctz_ > __first.__ctz_) {
- unsigned __s = __result.__ctz_ - __first.__ctz_;
- *__result.__seg_ |= __b1 << __s;
- *__first.__seg_ |= __b2 >> __s;
- } else {
- unsigned __s = __first.__ctz_ - __result.__ctz_;
- *__result.__seg_ |= __b1 >> __s;
- *__first.__seg_ |= __b2 << __s;
- }
- __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
- __dn -= __ddn;
- if (__dn > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __dn);
- __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- unsigned __s = __first.__ctz_ + __ddn;
- *__result.__seg_ |= __b1 >> __s;
- *__first.__seg_ |= __b2 << __s;
- __result.__ctz_ = static_cast<unsigned>(__dn);
- }
- ++__first.__seg_;
- // __first.__ctz_ = 0;
- }
- // __first.__ctz_ == 0;
- // do middle words
- __storage_type __m = ~__storage_type(0) << __result.__ctz_;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
- __storage_type __b1 = *__first.__seg_;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 << __result.__ctz_;
- *__first.__seg_ = __b2 >> __result.__ctz_;
- ++__result.__seg_;
- __b2 = *__result.__seg_ & ~__m;
- *__result.__seg_ &= __m;
- *__result.__seg_ |= __b1 >> __clz_r;
- *__first.__seg_ |= __b2 << __clz_r;
- }
- // do last word
- if (__n > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __dn = std::min<__storage_type>(__n, __clz_r);
- __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 << __result.__ctz_;
- *__first.__seg_ |= __b2 >> __result.__ctz_;
- __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
- __n -= __dn;
- if (__n > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 >> __dn;
- *__first.__seg_ |= __b2 << __dn;
- __result.__ctz_ = static_cast<unsigned>(__n);
- }
- }
- }
- return __result;
-}
-
-template <class _Cl, class _Cr>
-inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges(
- __bit_iterator<_Cl, false> __first1, __bit_iterator<_Cl, false> __last1, __bit_iterator<_Cr, false> __first2) {
- if (__first1.__ctz_ == __first2.__ctz_)
- return std::__swap_ranges_aligned(__first1, __last1, __first2);
- return std::__swap_ranges_unaligned(__first1, __last1, __first2);
-}
-
// rotate
template <class _Cp>
@@ -987,14 +842,14 @@ private:
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_unaligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
- template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
- swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
+ template <class, class _Cl, class _Cr>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+ __swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
diff --git a/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp b/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
new file mode 100644
index 00000000000000..a8fcdf8b9c4883
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <vector>
+
+static void bm_ranges_swap_ranges_aligned(benchmark::State& state) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(n, false);
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::ranges::swap_ranges(vec1, vec2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+static void bm_ranges_swap_ranges_unaligned(benchmark::State& state) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(n + 8, true);
+ auto beg1 = std::ranges::begin(vec1);
+ auto end1 = std::ranges::end(vec1);
+ auto beg2 = std::ranges::begin(vec2) + 4;
+ auto end2 = std::ranges::end(vec2) - 4;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::ranges::swap_ranges(beg1, end1, beg2, end2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+// Test std::ranges::swap_ranges for vector<bool>::iterator
+BENCHMARK(bm_ranges_swap_ranges_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_ranges_swap_ranges_unaligned)->Range(8, 1 << 20);
+
+static void bm_swap_ranges(benchmark::State& state, bool aligned) {
+ auto n = state.range();
+ std::vector<bool> vec1(n, true);
+ std::vector<bool> vec2(aligned ? n : n + 8, true);
+ auto beg1 = vec1.begin();
+ auto end1 = vec1.end();
+ auto beg2 = aligned ? vec2.begin() : vec2.begin() + 4;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(std::swap_ranges(beg1, end1, beg2));
+ benchmark::DoNotOptimize(&vec1);
+ benchmark::DoNotOptimize(&vec2);
+ }
+}
+
+static void bm_swap_ranges_aligned(benchmark::State& state) { bm_swap_ranges(state, true); }
+static void bm_swap_ranges_unaligned(benchmark::State& state) { bm_swap_ranges(state, false); }
+
+// Test std::swap_ranges for vector<bool>::iterator
+BENCHMARK(bm_swap_ranges_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_swap_ranges_unaligned)->Range(8, 1 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
index b3a9f5fc259ef7..0394a48a0bb9a2 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
@@ -19,25 +19,23 @@
#include "test_macros.h"
#if TEST_STD_VER > 17
-constexpr bool test_swap_constexpr()
-{
- int i = 1;
- int j = 2;
- std::iter_swap(&i, &j);
- return i == 2 && j == 1;
+constexpr bool test_swap_constexpr() {
+ int i = 1;
+ int j = 2;
+ std::iter_swap(&i, &j);
+ return i == 2 && j == 1;
}
#endif // TEST_STD_VER > 17
-int main(int, char**)
-{
- int i = 1;
- int j = 2;
- std::iter_swap(&i, &j);
- assert(i == 2);
- assert(j == 1);
+int main(int, char**) {
+ int i = 1;
+ int j = 2;
+ std::iter_swap(&i, &j);
+ assert(i == 2);
+ assert(j == 1);
#if TEST_STD_VER > 17
- static_assert(test_swap_constexpr());
+ static_assert(test_swap_constexpr());
#endif // TEST_STD_VER > 17
return 0;
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp
index a8d69b2832b462..a7b62dc4b886d3 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp
@@ -23,13 +23,14 @@
#include <array>
#include <cassert>
#include <ranges>
+#include <vector>
#include "test_iterators.h"
constexpr void test_different_lengths() {
- using Expected = std::ranges::swap_ranges_result<int*, int*>;
- int i[3] = {1, 2, 3};
- int j[1] = {4};
+ using Expected = std::ranges::swap_ranges_result<int*, int*>;
+ int i[3] = {1, 2, 3};
+ int j[1] = {4};
std::same_as<Expected> auto r = std::ranges::swap_ranges(i, i + 3, j, j + 1);
assert(r.in1 == i + 1);
assert(r.in2 == j + 1);
@@ -64,8 +65,8 @@ constexpr void test_range() {
std::array r1 = {1, 2, 3};
std::array r2 = {4, 5, 6};
-
- std::same_as<std::ranges::in_in_result<std::array<int, 3>::iterator, std::array<int, 3>::iterator>> auto r = std::ranges::swap_ranges(r1, r2);
+ std::same_as<std::ranges::in_in_result<std::array<int, 3>::iterator, std::array<int, 3>::iterator>> auto r =
+ std::ranges::swap_ranges(r1, r2);
assert(r.in1 == r1.end());
assert(r.in2 == r2.end());
@@ -110,13 +111,12 @@ constexpr void test_borrowed_input_range() {
}
constexpr void test_sentinel() {
- int i[3] = {1, 2, 3};
- int j[3] = {4, 5, 6};
- using It = cpp17_input_iterator<int*>;
- using Sent = sentinel_wrapper<It>;
- using Expected = std::ranges::swap_ranges_result<It, It>;
- std::same_as<Expected> auto r =
- std::ranges::swap_ranges(It(i), Sent(It(i + 3)), It(j), Sent(It(j + 3)));
+ int i[3] = {1, 2, 3};
+ int j[3] = {4, 5, 6};
+ using It = cpp17_input_iterator<int*>;
+ using Sent = sentinel_wrapper<It>;
+ using Expected = std::ranges::swap_ranges_result<It, It>;
+ std::same_as<Expected> auto r = std::ranges::swap_ranges(It(i), Sent(It(i + 3)), It(j), Sent(It(j + 3)));
assert(base(r.in1) == i + 3);
assert(base(r.in2) == j + 3);
assert(i[0] == 4);
@@ -130,8 +130,8 @@ constexpr void test_sentinel() {
template <class Iter1, class Iter2>
constexpr void test_iterators() {
using Expected = std::ranges::swap_ranges_result<Iter1, Iter2>;
- int i[3] = {1, 2, 3};
- int j[3] = {4, 5, 6};
+ int i[3] = {1, 2, 3};
+ int j[3] = {4, 5, 6};
std::same_as<Expected> auto r =
std::ranges::swap_ranges(Iter1(i), sentinel_wrapper(Iter1(i + 3)), Iter2(j), sentinel_wrapper(Iter2(j + 3)));
assert(base(r.in1) == i + 3);
@@ -146,7 +146,7 @@ constexpr void test_iterators() {
constexpr void test_rval_range() {
{
- using Expected = std::ranges::swap_ranges_result<std::array<int, 3>::iterator, std::ranges::dangling>;
+ using Expected = std::ranges::swap_ranges_result<std::array<int, 3>::iterator, std::ranges::dangling>;
std::array<int, 3> r = {1, 2, 3};
std::same_as<Expected> auto a = std::ranges::swap_ranges(r, std::array{4, 5, 6});
assert((r == std::array{4, 5, 6}));
@@ -154,7 +154,7 @@ constexpr void test_rval_range() {
}
{
std::array<int, 3> r = {1, 2, 3};
- using Expected = std::ranges::swap_ranges_result<std::ranges::dangling, std::array<int, 3>::iterator>;
+ using Expected = std::ranges::swap_ranges_result<std::ranges::dangling, std::array<int, 3>::iterator>;
std::same_as<Expected> auto b = std::ranges::swap_ranges(std::array{4, 5, 6}, r);
assert((r == std::array{4, 5, 6}));
assert(b.in2 == r.begin() + 3);
@@ -170,6 +170,24 @@ constexpr void test_proxy_in_iterators() {
test_iterators<ProxyIterator<contiguous_iterator<int*>>, Out>();
}
+template <std::size_t N>
+struct TestBitIter {
+ TEST_CONSTEXPR_CXX20 void operator()() {
+ { // Test swap_ranges() with aligned bytes
+ std::vector<bool> f(N, false), t(N, true);
+ std::ranges::swap_ranges(f, t);
+ assert(std::all_of(f.begin(), f.end(), [](bool b) { return b; }));
+ assert(std::all_of(t.begin(), t.end(), [](bool b) { return !b; }));
+ }
+ { // Test swap_ranges() with unaligned bytes
+ std::vector<bool> f(N, false), t(N + 8, true);
+ std::ranges::swap_ranges(f.begin(), f.end(), t.begin() + 4, t.end() - 4);
+ assert(std::all_of(f.begin(), f.end(), [](bool b) { return b; }));
+ assert(std::all_of(t.begin() + 4, t.end() - 4, [](bool b) { return !b; }));
+ }
+ }
+};
+
constexpr bool test() {
test_range();
@@ -214,6 +232,14 @@ constexpr bool test() {
test_borrowed_input_range();
test_rval_range();
+ { // Test vector<bool>::iterator optimization
+ TestBitIter<8>()();
+ TestBitIter<16>()();
+ TestBitIter<32>()();
+ TestBitIter<64>()();
+ TestBitIter<1024>()();
+ }
+
return true;
}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp
index 8bfbcd755e39f7..74280053449b1b 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp
@@ -17,159 +17,176 @@
#include <cassert>
#include <memory>
#include <utility>
+#include <vector>
#include "test_macros.h"
#include "test_iterators.h"
-template<class Iter1, class Iter2>
-void
-test()
-{
- int i[3] = {1, 2, 3};
- int j[3] = {4, 5, 6};
- Iter2 r = std::swap_ranges(Iter1(i), Iter1(i+3), Iter2(j));
- assert(base(r) == j+3);
- assert(i[0] == 4);
- assert(i[1] == 5);
- assert(i[2] == 6);
- assert(j[0] == 1);
- assert(j[1] == 2);
- assert(j[2] == 3);
+template <class Iter1, class Iter2>
+void test() {
+ int i[3] = {1, 2, 3};
+ int j[3] = {4, 5, 6};
+ Iter2 r = std::swap_ranges(Iter1(i), Iter1(i + 3), Iter2(j));
+ assert(base(r) == j + 3);
+ assert(i[0] == 4);
+ assert(i[1] == 5);
+ assert(i[2] == 6);
+ assert(j[0] == 1);
+ assert(j[1] == 2);
+ assert(j[2] == 3);
}
#if TEST_STD_VER >= 11
-template<class Iter1, class Iter2>
-void
-test1()
-{
- std::unique_ptr<int> i[3];
- for (int k = 0; k < 3; ++k)
- i[k].reset(new int(k+1));
- std::unique_ptr<int> j[3];
- for (int k = 0; k < 3; ++k)
- j[k].reset(new int(k+4));
- Iter2 r = std::swap_ranges(Iter1(i), Iter1(i+3), Iter2(j));
- assert(base(r) == j+3);
- assert(*i[0] == 4);
- assert(*i[1] == 5);
- assert(*i[2] == 6);
- assert(*j[0] == 1);
- assert(*j[1] == 2);
- assert(*j[2] == 3);
+template <class Iter1, class Iter2>
+void test1() {
+ std::unique_ptr<int> i[3];
+ for (int k = 0; k < 3; ++k)
+ i[k].reset(new int(k + 1));
+ std::unique_ptr<int> j[3];
+ for (int k = 0; k < 3; ++k)
+ j[k].reset(new int(k + 4));
+ Iter2 r = std::swap_ranges(Iter1(i), Iter1(i + 3), Iter2(j));
+ assert(base(r) == j + 3);
+ assert(*i[0] == 4);
+ assert(*i[1] == 5);
+ assert(*i[2] == 6);
+ assert(*j[0] == 1);
+ assert(*j[1] == 2);
+ assert(*j[2] == 3);
}
#endif // TEST_STD_VER >= 11
-void test2()
-{
- {
+void test2() {
+ {
int src[2][2] = {{0, 1}, {2, 3}};
decltype(src) dest = {{9, 8}, {7, 6}};
std::swap(src, dest);
- assert ( src[0][0] == 9 );
- assert ( src[0][1] == 8 );
- assert ( src[1][0] == 7 );
- assert ( src[1][1] == 6 );
+ assert(src[0][0] == 9);
+ assert(src[0][1] == 8);
+ assert(src[1][0] == 7);
+ assert(src[1][1] == 6);
- assert ( dest[0][0] == 0 );
- assert ( dest[0][1] == 1 );
- assert ( dest[1][0] == 2 );
- assert ( dest[1][1] == 3 );
- }
+ assert(dest[0][0] == 0);
+ assert(dest[0][1] == 1);
+ assert(dest[1][0] == 2);
+ assert(dest[1][1] == 3);
+ }
- {
+ {
int src[3][3] = {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}};
decltype(src) dest = {{9, 8, 7}, {6, 5, 4}, {3, 2, 1}};
std::swap(src, dest);
- assert ( src[0][0] == 9 );
- assert ( src[0][1] == 8 );
- assert ( src[0][2] == 7 );
- assert ( src[1][0] == 6 );
- assert ( src[1][1] == 5 );
- assert ( src[1][2] == 4 );
- assert ( src[2][0] == 3 );
- assert ( src[2][1] == 2 );
- assert ( src[2][2] == 1 );
-
- assert ( dest[0][0] == 0 );
- assert ( dest[0][1] == 1 );
- assert ( dest[0][2] == 2 );
- assert ( dest[1][0] == 3 );
- assert ( dest[1][1] == 4 );
- assert ( dest[1][2] == 5 );
- assert ( dest[2][0] == 6 );
- assert ( dest[2][1] == 7 );
- assert ( dest[2][2] == 8 );
- }
+ assert(src[0][0] == 9);
+ assert(src[0][1] == 8);
+ assert(src[0][2] == 7);
+ assert(src[1][0] == 6);
+ assert(src[1][1] == 5);
+ assert(src[1][2] == 4);
+ assert(src[2][0] == 3);
+ assert(src[2][1] == 2);
+ assert(src[2][2] == 1);
+
+ assert(dest[0][0] == 0);
+ assert(dest[0][1] == 1);
+ assert(dest[0][2] == 2);
+ assert(dest[1][0] == 3);
+ assert(dest[1][1] == 4);
+ assert(dest[1][2] == 5);
+ assert(dest[2][0] == 6);
+ assert(dest[2][1] == 7);
+ assert(dest[2][2] == 8);
+ }
}
#if TEST_STD_VER > 17
-constexpr bool test_swap_constexpr()
-{
- int i[3] = {1, 2, 3};
- int j[3] = {4, 5, 6};
- std::swap_ranges(i, i+3, j);
- return i[0] == 4 &&
- i[1] == 5 &&
- i[2] == 6 &&
- j[0] == 1 &&
- j[1] == 2 &&
- j[2] == 3;
+constexpr bool test_swap_constexpr() {
+ int i[3] = {1, 2, 3};
+ int j[3] = {4, 5, 6};
+ std::swap_ranges(i, i + 3, j);
+ return i[0] == 4 && i[1] == 5 && i[2] == 6 && j[0] == 1 && j[1] == 2 && j[2] == 3;
}
#endif // TEST_STD_VER > 17
-int main(int, char**)
-{
- test<forward_iterator<int*>, forward_iterator<int*> >();
- test<forward_iterator<int*>, bidirectional_iterator<int*> >();
- test<forward_iterator<int*>, random_access_iterator<int*> >();
- test<forward_iterator<int*>, int*>();
-
- test<bidirectional_iterator<int*>, forward_iterator<int*> >();
- test<bidirectional_iterator<int*>, bidirectional_iterator<int*> >();
- test<bidirectional_iterator<int*>, random_access_iterator<int*> >();
- test<bidirectional_iterator<int*>, int*>();
-
- test<random_access_iterator<int*>, forward_iterator<int*> >();
- test<random_access_iterator<int*>, bidirectional_iterator<int*> >();
- test<random_access_iterator<int*>, random_access_iterator<int*> >();
- test<random_access_iterator<int*>, int*>();
-
- test<int*, forward_iterator<int*> >();
- test<int*, bidirectional_iterator<int*> >();
- test<int*, random_access_iterator<int*> >();
- test<int*, int*>();
+template <std::size_t N>
+struct TestBitIter {
+ std::vector<bool> f, t;
+ TEST_CONSTEXPR_CXX20 TestBitIter() : f(N, false), t(N, true) {}
+ TEST_CONSTEXPR_CXX20 void operator()() {
+ { // Test swap_ranges() with aligned bytes
+ std::vector<bool> f1 = f, t1 = t;
+ std::swap_ranges(f1.begin(), f1.end(), t1.begin());
+ assert(std::equal(f1.begin(), f1.end(), t.begin()));
+ assert(std::equal(t1.begin(), t1.end(), f.begin()));
+ }
+ { // Test swap_ranges() with unaligned bytes
+ std::vector<bool> f1(N, false), t1(N + 8, true);
+ std::swap_ranges(f1.begin(), f1.end(), t1.begin() + 4);
+ assert(std::equal(f1.begin(), f1.end(), t.begin()));
+ assert(std::equal(t1.begin() + 4, t1.end() - 4, f.begin()));
+ }
+ }
+};
+
+int main(int, char**) {
+ test<forward_iterator<int*>, forward_iterator<int*> >();
+ test<forward_iterator<int*>, bidirectional_iterator<int*> >();
+ test<forward_iterator<int*>, random_access_iterator<int*> >();
+ test<forward_iterator<int*>, int*>();
+
+ test<bidirectional_iterator<int*>, forward_iterator<int*> >();
+ test<bidirectional_iterator<int*>, bidirectional_iterator<int*> >();
+ test<bidirectional_iterator<int*>, random_access_iterator<int*> >();
+ test<bidirectional_iterator<int*>, int*>();
+
+ test<random_access_iterator<int*>, forward_iterator<int*> >();
+ test<random_access_iterator<int*>, bidirectional_iterator<int*> >();
+ test<random_access_iterator<int*>, random_access_iterator<int*> >();
+ test<random_access_iterator<int*>, int*>();
+
+ test<int*, forward_iterator<int*> >();
+ test<int*, bidirectional_iterator<int*> >();
+ test<int*, random_access_iterator<int*> >();
+ test<int*, int*>();
#if TEST_STD_VER >= 11
- test1<forward_iterator<std::unique_ptr<int>*>, forward_iterator<std::unique_ptr<int>*> >();
- test1<forward_iterator<std::unique_ptr<int>*>, bidirectional_iterator<std::unique_ptr<int>*> >();
- test1<forward_iterator<std::unique_ptr<int>*>, random_access_iterator<std::unique_ptr<int>*> >();
- test1<forward_iterator<std::unique_ptr<int>*>, std::unique_ptr<int>*>();
-
- test1<bidirectional_iterator<std::unique_ptr<int>*>, forward_iterator<std::unique_ptr<int>*> >();
- test1<bidirectional_iterator<std::unique_ptr<int>*>, bidirectional_iterator<std::unique_ptr<int>*> >();
- test1<bidirectional_iterator<std::unique_ptr<int>*>, random_access_iterator<std::unique_ptr<int>*> >();
- test1<bidirectional_iterator<std::unique_ptr<int>*>, std::unique_ptr<int>*>();
-
- test1<random_access_iterator<std::unique_ptr<int>*>, forward_iterator<std::unique_ptr<int>*> >();
- test1<random_access_iterator<std::unique_ptr<int>*>, bidirectional_iterator<std::unique_ptr<int>*> >();
- test1<random_access_iterator<std::unique_ptr<int>*>, random_access_iterator<std::unique_ptr<int>*> >();
- test1<random_access_iterator<std::unique_ptr<int>*>, std::unique_ptr<int>*>();
-
- test1<std::unique_ptr<int>*, forward_iterator<std::unique_ptr<int>*> >();
- test1<std::unique_ptr<int>*, bidirectional_iterator<std::unique_ptr<int>*> >();
- test1<std::unique_ptr<int>*, random_access_iterator<std::unique_ptr<int>*> >();
- test1<std::unique_ptr<int>*, std::unique_ptr<int>*>();
+ test1<forward_iterator<std::unique_ptr<int>*>, forward_iterator<std::unique_ptr<int>*> >();
+ test1<forward_iterator<std::unique_ptr<int>*>, bidirectional_iterator<std::unique_ptr<int>*> >();
+ test1<forward_iterator<std::unique_ptr<int>*>, random_access_iterator<std::unique_ptr<int>*> >();
+ test1<forward_iterator<std::unique_ptr<int>*>, std::unique_ptr<int>*>();
+
+ test1<bidirectional_iterator<std::unique_ptr<int>*>, forward_iterator<std::unique_ptr<int>*> >();
+ test1<bidirectional_iterator<std::unique_ptr<int>*>, bidirectional_iterator<std::unique_ptr<int>*> >();
+ test1<bidirectional_iterator<std::unique_ptr<int>*>, random_access_iterator<std::unique_ptr<int>*> >();
+ test1<bidirectional_iterator<std::unique_ptr<int>*>, std::unique_ptr<int>*>();
+
+ test1<random_access_iterator<std::unique_ptr<int>*>, forward_iterator<std::unique_ptr<int>*> >();
+ test1<random_access_iterator<std::unique_ptr<int>*>, bidirectional_iterator<std::unique_ptr<int>*> >();
+ test1<random_access_iterator<std::unique_ptr<int>*>, random_access_iterator<std::unique_ptr<int>*> >();
+ test1<random_access_iterator<std::unique_ptr<int>*>, std::unique_ptr<int>*>();
+
+ test1<std::unique_ptr<int>*, forward_iterator<std::unique_ptr<int>*> >();
+ test1<std::unique_ptr<int>*, bidirectional_iterator<std::unique_ptr<int>*> >();
+ test1<std::unique_ptr<int>*, random_access_iterator<std::unique_ptr<int>*> >();
+ test1<std::unique_ptr<int>*, std::unique_ptr<int>*>();
#endif // TEST_STD_VER >= 11
#if TEST_STD_VER > 17
- static_assert(test_swap_constexpr());
+ static_assert(test_swap_constexpr());
#endif // TEST_STD_VER > 17
- test2();
+ test2();
+
+ { // Test vector<bool>::iterator optimization
+ TestBitIter<8>()();
+ TestBitIter<16>()();
+ TestBitIter<32>()();
+ TestBitIter<64>()();
+ TestBitIter<1024>()();
+ }
return 0;
}
More information about the libcxx-commits
mailing list