[libcxx-commits] [libcxx] [libc++] Optimize ranges::equal for vector<bool>::iterator (PR #121084)

via libcxx-commits libcxx-commits at lists.llvm.org
Wed Jan 22 05:25:16 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libcxx

Author: Peng Liu (winner245)

<details>
<summary>Changes</summary>

This PR optimizes the performance of `std::ranges::equal` for `vector<bool>::iterator`, addressing a subtask outlined in issue #<!-- -->64038. The optimizations yield performance improvements of up to **190x** for aligned equality comparison and **80x** for unaligned equality comparison. 

- Aligned equality comparison

```
-----------------------------------------------------------------------------
Benchmark                                  Before         After   Improvement
-----------------------------------------------------------------------------
bm_ranges_equal_vb_aligned/8               13.6 ns      0.889 ns          15x
bm_ranges_equal_vb_aligned/64              94.7 ns       1.09 ns          87x
bm_ranges_equal_vb_aligned/512              694 ns       4.15 ns         167x
bm_ranges_equal_vb_aligned/4096            5529 ns       37.4 ns         148x
bm_ranges_equal_vb_aligned/32768          44256 ns        255 ns         173x
bm_ranges_equal_vb_aligned/180224        312311 ns       1695 ns         184x
bm_ranges_equal_vb_aligned/184320        320931 ns       1743 ns         184x
bm_ranges_equal_vb_aligned/188416        325096 ns       1780 ns         183x
bm_ranges_equal_vb_aligned/192512        328834 ns       1806 ns         182x
bm_ranges_equal_vb_aligned/196608        337802 ns       1816 ns         186x
bm_ranges_equal_vb_aligned/200704        360741 ns       1866 ns         193x
bm_ranges_equal_vb_aligned/204800        353518 ns       1915 ns         184x
bm_ranges_equal_vb_aligned/262144        362013 ns       2055 ns         176x
bm_ranges_equal_vb_aligned/1048576      1422572 ns       8406 ns         169x
```

- Unaligned equality comparison

```
-----------------------------------------------------------------------------
Benchmark                                  Before         After   Improvement
-----------------------------------------------------------------------------
bm_ranges_equal_vb_unaligned/8             13.2 ns       5.82 ns         2.3x
bm_ranges_equal_vb_unaligned/64            95.5 ns       5.76 ns          17x
bm_ranges_equal_vb_unaligned/512            717 ns       14.1 ns          51x
bm_ranges_equal_vb_unaligned/4096          5605 ns       80.4 ns          70x
bm_ranges_equal_vb_unaligned/32768        44925 ns        583 ns          77x
bm_ranges_equal_vb_unaligned/262144      360244 ns       4454 ns          81x
bm_ranges_equal_vb_unaligned/1048576    1449077 ns      17869 ns          81x
```

---

Patch is 36.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121084.diff


6 Files Affected:

- (modified) libcxx/include/__algorithm/equal.h (+159) 
- (modified) libcxx/include/__bit_reference (+11-124) 
- (modified) libcxx/include/bitset (+1) 
- (modified) libcxx/test/benchmarks/algorithms/equal.bench.cpp (+51) 
- (modified) libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp (+33) 
- (modified) libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp (+156-93) 


``````````diff
diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h
index a276bb9954c9bb..0f8f1147b193c3 100644
--- a/libcxx/include/__algorithm/equal.h
+++ b/libcxx/include/__algorithm/equal.h
@@ -11,19 +11,27 @@
 #define _LIBCPP___ALGORITHM_EQUAL_H
 
 #include <__algorithm/comp.h>
+#include <__algorithm/min.h>
 #include <__algorithm/unwrap_iter.h>
 #include <__config>
 #include <__functional/identity.h>
+#include <__fwd/bit_reference.h>
 #include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
+#include <__memory/pointer_traits.h>
 #include <__string/constexpr_c_functions.h>
 #include <__type_traits/desugars_to.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
 #include <__type_traits/is_equality_comparable.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/is_volatile.h>
 #include <__utility/move.h>
 
+#if _LIBCPP_STD_VER >= 20
+#  include <__functional/ranges_operations.h>
+#endif
+
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
@@ -33,6 +41,132 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _Cp, bool _IC1, bool _IC2>
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned(
+    __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
+  using _It             = __bit_iterator<_Cp, _IC1>;
+  using difference_type = typename _It::difference_type;
+  using __storage_type  = typename _It::__storage_type;
+
+  const int __bits_per_word = _It::__bits_per_word;
+  difference_type __n       = __last1 - __first1;
+  if (__n > 0) {
+    // do first word
+    if (__first1.__ctz_ != 0) {
+      unsigned __clz_f     = __bits_per_word - __first1.__ctz_;
+      difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
+      __n -= __dn;
+      __storage_type __m   = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
+      __storage_type __b   = *__first1.__seg_ & __m;
+      unsigned __clz_r     = __bits_per_word - __first2.__ctz_;
+      __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
+      __m                  = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
+      if (__first2.__ctz_ > __first1.__ctz_) {
+        if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
+          return false;
+      } else {
+        if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
+          return false;
+      }
+      __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
+      __first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
+      __dn -= __ddn;
+      if (__dn > 0) {
+        __m = ~__storage_type(0) >> (__bits_per_word - __dn);
+        if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
+          return false;
+        __first2.__ctz_ = static_cast<unsigned>(__dn);
+      }
+      ++__first1.__seg_;
+      // __first1.__ctz_ = 0;
+    }
+    // __first1.__ctz_ == 0;
+    // do middle words
+    unsigned __clz_r   = __bits_per_word - __first2.__ctz_;
+    __storage_type __m = ~__storage_type(0) << __first2.__ctz_;
+    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
+      __storage_type __b = *__first1.__seg_;
+      if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
+        return false;
+      ++__first2.__seg_;
+      if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
+        return false;
+    }
+    // do last word
+    if (__n > 0) {
+      __m                 = ~__storage_type(0) >> (__bits_per_word - __n);
+      __storage_type __b  = *__first1.__seg_ & __m;
+      __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
+      __m                 = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
+      if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
+        return false;
+      __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
+      __first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
+      __n -= __dn;
+      if (__n > 0) {
+        __m = ~__storage_type(0) >> (__bits_per_word - __n);
+        if ((*__first2.__seg_ & __m) != (__b >> __dn))
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
+template <class _Cp, bool _IC1, bool _IC2>
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned(
+    __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
+  using _It             = __bit_iterator<_Cp, _IC1>;
+  using difference_type = typename _It::difference_type;
+  using __storage_type  = typename _It::__storage_type;
+
+  const int __bits_per_word = _It::__bits_per_word;
+  difference_type __n       = __last1 - __first1;
+  if (__n > 0) {
+    // do first word
+    if (__first1.__ctz_ != 0) {
+      unsigned __clz       = __bits_per_word - __first1.__ctz_;
+      difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
+      __n -= __dn;
+      __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+      if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
+        return false;
+      ++__first2.__seg_;
+      ++__first1.__seg_;
+      // __first1.__ctz_ = 0;
+      // __first2.__ctz_ = 0;
+    }
+    // __first1.__ctz_ == 0;
+    // __first2.__ctz_ == 0;
+    // do middle words
+    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
+      if (*__first2.__seg_ != *__first1.__seg_)
+        return false;
+    // do last word
+    if (__n > 0) {
+      __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
+      if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
+        return false;
+    }
+  }
+  return true;
+}
+
+template <class _Cp,
+          bool _IC1,
+          bool _IC2,
+          class _BinaryPredicate,
+          __enable_if_t<std::is_same<_BinaryPredicate, __equal_to>::value, int> = 0>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
+    __bit_iterator<_Cp, _IC1> __first1,
+    __bit_iterator<_Cp, _IC1> __last1,
+    __bit_iterator<_Cp, _IC2> __first2,
+    _BinaryPredicate) {
+  if (__first1.__ctz_ == __first2.__ctz_)
+    return std::__equal_aligned(__first1, __last1, __first2);
+  return std::__equal_unaligned(__first1, __last1, __first2);
+}
+
 template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
     _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) {
@@ -94,6 +228,31 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&,
   return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
 }
 
+template <class _Cp,
+          bool _IC1,
+          bool _IC2,
+          class _Pred,
+          class _Proj1,
+          class _Proj2,
+          __enable_if_t<(is_same<_Pred, __equal_to>::value
+#  if _LIBCPP_STD_VER >= 20
+                         || is_same<_Pred, ranges::equal_to>::value
+#  endif
+                         ) &&
+                            __desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
+                            __is_identity<_Proj2>::value,
+                        int> = 0>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
+    __bit_iterator<_Cp, _IC1> __first1,
+    __bit_iterator<_Cp, _IC1> __last1,
+    __bit_iterator<_Cp, _IC2> __first2,
+    __bit_iterator<_Cp, _IC2>,
+    _Pred&,
+    _Proj1&,
+    _Proj2&) {
+  return std::__equal_iter_impl(__first1, __last1, __first2, __equal_to());
+}
+
 template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
 equal(_InputIterator1 __first1,
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 67abb023122edf..8a0c4c93bcbaf7 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -10,7 +10,9 @@
 #ifndef _LIBCPP___BIT_REFERENCE
 #define _LIBCPP___BIT_REFERENCE
 
+#include <__algorithm/comp.h>
 #include <__algorithm/copy_n.h>
+#include <__algorithm/equal.h>
 #include <__algorithm/min.h>
 #include <__bit/countr.h>
 #include <__compare/ordering.h>
@@ -22,7 +24,9 @@
 #include <__memory/construct_at.h>
 #include <__memory/pointer_traits.h>
 #include <__type_traits/conditional.h>
+#include <__type_traits/enable_if.h>
 #include <__type_traits/is_constant_evaluated.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/void_t.h>
 #include <__utility/swap.h>
 
@@ -669,127 +673,6 @@ rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle,
   return __r;
 }
 
-// equal
-
-template <class _Cp, bool _IC1, bool _IC2>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned(
-    __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
-  using _It             = __bit_iterator<_Cp, _IC1>;
-  using difference_type = typename _It::difference_type;
-  using __storage_type  = typename _It::__storage_type;
-
-  const int __bits_per_word = _It::__bits_per_word;
-  difference_type __n       = __last1 - __first1;
-  if (__n > 0) {
-    // do first word
-    if (__first1.__ctz_ != 0) {
-      unsigned __clz_f     = __bits_per_word - __first1.__ctz_;
-      difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
-      __n -= __dn;
-      __storage_type __m   = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
-      __storage_type __b   = *__first1.__seg_ & __m;
-      unsigned __clz_r     = __bits_per_word - __first2.__ctz_;
-      __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
-      __m                  = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
-      if (__first2.__ctz_ > __first1.__ctz_) {
-        if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
-          return false;
-      } else {
-        if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
-          return false;
-      }
-      __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
-      __first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
-      __dn -= __ddn;
-      if (__dn > 0) {
-        __m = ~__storage_type(0) >> (__bits_per_word - __dn);
-        if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
-          return false;
-        __first2.__ctz_ = static_cast<unsigned>(__dn);
-      }
-      ++__first1.__seg_;
-      // __first1.__ctz_ = 0;
-    }
-    // __first1.__ctz_ == 0;
-    // do middle words
-    unsigned __clz_r   = __bits_per_word - __first2.__ctz_;
-    __storage_type __m = ~__storage_type(0) << __first2.__ctz_;
-    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
-      __storage_type __b = *__first1.__seg_;
-      if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
-        return false;
-      ++__first2.__seg_;
-      if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
-        return false;
-    }
-    // do last word
-    if (__n > 0) {
-      __m                 = ~__storage_type(0) >> (__bits_per_word - __n);
-      __storage_type __b  = *__first1.__seg_ & __m;
-      __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
-      __m                 = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
-      if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
-        return false;
-      __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
-      __first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
-      __n -= __dn;
-      if (__n > 0) {
-        __m = ~__storage_type(0) >> (__bits_per_word - __n);
-        if ((*__first2.__seg_ & __m) != (__b >> __dn))
-          return false;
-      }
-    }
-  }
-  return true;
-}
-
-template <class _Cp, bool _IC1, bool _IC2>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned(
-    __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
-  using _It             = __bit_iterator<_Cp, _IC1>;
-  using difference_type = typename _It::difference_type;
-  using __storage_type  = typename _It::__storage_type;
-
-  const int __bits_per_word = _It::__bits_per_word;
-  difference_type __n       = __last1 - __first1;
-  if (__n > 0) {
-    // do first word
-    if (__first1.__ctz_ != 0) {
-      unsigned __clz       = __bits_per_word - __first1.__ctz_;
-      difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
-      __n -= __dn;
-      __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
-      if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
-        return false;
-      ++__first2.__seg_;
-      ++__first1.__seg_;
-      // __first1.__ctz_ = 0;
-      // __first2.__ctz_ = 0;
-    }
-    // __first1.__ctz_ == 0;
-    // __first2.__ctz_ == 0;
-    // do middle words
-    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
-      if (*__first2.__seg_ != *__first1.__seg_)
-        return false;
-    // do last word
-    if (__n > 0) {
-      __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
-      if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class _Cp, bool _IC1, bool _IC2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
-equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
-  if (__first1.__ctz_ == __first2.__ctz_)
-    return std::__equal_aligned(__first1, __last1, __first2);
-  return std::__equal_unaligned(__first1, __last1, __first2);
-}
-
 template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
 class __bit_iterator {
 public:
@@ -1018,9 +901,13 @@ private:
   template <class _Dp, bool _IC1, bool _IC2>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
       __equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
-  template <class _Dp, bool _IC1, bool _IC2>
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
-      equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
+  template <class _Dp,
+            bool _IC1,
+            bool _IC2,
+            class _BinaryPredicate,
+            __enable_if_t<std::is_same<_BinaryPredicate, __equal_to>::value, int> >
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl(
+      __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>, _BinaryPredicate);
   template <bool _ToFind, class _Dp, bool _IC>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC>
       __find_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type);
diff --git a/libcxx/include/bitset b/libcxx/include/bitset
index 10576eb80bf2ee..a8c499df04232f 100644
--- a/libcxx/include/bitset
+++ b/libcxx/include/bitset
@@ -130,6 +130,7 @@ template <size_t N> struct hash<std::bitset<N>>;
 #  include <__cxx03/bitset>
 #else
 #  include <__algorithm/count.h>
+#  include <__algorithm/equal.h>
 #  include <__algorithm/fill.h>
 #  include <__algorithm/fill_n.h>
 #  include <__algorithm/find.h>
diff --git a/libcxx/test/benchmarks/algorithms/equal.bench.cpp b/libcxx/test/benchmarks/algorithms/equal.bench.cpp
index 2dc11585c15c7f..ac3aa28bb28b39 100644
--- a/libcxx/test/benchmarks/algorithms/equal.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/equal.bench.cpp
@@ -45,4 +45,55 @@ static void bm_ranges_equal(benchmark::State& state) {
 }
 BENCHMARK(bm_ranges_equal)->DenseRange(1, 8)->Range(16, 1 << 20);
 
+static void bm_ranges_equal_vb_aligned(benchmark::State& state) {
+  auto n = state.range();
+  std::vector<bool> vec1(n, true);
+  std::vector<bool> vec2(n, true);
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(std::ranges::equal(vec1, vec2));
+    benchmark::DoNotOptimize(&vec1);
+    benchmark::DoNotOptimize(&vec2);
+  }
+}
+
+static void bm_ranges_equal_vb_unaligned(benchmark::State& state) {
+  auto n = state.range();
+  std::vector<bool> vec1(n, true);
+  std::vector<bool> vec2(n + 8, true);
+  auto beg1 = std::ranges::begin(vec1);
+  auto end1 = std::ranges::end(vec1);
+  auto beg2 = std::ranges::begin(vec2) + 4;
+  auto end2 = std::ranges::end(vec2) - 4;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(std::ranges::equal(beg1, end1, beg2, end2));
+    benchmark::DoNotOptimize(&vec1);
+    benchmark::DoNotOptimize(&vec2);
+  }
+}
+
+// Test std::ranges::equal for vector<bool>::iterator
+BENCHMARK(bm_ranges_equal_vb_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_ranges_equal_vb_unaligned)->Range(8, 1 << 20);
+
+static void bm_equal_vb(benchmark::State& state, bool aligned) {
+  auto n = state.range();
+  std::vector<bool> vec1(n, true);
+  std::vector<bool> vec2(aligned ? n : n + 8, true);
+  auto beg1 = vec1.begin();
+  auto end1 = vec1.end();
+  auto beg2 = aligned ? vec2.begin() : vec2.begin() + 4;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(std::equal(beg1, end1, beg2));
+    benchmark::DoNotOptimize(&vec1);
+    benchmark::DoNotOptimize(&vec2);
+  }
+}
+
+static void bm_equal_vb_aligned(benchmark::State& state) { bm_equal_vb(state, true); }
+static void bm_equal_vb_unaligned(benchmark::State& state) { bm_equal_vb(state, false); }
+
+// Test std::equal for vector<bool>::iterator
+BENCHMARK(bm_equal_vb_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_equal_vb_unaligned)->Range(8, 1 << 20);
+
 BENCHMARK_MAIN();
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp
index c3ba3f89b4de3c..a88f041013da6c 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp
@@ -28,6 +28,7 @@
 #include <algorithm>
 #include <cassert>
 #include <functional>
+#include <vector>
 
 #include "test_iterators.h"
 #include "test_macros.h"
@@ -123,6 +124,30 @@ class trivially_equality_comparable {
 
 #endif
 
+template <std::size_t N>
+TEST_CONSTEXPR_CXX20 void test_vector_bool() {
+  std::vector<bool> in(N, false);
+  for (std::size_t i = 0; i < N; i += 2)
+    in[i] = true;
+
+  { // Test equal() with aligned bytes
+    std::vector<bool> out = in;
+    assert(std::equal(in.begin(), in.end(), out.begin()));
+#if TEST_STD_VER >= 14
+    assert(std::equal(in.begin(), in.end(), out.begin(), out.end()));
+#endif
+  }
+
+  { // Test equal() with unaligned bytes
+    std::vector<bool> out(N + 8);
+    std::copy(in.begin(), in.end(), out.begin() + 4);
+    assert(std::equal(in.begin(), in.end(), out.begin() + 4));
+#if TEST_STD_VER >= 14
+    assert(std::equal(in.begin(), in.end(), out.begin() + 4, out.end() - 4));
+#endif
+  }
+}
+
 TEST_CONSTEXPR_CXX20 bool test() {
   types::for_each(types::cpp17_input_iterator_list<int*>(), TestIter2<int, types::cpp17_input_iterator_list<int*> >());
   types::for_each(
@@ -138,6 +163,14 @@ TEST_CONSTEXPR_CXX20 bool test() {
       TestIter2<trivially_equality_comparable, types::cpp17_input_iterator_list<trivially_equality_comparable*>>{});
 #endif
 
+  { // Test vector<bool>::iterator optimization
+    test_vector_bool<8>();
+    test_vector_bool<16>();
+    test_vector_bool<32>();
+    test_vector_bool<64>();
+    test_vector_bool<256>();
+  }
+
   return true;
 }
 
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp
index f36cd2e0896552..37c3677b445c00 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ra...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/121084


More information about the libcxx-commits mailing list