[libcxx-commits] [libcxx] [libc++] Optimize ranges::equal for vector<bool>::iterator (PR #121084)

Peng Liu via libcxx-commits libcxx-commits at lists.llvm.org
Fri Feb 7 14:08:03 PST 2025


https://github.com/winner245 updated https://github.com/llvm/llvm-project/pull/121084

>From 1a155fe218da8a84b51abee3f94d93ea7c2dfaaf Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Tue, 24 Dec 2024 18:24:39 -0500
Subject: [PATCH 1/2] Optimize ranges::equal for vector<bool>::iterator

---
 libcxx/docs/ReleaseNotes/21.rst               |   3 +
 libcxx/include/__algorithm/equal.h            | 160 +++++++++++
 libcxx/include/__bit_reference                | 161 +++--------
 libcxx/include/bitset                         |   1 +
 .../benchmarks/algorithms/equal.bench.cpp     |  51 ++++
 .../alg.nonmodifying/alg.equal/equal.pass.cpp |  35 +++
 .../alg.equal/ranges.equal.pass.cpp           | 251 +++++++++++-------
 7 files changed, 441 insertions(+), 221 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 24393607970238a..ccbf3951fdf9ad2 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -46,6 +46,9 @@ Improvements and New Features
 - The ``std::ranges::{copy, copy_n, copy_backward}`` algorithms have been optimized for ``std::vector<bool>::iterator``\s,
   resulting in a performance improvement of up to 2000x.
 
+- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
+  improvement of up to 188x.
+
 - Updated formatting library to Unicode 16.0.0.
 
 Deprecations and Removals
diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h
index a276bb9954c9bbc..dc1b97d673c8289 100644
--- a/libcxx/include/__algorithm/equal.h
+++ b/libcxx/include/__algorithm/equal.h
@@ -11,19 +11,27 @@
 #define _LIBCPP___ALGORITHM_EQUAL_H
 
 #include <__algorithm/comp.h>
+#include <__algorithm/min.h>
 #include <__algorithm/unwrap_iter.h>
 #include <__config>
 #include <__functional/identity.h>
+#include <__fwd/bit_reference.h>
 #include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
+#include <__memory/pointer_traits.h>
 #include <__string/constexpr_c_functions.h>
 #include <__type_traits/desugars_to.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
 #include <__type_traits/is_equality_comparable.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/is_volatile.h>
 #include <__utility/move.h>
 
+#if _LIBCPP_STD_VER >= 20
+#  include <__functional/ranges_operations.h>
+#endif
+
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
@@ -33,6 +41,136 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _Cp, bool _IsConst1, bool _IsConst2>
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool
+__equal_unaligned(__bit_iterator<_Cp, _IsConst1> __first1,
+                  __bit_iterator<_Cp, _IsConst1> __last1,
+                  __bit_iterator<_Cp, _IsConst2> __first2) {
+  using _It             = __bit_iterator<_Cp, _IsConst1>;
+  using difference_type = typename _It::difference_type;
+  using __storage_type  = typename _It::__storage_type;
+
+  const int __bits_per_word = _It::__bits_per_word;
+  difference_type __n       = __last1 - __first1;
+  if (__n > 0) {
+    // do first word
+    if (__first1.__ctz_ != 0) {
+      unsigned __clz_f     = __bits_per_word - __first1.__ctz_;
+      difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
+      __n -= __dn;
+      __storage_type __m   = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
+      __storage_type __b   = *__first1.__seg_ & __m;
+      unsigned __clz_r     = __bits_per_word - __first2.__ctz_;
+      __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
+      __m                  = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
+      if (__first2.__ctz_ > __first1.__ctz_) {
+        if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
+          return false;
+      } else {
+        if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
+          return false;
+      }
+      __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
+      __first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
+      __dn -= __ddn;
+      if (__dn > 0) {
+        __m = ~__storage_type(0) >> (__bits_per_word - __dn);
+        if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
+          return false;
+        __first2.__ctz_ = static_cast<unsigned>(__dn);
+      }
+      ++__first1.__seg_;
+      // __first1.__ctz_ = 0;
+    }
+    // __first1.__ctz_ == 0;
+    // do middle words
+    unsigned __clz_r   = __bits_per_word - __first2.__ctz_;
+    __storage_type __m = ~__storage_type(0) << __first2.__ctz_;
+    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
+      __storage_type __b = *__first1.__seg_;
+      if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
+        return false;
+      ++__first2.__seg_;
+      if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
+        return false;
+    }
+    // do last word
+    if (__n > 0) {
+      __m                 = ~__storage_type(0) >> (__bits_per_word - __n);
+      __storage_type __b  = *__first1.__seg_ & __m;
+      __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
+      __m                 = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
+      if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
+        return false;
+      __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
+      __first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
+      __n -= __dn;
+      if (__n > 0) {
+        __m = ~__storage_type(0) >> (__bits_per_word - __n);
+        if ((*__first2.__seg_ & __m) != (__b >> __dn))
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
+template <class _Cp, bool _IsConst1, bool _IsConst2>
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool
+__equal_aligned(__bit_iterator<_Cp, _IsConst1> __first1,
+                __bit_iterator<_Cp, _IsConst1> __last1,
+                __bit_iterator<_Cp, _IsConst2> __first2) {
+  using _It             = __bit_iterator<_Cp, _IsConst1>;
+  using difference_type = typename _It::difference_type;
+  using __storage_type  = typename _It::__storage_type;
+
+  const int __bits_per_word = _It::__bits_per_word;
+  difference_type __n       = __last1 - __first1;
+  if (__n > 0) {
+    // do first word
+    if (__first1.__ctz_ != 0) {
+      unsigned __clz       = __bits_per_word - __first1.__ctz_;
+      difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
+      __n -= __dn;
+      __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+      if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
+        return false;
+      ++__first2.__seg_;
+      ++__first1.__seg_;
+      // __first1.__ctz_ = 0;
+      // __first2.__ctz_ = 0;
+    }
+    // __first1.__ctz_ == 0;
+    // __first2.__ctz_ == 0;
+    // do middle words
+    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
+      if (*__first2.__seg_ != *__first1.__seg_)
+        return false;
+    // do last word
+    if (__n > 0) {
+      __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
+      if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
+        return false;
+    }
+  }
+  return true;
+}
+
+template <class _Cp,
+          bool _IsConst1,
+          bool _IsConst2,
+          class _BinaryPredicate,
+          __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> = 0>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
+    __bit_iterator<_Cp, _IsConst1> __first1,
+    __bit_iterator<_Cp, _IsConst1> __last1,
+    __bit_iterator<_Cp, _IsConst2> __first2,
+    _BinaryPredicate) {
+  if (__first1.__ctz_ == __first2.__ctz_)
+    return std::__equal_aligned(__first1, __last1, __first2);
+  return std::__equal_unaligned(__first1, __last1, __first2);
+}
+
 template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
     _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) {
@@ -94,6 +232,28 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&,
   return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
 }
 
+template <class _Cp,
+          bool _IsConst1,
+          bool _IsConst2,
+          class _Pred,
+          class _Proj1,
+          class _Proj2,
+          __enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
+                            __is_identity<_Proj2>::value,
+                        int> = 0>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
+    __bit_iterator<_Cp, _IsConst1> __first1,
+    __bit_iterator<_Cp, _IsConst1> __last1,
+    __bit_iterator<_Cp, _IsConst2> __first2,
+    __bit_iterator<_Cp, _IsConst2>,
+    _Pred&,
+    _Proj1&,
+    _Proj2&) {
+  if (__first1.__ctz_ == __first2.__ctz_)
+    return std::__equal_aligned(__first1, __last1, __first2);
+  return std::__equal_unaligned(__first1, __last1, __first2);
+}
+
 template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
 equal(_InputIterator1 __first1,
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index aad470394732c6e..69e5b17691a1b0f 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -10,9 +10,11 @@
 #ifndef _LIBCPP___BIT_REFERENCE
 #define _LIBCPP___BIT_REFERENCE
 
+#include <__algorithm/comp.h>
 #include <__algorithm/copy.h>
 #include <__algorithm/copy_backward.h>
 #include <__algorithm/copy_n.h>
+#include <__algorithm/equal.h>
 #include <__algorithm/min.h>
 #include <__assert>
 #include <__bit/countr.h>
@@ -20,13 +22,16 @@
 #include <__config>
 #include <__cstddef/ptrdiff_t.h>
 #include <__cstddef/size_t.h>
+#include <__functional/identity.h>
 #include <__fwd/bit_reference.h>
 #include <__iterator/iterator_traits.h>
 #include <__memory/construct_at.h>
 #include <__memory/pointer_traits.h>
 #include <__type_traits/conditional.h>
+#include <__type_traits/desugars_to.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_constant_evaluated.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/is_unsigned.h>
 #include <__type_traits/void_t.h>
 #include <__utility/pair.h>
@@ -444,127 +449,6 @@ rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle,
   return __r;
 }
 
-// equal
-
-template <class _Cp, bool _IC1, bool _IC2>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned(
-    __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
-  using _It             = __bit_iterator<_Cp, _IC1>;
-  using difference_type = typename _It::difference_type;
-  using __storage_type  = typename _It::__storage_type;
-
-  const int __bits_per_word = _It::__bits_per_word;
-  difference_type __n       = __last1 - __first1;
-  if (__n > 0) {
-    // do first word
-    if (__first1.__ctz_ != 0) {
-      unsigned __clz_f     = __bits_per_word - __first1.__ctz_;
-      difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
-      __n -= __dn;
-      __storage_type __m   = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
-      __storage_type __b   = *__first1.__seg_ & __m;
-      unsigned __clz_r     = __bits_per_word - __first2.__ctz_;
-      __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
-      __m                  = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
-      if (__first2.__ctz_ > __first1.__ctz_) {
-        if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
-          return false;
-      } else {
-        if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
-          return false;
-      }
-      __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
-      __first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
-      __dn -= __ddn;
-      if (__dn > 0) {
-        __m = ~__storage_type(0) >> (__bits_per_word - __dn);
-        if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
-          return false;
-        __first2.__ctz_ = static_cast<unsigned>(__dn);
-      }
-      ++__first1.__seg_;
-      // __first1.__ctz_ = 0;
-    }
-    // __first1.__ctz_ == 0;
-    // do middle words
-    unsigned __clz_r   = __bits_per_word - __first2.__ctz_;
-    __storage_type __m = ~__storage_type(0) << __first2.__ctz_;
-    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
-      __storage_type __b = *__first1.__seg_;
-      if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
-        return false;
-      ++__first2.__seg_;
-      if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
-        return false;
-    }
-    // do last word
-    if (__n > 0) {
-      __m                 = ~__storage_type(0) >> (__bits_per_word - __n);
-      __storage_type __b  = *__first1.__seg_ & __m;
-      __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
-      __m                 = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
-      if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
-        return false;
-      __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
-      __first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
-      __n -= __dn;
-      if (__n > 0) {
-        __m = ~__storage_type(0) >> (__bits_per_word - __n);
-        if ((*__first2.__seg_ & __m) != (__b >> __dn))
-          return false;
-      }
-    }
-  }
-  return true;
-}
-
-template <class _Cp, bool _IC1, bool _IC2>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned(
-    __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
-  using _It             = __bit_iterator<_Cp, _IC1>;
-  using difference_type = typename _It::difference_type;
-  using __storage_type  = typename _It::__storage_type;
-
-  const int __bits_per_word = _It::__bits_per_word;
-  difference_type __n       = __last1 - __first1;
-  if (__n > 0) {
-    // do first word
-    if (__first1.__ctz_ != 0) {
-      unsigned __clz       = __bits_per_word - __first1.__ctz_;
-      difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
-      __n -= __dn;
-      __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
-      if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
-        return false;
-      ++__first2.__seg_;
-      ++__first1.__seg_;
-      // __first1.__ctz_ = 0;
-      // __first2.__ctz_ = 0;
-    }
-    // __first1.__ctz_ == 0;
-    // __first2.__ctz_ == 0;
-    // do middle words
-    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
-      if (*__first2.__seg_ != *__first1.__seg_)
-        return false;
-    // do last word
-    if (__n > 0) {
-      __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
-      if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
-        return false;
-    }
-  }
-  return true;
-}
-
-template <class _Cp, bool _IC1, bool _IC2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
-equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
-  if (__first1.__ctz_ == __first2.__ctz_)
-    return std::__equal_aligned(__first1, __last1, __first2);
-  return std::__equal_unaligned(__first1, __last1, __first2);
-}
-
 template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
 class __bit_iterator {
 public:
@@ -787,15 +671,36 @@ private:
   template <class _Dp>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
       rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
-  template <class _Dp, bool _IC1, bool _IC2>
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
-      __equal_aligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
-  template <class _Dp, bool _IC1, bool _IC2>
+  template <class _Dp, bool _IsConst1, bool _IsConst2>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
-      __equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
-  template <class _Dp, bool _IC1, bool _IC2>
+      __equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
+  template <class _Dp, bool _IsConst1, bool _IsConst2>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
-      equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
+      __equal_unaligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
+  template <class _Dp,
+            bool _IsConst1,
+            bool _IsConst2,
+            class _BinaryPredicate,
+            __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> >
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl(
+      __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>, _BinaryPredicate);
+  template <class _Dp,
+            bool _IsConst1,
+            bool _IsConst2,
+            class _Pred,
+            class _Proj1,
+            class _Proj2,
+            __enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
+                              __is_identity<_Proj2>::value,
+                          int> >
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_impl(
+      __bit_iterator<_Dp, _IsConst1> __first1,
+      __bit_iterator<_Dp, _IsConst1> __last1,
+      __bit_iterator<_Dp, _IsConst2> __first2,
+      __bit_iterator<_Dp, _IsConst2>,
+      _Pred&,
+      _Proj1&,
+      _Proj2&);
   template <bool _ToFind, class _Dp, bool _IC>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC>
       __find_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type);
diff --git a/libcxx/include/bitset b/libcxx/include/bitset
index a20842985b3d59d..afc44daa9ed380d 100644
--- a/libcxx/include/bitset
+++ b/libcxx/include/bitset
@@ -132,6 +132,7 @@ template <size_t N> struct hash<std::bitset<N>>;
 #  include <__algorithm/copy.h>
 #  include <__algorithm/copy_backward.h>
 #  include <__algorithm/count.h>
+#  include <__algorithm/equal.h>
 #  include <__algorithm/fill.h>
 #  include <__algorithm/fill_n.h>
 #  include <__algorithm/find.h>
diff --git a/libcxx/test/benchmarks/algorithms/equal.bench.cpp b/libcxx/test/benchmarks/algorithms/equal.bench.cpp
index 2dc11585c15c7f7..328b39608607ede 100644
--- a/libcxx/test/benchmarks/algorithms/equal.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/equal.bench.cpp
@@ -45,4 +45,55 @@ static void bm_ranges_equal(benchmark::State& state) {
 }
 BENCHMARK(bm_ranges_equal)->DenseRange(1, 8)->Range(16, 1 << 20);
 
+static void bm_ranges_equal_vb_aligned(benchmark::State& state) {
+  auto n = state.range();
+  std::vector<bool> vec1(n, true);
+  std::vector<bool> vec2(n, true);
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(std::ranges::equal(vec1, vec2));
+    benchmark::DoNotOptimize(&vec1);
+    benchmark::DoNotOptimize(&vec2);
+  }
+}
+
+static void bm_ranges_equal_vb_unaligned(benchmark::State& state) {
+  auto n = state.range();
+  std::vector<bool> vec1(n, true);
+  std::vector<bool> vec2(n + 8, true);
+  auto beg1 = std::ranges::begin(vec1);
+  auto end1 = std::ranges::end(vec1);
+  auto beg2 = std::ranges::begin(vec2) + 4;
+  auto end2 = std::ranges::end(vec2) - 4;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(std::ranges::equal(beg1, end1, beg2, end2));
+    benchmark::DoNotOptimize(&vec1);
+    benchmark::DoNotOptimize(&vec2);
+  }
+}
+
+// Test std::ranges::equal for vector<bool>::iterator
+BENCHMARK(bm_ranges_equal_vb_aligned)->RangeMultiplier(4)->Range(8, 1 << 20);
+BENCHMARK(bm_ranges_equal_vb_unaligned)->Range(8, 1 << 20);
+
+static void bm_equal_vb(benchmark::State& state, bool aligned) {
+  auto n = state.range();
+  std::vector<bool> vec1(n, true);
+  std::vector<bool> vec2(aligned ? n : n + 8, true);
+  auto beg1 = vec1.begin();
+  auto end1 = vec1.end();
+  auto beg2 = aligned ? vec2.begin() : vec2.begin() + 4;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(std::equal(beg1, end1, beg2));
+    benchmark::DoNotOptimize(&vec1);
+    benchmark::DoNotOptimize(&vec2);
+  }
+}
+
+static void bm_equal_vb_aligned(benchmark::State& state) { bm_equal_vb(state, true); }
+static void bm_equal_vb_unaligned(benchmark::State& state) { bm_equal_vb(state, false); }
+
+// Test std::equal for vector<bool>::iterator
+BENCHMARK(bm_equal_vb_aligned)->Range(8, 1 << 20);
+BENCHMARK(bm_equal_vb_unaligned)->Range(8, 1 << 20);
+
 BENCHMARK_MAIN();
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp
index c3ba3f89b4de3c5..b37d788c80d3a6d 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp
@@ -28,6 +28,7 @@
 #include <algorithm>
 #include <cassert>
 #include <functional>
+#include <vector>
 
 #include "test_iterators.h"
 #include "test_macros.h"
@@ -123,6 +124,30 @@ class trivially_equality_comparable {
 
 #endif
 
+template <std::size_t N>
+TEST_CONSTEXPR_CXX20 void test_vector_bool() {
+  std::vector<bool> in(N, false);
+  for (std::size_t i = 0; i < N; i += 2)
+    in[i] = true;
+
+  { // Test equal() with aligned bytes
+    std::vector<bool> out = in;
+    assert(std::equal(in.begin(), in.end(), out.begin()));
+#if TEST_STD_VER >= 14
+    assert(std::equal(in.begin(), in.end(), out.begin(), out.end()));
+#endif
+  }
+
+  { // Test equal() with unaligned bytes
+    std::vector<bool> out(N + 8);
+    std::copy(in.begin(), in.end(), out.begin() + 4);
+    assert(std::equal(in.begin(), in.end(), out.begin() + 4));
+#if TEST_STD_VER >= 14
+    assert(std::equal(in.begin(), in.end(), out.begin() + 4, out.end() - 4));
+#endif
+  }
+}
+
 TEST_CONSTEXPR_CXX20 bool test() {
   types::for_each(types::cpp17_input_iterator_list<int*>(), TestIter2<int, types::cpp17_input_iterator_list<int*> >());
   types::for_each(
@@ -138,6 +163,16 @@ TEST_CONSTEXPR_CXX20 bool test() {
       TestIter2<trivially_equality_comparable, types::cpp17_input_iterator_list<trivially_equality_comparable*>>{});
 #endif
 
+  { // Test vector<bool>::iterator optimization
+    test_vector_bool<8>();
+    test_vector_bool<19>();
+    test_vector_bool<32>();
+    test_vector_bool<49>();
+    test_vector_bool<64>();
+    test_vector_bool<199>();
+    test_vector_bool<256>();
+  }
+
   return true;
 }
 
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp
index f36cd2e08965529..4c1c29c72cab044 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp
@@ -28,13 +28,17 @@
 #include <concepts>
 #include <functional>
 #include <ranges>
+#include <vector>
 
 #include "almost_satisfies_types.h"
 #include "test_iterators.h"
+#include "test_macros.h"
 
-template <class Iter1, class Sent1 = sentinel_wrapper<Iter1>,
-          class Iter2 = Iter1, class Sent2 = sentinel_wrapper<Iter2>>
-concept HasEqualIt = requires (Iter1 first1, Sent1 last1, Iter2 first2, Sent2 last2) {
+template <class Iter1,
+          class Sent1 = sentinel_wrapper<Iter1>,
+          class Iter2 = Iter1,
+          class Sent2 = sentinel_wrapper<Iter2>>
+concept HasEqualIt = requires(Iter1 first1, Sent1 last1, Iter2 first2, Sent2 last2) {
   std::ranges::equal(first1, last1, first2, last2);
 };
 
@@ -52,9 +56,7 @@ static_assert(!HasEqualIt<int*, int*, int*, SentinelForNotWeaklyEqualityComparab
 static_assert(!HasEqualIt<int*, int*, int**>);
 
 template <class Range1, class Range2>
-concept HasEqualR = requires (Range1 range1, Range2 range2) {
-  std::ranges::equal(range1, range2);
-};
+concept HasEqualR = requires(Range1 range1, Range2 range2) { std::ranges::equal(range1, range2); };
 
 static_assert(HasEqualR<UncheckedRange<int*>, UncheckedRange<int*>>);
 static_assert(!HasEqualR<InputRangeNotDerivedFrom, UncheckedRange<int*>>);
@@ -75,15 +77,15 @@ constexpr void test_iterators() {
     {
       int a[] = {1, 2, 3, 4};
       int b[] = {1, 2, 3, 4};
-      std::same_as<bool> decltype(auto) ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)),
-                                                                 Iter2(b), Sent2(Iter2(b + 4)));
+      std::same_as<bool> decltype(auto) ret =
+          std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)));
       assert(ret);
     }
     {
-      int a[] = {1, 2, 3, 4};
-      int b[] = {1, 2, 3, 4};
-      auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4)));
-      auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4)));
+      int a[]                               = {1, 2, 3, 4};
+      int b[]                               = {1, 2, 3, 4};
+      auto range1                           = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4)));
+      auto range2                           = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4)));
       std::same_as<bool> decltype(auto) ret = std::ranges::equal(range1, range2);
       assert(ret);
     }
@@ -92,12 +94,12 @@ constexpr void test_iterators() {
   { // check that false is returned for non-equal ranges
     {
       int a[] = {1, 2, 3, 4};
-      int b[]  = {1, 2, 4, 4};
+      int b[] = {1, 2, 4, 4};
       assert(!std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4))));
     }
     {
-      int a[] = {1, 2, 3, 4};
-      int b[] = {1, 2, 4, 4};
+      int a[]     = {1, 2, 3, 4};
+      int b[]     = {1, 2, 4, 4};
       auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4)));
       auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4)));
       assert(!std::ranges::equal(range1, range2));
@@ -106,95 +108,96 @@ constexpr void test_iterators() {
 
   { // check that the predicate is used (return true)
     {
-      int a[] = {1, 2, 3, 4};
-      int b[] = {2, 3, 4, 5};
-      auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)),
-                                    [](int l, int r) { return l != r; });
+      int a[]  = {1, 2, 3, 4};
+      int b[]  = {2, 3, 4, 5};
+      auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)), [](int l, int r) {
+        return l != r;
+      });
       assert(ret);
     }
     {
-      int a[] = {1, 2, 3, 4};
-      int b[] = {2, 3, 4, 5};
+      int a[]     = {1, 2, 3, 4};
+      int b[]     = {2, 3, 4, 5};
       auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4)));
       auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4)));
-      auto ret = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; });
+      auto ret    = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; });
       assert(ret);
     }
   }
 
   { // check that the predicate is used (return false)
     {
-      int a[] = {1, 2, 3, 4};
-      int b[] = {2, 3, 3, 5};
-      auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)),
-                                    [](int l, int r) { return l != r; });
+      int a[]  = {1, 2, 3, 4};
+      int b[]  = {2, 3, 3, 5};
+      auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)), [](int l, int r) {
+        return l != r;
+      });
       assert(!ret);
     }
     {
-      int a[] = {1, 2, 3, 4};
-      int b[] = {2, 3, 3, 5};
+      int a[]     = {1, 2, 3, 4};
+      int b[]     = {2, 3, 3, 5};
       auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4)));
       auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4)));
-      auto ret = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; });
+      auto ret    = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; });
       assert(!ret);
     }
   }
 
   { // check that the projections are used
     {
-      int a[] = {1, 2, 3, 4, 5};
-      int b[] = {6, 10, 14, 18, 22};
-      auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 5)),
-                                    Iter2(b), Sent2(Iter2(b + 5)),
-                                    {},
-                                    [](int i) { return i * 4; },
-                                    [](int i) { return i - 2; });
+      int a[]  = {1, 2, 3, 4, 5};
+      int b[]  = {6, 10, 14, 18, 22};
+      auto ret = std::ranges::equal(
+          Iter1(a),
+          Sent1(Iter1(a + 5)),
+          Iter2(b),
+          Sent2(Iter2(b + 5)),
+          {},
+          [](int i) { return i * 4; },
+          [](int i) { return i - 2; });
       assert(ret);
     }
     {
-      int a[] = {1, 2, 3, 4, 5};
-      int b[] = {6, 10, 14, 18, 22};
+      int a[]     = {1, 2, 3, 4, 5};
+      int b[]     = {6, 10, 14, 18, 22};
       auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 5)));
       auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 5)));
-      auto ret = std::ranges::equal(range1,
-                                    range2,
-                                    {},
-                                    [](int i) { return i * 4; },
-                                    [](int i) { return i - 2; });
+      auto ret    = std::ranges::equal(range1, range2, {}, [](int i) { return i * 4; }, [](int i) { return i - 2; });
       assert(ret);
     }
   }
 
   { // check that different sized ranges work
     {
-      int a[] = {4, 3, 2, 1};
-      int b[] = {4, 3, 2, 1, 5, 6, 7};
+      int a[]  = {4, 3, 2, 1};
+      int b[]  = {4, 3, 2, 1, 5, 6, 7};
       auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 7)));
       assert(!ret);
     }
     {
-      int a[] = {4, 3, 2, 1};
-      int b[] = {4, 3, 2, 1, 5, 6, 7};
+      int a[]     = {4, 3, 2, 1};
+      int b[]     = {4, 3, 2, 1, 5, 6, 7};
       auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4)));
       auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 7)));
-      auto ret = std::ranges::equal(range1, range2);
+      auto ret    = std::ranges::equal(range1, range2);
       assert(!ret);
     }
   }
 
   { // check that two ranges with the same size but different values are different
     {
-      int a[] = {4, 6, 34, 76, 5};
-      int b[] = {4, 6, 34, 67, 5};
+      int a[]  = {4, 6, 34, 76, 5};
+      int b[]  = {4, 6, 34, 67, 5};
       auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 5)), Iter2(b), Sent2(Iter2(b + 5)));
       assert(!ret);
     }
     {
-      int a[] = {4, 6, 34, 76, 5};
-      int b[] = {4, 6, 34, 67, 5};
+      int a[]     = {4, 6, 34, 76, 5};
+      int b[]     = {4, 6, 34, 67, 5};
       auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 5)));
       auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 5)));
-      auto ret = std::ranges::equal(range1, range2);
+      auto ret    = std::ranges::equal(range1, range2);
       assert(!ret);
     }
   }
@@ -211,7 +214,7 @@ constexpr void test_iterators() {
       std::array<int, 0> b = {};
       auto range1          = std::ranges::subrange(Iter1(a.data()), Sent1(Iter1(a.data())));
       auto range2          = std::ranges::subrange(Iter2(b.data()), Sent2(Iter2(b.data())));
-      auto ret = std::ranges::equal(range1, range2);
+      auto ret             = std::ranges::equal(range1, range2);
       assert(ret);
     }
   }
@@ -219,39 +222,39 @@ constexpr void test_iterators() {
   { // check that it works with the first range empty
     {
       std::array<int, 0> a = {};
-      int b[] = {1, 2};
+      int b[]              = {1, 2};
       auto ret             = std::ranges::equal(Iter1(a.data()), Sent1(Iter1(a.data())), Iter2(b), Sent2(Iter2(b + 2)));
       assert(!ret);
     }
     {
       std::array<int, 0> a = {};
-      int b[] = {1, 2};
+      int b[]              = {1, 2};
       auto range1          = std::ranges::subrange(Iter1(a.data()), Sent1(Iter1(a.data())));
-      auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 2)));
-      auto ret = std::ranges::equal(range1, range2);
+      auto range2          = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 2)));
+      auto ret             = std::ranges::equal(range1, range2);
       assert(!ret);
     }
   }
 
   { // check that it works with the second range empty
     {
-      int a[] = {1, 2};
+      int a[]              = {1, 2};
       std::array<int, 0> b = {};
       auto ret             = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 2)), Iter2(b.data()), Sent2(Iter2(b.data())));
       assert(!ret);
     }
     {
-      int a[] = {1, 2};
+      int a[]              = {1, 2};
       std::array<int, 0> b = {};
-      auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 2)));
+      auto range1          = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 2)));
       auto range2          = std::ranges::subrange(Iter2(b.data()), Sent2(Iter2(b.data())));
-      auto ret = std::ranges::equal(range1, range2);
+      auto ret             = std::ranges::equal(range1, range2);
       assert(!ret);
     }
   }
 }
 
-template<class Iter1, class Sent1 = Iter1>
+template <class Iter1, class Sent1 = Iter1>
 constexpr void test_iterators2() {
   test_iterators<Iter1, Sent1, cpp17_input_iterator<int*>, sentinel_wrapper<cpp17_input_iterator<int*>>>();
   test_iterators<Iter1, Sent1, cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
@@ -263,6 +266,22 @@ constexpr void test_iterators2() {
   test_iterators<Iter1, Sent1, const int*>();
 }
 
+template <std::size_t N>
+constexpr void test_vector_bool() {
+  std::vector<bool> in(N, false);
+  for (std::size_t i = 0; i < N; i += 2)
+    in[i] = true;
+  { // Test equal() with aligned bytes
+    std::vector<bool> out = in;
+    assert(std::ranges::equal(in, out));
+  }
+  { // Test equal() with unaligned bytes
+    std::vector<bool> out(N + 8);
+    std::copy(in.begin(), in.end(), out.begin() + 4);
+    assert(std::ranges::equal(in.begin(), in.end(), out.begin() + 4, out.end() - 4));
+  }
+}
+
 constexpr bool test() {
   test_iterators2<cpp17_input_iterator<int*>, sentinel_wrapper<cpp17_input_iterator<int*>>>();
   test_iterators2<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
@@ -281,40 +300,52 @@ constexpr bool test() {
       int i;
     };
     {
-      S a[] = {7, 8, 9};
-      S b[] = {7, 8, 9};
+      S a[]    = {7, 8, 9};
+      S b[]    = {7, 8, 9};
       auto ret = std::ranges::equal(a, a + 3, b, b + 3, &S::equal, &S::identity, &S::i);
       assert(ret);
     }
     {
-      S a[] = {7, 8, 9};
-      S b[] = {7, 8, 9};
+      S a[]    = {7, 8, 9};
+      S b[]    = {7, 8, 9};
       auto ret = std::ranges::equal(a, b, &S::equal, &S::identity, &S::i);
       assert(ret);
     }
   }
 
-  { // check that the complexity requirements are met
+  {   // check that the complexity requirements are met
     { // different size
       {
-        int a[] = {1, 2, 3};
-        int b[] = {1, 2, 3, 4};
+        int a[]       = {1, 2, 3};
+        int b[]       = {1, 2, 3, 4};
         int predCount = 0;
         int projCount = 0;
-        auto pred = [&](int l, int r) { ++predCount; return l == r; };
-        auto proj = [&](int i) { ++projCount; return i; };
+        auto pred     = [&](int l, int r) {
+          ++predCount;
+          return l == r;
+        };
+        auto proj = [&](int i) {
+          ++projCount;
+          return i;
+        };
         auto ret = std::ranges::equal(a, a + 3, b, b + 4, pred, proj, proj);
         assert(!ret);
         assert(predCount == 0);
         assert(projCount == 0);
       }
       {
-        int a[] = {1, 2, 3};
-        int b[] = {1, 2, 3, 4};
+        int a[]       = {1, 2, 3};
+        int b[]       = {1, 2, 3, 4};
         int predCount = 0;
         int projCount = 0;
-        auto pred = [&](int l, int r) { ++predCount; return l == r; };
-        auto proj = [&](int i) { ++projCount; return i; };
+        auto pred     = [&](int l, int r) {
+          ++predCount;
+          return l == r;
+        };
+        auto proj = [&](int i) {
+          ++projCount;
+          return i;
+        };
         auto ret = std::ranges::equal(a, b, pred, proj, proj);
         assert(!ret);
         assert(predCount == 0);
@@ -324,27 +355,39 @@ constexpr bool test() {
 
     { // not a sized sentinel
       {
-        int a[] = {1, 2, 3};
-        int b[] = {1, 2, 3, 4};
+        int a[]       = {1, 2, 3};
+        int b[]       = {1, 2, 3, 4};
         int predCount = 0;
         int projCount = 0;
-        auto pred = [&](int l, int r) { ++predCount; return l == r; };
-        auto proj = [&](int i) { ++projCount; return i; };
+        auto pred     = [&](int l, int r) {
+          ++predCount;
+          return l == r;
+        };
+        auto proj = [&](int i) {
+          ++projCount;
+          return i;
+        };
         auto ret = std::ranges::equal(a, sentinel_wrapper(a + 3), b, sentinel_wrapper(b + 4), pred, proj, proj);
         assert(!ret);
         assert(predCount <= 4);
         assert(projCount <= 7);
       }
       {
-        int a[] = {1, 2, 3};
-        int b[] = {1, 2, 3, 4};
+        int a[]       = {1, 2, 3};
+        int b[]       = {1, 2, 3, 4};
         int predCount = 0;
         int projCount = 0;
-        auto pred = [&](int l, int r) { ++predCount; return l == r; };
-        auto proj = [&](int i) { ++projCount; return i; };
+        auto pred     = [&](int l, int r) {
+          ++predCount;
+          return l == r;
+        };
+        auto proj = [&](int i) {
+          ++projCount;
+          return i;
+        };
         auto range1 = std::ranges::subrange(a, sentinel_wrapper(a + 3));
         auto range2 = std::ranges::subrange(b, sentinel_wrapper(b + 4));
-        auto ret = std::ranges::equal(range1, range2, pred, proj, proj);
+        auto ret    = std::ranges::equal(range1, range2, pred, proj, proj);
         assert(!ret);
         assert(predCount <= 4);
         assert(projCount <= 7);
@@ -353,30 +396,52 @@ constexpr bool test() {
 
     { // same size
       {
-        int a[] = {1, 2, 3};
-        int b[] = {1, 2, 3};
+        int a[]       = {1, 2, 3};
+        int b[]       = {1, 2, 3};
         int predCount = 0;
         int projCount = 0;
-        auto pred = [&](int l, int r) { ++predCount; return l == r; };
-        auto proj = [&](int i) { ++projCount; return i; };
+        auto pred     = [&](int l, int r) {
+          ++predCount;
+          return l == r;
+        };
+        auto proj = [&](int i) {
+          ++projCount;
+          return i;
+        };
         auto ret = std::ranges::equal(a, a + 3, b, b + 3, pred, proj, proj);
         assert(ret);
         assert(predCount == 3);
         assert(projCount == 6);
       }
       {
-        int a[] = {1, 2, 3};
-        int b[] = {1, 2, 3};
+        int a[]       = {1, 2, 3};
+        int b[]       = {1, 2, 3};
         int predCount = 0;
         int projCount = 0;
-        auto pred = [&](int l, int r) { ++predCount; return l == r; };
-        auto proj = [&](int i) { ++projCount; return i; };
+        auto pred     = [&](int l, int r) {
+          ++predCount;
+          return l == r;
+        };
+        auto proj = [&](int i) {
+          ++projCount;
+          return i;
+        };
         auto ret = std::ranges::equal(a, b, pred, proj, proj);
         assert(ret);
         assert(predCount == 3);
         assert(projCount == 6);
       }
     }
+
+    { // Test vector<bool>::iterator optimization
+      test_vector_bool<8>();
+      test_vector_bool<19>();
+      test_vector_bool<32>();
+      test_vector_bool<49>();
+      test_vector_bool<64>();
+      test_vector_bool<199>();
+      test_vector_bool<256>();
+    }
   }
 
   return true;

>From 2019761098334ba6f02442aae22552ddfdc2d01c Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Fri, 7 Feb 2025 16:35:15 -0500
Subject: [PATCH 2/2] Intentionally invokes standard std::equal in fill, fill_n
 tests

---
 .../alg.fill/fill.pass.cpp                         | 14 ++++++++++++--
 .../alg.fill/fill_n.pass.cpp                       | 14 ++++++++++++--
 .../alg.fill/ranges.fill.pass.cpp                  | 14 ++++++++++++--
 .../alg.fill/ranges.fill_n.pass.cpp                | 14 ++++++++++++--
 4 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
index 7656be73c14c6d6..86fd6fb6f1cccc2 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
@@ -55,14 +55,24 @@ TEST_CONSTEXPR_CXX20 void test_bititer_with_custom_sized_types() {
     std::vector<bool, Alloc> in(100, false, Alloc(1));
     std::vector<bool, Alloc> expected(100, true, Alloc(1));
     std::fill(in.begin(), in.end(), true);
-    assert(in == expected);
+
+    // FIXME: Force to call standard-form std::equal() instead of the vector<bool> optimization because the latter
+    // suffers from UB for small integral `size_type`s, which will be fixed soon.
+    using It = cpp17_input_iterator<std::vector<bool, Alloc>::iterator>;
+    assert(in.size() == expected.size() && std::equal(It(in.begin()), It(in.end()), It(expected.begin())));
+    // assert(in == expected);
   }
   {
     using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
     std::vector<bool, Alloc> in(200, false, Alloc(1));
     std::vector<bool, Alloc> expected(200, true, Alloc(1));
     std::fill(in.begin(), in.end(), true);
-    assert(in == expected);
+
+    // FIXME: Force to call standard-form std::equal() instead of the vector<bool> optimization because the latter
+    // suffers from UB for small integral `size_type`s, which will be fixed soon.
+    using It = cpp17_input_iterator<std::vector<bool, Alloc>::iterator>;
+    assert(in.size() == expected.size() && std::equal(It(in.begin()), It(in.end()), It(expected.begin())));
+    // assert(in == expected);
   }
   {
     using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
index 3b67101a8b29e78..d162da59bca9ba8 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
@@ -137,14 +137,24 @@ TEST_CONSTEXPR_CXX20 void test_bititer_with_custom_sized_types() {
     std::vector<bool, Alloc> in(100, false, Alloc(1));
     std::vector<bool, Alloc> expected(100, true, Alloc(1));
     std::fill_n(in.begin(), in.size(), true);
-    assert(in == expected);
+
+    // FIXME: Force to call standard-form std::equal() instead of the vector<bool> optimization because the latter
+    // suffers from UB for small integral `size_type`s, which will be fixed soon.
+    using It = cpp17_input_iterator<std::vector<bool, Alloc>::iterator>;
+    assert(in.size() == expected.size() && std::equal(It(in.begin()), It(in.end()), It(expected.begin())));
+    // assert(in == expected);
   }
   {
     using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
     std::vector<bool, Alloc> in(200, false, Alloc(1));
     std::vector<bool, Alloc> expected(200, true, Alloc(1));
     std::fill_n(in.begin(), in.size(), true);
-    assert(in == expected);
+
+    // FIXME: Force to call standard-form std::equal() instead of the vector<bool> optimization because the latter
+    // suffers from UB for small integral `size_type`s, which will be fixed soon.
+    using It = cpp17_input_iterator<std::vector<bool, Alloc>::iterator>;
+    assert(in.size() == expected.size() && std::equal(It(in.begin()), It(in.end()), It(expected.begin())));
+    // assert(in == expected);
   }
   {
     using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
index 30dfdd5486f5b1f..810406b6ef840fc 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
@@ -91,14 +91,24 @@ TEST_CONSTEXPR_CXX20 void test_bititer_with_custom_sized_types() {
     std::vector<bool, Alloc> in(100, false, Alloc(1));
     std::vector<bool, Alloc> expected(100, true, Alloc(1));
     std::ranges::fill(in, true);
-    assert(in == expected);
+
+    // FIXME: Force to call standard-form std::equal() instead of the vector<bool> optimization because the latter
+    // suffers from UB for small integral `size_type`s, which will be fixed soon.
+    using It = cpp17_input_iterator<std::vector<bool, Alloc>::iterator>;
+    assert(in.size() == expected.size() && std::equal(It(in.begin()), It(in.end()), It(expected.begin())));
+    // assert(in == expected);
   }
   {
     using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
     std::vector<bool, Alloc> in(200, false, Alloc(1));
     std::vector<bool, Alloc> expected(200, true, Alloc(1));
     std::ranges::fill(in, true);
-    assert(in == expected);
+
+    // FIXME: Force to call standard-form std::equal() instead of the vector<bool> optimization because the latter
+    // suffers from UB for small integral `size_type`s, which will be fixed soon.
+    using It = cpp17_input_iterator<std::vector<bool, Alloc>::iterator>;
+    assert(in.size() == expected.size() && std::equal(It(in.begin()), It(in.end()), It(expected.begin())));
+    // assert(in == expected);
   }
   {
     using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
index ae70e7155f67ffc..3c4cdc5f32c2d1c 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
@@ -64,14 +64,24 @@ TEST_CONSTEXPR_CXX20 void test_bititer_with_custom_sized_types() {
     std::vector<bool, Alloc> in(100, false, Alloc(1));
     std::vector<bool, Alloc> expected(100, true, Alloc(1));
     std::ranges::fill_n(std::ranges::begin(in), in.size(), true);
-    assert(in == expected);
+
+    // FIXME: Force to call standard-form std::equal() instead of the vector<bool> optimization because the latter
+    // suffers from UB for small integral `size_type`s, which will be fixed soon.
+    using It = cpp17_input_iterator<std::vector<bool, Alloc>::iterator>;
+    assert(in.size() == expected.size() && std::equal(It(in.begin()), It(in.end()), It(expected.begin())));
+    // assert(in == expected);
   }
   {
     using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
     std::vector<bool, Alloc> in(200, false, Alloc(1));
     std::vector<bool, Alloc> expected(200, true, Alloc(1));
     std::ranges::fill_n(std::ranges::begin(in), in.size(), true);
-    assert(in == expected);
+
+    // FIXME: Force to call standard-form std::equal() instead of the vector<bool> optimization because the latter
+    // suffers from UB for small integral `size_type`s, which will be fixed soon.
+    using It = cpp17_input_iterator<std::vector<bool, Alloc>::iterator>;
+    assert(in.size() == expected.size() && std::equal(It(in.begin()), It(in.end()), It(expected.begin())));
+    // assert(in == expected);
   }
   {
     using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;



More information about the libcxx-commits mailing list