[libcxx-commits] [libcxx] [libc++] Optimize ranges::equal for vector<bool>::iterator (PR #121084)
Louis Dionne via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Feb 5 09:20:48 PST 2025
================
@@ -33,6 +41,132 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
+template <class _Cp, bool _IC1, bool _IC2>
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned(
+ __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
+ using _It = __bit_iterator<_Cp, _IC1>;
+ using difference_type = typename _It::difference_type;
+ using __storage_type = typename _It::__storage_type;
+
+ const int __bits_per_word = _It::__bits_per_word;
+ difference_type __n = __last1 - __first1;
+ if (__n > 0) {
+ // do first word
+ if (__first1.__ctz_ != 0) {
+ unsigned __clz_f = __bits_per_word - __first1.__ctz_;
+ difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
+ __n -= __dn;
+ __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
+ __storage_type __b = *__first1.__seg_ & __m;
+ unsigned __clz_r = __bits_per_word - __first2.__ctz_;
+ __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
+ __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
+ if (__first2.__ctz_ > __first1.__ctz_) {
+ if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
+ return false;
+ } else {
+ if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
+ return false;
+ }
+ __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
+ __first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
+ __dn -= __ddn;
+ if (__dn > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __dn);
+ if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
+ return false;
+ __first2.__ctz_ = static_cast<unsigned>(__dn);
+ }
+ ++__first1.__seg_;
+ // __first1.__ctz_ = 0;
+ }
+ // __first1.__ctz_ == 0;
+ // do middle words
+ unsigned __clz_r = __bits_per_word - __first2.__ctz_;
+ __storage_type __m = ~__storage_type(0) << __first2.__ctz_;
+ for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
+ __storage_type __b = *__first1.__seg_;
+ if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
+ return false;
+ ++__first2.__seg_;
+ if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
+ return false;
+ }
+ // do last word
+ if (__n > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ __storage_type __b = *__first1.__seg_ & __m;
+ __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
+ __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
+ if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
+ return false;
+ __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
+ __first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
+ __n -= __dn;
+ if (__n > 0) {
+ __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ if ((*__first2.__seg_ & __m) != (__b >> __dn))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+template <class _Cp, bool _IC1, bool _IC2>
+[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned(
+ __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
+ using _It = __bit_iterator<_Cp, _IC1>;
+ using difference_type = typename _It::difference_type;
+ using __storage_type = typename _It::__storage_type;
+
+ const int __bits_per_word = _It::__bits_per_word;
+ difference_type __n = __last1 - __first1;
+ if (__n > 0) {
+ // do first word
+ if (__first1.__ctz_ != 0) {
+ unsigned __clz = __bits_per_word - __first1.__ctz_;
+ difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
+ __n -= __dn;
+ __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+ if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
+ return false;
+ ++__first2.__seg_;
+ ++__first1.__seg_;
+ // __first1.__ctz_ = 0;
+ // __first2.__ctz_ = 0;
+ }
+ // __first1.__ctz_ == 0;
+ // __first2.__ctz_ == 0;
+ // do middle words
+ for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
+ if (*__first2.__seg_ != *__first1.__seg_)
+ return false;
+ // do last word
+ if (__n > 0) {
+ __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
+ if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
+ return false;
+ }
+ }
+ return true;
+}
+
+template <class _Cp,
+ bool _IC1,
+ bool _IC2,
+ class _BinaryPredicate,
+ __enable_if_t<std::is_same<_BinaryPredicate, __equal_to>::value, int> = 0>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
----------------
ldionne wrote:
Let's inline this function into `__equal_impl(__bit_iterator, ...)` below, since the additional indirection doesn't serve much of a purpose.
https://github.com/llvm/llvm-project/pull/121084
More information about the libcxx-commits
mailing list