[libcxx-commits] [libcxx] 8670b53 - [libc++] Optimize ranges::find for vector<bool>

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Tue Aug 1 10:28:30 PDT 2023


Author: Nikolas Klauser
Date: 2023-08-01T10:28:25-07:00
New Revision: 8670b53e11bb4f41eeef812472ff8efa957470a1

URL: https://github.com/llvm/llvm-project/commit/8670b53e11bb4f41eeef812472ff8efa957470a1
DIFF: https://github.com/llvm/llvm-project/commit/8670b53e11bb4f41eeef812472ff8efa957470a1.diff

LOG: [libc++] Optimize ranges::find for vector<bool>

Benchmark results:
```
----------------------------------------------------------------
Benchmark                                    old             new
----------------------------------------------------------------
bm_vector_bool_ranges_find/1             5.64 ns         6.08 ns
bm_vector_bool_ranges_find/2             16.5 ns         6.03 ns
bm_vector_bool_ranges_find/3             20.3 ns         6.07 ns
bm_vector_bool_ranges_find/4             22.2 ns         6.08 ns
bm_vector_bool_ranges_find/5             23.5 ns         6.05 ns
bm_vector_bool_ranges_find/6             24.4 ns         6.10 ns
bm_vector_bool_ranges_find/7             26.7 ns         6.10 ns
bm_vector_bool_ranges_find/8             25.0 ns         6.08 ns
bm_vector_bool_ranges_find/16            27.9 ns         6.07 ns
bm_vector_bool_ranges_find/64            44.5 ns         5.35 ns
bm_vector_bool_ranges_find/512            243 ns         25.7 ns
bm_vector_bool_ranges_find/4096          1858 ns         35.6 ns
bm_vector_bool_ranges_find/32768        15461 ns         93.5 ns
bm_vector_bool_ranges_find/262144      126462 ns          571 ns
bm_vector_bool_ranges_find/1048576     497736 ns         2272 ns
```

Reviewed By: #libc, Mordante

Spies: var-const, Mordante, libcxx-commits

Differential Revision: https://reviews.llvm.org/D156039

Added: 
    libcxx/include/__bit/invert_if.h
    libcxx/include/__fwd/bit_reference.h

Modified: 
    libcxx/benchmarks/algorithms/find.bench.cpp
    libcxx/include/CMakeLists.txt
    libcxx/include/__algorithm/find.h
    libcxx/include/__bit_reference
    libcxx/include/bitset
    libcxx/test/libcxx/transitive_includes/cxx03.csv
    libcxx/test/libcxx/transitive_includes/cxx11.csv
    libcxx/test/libcxx/transitive_includes/cxx14.csv
    libcxx/test/libcxx/transitive_includes/cxx17.csv
    libcxx/test/libcxx/transitive_includes/cxx20.csv
    libcxx/test/libcxx/transitive_includes/cxx23.csv
    libcxx/test/libcxx/transitive_includes/cxx26.csv

Removed: 
    


################################################################################
diff  --git a/libcxx/benchmarks/algorithms/find.bench.cpp b/libcxx/benchmarks/algorithms/find.bench.cpp
index 65b2cdaca95a7e..b87c575a16b4dc 100644
--- a/libcxx/benchmarks/algorithms/find.bench.cpp
+++ b/libcxx/benchmarks/algorithms/find.bench.cpp
@@ -46,4 +46,32 @@ BENCHMARK(bm_ranges_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
 BENCHMARK(bm_ranges_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
 BENCHMARK(bm_ranges_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
 
+static void bm_vector_bool_find(benchmark::State& state) {
+  std::vector<bool> vec1(state.range(), false);
+  std::mt19937_64 rng(std::random_device{}());
+
+  for (auto _ : state) {
+    auto idx  = rng() % vec1.size();
+    vec1[idx] = true;
+    benchmark::DoNotOptimize(vec1);
+    benchmark::DoNotOptimize(std::find(vec1.begin(), vec1.end(), true));
+    vec1[idx] = false;
+  }
+}
+BENCHMARK(bm_vector_bool_find)->DenseRange(1, 8)->Range(16, 1 << 20);
+
+static void bm_vector_bool_ranges_find(benchmark::State& state) {
+  std::vector<bool> vec1(state.range(), false);
+  std::mt19937_64 rng(std::random_device{}());
+
+  for (auto _ : state) {
+    auto idx  = rng() % vec1.size();
+    vec1[idx] = true;
+    benchmark::DoNotOptimize(vec1);
+    benchmark::DoNotOptimize(std::ranges::find(vec1, true));
+    vec1[idx] = false;
+  }
+}
+BENCHMARK(bm_vector_bool_ranges_find)->DenseRange(1, 8)->Range(16, 1 << 20);
+
 BENCHMARK_MAIN();

diff  --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 726afdd07daeed..58c700725447f6 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -249,6 +249,7 @@ set(files
   __bit/countr.h
   __bit/endian.h
   __bit/has_single_bit.h
+  __bit/invert_if.h
   __bit/popcount.h
   __bit/rotate.h
   __bit_reference
@@ -415,6 +416,7 @@ set(files
   __functional/unary_negate.h
   __functional/weak_result_type.h
   __fwd/array.h
+  __fwd/bit_reference.h
   __fwd/fstream.h
   __fwd/get.h
   __fwd/hash.h

diff  --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index e0de5032878ecf..d7c268bc6b338b 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -10,10 +10,14 @@
 #ifndef _LIBCPP___ALGORITHM_FIND_H
 #define _LIBCPP___ALGORITHM_FIND_H
 
+#include <__algorithm/min.h>
 #include <__algorithm/unwrap_iter.h>
+#include <__bit/countr.h>
+#include <__bit/invert_if.h>
 #include <__config>
 #include <__functional/identity.h>
 #include <__functional/invoke.h>
+#include <__fwd/bit_reference.h>
 #include <__string/constexpr_c_functions.h>
 #include <__type_traits/is_same.h>
 
@@ -25,8 +29,12 @@
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+// generic implementation
 template <class _Iter, class _Sent, class _Tp, class _Proj>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
 __find_impl(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
@@ -36,6 +44,7 @@ __find_impl(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
   return __first;
 }
 
+// trivially equality comparable implementations
 template <class _Tp,
           class _Up,
           class _Proj,
@@ -64,6 +73,51 @@ __find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
 }
 #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
 
+// __bit_iterator implementation
+template <bool _ToFind, class _Cp, bool _IsConst>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, _IsConst>
+__find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) {
+  using _It            = __bit_iterator<_Cp, _IsConst>;
+  using __storage_type = typename _It::__storage_type;
+
+  const int __bits_per_word = _It::__bits_per_word;
+  // do first partial word
+  if (__first.__ctz_ != 0) {
+    __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
+    __storage_type __dn    = std::min(__clz_f, __n);
+    __storage_type __m     = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
+    __storage_type __b     = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
+    if (__b)
+      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
+    if (__n == __dn)
+      return __first + __n;
+    __n -= __dn;
+    ++__first.__seg_;
+  }
+  // do middle whole words
+  for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) {
+    __storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_);
+    if (__b)
+      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
+  }
+  // do last partial word
+  if (__n > 0) {
+    __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
+    __storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
+    if (__b)
+      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
+  }
+  return _It(__first.__seg_, static_cast<unsigned>(__n));
+}
+
+template <class _Cp, bool _IsConst, class _Tp, class _Proj, __enable_if_t<__is_identity<_Proj>::value, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, _IsConst>
+__find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
+  if (static_cast<bool>(__value))
+    return std::__find_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
+  return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
+}
+
 template <class _InputIterator, class _Tp>
 _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
 find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
@@ -74,4 +128,6 @@ find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
 
 _LIBCPP_END_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___ALGORITHM_FIND_H

diff  --git a/libcxx/include/__bit/invert_if.h b/libcxx/include/__bit/invert_if.h
new file mode 100644
index 00000000000000..f7606ede26da00
--- /dev/null
+++ b/libcxx/include/__bit/invert_if.h
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___BIT_INVERT_IF_H
+#define _LIBCPP___BIT_INVERT_IF_H
+
+#include <__concepts/arithmetic.h>
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <bool _Invert, class _Tp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __invert_if(_Tp __v) {
+  if (_Invert)
+    return ~__v;
+  return __v;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___BIT_INVERT_IF_H

diff  --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 67838d9d54cfb2..107368759c6e47 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -14,8 +14,10 @@
 #include <__algorithm/fill_n.h>
 #include <__algorithm/min.h>
 #include <__bit/countr.h>
+#include <__bit/invert_if.h>
 #include <__bit/popcount.h>
 #include <__config>
+#include <__fwd/bit_reference.h>
 #include <__iterator/iterator_traits.h>
 #include <__memory/construct_at.h>
 #include <__memory/pointer_traits.h>
@@ -32,8 +34,6 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
-class __bit_iterator;
 template <class _Cp>
 class __bit_const_reference;
 
@@ -171,59 +171,6 @@ private:
   __bit_const_reference& operator=(const __bit_const_reference&) = delete;
 };
 
-template <bool _Invert, class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __invert_if(_Tp __v) {
-  if (_Invert)
-    return ~__v;
-  return __v;
-}
-
-// find
-
-template <bool _ToFind, class _Cp, bool _IsConst>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, _IsConst>
-__find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) {
-  using _It            = __bit_iterator<_Cp, _IsConst>;
-  using __storage_type = typename _It::__storage_type;
-
-  const int __bits_per_word = _It::__bits_per_word;
-  // do first partial word
-  if (__first.__ctz_ != 0) {
-    __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
-    __storage_type __dn    = std::min(__clz_f, __n);
-    __storage_type __m     = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
-    __storage_type __b     = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
-    if (__b)
-      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
-    if (__n == __dn)
-      return __first + __n;
-    __n -= __dn;
-    ++__first.__seg_;
-  }
-  // do middle whole words
-  for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) {
-    __storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_);
-    if (__b)
-      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
-  }
-  // do last partial word
-  if (__n > 0) {
-    __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
-    __storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
-    if (__b)
-      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
-  }
-  return _It(__first.__seg_, static_cast<unsigned>(__n));
-}
-
-template <class _Cp, bool _IsConst, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, _IsConst>
-find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value) {
-  if (static_cast<bool>(__value))
-    return std::__find_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
-  return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
-}
-
 // count
 
 template <bool _ToCount, class _Cp, bool _IsConst>

diff  --git a/libcxx/include/__fwd/bit_reference.h b/libcxx/include/__fwd/bit_reference.h
new file mode 100644
index 00000000000000..237efb6db66429
--- /dev/null
+++ b/libcxx/include/__fwd/bit_reference.h
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FWD_BIT_REFERENCE_H
+#define _LIBCPP___FWD_BIT_REFERENCE_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
+class __bit_iterator;
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FWD_BIT_REFERENCE_H

diff  --git a/libcxx/include/bitset b/libcxx/include/bitset
index 4da0a4f29559e9..abac705ab29801 100644
--- a/libcxx/include/bitset
+++ b/libcxx/include/bitset
@@ -113,6 +113,7 @@ template <size_t N> struct hash<std::bitset<N>>;
 */
 
 #include <__algorithm/fill.h>
+#include <__algorithm/find.h>
 #include <__assert> // all public C++ headers provide the assertion handler
 #include <__bit_reference>
 #include <__config>

diff  --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index b832e076488b9f..2487cb5d640594 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -89,6 +89,7 @@ bitset cstddef
 bitset cstdint
 bitset cstdlib
 bitset cstring
+bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits

diff  --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index 5a96f154773633..3f211ab8863c7b 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -89,6 +89,7 @@ bitset cstddef
 bitset cstdint
 bitset cstdlib
 bitset cstring
+bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits

diff  --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index b188956f09dff0..35701d508e4995 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -89,6 +89,7 @@ bitset cstddef
 bitset cstdint
 bitset cstdlib
 bitset cstring
+bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits

diff  --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index b188956f09dff0..35701d508e4995 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -89,6 +89,7 @@ bitset cstddef
 bitset cstdint
 bitset cstdlib
 bitset cstring
+bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits

diff  --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index d049ba6ae42edb..0db9a188bf28df 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -88,6 +88,7 @@ bitset cstddef
 bitset cstdint
 bitset cstdlib
 bitset cstring
+bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits

diff  --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv
index 68e06a087053ab..50a0bcfbca0c4a 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx23.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv
@@ -50,6 +50,7 @@ bitset climits
 bitset cstddef
 bitset cstdint
 bitset cstring
+bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits

diff  --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 68e06a087053ab..50a0bcfbca0c4a 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -50,6 +50,7 @@ bitset climits
 bitset cstddef
 bitset cstdint
 bitset cstring
+bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits


        


More information about the libcxx-commits mailing list