[libcxx-commits] [libcxx] [libc++] Optimize std::min_element (PR #100616)

via libcxx-commits libcxx-commits at lists.llvm.org
Tue Oct 15 09:54:37 PDT 2024


https://github.com/hiraditya updated https://github.com/llvm/llvm-project/pull/100616

>From 01c71b48fa1b4a9dfe11e14238f7a8fb4f0ab286 Mon Sep 17 00:00:00 2001
From: Vaibhav Pathak <pathakvaibhav at protonmail.com>
Date: Thu, 25 Jul 2024 22:01:54 +0530
Subject: [PATCH 1/3] [libc++] Optimize std::min_element

[libc++] Fix formatting and move min_element.bench.cpp

[libc++] Use __invoke instead of invoke

[libc++] Fix build issues with find
---
 libcxx/benchmarks/CMakeLists.txt              |  1 +
 .../algorithms/min_element.bench.cpp          | 80 +++++++++++++++++++
 libcxx/include/__algorithm/find.h             |  2 +-
 libcxx/include/__algorithm/min_element.h      | 56 +++++++++++--
 4 files changed, 133 insertions(+), 6 deletions(-)
 create mode 100644 libcxx/benchmarks/algorithms/min_element.bench.cpp

diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index d96ccc1e49f66b..952777d5346f7a 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -121,6 +121,7 @@ set(BENCHMARK_TESTS
     algorithms/make_heap_then_sort_heap.bench.cpp
     algorithms/min.bench.cpp
     algorithms/minmax.bench.cpp
+    algorithms/min_element.bench.cpp
     algorithms/min_max_element.bench.cpp
     algorithms/mismatch.bench.cpp
     algorithms/pop_heap.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/min_element.bench.cpp b/libcxx/benchmarks/algorithms/min_element.bench.cpp
new file mode 100644
index 00000000000000..7edb8ddbb15f29
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/min_element.bench.cpp
@@ -0,0 +1,80 @@
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+#include <random>
+
+template <typename T>
+static void BM_stdmin_element_decreasing(benchmark::State& state) {
+  std::vector<T> v(state.range(0));
+  T start = std::numeric_limits<T>::max();
+  T end   = std::numeric_limits<T>::min();
+
+  for (size_t i = 0; i < v.size(); i++)
+    v[i] = ((start != end) ? start-- : end);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(v);
+    benchmark::DoNotOptimize(std::min_element(v.begin(), v.end()));
+  }
+}
+
+BENCHMARK(BM_stdmin_element_decreasing<char>)
+    ->DenseRange(1, 8)
+    ->Range(32, 128)
+    ->Range(256, 4096)
+    ->DenseRange(5000, 10000, 1000)
+    ->Range(1 << 14, 1 << 16)
+    ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<short>)
+    ->DenseRange(1, 8)
+    ->Range(32, 128)
+    ->Range(256, 4096)
+    ->DenseRange(5000, 10000, 1000)
+    ->Range(1 << 14, 1 << 16)
+    ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<int>)
+    ->DenseRange(1, 8)
+    ->Range(32, 128)
+    ->Range(256, 4096)
+    ->DenseRange(5000, 10000, 1000)
+    ->Range(1 << 14, 1 << 16)
+    ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<long long>)
+    ->DenseRange(1, 8)
+    ->Range(32, 128)
+    ->Range(256, 4096)
+    ->DenseRange(5000, 10000, 1000)
+    ->Range(1 << 14, 1 << 16)
+    ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned char>)
+    ->DenseRange(1, 8)
+    ->Range(32, 128)
+    ->Range(256, 4096)
+    ->DenseRange(5000, 10000, 1000)
+    ->Range(1 << 14, 1 << 16)
+    ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned short>)
+    ->DenseRange(1, 8)
+    ->Range(32, 128)
+    ->Range(256, 4096)
+    ->DenseRange(5000, 10000, 1000)
+    ->Range(1 << 14, 1 << 16)
+    ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned int>)
+    ->DenseRange(1, 8)
+    ->Range(32, 128)
+    ->Range(256, 4096)
+    ->DenseRange(5000, 10000, 1000)
+    ->Range(1 << 14, 1 << 16)
+    ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned long long>)
+    ->DenseRange(1, 8)
+    ->Range(32, 128)
+    ->Range(256, 4096)
+    ->DenseRange(5000, 10000, 1000)
+    ->Range(1 << 14, 1 << 16)
+    ->Arg(70000);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 7f58dbb13a5776..8ecd53e63af21b 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -104,7 +104,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
   // do first partial word
   if (__first.__ctz_ != 0) {
     __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
-    __storage_type __dn    = std::min(__clz_f, __n);
+    __storage_type __dn    = (__clz_f < __n) ? __clz_f : __n;
     __storage_type __m     = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
     __storage_type __b     = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
     if (__b)
diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h
index 65f3594d630cef..25964a9e3a52fe 100644
--- a/libcxx/include/__algorithm/min_element.h
+++ b/libcxx/include/__algorithm/min_element.h
@@ -11,6 +11,8 @@
 
 #include <__algorithm/comp.h>
 #include <__algorithm/comp_ref_type.h>
+#include <__algorithm/find.h>
+#include <__algorithm/iterator_operations.h>
 #include <__config>
 #include <__functional/identity.h>
 #include <__functional/invoke.h>
@@ -33,12 +35,56 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
   if (__first == __last)
     return __first;
 
-  _Iter __i = __first;
-  while (++__i != __last)
-    if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
-      __first = __i;
+  const size_t __n = static_cast<size_t>(std::distance(__first, __last));
 
-  return __first;
+  if (__n <= 64) {
+    _Iter __i = __first;
+    while (++__i != __last)
+      if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
+        __first = __i;
+    return __first;
+  }
+
+  size_t __block_size = 256;
+
+  size_t __n_blocked  = __n - (__n % __block_size);
+  _Iter __block_start = __first, __block_end = __first;
+
+  typedef typename std::iterator_traits<_Iter>::value_type value_type;
+  value_type __min_val = std::__invoke(__proj, *__first);
+
+  _Iter __curr = __first;
+  for (size_t __i = 0; __i < __n_blocked; __i += __block_size) {
+    _Iter __start          = __curr;
+    value_type __block_min = __min_val;
+    for (size_t j = 0; j < __block_size; j++) {
+      if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __block_min)) {
+        __block_min = *__curr;
+      }
+      __curr++;
+    }
+    if (std::__invoke(__comp, __block_min, __min_val)) {
+      __min_val     = __block_min;
+      __block_start = __start;
+      __block_end   = __curr;
+    }
+  }
+
+  value_type __epilogue_min = __min_val;
+  _Iter __epilogue_start    = __curr;
+  while (__curr != __last) {
+    if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __epilogue_min)) {
+      __epilogue_min = *__curr;
+    }
+    __curr++;
+  }
+  if (std::__invoke(__comp, __epilogue_min, __min_val)) {
+    __min_val     = __epilogue_min;
+    __block_start = __epilogue_start;
+    __block_end   = __last;
+  }
+
+  return std::__find(__block_start, __block_end, __min_val, __proj);
 }
 
 template <class _Comp, class _Iter, class _Sent>

>From 6aae7b12ca3251b5eb24e85403d55a0ceffd71be Mon Sep 17 00:00:00 2001
From: Vaibhav Pathak <pathakvaibhav at protonmail.com>
Date: Fri, 26 Jul 2024 03:22:53 +0530
Subject: [PATCH 2/3] [libc++] Fix build issues and replace std::find with loop

[libcxx] revert block_size logic

[libc++] Replace std::find with impl
---
 libcxx/include/__algorithm/find.h        |  2 +-
 libcxx/include/__algorithm/min_element.h | 68 ++++++++++++++++++++----
 2 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 8ecd53e63af21b..7f58dbb13a5776 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -104,7 +104,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
   // do first partial word
   if (__first.__ctz_ != 0) {
     __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
-    __storage_type __dn    = (__clz_f < __n) ? __clz_f : __n;
+    __storage_type __dn    = std::min(__clz_f, __n);
     __storage_type __m     = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
     __storage_type __b     = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
     if (__b)
diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h
index 25964a9e3a52fe..e0b0a17158c9be 100644
--- a/libcxx/include/__algorithm/min_element.h
+++ b/libcxx/include/__algorithm/min_element.h
@@ -11,7 +11,6 @@
 
 #include <__algorithm/comp.h>
 #include <__algorithm/comp_ref_type.h>
-#include <__algorithm/find.h>
 #include <__algorithm/iterator_operations.h>
 #include <__config>
 #include <__functional/identity.h>
@@ -29,13 +28,54 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _Iter, class _Sent, class = void>
+struct __ConstTimeDistance : false_type {};
+
+#if _LIBCPP_STD_VER >= 20
+
+template <class _Iter, class _Sent>
+struct __ConstTimeDistance< _Iter, _Sent, __enable_if_t< sized_sentinel_for<_Sent, _Iter> >> : true_type {};
+
+#else
+
+template <class _Iter>
+struct __ConstTimeDistance<
+    _Iter,
+    _Iter,
+    __enable_if_t< is_same<typename iterator_traits<_Iter>::iterator_category, random_access_iterator_tag>::value> >
+    : true_type {};
+
+#endif // _LIBCPP_STD_VER >= 20
+
 template <class _Comp, class _Iter, class _Sent, class _Proj>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
-__min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter __min_element(
+    _Iter __first,
+    _Sent __last,
+    _Comp __comp,
+    _Proj& __proj,
+    /*_ConstTimeDistance*/ false_type) {
   if (__first == __last)
     return __first;
 
-  const size_t __n = static_cast<size_t>(std::distance(__first, __last));
+  _Iter __i = __first;
+  while (++__i != __last)
+    if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
+      __first = __i;
+  return __first;
+}
+
+template <class _Comp, class _Iter, class _Sent, class _Proj>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter __min_element(
+    _Iter __first,
+    _Sent __last,
+    _Comp __comp,
+    _Proj& __proj,
+    /*ConstTimeDistance*/ true_type) {
+  if (__first == __last)
+    return __first;
+
+  typedef typename std::iterator_traits<_Iter>::difference_type diff_type;
+  diff_type __n = std::distance(__first, __last);
 
   if (__n <= 64) {
     _Iter __i = __first;
@@ -45,19 +85,19 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
     return __first;
   }
 
-  size_t __block_size = 256;
+  diff_type __block_size = 256;
 
-  size_t __n_blocked  = __n - (__n % __block_size);
+  diff_type __n_blocked = __n - (__n % __block_size);
   _Iter __block_start = __first, __block_end = __first;
 
   typedef typename std::iterator_traits<_Iter>::value_type value_type;
   value_type __min_val = std::__invoke(__proj, *__first);
 
   _Iter __curr = __first;
-  for (size_t __i = 0; __i < __n_blocked; __i += __block_size) {
+  for (diff_type __i = 0; __i < __n_blocked; __i += __block_size) {
     _Iter __start          = __curr;
     value_type __block_min = __min_val;
-    for (size_t j = 0; j < __block_size; j++) {
+    for (diff_type __j = 0; __j < __block_size; __j++) {
       if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __block_min)) {
         __block_min = *__curr;
       }
@@ -84,7 +124,17 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
     __block_end   = __last;
   }
 
-  return std::__find(__block_start, __block_end, __min_val, __proj);
+  for (; __block_start != __block_end; ++__block_start)
+    if (std::__invoke(__proj, *__block_start) == __min_val)
+      break;
+  return __block_start;
+}
+
+template <class _Comp, class _Iter, class _Sent, class _Proj>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
+__min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
+  return std::__min_element<_Comp>(
+      std::move(__first), std::move(__last), __comp, __proj, __ConstTimeDistance<_Iter, _Sent>());
 }
 
 template <class _Comp, class _Iter, class _Sent>

>From f8e61314b7575a1447139ffc344bddc87a5dc06f Mon Sep 17 00:00:00 2001
From: AdityaK <hiraditya at msn.com>
Date: Tue, 15 Oct 2024 09:54:28 -0700
Subject: [PATCH 3/3] Update CMakeLists.txt

---
 libcxx/benchmarks/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 952777d5346f7a..d96ccc1e49f66b 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -121,7 +121,6 @@ set(BENCHMARK_TESTS
     algorithms/make_heap_then_sort_heap.bench.cpp
     algorithms/min.bench.cpp
     algorithms/minmax.bench.cpp
-    algorithms/min_element.bench.cpp
     algorithms/min_max_element.bench.cpp
     algorithms/mismatch.bench.cpp
     algorithms/pop_heap.bench.cpp



More information about the libcxx-commits mailing list