[libcxx-commits] [libcxx] [libc++] Optimize std::min_element (PR #100616)
via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Jul 25 11:57:40 PDT 2024
https://github.com/mrdaybird updated https://github.com/llvm/llvm-project/pull/100616
>From cb48c93356fef4b0197e168daff220702c3e74c0 Mon Sep 17 00:00:00 2001
From: Vaibhav Pathak <pathakvaibhav at protonmail.com>
Date: Thu, 25 Jul 2024 22:01:54 +0530
Subject: [PATCH 1/4] [libc++] Optimize std::min_element
---
libcxx/benchmarks/CMakeLists.txt | 1 +
libcxx/benchmarks/min_element.bench.cpp | 48 +++++++++++++++++++++
libcxx/include/__algorithm/min_element.h | 55 +++++++++++++++++++++---
3 files changed, 99 insertions(+), 5 deletions(-)
create mode 100644 libcxx/benchmarks/min_element.bench.cpp
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index d96ccc1e49f66..952777d5346f7 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -121,6 +121,7 @@ set(BENCHMARK_TESTS
algorithms/make_heap_then_sort_heap.bench.cpp
algorithms/min.bench.cpp
algorithms/minmax.bench.cpp
+ algorithms/min_element.bench.cpp
algorithms/min_max_element.bench.cpp
algorithms/mismatch.bench.cpp
algorithms/pop_heap.bench.cpp
diff --git a/libcxx/benchmarks/min_element.bench.cpp b/libcxx/benchmarks/min_element.bench.cpp
new file mode 100644
index 0000000000000..ed769aeeea7d2
--- /dev/null
+++ b/libcxx/benchmarks/min_element.bench.cpp
@@ -0,0 +1,48 @@
+#include <vector>
+#include <algorithm>
+#include <limits>
+
+#include <benchmark/benchmark.h>
+#include <random>
+
+template<typename T>
+static void BM_stdmin_element_decreasing(benchmark::State &state){
+ std::vector<T> v(state.range(0));
+ T start = std::numeric_limits<T>::max();
+ T end = std::numeric_limits<T>::min();
+
+ for(size_t i = 0; i < v.size(); i++)
+ v[i] = ((start != end) ? start-- : end);
+
+ for(auto _ : state){
+ benchmark::DoNotOptimize(v);
+ benchmark::DoNotOptimize(std::min_element(v.begin(), v.end()));
+ }
+}
+
+BENCHMARK(BM_stdmin_element_decreasing<char>)
+ ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
+ ->Range(1<<14, 1<<16)->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<short>)
+ ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
+ ->Range(1<<14, 1<<16)->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<int>)
+ ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
+ ->Range(1<<14, 1<<16)->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<long long>)
+ ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
+ ->Range(1<<14, 1<<16)->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned char>)
+ ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
+ ->Range(1<<14, 1<<16)->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned short>)
+ ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
+ ->Range(1<<14, 1<<16)->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned int>)
+ ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
+ ->Range(1<<14, 1<<16)->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned long long>)
+ ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
+ ->Range(1<<14, 1<<16)->Arg(70000);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h
index 65f3594d630ce..891a4afae3ea1 100644
--- a/libcxx/include/__algorithm/min_element.h
+++ b/libcxx/include/__algorithm/min_element.h
@@ -11,6 +11,7 @@
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
+#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
@@ -33,12 +34,56 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
if (__first == __last)
return __first;
- _Iter __i = __first;
- while (++__i != __last)
- if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
- __first = __i;
+ const size_t __n = static_cast<size_t>(std::distance(__first, __last));
- return __first;
+ if (__n <= 64) {
+ _Iter __i = __first;
+ while (++__i != __last)
+ if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
+ __first = __i;
+ return __first;
+ }
+
+ size_t __block_size = 256;
+
+ size_t __n_blocked = __n - (__n % __block_size);
+ _Iter __block_start = __first, __block_end = __first;
+
+ typedef typename std::iterator_traits<_Iter>::value_type value_type;
+ value_type __min_val = std::invoke(__proj, *__first);
+
+ _Iter __curr = __first;
+ for (size_t __i = 0; __i < __n_blocked; __i += __block_size) {
+ _Iter __start = __curr;
+ value_type __block_min = __min_val;
+ for (size_t j = 0; j < __block_size; j++) {
+ if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __block_min)) {
+ __block_min = *__curr;
+ }
+ __curr++;
+ }
+ if (std::invoke(__comp, __block_min, __min_val)) {
+ __min_val = __block_min;
+ __block_start = __start;
+ __block_end = __curr;
+ }
+ }
+
+ value_type __epilogue_min = __min_val;
+ _Iter __epilogue_start = __curr;
+ while (__curr != __last) {
+ if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __epilogue_min)) {
+ __epilogue_min = *__curr;
+ }
+ __curr++;
+ }
+ if (std::__invoke(__comp, __epilogue_min, __min_val)) {
+ __min_val = __epilogue_min;
+ __block_start = __epilogue_start;
+ __block_end = __last;
+ }
+
+ return find(__block_start, __block_end, __min_val);
}
template <class _Comp, class _Iter, class _Sent>
>From baedc8e618d32188df17ebabe4a058d7f2493a0c Mon Sep 17 00:00:00 2001
From: Vaibhav Pathak <pathakvaibhav at protonmail.com>
Date: Thu, 25 Jul 2024 23:43:53 +0530
Subject: [PATCH 2/4] [libc++] Fix formatting and move min_element.bench.cpp
---
.../algorithms/min_element.bench.cpp | 80 +++++++++++++++++++
libcxx/benchmarks/min_element.bench.cpp | 48 -----------
2 files changed, 80 insertions(+), 48 deletions(-)
create mode 100644 libcxx/benchmarks/algorithms/min_element.bench.cpp
delete mode 100644 libcxx/benchmarks/min_element.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/min_element.bench.cpp b/libcxx/benchmarks/algorithms/min_element.bench.cpp
new file mode 100644
index 0000000000000..7edb8ddbb15f2
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/min_element.bench.cpp
@@ -0,0 +1,80 @@
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+#include <random>
+
+template <typename T>
+static void BM_stdmin_element_decreasing(benchmark::State& state) {
+ std::vector<T> v(state.range(0));
+ T start = std::numeric_limits<T>::max();
+ T end = std::numeric_limits<T>::min();
+
+ for (size_t i = 0; i < v.size(); i++)
+ v[i] = ((start != end) ? start-- : end);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(v);
+ benchmark::DoNotOptimize(std::min_element(v.begin(), v.end()));
+ }
+}
+
+BENCHMARK(BM_stdmin_element_decreasing<char>)
+ ->DenseRange(1, 8)
+ ->Range(32, 128)
+ ->Range(256, 4096)
+ ->DenseRange(5000, 10000, 1000)
+ ->Range(1 << 14, 1 << 16)
+ ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<short>)
+ ->DenseRange(1, 8)
+ ->Range(32, 128)
+ ->Range(256, 4096)
+ ->DenseRange(5000, 10000, 1000)
+ ->Range(1 << 14, 1 << 16)
+ ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<int>)
+ ->DenseRange(1, 8)
+ ->Range(32, 128)
+ ->Range(256, 4096)
+ ->DenseRange(5000, 10000, 1000)
+ ->Range(1 << 14, 1 << 16)
+ ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<long long>)
+ ->DenseRange(1, 8)
+ ->Range(32, 128)
+ ->Range(256, 4096)
+ ->DenseRange(5000, 10000, 1000)
+ ->Range(1 << 14, 1 << 16)
+ ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned char>)
+ ->DenseRange(1, 8)
+ ->Range(32, 128)
+ ->Range(256, 4096)
+ ->DenseRange(5000, 10000, 1000)
+ ->Range(1 << 14, 1 << 16)
+ ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned short>)
+ ->DenseRange(1, 8)
+ ->Range(32, 128)
+ ->Range(256, 4096)
+ ->DenseRange(5000, 10000, 1000)
+ ->Range(1 << 14, 1 << 16)
+ ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned int>)
+ ->DenseRange(1, 8)
+ ->Range(32, 128)
+ ->Range(256, 4096)
+ ->DenseRange(5000, 10000, 1000)
+ ->Range(1 << 14, 1 << 16)
+ ->Arg(70000);
+BENCHMARK(BM_stdmin_element_decreasing<unsigned long long>)
+ ->DenseRange(1, 8)
+ ->Range(32, 128)
+ ->Range(256, 4096)
+ ->DenseRange(5000, 10000, 1000)
+ ->Range(1 << 14, 1 << 16)
+ ->Arg(70000);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/benchmarks/min_element.bench.cpp b/libcxx/benchmarks/min_element.bench.cpp
deleted file mode 100644
index ed769aeeea7d2..0000000000000
--- a/libcxx/benchmarks/min_element.bench.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-#include <vector>
-#include <algorithm>
-#include <limits>
-
-#include <benchmark/benchmark.h>
-#include <random>
-
-template<typename T>
-static void BM_stdmin_element_decreasing(benchmark::State &state){
- std::vector<T> v(state.range(0));
- T start = std::numeric_limits<T>::max();
- T end = std::numeric_limits<T>::min();
-
- for(size_t i = 0; i < v.size(); i++)
- v[i] = ((start != end) ? start-- : end);
-
- for(auto _ : state){
- benchmark::DoNotOptimize(v);
- benchmark::DoNotOptimize(std::min_element(v.begin(), v.end()));
- }
-}
-
-BENCHMARK(BM_stdmin_element_decreasing<char>)
- ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
- ->Range(1<<14, 1<<16)->Arg(70000);
-BENCHMARK(BM_stdmin_element_decreasing<short>)
- ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
- ->Range(1<<14, 1<<16)->Arg(70000);
-BENCHMARK(BM_stdmin_element_decreasing<int>)
- ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
- ->Range(1<<14, 1<<16)->Arg(70000);
-BENCHMARK(BM_stdmin_element_decreasing<long long>)
- ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
- ->Range(1<<14, 1<<16)->Arg(70000);
-BENCHMARK(BM_stdmin_element_decreasing<unsigned char>)
- ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
- ->Range(1<<14, 1<<16)->Arg(70000);
-BENCHMARK(BM_stdmin_element_decreasing<unsigned short>)
- ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
- ->Range(1<<14, 1<<16)->Arg(70000);
-BENCHMARK(BM_stdmin_element_decreasing<unsigned int>)
- ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
- ->Range(1<<14, 1<<16)->Arg(70000);
-BENCHMARK(BM_stdmin_element_decreasing<unsigned long long>)
- ->DenseRange(1, 8)->Range(32, 128)->Range(256, 4096)->DenseRange(5000, 10000, 1000)
- ->Range(1<<14, 1<<16)->Arg(70000);
-
-BENCHMARK_MAIN();
>From d3da2a71ee59cef06faea3bdca56f73807543512 Mon Sep 17 00:00:00 2001
From: Vaibhav Pathak <pathakvaibhav at protonmail.com>
Date: Thu, 25 Jul 2024 23:57:28 +0530
Subject: [PATCH 3/4] [libc++] Use __invoke instead of invoke
---
libcxx/include/__algorithm/min_element.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h
index 891a4afae3ea1..964c3ec4f976e 100644
--- a/libcxx/include/__algorithm/min_element.h
+++ b/libcxx/include/__algorithm/min_element.h
@@ -50,7 +50,7 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
_Iter __block_start = __first, __block_end = __first;
typedef typename std::iterator_traits<_Iter>::value_type value_type;
- value_type __min_val = std::invoke(__proj, *__first);
+ value_type __min_val = std::__invoke(__proj, *__first);
_Iter __curr = __first;
for (size_t __i = 0; __i < __n_blocked; __i += __block_size) {
@@ -62,7 +62,7 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
}
__curr++;
}
- if (std::invoke(__comp, __block_min, __min_val)) {
+ if (std::__invoke(__comp, __block_min, __min_val)) {
__min_val = __block_min;
__block_start = __start;
__block_end = __curr;
>From a997ee5bbae0a7138f854afc5a83862282ecaa6c Mon Sep 17 00:00:00 2001
From: Vaibhav Pathak <pathakvaibhav at protonmail.com>
Date: Fri, 26 Jul 2024 00:25:48 +0530
Subject: [PATCH 4/4] [libc++] Fix build issues with find
---
libcxx/include/__algorithm/find.h | 2 +-
libcxx/include/__algorithm/min_element.h | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 7f58dbb13a577..8ecd53e63af21 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -104,7 +104,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
// do first partial word
if (__first.__ctz_ != 0) {
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
- __storage_type __dn = std::min(__clz_f, __n);
+ __storage_type __dn = (__clz_f < __n) ? __clz_f : __n;
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
if (__b)
diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h
index 964c3ec4f976e..25964a9e3a52f 100644
--- a/libcxx/include/__algorithm/min_element.h
+++ b/libcxx/include/__algorithm/min_element.h
@@ -11,6 +11,7 @@
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
+#include <__algorithm/find.h>
#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
@@ -83,7 +84,7 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
__block_end = __last;
}
- return find(__block_start, __block_end, __min_val);
+ return std::__find(__block_start, __block_end, __min_val, __proj);
}
template <class _Comp, class _Iter, class _Sent>
More information about the libcxx-commits
mailing list