[libcxx-commits] [libcxx] [libc++] Optimize ranges::minmax (PR #87335)
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Fri Apr 5 02:41:20 PDT 2024
https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/87335
>From 937b17a9eef2e4445b7d2b4f9de609edf922489c Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Mon, 11 Mar 2024 13:18:12 +0100
Subject: [PATCH] [libc++] Optimize ranges::minmax
---
libcxx/benchmarks/CMakeLists.txt | 1 +
libcxx/benchmarks/algorithms/minmax.bench.cpp | 68 +++++++++++++++++++
libcxx/docs/ReleaseNotes/19.rst | 2 +
libcxx/include/__algorithm/comp.h | 3 +
libcxx/include/__algorithm/ranges_minmax.h | 17 ++++-
libcxx/include/__functional/operations.h | 6 ++
.../include/__functional/ranges_operations.h | 3 +
libcxx/include/__type_traits/desugars_to.h | 1 +
8 files changed, 100 insertions(+), 1 deletion(-)
create mode 100644 libcxx/benchmarks/algorithms/minmax.bench.cpp
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 387e013afeb6c4..928238c1ac69ba 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -182,6 +182,7 @@ set(BENCHMARK_TESTS
algorithms/make_heap.bench.cpp
algorithms/make_heap_then_sort_heap.bench.cpp
algorithms/min.bench.cpp
+ algorithms/minmax.bench.cpp
algorithms/min_max_element.bench.cpp
algorithms/mismatch.bench.cpp
algorithms/pop_heap.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/minmax.bench.cpp b/libcxx/benchmarks/algorithms/minmax.bench.cpp
new file mode 100644
index 00000000000000..b0ff7f91c19939
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/minmax.bench.cpp
@@ -0,0 +1,68 @@
+#include <algorithm>
+#include <cassert>
+
+#include <benchmark/benchmark.h>
+
+void run_sizes(auto benchmark) {
+ benchmark->Arg(1)
+ ->Arg(2)
+ ->Arg(3)
+ ->Arg(4)
+ ->Arg(5)
+ ->Arg(6)
+ ->Arg(7)
+ ->Arg(8)
+ ->Arg(9)
+ ->Arg(10)
+ ->Arg(11)
+ ->Arg(12)
+ ->Arg(13)
+ ->Arg(14)
+ ->Arg(15)
+ ->Arg(16)
+ ->Arg(17)
+ ->Arg(18)
+ ->Arg(19)
+ ->Arg(20)
+ ->Arg(21)
+ ->Arg(22)
+ ->Arg(23)
+ ->Arg(24)
+ ->Arg(25)
+ ->Arg(26)
+ ->Arg(27)
+ ->Arg(28)
+ ->Arg(29)
+ ->Arg(30)
+ ->Arg(31)
+ ->Arg(32)
+ ->Arg(64)
+ ->Arg(512)
+ ->Arg(1024)
+ ->Arg(4000)
+ ->Arg(4096)
+ ->Arg(5500)
+ ->Arg(64000)
+ ->Arg(65536)
+ ->Arg(70000);
+}
+
+template <class T>
+static void BM_std_minmax(benchmark::State& state) {
+ std::vector<T> vec(state.range(), 3);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(vec);
+ benchmark::DoNotOptimize(std::ranges::minmax(vec));
+ }
+}
+BENCHMARK(BM_std_minmax<char>)->Apply(run_sizes);
+BENCHMARK(BM_std_minmax<short>)->Apply(run_sizes);
+BENCHMARK(BM_std_minmax<int>)->Apply(run_sizes);
+BENCHMARK(BM_std_minmax<long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_minmax<unsigned char>)->Apply(run_sizes);
+BENCHMARK(BM_std_minmax<unsigned short>)->Apply(run_sizes);
+BENCHMARK(BM_std_minmax<unsigned int>)->Apply(run_sizes);
+BENCHMARK(BM_std_minmax<unsigned long long>)->Apply(run_sizes);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index 2da9df54a53198..a420b599cd597e 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -54,6 +54,8 @@ Improvements and New Features
resulting in a performance increase of up to 1400x.
- The ``std::mismatch`` algorithm has been optimized for integral types, which can lead up to 40x performance
improvements.
+- The ``std::ranges::minmax`` algorithm has been optimized for integral types, resulting in a performance increase of
+ up to 100x.
- The ``_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM`` macro has been added to make the declarations in ``<strstream>`` available.
diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h
index a089375e3da139..a0fa88d6d2acd3 100644
--- a/libcxx/include/__algorithm/comp.h
+++ b/libcxx/include/__algorithm/comp.h
@@ -41,6 +41,9 @@ struct __less<void, void> {
}
};
+template <class _Tp>
+inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true;
+
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ALGORITHM_COMP_H
diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h
index 22a62b620c936f..ca5722523336fd 100644
--- a/libcxx/include/__algorithm/ranges_minmax.h
+++ b/libcxx/include/__algorithm/ranges_minmax.h
@@ -23,7 +23,9 @@
#include <__iterator/projected.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
+#include <__type_traits/desugars_to.h>
#include <__type_traits/is_reference.h>
+#include <__type_traits/is_trivially_copyable.h>
#include <__type_traits/remove_cvref.h>
#include <__utility/forward.h>
#include <__utility/move.h>
@@ -83,7 +85,20 @@ struct __fn {
_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__first != __last, "range has to contain at least one element");
- if constexpr (forward_range<_Range>) {
+ // This optimiation is not in minmax_element because clang doesn't see through the pointers and as a result doesn't
+ // vectorize the code.
+ if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> &&
+ __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value &&
+ __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) {
+ minmax_result<_ValueT> __result = {__r[0], __r[0]};
+ for (auto __e : __r) {
+ if (__e < __result.min)
+ __result.min = __e;
+ if (__result.max < __e)
+ __result.max = __e;
+ }
+ return __result;
+ } else if constexpr (forward_range<_Range>) {
// Special-case the one element case. Avoid repeatedly initializing objects from the result of an iterator
// dereference when doing so might not be idempotent. The `if constexpr` avoids the extra branch in cases where
// it's not needed.
diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h
index 9aa28e4925069c..240f127e542553 100644
--- a/libcxx/include/__functional/operations.h
+++ b/libcxx/include/__functional/operations.h
@@ -359,6 +359,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> {
};
_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less);
+template <class _Tp>
+inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true;
+
#if _LIBCPP_STD_VER >= 14
template <>
struct _LIBCPP_TEMPLATE_VIS less<void> {
@@ -370,6 +373,9 @@ struct _LIBCPP_TEMPLATE_VIS less<void> {
}
typedef void is_transparent;
};
+
+template <class _Tp>
+inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Tp> = true;
#endif
#if _LIBCPP_STD_VER >= 14
diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h
index a9dffaf6962585..27f06eadd0eb11 100644
--- a/libcxx/include/__functional/ranges_operations.h
+++ b/libcxx/include/__functional/ranges_operations.h
@@ -99,6 +99,9 @@ struct greater_equal {
template <class _Tp, class _Up>
inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = true;
+template <class _Tp, class _Up>
+inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true;
+
#endif // _LIBCPP_STD_VER >= 20
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h
index a8f69c28dfc520..97a2ee5448f203 100644
--- a/libcxx/include/__type_traits/desugars_to.h
+++ b/libcxx/include/__type_traits/desugars_to.h
@@ -20,6 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// Tags to represent the canonical operations
struct __equal_tag {};
struct __plus_tag {};
+struct __less_tag {};
// This class template is used to determine whether an operation "desugars"
// (or boils down) to a given canonical operation.
More information about the libcxx-commits
mailing list