[libcxx-commits] [libcxx] aff3cdc - [libc++] Optimize std::ranges::{min, max} for types that are cheap to copy
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Mar 11 07:28:30 PST 2023
Author: Nikolas Klauser
Date: 2023-03-11T16:28:24+01:00
New Revision: aff3cdc604b01d3db1674e8d1402a5e5727fa087
URL: https://github.com/llvm/llvm-project/commit/aff3cdc604b01d3db1674e8d1402a5e5727fa087
DIFF: https://github.com/llvm/llvm-project/commit/aff3cdc604b01d3db1674e8d1402a5e5727fa087.diff
LOG: [libc++] Optimize std::ranges::{min, max} for types that are cheap to copy
Don't forward to `min_element` for small types that are trivially copyable, and instead use a naive loop that keeps track of the smallest element (as opposed to an iterator to the smallest element). This allows the compiler to vectorize the loop in some cases.
Reviewed By: #libc, ldionne
Spies: ldionne, libcxx-commits
Differential Revision: https://reviews.llvm.org/D143596
Added:
libcxx/benchmarks/algorithms/min.bench.cpp
Modified:
libcxx/benchmarks/CMakeLists.txt
libcxx/include/__algorithm/ranges_max.h
libcxx/include/__algorithm/ranges_min.h
libcxx/include/__type_traits/is_trivially_copyable.h
libcxx/test/libcxx/transitive_includes/cxx2b.csv
libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp
Removed:
################################################################################
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 572f210d71004..979e9c9319a98 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -162,6 +162,7 @@ set(BENCHMARK_TESTS
algorithms/lower_bound.bench.cpp
algorithms/make_heap.bench.cpp
algorithms/make_heap_then_sort_heap.bench.cpp
+ algorithms/min.bench.cpp
algorithms/min_max_element.bench.cpp
algorithms/pop_heap.bench.cpp
algorithms/push_heap.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/min.bench.cpp b/libcxx/benchmarks/algorithms/min.bench.cpp
new file mode 100644
index 0000000000000..1e1dd4e71c2ef
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/min.bench.cpp
@@ -0,0 +1,70 @@
+#include <algorithm>
+#include <cassert>
+
+#include <benchmark/benchmark.h>
+
+void run_sizes(auto benchmark) {
+ benchmark->Arg(1)
+ ->Arg(2)
+ ->Arg(3)
+ ->Arg(4)
+ ->Arg(5)
+ ->Arg(6)
+ ->Arg(7)
+ ->Arg(8)
+ ->Arg(9)
+ ->Arg(10)
+ ->Arg(11)
+ ->Arg(12)
+ ->Arg(13)
+ ->Arg(14)
+ ->Arg(15)
+ ->Arg(16)
+ ->Arg(17)
+ ->Arg(18)
+ ->Arg(19)
+ ->Arg(20)
+ ->Arg(21)
+ ->Arg(22)
+ ->Arg(23)
+ ->Arg(24)
+ ->Arg(25)
+ ->Arg(26)
+ ->Arg(27)
+ ->Arg(28)
+ ->Arg(29)
+ ->Arg(30)
+ ->Arg(31)
+ ->Arg(32)
+ ->Arg(64)
+ ->Arg(512)
+ ->Arg(1024)
+ ->Arg(4000)
+ ->Arg(4096)
+ ->Arg(5500)
+ ->Arg(64000)
+ ->Arg(65536)
+ ->Arg(70000);
+}
+
+template <class T>
+static void BM_std_min(benchmark::State& state) {
+ std::vector<T> vec(state.range(), 3);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(vec);
+ benchmark::DoNotOptimize(std::ranges::min(vec));
+ }
+}
+BENCHMARK(BM_std_min<char>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<short>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<int>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<__int128>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned char>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned short>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned int>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned __int128>)->Apply(run_sizes);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__algorithm/ranges_max.h b/libcxx/include/__algorithm/ranges_max.h
index b3c7fb163de1e..2fd2970bbf35d 100644
--- a/libcxx/include/__algorithm/ranges_max.h
+++ b/libcxx/include/__algorithm/ranges_max.h
@@ -20,6 +20,7 @@
#include <__iterator/projected.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
+#include <__type_traits/is_trivially_copyable.h>
#include <__utility/move.h>
#include <initializer_list>
@@ -67,7 +68,7 @@ struct __fn {
_LIBCPP_ASSERT(__first != __last, "range must contain at least one element");
- if constexpr (forward_range<_Rp>) {
+ if constexpr (forward_range<_Rp> && !__is_cheap_to_copy<range_value_t<_Rp>>) {
auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); };
return *ranges::__min_element_impl(std::move(__first), std::move(__last), __comp_lhs_rhs_swapped, __proj);
} else {
diff --git a/libcxx/include/__algorithm/ranges_min.h b/libcxx/include/__algorithm/ranges_min.h
index ba9dad5e9c200..5e941a1db06a8 100644
--- a/libcxx/include/__algorithm/ranges_min.h
+++ b/libcxx/include/__algorithm/ranges_min.h
@@ -20,6 +20,7 @@
#include <__iterator/projected.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
+#include <__type_traits/is_trivially_copyable.h>
#include <initializer_list>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -61,10 +62,8 @@ struct __fn {
range_value_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const {
auto __first = ranges::begin(__r);
auto __last = ranges::end(__r);
-
_LIBCPP_ASSERT(__first != __last, "range must contain at least one element");
-
- if constexpr (forward_range<_Rp>) {
+ if constexpr (forward_range<_Rp> && !__is_cheap_to_copy<range_value_t<_Rp>>) {
return *ranges::__min_element_impl(__first, __last, __comp, __proj);
} else {
range_value_t<_Rp> __result = *__first;
diff --git a/libcxx/include/__type_traits/is_trivially_copyable.h b/libcxx/include/__type_traits/is_trivially_copyable.h
index 3e6d598bb2233..a725a0beb4ffa 100644
--- a/libcxx/include/__type_traits/is_trivially_copyable.h
+++ b/libcxx/include/__type_traits/is_trivially_copyable.h
@@ -11,6 +11,7 @@
#include <__config>
#include <__type_traits/integral_constant.h>
+#include <cstdint>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -27,6 +28,11 @@ template <class _Tp>
inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp);
#endif
+#if _LIBCPP_STD_VER >= 20
+template <class _Tp>
+inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t);
+#endif
+
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H
diff --git a/libcxx/test/libcxx/transitive_includes/cxx2b.csv b/libcxx/test/libcxx/transitive_includes/cxx2b.csv
index 06ee226d91bf8..beb4acc3211c6 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx2b.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx2b.csv
@@ -652,6 +652,7 @@ utility limits
utility version
valarray cmath
valarray cstddef
+valarray cstdint
valarray cstdlib
valarray initializer_list
valarray limits
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
index 3e0d82daba7d8..e9e5240cf8cde 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
@@ -176,19 +176,38 @@ constexpr void test_initializer_list() {
template <class It, class Sent = It>
constexpr void test_range_types() {
- int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
+ std::iter_value_t<It> a[] = {7, 6, 9, 3, 5, 1, 2, 4};
auto range = std::ranges::subrange(It(a), Sent(It(a + 8)));
- int ret = std::ranges::max(range);
+ auto ret = std::ranges::max(range);
assert(ret == 9);
}
constexpr void test_range() {
- { // check that all range types work
- test_range_types<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
- test_range_types<forward_iterator<int*>>();
- test_range_types<bidirectional_iterator<int*>>();
- test_range_types<random_access_iterator<int*>>();
- test_range_types<contiguous_iterator<int*>>();
+ // check that all range types work
+ {
+ struct NonTrivialInt {
+ int val_;
+ constexpr NonTrivialInt(int val) : val_(val) {}
+ constexpr NonTrivialInt(const NonTrivialInt& other) : val_(other.val_) {}
+ constexpr NonTrivialInt& operator=(const NonTrivialInt& other) {
+ val_ = other.val_;
+ return *this;
+ }
+
+ constexpr ~NonTrivialInt() {}
+
+ auto operator<=>(const NonTrivialInt&) const = default;
+ };
+
+ auto call_with_sentinels = []<class Iter> {
+ if constexpr (std::forward_iterator<Iter>)
+ test_range_types<Iter, Iter>();
+ test_range_types<Iter, sentinel_wrapper<Iter>>();
+ test_range_types<Iter, sized_sentinel<Iter>>();
+ };
+
+ types::for_each(types::cpp20_input_iterator_list<int*>{}, call_with_sentinels);
+ types::for_each(types::cpp20_input_iterator_list<NonTrivialInt*>{}, call_with_sentinels);
}
int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp
index a211fe6ed5269..3d92964c90daa 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp
@@ -171,19 +171,38 @@ constexpr void test_initializer_list() {
template <class It, class Sent = It>
constexpr void test_range_types() {
- int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
+ std::iter_value_t<It> a[] = {7, 6, 9, 3, 5, 1, 2, 4};
auto range = std::ranges::subrange(It(a), Sent(It(a + 8)));
- int ret = std::ranges::min(range);
+ auto ret = std::ranges::min(range);
assert(ret == 1);
}
constexpr void test_range() {
- { // check that all range types work
- test_range_types<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
- test_range_types<forward_iterator<int*>>();
- test_range_types<bidirectional_iterator<int*>>();
- test_range_types<random_access_iterator<int*>>();
- test_range_types<contiguous_iterator<int*>>();
+ // check that all range types work
+ {
+ struct NonTrivialInt {
+ int val_;
+ constexpr NonTrivialInt(int val) : val_(val) {}
+ constexpr NonTrivialInt(const NonTrivialInt& other) : val_(other.val_) {}
+ constexpr NonTrivialInt& operator=(const NonTrivialInt& other) {
+ val_ = other.val_;
+ return *this;
+ }
+
+ constexpr ~NonTrivialInt() {}
+
+ auto operator<=>(const NonTrivialInt&) const = default;
+ };
+
+ auto call_with_sentinels = []<class Iter> {
+ if constexpr (std::forward_iterator<Iter>)
+ test_range_types<Iter, Iter>();
+ test_range_types<Iter, sentinel_wrapper<Iter>>();
+ test_range_types<Iter, sized_sentinel<Iter>>();
+ };
+
+ types::for_each(types::cpp20_input_iterator_list<int*>{}, call_with_sentinels);
+ types::for_each(types::cpp20_input_iterator_list<NonTrivialInt*>{}, call_with_sentinels);
}
int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
More information about the libcxx-commits
mailing list