[libcxx-commits] [libcxx] aff3cdc - [libc++] Optimize std::ranges::{min, max} for types that are cheap to copy

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Sat Mar 11 07:28:30 PST 2023


Author: Nikolas Klauser
Date: 2023-03-11T16:28:24+01:00
New Revision: aff3cdc604b01d3db1674e8d1402a5e5727fa087

URL: https://github.com/llvm/llvm-project/commit/aff3cdc604b01d3db1674e8d1402a5e5727fa087
DIFF: https://github.com/llvm/llvm-project/commit/aff3cdc604b01d3db1674e8d1402a5e5727fa087.diff

LOG: [libc++] Optimize std::ranges::{min, max} for types that are cheap to copy

Don't forward to `min_element` for small types that are trivially copyable, and instead use a naive loop that keeps track of the smallest element (as opposed to an iterator to the smallest element). This allows the compiler to vectorize the loop in some cases.

Reviewed By: #libc, ldionne

Spies: ldionne, libcxx-commits

Differential Revision: https://reviews.llvm.org/D143596

Added: 
    libcxx/benchmarks/algorithms/min.bench.cpp

Modified: 
    libcxx/benchmarks/CMakeLists.txt
    libcxx/include/__algorithm/ranges_max.h
    libcxx/include/__algorithm/ranges_min.h
    libcxx/include/__type_traits/is_trivially_copyable.h
    libcxx/test/libcxx/transitive_includes/cxx2b.csv
    libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
    libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 572f210d71004..979e9c9319a98 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -162,6 +162,7 @@ set(BENCHMARK_TESTS
     algorithms/lower_bound.bench.cpp
     algorithms/make_heap.bench.cpp
     algorithms/make_heap_then_sort_heap.bench.cpp
+    algorithms/min.bench.cpp
     algorithms/min_max_element.bench.cpp
     algorithms/pop_heap.bench.cpp
     algorithms/push_heap.bench.cpp

diff  --git a/libcxx/benchmarks/algorithms/min.bench.cpp b/libcxx/benchmarks/algorithms/min.bench.cpp
new file mode 100644
index 0000000000000..1e1dd4e71c2ef
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/min.bench.cpp
@@ -0,0 +1,70 @@
+#include <algorithm>
+#include <cassert>
+
+#include <benchmark/benchmark.h>
+
+void run_sizes(auto benchmark) {
+  benchmark->Arg(1)
+      ->Arg(2)
+      ->Arg(3)
+      ->Arg(4)
+      ->Arg(5)
+      ->Arg(6)
+      ->Arg(7)
+      ->Arg(8)
+      ->Arg(9)
+      ->Arg(10)
+      ->Arg(11)
+      ->Arg(12)
+      ->Arg(13)
+      ->Arg(14)
+      ->Arg(15)
+      ->Arg(16)
+      ->Arg(17)
+      ->Arg(18)
+      ->Arg(19)
+      ->Arg(20)
+      ->Arg(21)
+      ->Arg(22)
+      ->Arg(23)
+      ->Arg(24)
+      ->Arg(25)
+      ->Arg(26)
+      ->Arg(27)
+      ->Arg(28)
+      ->Arg(29)
+      ->Arg(30)
+      ->Arg(31)
+      ->Arg(32)
+      ->Arg(64)
+      ->Arg(512)
+      ->Arg(1024)
+      ->Arg(4000)
+      ->Arg(4096)
+      ->Arg(5500)
+      ->Arg(64000)
+      ->Arg(65536)
+      ->Arg(70000);
+}
+
+template <class T>
+static void BM_std_min(benchmark::State& state) {
+  std::vector<T> vec(state.range(), 3);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(vec);
+    benchmark::DoNotOptimize(std::ranges::min(vec));
+  }
+}
+BENCHMARK(BM_std_min<char>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<short>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<int>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<__int128>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned char>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned short>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned int>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned __int128>)->Apply(run_sizes);
+
+BENCHMARK_MAIN();

diff  --git a/libcxx/include/__algorithm/ranges_max.h b/libcxx/include/__algorithm/ranges_max.h
index b3c7fb163de1e..2fd2970bbf35d 100644
--- a/libcxx/include/__algorithm/ranges_max.h
+++ b/libcxx/include/__algorithm/ranges_max.h
@@ -20,6 +20,7 @@
 #include <__iterator/projected.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
+#include <__type_traits/is_trivially_copyable.h>
 #include <__utility/move.h>
 #include <initializer_list>
 
@@ -67,7 +68,7 @@ struct __fn {
 
     _LIBCPP_ASSERT(__first != __last, "range must contain at least one element");
 
-    if constexpr (forward_range<_Rp>) {
+    if constexpr (forward_range<_Rp> && !__is_cheap_to_copy<range_value_t<_Rp>>) {
       auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); };
       return *ranges::__min_element_impl(std::move(__first), std::move(__last), __comp_lhs_rhs_swapped, __proj);
     } else {

diff  --git a/libcxx/include/__algorithm/ranges_min.h b/libcxx/include/__algorithm/ranges_min.h
index ba9dad5e9c200..5e941a1db06a8 100644
--- a/libcxx/include/__algorithm/ranges_min.h
+++ b/libcxx/include/__algorithm/ranges_min.h
@@ -20,6 +20,7 @@
 #include <__iterator/projected.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
+#include <__type_traits/is_trivially_copyable.h>
 #include <initializer_list>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -61,10 +62,8 @@ struct __fn {
   range_value_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const {
     auto __first = ranges::begin(__r);
     auto __last = ranges::end(__r);
-
     _LIBCPP_ASSERT(__first != __last, "range must contain at least one element");
-
-    if constexpr (forward_range<_Rp>) {
+    if constexpr (forward_range<_Rp> && !__is_cheap_to_copy<range_value_t<_Rp>>) {
       return *ranges::__min_element_impl(__first, __last, __comp, __proj);
     } else {
       range_value_t<_Rp> __result = *__first;

diff  --git a/libcxx/include/__type_traits/is_trivially_copyable.h b/libcxx/include/__type_traits/is_trivially_copyable.h
index 3e6d598bb2233..a725a0beb4ffa 100644
--- a/libcxx/include/__type_traits/is_trivially_copyable.h
+++ b/libcxx/include/__type_traits/is_trivially_copyable.h
@@ -11,6 +11,7 @@
 
 #include <__config>
 #include <__type_traits/integral_constant.h>
+#include <cstdint>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -27,6 +28,11 @@ template <class _Tp>
 inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp);
 #endif
 
+#if _LIBCPP_STD_VER >= 20
+template <class _Tp>
+inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t);
+#endif
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H

diff  --git a/libcxx/test/libcxx/transitive_includes/cxx2b.csv b/libcxx/test/libcxx/transitive_includes/cxx2b.csv
index 06ee226d91bf8..beb4acc3211c6 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx2b.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx2b.csv
@@ -652,6 +652,7 @@ utility limits
 utility version
 valarray cmath
 valarray cstddef
+valarray cstdint
 valarray cstdlib
 valarray initializer_list
 valarray limits

diff  --git a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
index 3e0d82daba7d8..e9e5240cf8cde 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
@@ -176,19 +176,38 @@ constexpr void test_initializer_list() {
 
 template <class It, class Sent = It>
 constexpr void test_range_types() {
-  int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
+  std::iter_value_t<It> a[] = {7, 6, 9, 3, 5, 1, 2, 4};
   auto range = std::ranges::subrange(It(a), Sent(It(a + 8)));
-  int ret = std::ranges::max(range);
+  auto ret = std::ranges::max(range);
   assert(ret == 9);
 }
 
 constexpr void test_range() {
-  { // check that all range types work
-    test_range_types<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
-    test_range_types<forward_iterator<int*>>();
-    test_range_types<bidirectional_iterator<int*>>();
-    test_range_types<random_access_iterator<int*>>();
-    test_range_types<contiguous_iterator<int*>>();
+  // check that all range types work
+  {
+    struct NonTrivialInt {
+      int val_;
+      constexpr NonTrivialInt(int val) : val_(val) {}
+      constexpr NonTrivialInt(const NonTrivialInt& other) : val_(other.val_) {}
+      constexpr NonTrivialInt& operator=(const NonTrivialInt& other) {
+        val_ = other.val_;
+        return *this;
+      }
+
+      constexpr ~NonTrivialInt() {}
+
+      auto operator<=>(const NonTrivialInt&) const = default;
+    };
+
+    auto call_with_sentinels = []<class Iter> {
+      if constexpr (std::forward_iterator<Iter>)
+        test_range_types<Iter, Iter>();
+      test_range_types<Iter, sentinel_wrapper<Iter>>();
+      test_range_types<Iter, sized_sentinel<Iter>>();
+    };
+
+    types::for_each(types::cpp20_input_iterator_list<int*>{}, call_with_sentinels);
+    types::for_each(types::cpp20_input_iterator_list<NonTrivialInt*>{}, call_with_sentinels);
   }
 
   int a[] = {7, 6, 9, 3, 5, 1, 2, 4};

diff  --git a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp
index a211fe6ed5269..3d92964c90daa 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp
@@ -171,19 +171,38 @@ constexpr void test_initializer_list() {
 
 template <class It, class Sent = It>
 constexpr void test_range_types() {
-  int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
+  std::iter_value_t<It> a[] = {7, 6, 9, 3, 5, 1, 2, 4};
   auto range = std::ranges::subrange(It(a), Sent(It(a + 8)));
-  int ret = std::ranges::min(range);
+  auto ret = std::ranges::min(range);
   assert(ret == 1);
 }
 
 constexpr void test_range() {
-  { // check that all range types work
-    test_range_types<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
-    test_range_types<forward_iterator<int*>>();
-    test_range_types<bidirectional_iterator<int*>>();
-    test_range_types<random_access_iterator<int*>>();
-    test_range_types<contiguous_iterator<int*>>();
+  // check that all range types work
+  {
+    struct NonTrivialInt {
+      int val_;
+      constexpr NonTrivialInt(int val) : val_(val) {}
+      constexpr NonTrivialInt(const NonTrivialInt& other) : val_(other.val_) {}
+      constexpr NonTrivialInt& operator=(const NonTrivialInt& other) {
+        val_ = other.val_;
+        return *this;
+      }
+
+      constexpr ~NonTrivialInt() {}
+
+      auto operator<=>(const NonTrivialInt&) const = default;
+    };
+
+    auto call_with_sentinels = []<class Iter> {
+      if constexpr (std::forward_iterator<Iter>)
+        test_range_types<Iter, Iter>();
+      test_range_types<Iter, sentinel_wrapper<Iter>>();
+      test_range_types<Iter, sized_sentinel<Iter>>();
+    };
+
+    types::for_each(types::cpp20_input_iterator_list<int*>{}, call_with_sentinels);
+    types::for_each(types::cpp20_input_iterator_list<NonTrivialInt*>{}, call_with_sentinels);
   }
 
   int a[] = {7, 6, 9, 3, 5, 1, 2, 4};


        


More information about the libcxx-commits mailing list