[libcxx-commits] [libcxx] [libc++] Optimize rotate (PR #120890)
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Dec 22 03:40:41 PST 2024
https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/120890
```
----------------------------------------------------------
Benchmark old new
----------------------------------------------------------
BM_std_rotate<int>/1 0.707 ns 2.83 ns
BM_std_rotate<int>/2 0.706 ns 2.83 ns
BM_std_rotate<int>/3 4.93 ns 5.04 ns
BM_std_rotate<int>/4 5.12 ns 5.14 ns
BM_std_rotate<int>/32 41.3 ns 16.5 ns
BM_std_rotate<int>/64 94.1 ns 42.8 ns
BM_std_rotate<int>/512 764 ns 194 ns
BM_std_rotate<int>/4096 5899 ns 2687 ns
BM_std_rotate<int>/65536 94248 ns 43003 ns
BM_std_rotate<__int128>/1 0.713 ns 2.16 ns
BM_std_rotate<__int128>/2 0.711 ns 2.16 ns
BM_std_rotate<__int128>/3 5.63 ns 5.70 ns
BM_std_rotate<__int128>/4 5.64 ns 5.62 ns
BM_std_rotate<__int128>/32 46.1 ns 29.8 ns
BM_std_rotate<__int128>/64 98.6 ns 75.6 ns
BM_std_rotate<__int128>/512 752 ns 486 ns
BM_std_rotate<__int128>/4096 5849 ns 4913 ns
BM_std_rotate<__int128>/65536 97091 ns 78037 ns
BM_std_rotate<std::string>/1 0.722 ns 0.717 ns
BM_std_rotate<std::string>/2 0.722 ns 0.718 ns
BM_std_rotate<std::string>/3 4.35 ns 4.42 ns
BM_std_rotate<std::string>/4 6.12 ns 6.08 ns
BM_std_rotate<std::string>/32 46.7 ns 46.4 ns
BM_std_rotate<std::string>/64 95.7 ns 94.9 ns
BM_std_rotate<std::string>/512 795 ns 792 ns
BM_std_rotate<std::string>/4096 6500 ns 6442 ns
BM_std_rotate<std::string>/65536 106532 ns 103567 ns
```
Fixes #54949
Fixes #39644
>From 78fac1e96260a9c7c12744a262cc187366c1fe2a Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 22 Dec 2024 00:53:36 +0100
Subject: [PATCH] [libc++] Optimize rotate
---
libcxx/docs/ReleaseNotes/20.rst | 3 +
libcxx/include/__algorithm/rotate.h | 65 +++++++++----------
.../benchmarks/algorithms/rotate.bench.cpp | 35 ++++++++++
3 files changed, 67 insertions(+), 36 deletions(-)
create mode 100644 libcxx/test/benchmarks/algorithms/rotate.bench.cpp
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index c8a07fb8b73348..a0239e612caeee 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -73,6 +73,9 @@ Improvements and New Features
optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
+- ``rotate`` has been optimized, resulting in a performance improvement of up to 2.2x for trivially move assignable
+ types.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h
index df4ca95aac95bc..91a073c1be0297 100644
--- a/libcxx/include/__algorithm/rotate.h
+++ b/libcxx/include/__algorithm/rotate.h
@@ -82,46 +82,39 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt
return __r;
}
-template <typename _Integral>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Integral __algo_gcd(_Integral __x, _Integral __y) {
- do {
- _Integral __t = __x % __y;
- __x = __y;
- __y = __t;
- } while (__y);
- return __x;
-}
+template <class _AlgPolicy, class _Iter, class _Sent>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Iter __rotate_random(_Iter __first, _Iter __middle, _Sent __last) {
+ auto __left = _IterOps<_AlgPolicy>::distance(__first, __middle);
+ auto __right = _IterOps<_AlgPolicy>::distance(__middle, __last);
+ auto __end = __first + __right;
-template <class _AlgPolicy, typename _RandomAccessIterator>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _RandomAccessIterator
-__rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) {
- typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
- typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
- using _Ops = _IterOps<_AlgPolicy>;
+ if (__left == 0 || __first == __last)
+ return __first;
- const difference_type __m1 = __middle - __first;
- const difference_type __m2 = _Ops::distance(__middle, __last);
- if (__m1 == __m2) {
- std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle, __last);
+ if (__left == __right) {
+ std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle);
return __middle;
}
- const difference_type __g = std::__algo_gcd(__m1, __m2);
- for (_RandomAccessIterator __p = __first + __g; __p != __first;) {
- value_type __t(_Ops::__iter_move(--__p));
- _RandomAccessIterator __p1 = __p;
- _RandomAccessIterator __p2 = __p1 + __m1;
- do {
- *__p1 = _Ops::__iter_move(__p2);
- __p1 = __p2;
- const difference_type __d = _Ops::distance(__p2, __last);
- if (__m1 < __d)
- __p2 += __m1;
- else
- __p2 = __first + (__m1 - __d);
- } while (__p2 != __p);
- *__p1 = std::move(__t);
+
+ auto __min_len = std::min(__left, __right);
+
+ while (__min_len > 0) {
+ if (__left <= __right) {
+ do {
+ std::__swap_ranges<_AlgPolicy>(__first, __first + __left, __first + __left);
+ __first += __left;
+ __right -= __left;
+ } while (__left <= __right);
+ __min_len = __right;
+ } else {
+ do {
+ std::__swap_ranges<_AlgPolicy>(__first + (__left - __right), __first + __left, __first + __left);
+ __left -= __right;
+ } while (__left > __right);
+ __min_len = __left;
+ }
}
- return __first + __m2;
+ return __end;
}
template <class _AlgPolicy, class _ForwardIterator>
@@ -163,7 +156,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator
return std::__rotate_left<_AlgPolicy>(__first, __last);
if (_IterOps<_AlgPolicy>::next(__middle) == __last)
return std::__rotate_right<_AlgPolicy>(__first, __last);
- return std::__rotate_gcd<_AlgPolicy>(__first, __middle, __last);
+ return std::__rotate_random<_AlgPolicy>(__first, __middle, __last);
}
return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last);
}
diff --git a/libcxx/test/benchmarks/algorithms/rotate.bench.cpp b/libcxx/test/benchmarks/algorithms/rotate.bench.cpp
new file mode 100644
index 00000000000000..bfe0b2ea8587a1
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/rotate.bench.cpp
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cassert>
+
+#include <benchmark/benchmark.h>
+
+void run_sizes(auto benchmark) {
+ benchmark->Arg(1)->Arg(2)->Arg(3)->Arg(4)->Arg(32)->Arg(64)->Arg(512)->Arg(4096)->Arg(65536);
+}
+
+template <class T>
+static void BM_std_rotate(benchmark::State& state) {
+ std::vector<T> vec(state.range(), T());
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(vec);
+ benchmark::DoNotOptimize(std::rotate(vec.begin(), vec.begin() + vec.size() / 3, vec.end()));
+ }
+}
+BENCHMARK(BM_std_rotate<int>)->Apply(run_sizes);
+#ifndef TEST_HAS_NO_INT128
+BENCHMARK(BM_std_rotate<__int128>)->Apply(run_sizes);
+#endif
+BENCHMARK(BM_std_rotate<std::string>)->Apply(run_sizes);
+
+BENCHMARK_MAIN();
More information about the libcxx-commits
mailing list