[libcxx-commits] [libcxx] [libc++] Optimize rotate (PR #120862)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Sat Dec 21 15:54:21 PST 2024


https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/120862

```
----------------------------------------------------------
Benchmark                                 old          new
----------------------------------------------------------
BM_std_rotate<int>/1                 0.707 ns      2.83 ns
BM_std_rotate<int>/2                 0.706 ns      2.83 ns
BM_std_rotate<int>/3                  4.93 ns      5.04 ns
BM_std_rotate<int>/4                  5.12 ns      5.14 ns
BM_std_rotate<int>/32                 41.3 ns      16.5 ns
BM_std_rotate<int>/64                 94.1 ns      42.8 ns
BM_std_rotate<int>/512                 764 ns       194 ns
BM_std_rotate<int>/4096               5899 ns      2687 ns
BM_std_rotate<int>/65536             94248 ns     43003 ns
BM_std_rotate<__int128>/1            0.713 ns      2.16 ns
BM_std_rotate<__int128>/2            0.711 ns      2.16 ns
BM_std_rotate<__int128>/3             5.63 ns      5.70 ns
BM_std_rotate<__int128>/4             5.64 ns      5.62 ns
BM_std_rotate<__int128>/32            46.1 ns      29.8 ns
BM_std_rotate<__int128>/64            98.6 ns      75.6 ns
BM_std_rotate<__int128>/512            752 ns       486 ns
BM_std_rotate<__int128>/4096          5849 ns      4913 ns
BM_std_rotate<__int128>/65536        97091 ns     78037 ns
BM_std_rotate<std::string>/1         0.722 ns     0.717 ns
BM_std_rotate<std::string>/2         0.722 ns     0.718 ns
BM_std_rotate<std::string>/3          4.35 ns      4.42 ns
BM_std_rotate<std::string>/4          6.12 ns      6.08 ns
BM_std_rotate<std::string>/32         46.7 ns      46.4 ns
BM_std_rotate<std::string>/64         95.7 ns      94.9 ns
BM_std_rotate<std::string>/512         795 ns       792 ns
BM_std_rotate<std::string>/4096       6500 ns      6442 ns
BM_std_rotate<std::string>/65536    106532 ns    103567 ns
```

Fixes #54949
Fixes #39644



>From 6399aec8e1b59b0aa436cf5e08ac78cdb18bd3d3 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 22 Dec 2024 00:53:36 +0100
Subject: [PATCH] [libc++] Optimize rotate

---
 libcxx/docs/ReleaseNotes/20.rst               |  3 +
 libcxx/include/__algorithm/rotate.h           | 65 +++++++++----------
 .../benchmarks/algorithms/rotate.bench.cpp    | 35 ++++++++++
 3 files changed, 67 insertions(+), 36 deletions(-)
 create mode 100644 libcxx/test/benchmarks/algorithms/rotate.bench.cpp

diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index c8a07fb8b73348..a0239e612caeee 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -73,6 +73,9 @@ Improvements and New Features
   optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
   and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
 
+- ``rotate`` has been optimized, resulting in a performance improvement of up to 2.2x for trivially move assignable
+  types.
+
 Deprecations and Removals
 -------------------------
 
diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h
index df4ca95aac95bc..1755a7588812f0 100644
--- a/libcxx/include/__algorithm/rotate.h
+++ b/libcxx/include/__algorithm/rotate.h
@@ -82,46 +82,39 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt
   return __r;
 }
 
-template <typename _Integral>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Integral __algo_gcd(_Integral __x, _Integral __y) {
-  do {
-    _Integral __t = __x % __y;
-    __x           = __y;
-    __y           = __t;
-  } while (__y);
-  return __x;
-}
+template <class _AlgPolicy, class _Iter, class _Sent>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Iter __rotate_random(_Iter __first, _Iter __middle, _Sent __last) {
+  auto __left = _IterOps<_AlgPolicy>::distance(__first, __middle);
+  auto __right = _IterOps<_AlgPolicy>::distance(__middle, __last);
+  auto __end = __first + __right;
 
-template <class _AlgPolicy, typename _RandomAccessIterator>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _RandomAccessIterator
-__rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) {
-  typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
-  typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
-  using _Ops = _IterOps<_AlgPolicy>;
+  if (__left == 0 || __first == __last)
+    return __first;
 
-  const difference_type __m1 = __middle - __first;
-  const difference_type __m2 = _Ops::distance(__middle, __last);
-  if (__m1 == __m2) {
-    std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle, __last);
+  if (__left == __right) {
+    std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle);
     return __middle;
   }
-  const difference_type __g = std::__algo_gcd(__m1, __m2);
-  for (_RandomAccessIterator __p = __first + __g; __p != __first;) {
-    value_type __t(_Ops::__iter_move(--__p));
-    _RandomAccessIterator __p1 = __p;
-    _RandomAccessIterator __p2 = __p1 + __m1;
-    do {
-      *__p1                     = _Ops::__iter_move(__p2);
-      __p1                      = __p2;
-      const difference_type __d = _Ops::distance(__p2, __last);
-      if (__m1 < __d)
-        __p2 += __m1;
-      else
-        __p2 = __first + (__m1 - __d);
-    } while (__p2 != __p);
-    *__p1 = std::move(__t);
+
+  auto __min_len = std::min(__left, __right);
+
+  while (__min_len > 0) {
+    if (__left <= __right) {
+      do {
+        std::__swap_ranges<_AlgPolicy>(__first, __first + __left, __first + __left);
+        __first += __left;
+        __right -= __left;
+      } while (__left <= __right);
+      __min_len = __right;
+    } else {
+      do {
+        std::__swap_ranges<_AlgPolicy>(__first + (__left - __right), __first + __left, __first + __left);
+        __left -= __right;
+      } while (__left > __right);
+      __min_len = __left;
+    }
   }
-  return __first + __m2;
+  return __end;
 }
 
 template <class _AlgPolicy, class _ForwardIterator>
@@ -163,7 +156,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator
       return std::__rotate_left<_AlgPolicy>(__first, __last);
     if (_IterOps<_AlgPolicy>::next(__middle) == __last)
       return std::__rotate_right<_AlgPolicy>(__first, __last);
-    return std::__rotate_gcd<_AlgPolicy>(__first, __middle, __last);
+    return std::__rotate_random<_AlgPolicy>(__first, __middle, __last);
   }
   return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last);
 }
diff --git a/libcxx/test/benchmarks/algorithms/rotate.bench.cpp b/libcxx/test/benchmarks/algorithms/rotate.bench.cpp
new file mode 100644
index 00000000000000..bfe0b2ea8587a1
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/rotate.bench.cpp
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cassert>
+
+#include <benchmark/benchmark.h>
+
+void run_sizes(auto benchmark) {
+  benchmark->Arg(1)->Arg(2)->Arg(3)->Arg(4)->Arg(32)->Arg(64)->Arg(512)->Arg(4096)->Arg(65536);
+}
+
+template <class T>
+static void BM_std_rotate(benchmark::State& state) {
+  std::vector<T> vec(state.range(), T());
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(vec);
+    benchmark::DoNotOptimize(std::rotate(vec.begin(), vec.begin() + vec.size() / 3, vec.end()));
+  }
+}
+BENCHMARK(BM_std_rotate<int>)->Apply(run_sizes);
+#ifndef TEST_HAS_NO_INT128
+BENCHMARK(BM_std_rotate<__int128>)->Apply(run_sizes);
+#endif
+BENCHMARK(BM_std_rotate<std::string>)->Apply(run_sizes);
+
+BENCHMARK_MAIN();



More information about the libcxx-commits mailing list