[libcxx-commits] [libcxx] [libc++] Optimize rotate (PR #120890)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Fri Dec 12 00:01:03 PST 2025


https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/120890

>From 417bed73f172b528147d523c05e2dfbcffbafee0 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 22 Dec 2024 00:53:36 +0100
Subject: [PATCH] [libc++] Optimize rotate

---
 libcxx/docs/ReleaseNotes/20.rst               |  3 +
 libcxx/include/__algorithm/rotate.h           | 70 ++++++++-----------
 .../benchmarks/algorithms/rotate.bench.cpp    | 65 +++++++++++++++++
 3 files changed, 98 insertions(+), 40 deletions(-)
 create mode 100644 libcxx/test/benchmarks/algorithms/rotate.bench.cpp

diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index 572d4321dc46f..0cf19d6438fe9 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -122,6 +122,9 @@ Improvements and New Features
 
 - The performance of ``std::getline`` has been improved, resulting in a performance uplift of up to 10x.
 
+- ``rotate`` has been optimized, resulting in a performance improvement of up to 2.2x for trivially move assignable
+  types.
+
 Deprecations and Removals
 -------------------------
 
diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h
index c676980f0c1ca..d70f66d618786 100644
--- a/libcxx/include/__algorithm/rotate.h
+++ b/libcxx/include/__algorithm/rotate.h
@@ -12,16 +12,13 @@
 #include <__algorithm/copy.h>
 #include <__algorithm/copy_backward.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/min.h>
 #include <__algorithm/move.h>
 #include <__algorithm/move_backward.h>
 #include <__algorithm/swap_ranges.h>
 #include <__config>
-#include <__cstddef/size_t.h>
 #include <__fwd/bit_reference.h>
 #include <__iterator/iterator_traits.h>
-#include <__memory/construct_at.h>
-#include <__memory/pointer_traits.h>
-#include <__type_traits/is_constant_evaluated.h>
 #include <__type_traits/is_trivially_assignable.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
@@ -89,46 +86,39 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt
   return __r;
 }
 
-template <typename _Integral>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Integral __algo_gcd(_Integral __x, _Integral __y) {
-  do {
-    _Integral __t = __x % __y;
-    __x           = __y;
-    __y           = __t;
-  } while (__y);
-  return __x;
-}
+template <class _AlgPolicy, class _Iter, class _Sent>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Iter __rotate_random(_Iter __first, _Iter __middle, _Sent __last) {
+  auto __left  = _IterOps<_AlgPolicy>::distance(__first, __middle);
+  auto __right = _IterOps<_AlgPolicy>::distance(__middle, __last);
+  auto __end   = __first + __right;
 
-template <class _AlgPolicy, typename _RandomAccessIterator>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _RandomAccessIterator
-__rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) {
-  typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
-  typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
-  using _Ops = _IterOps<_AlgPolicy>;
+  if (__left == 0 || __first == __last)
+    return __first;
 
-  const difference_type __m1 = __middle - __first;
-  const difference_type __m2 = _Ops::distance(__middle, __last);
-  if (__m1 == __m2) {
-    std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle, __last);
+  if (__left == __right) {
+    std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle);
     return __middle;
   }
-  const difference_type __g = std::__algo_gcd(__m1, __m2);
-  for (_RandomAccessIterator __p = __first + __g; __p != __first;) {
-    value_type __t(_Ops::__iter_move(--__p));
-    _RandomAccessIterator __p1 = __p;
-    _RandomAccessIterator __p2 = __p1 + __m1;
-    do {
-      *__p1                     = _Ops::__iter_move(__p2);
-      __p1                      = __p2;
-      const difference_type __d = _Ops::distance(__p2, __last);
-      if (__m1 < __d)
-        __p2 += __m1;
-      else
-        __p2 = __first + (__m1 - __d);
-    } while (__p2 != __p);
-    *__p1 = std::move(__t);
+
+  auto __min_len = std::min(__left, __right);
+
+  while (__min_len > 0) {
+    if (__left <= __right) {
+      do {
+        std::__swap_ranges<_AlgPolicy>(__first, __first + __left, __first + __left);
+        __first += __left;
+        __right -= __left;
+      } while (__left <= __right);
+      __min_len = __right;
+    } else {
+      do {
+        std::__swap_ranges<_AlgPolicy>(__first + (__left - __right), __first + __left, __first + __left);
+        __left -= __right;
+      } while (__left > __right);
+      __min_len = __left;
+    }
   }
-  return __first + __m2;
+  return __end;
 }
 
 template <class _AlgPolicy, class _ForwardIterator>
@@ -170,7 +160,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator
       return std::__rotate_left<_AlgPolicy>(__first, __last);
     if (_IterOps<_AlgPolicy>::next(__middle) == __last)
       return std::__rotate_right<_AlgPolicy>(__first, __last);
-    return std::__rotate_gcd<_AlgPolicy>(__first, __middle, __last);
+    return std::__rotate_random<_AlgPolicy>(__first, __middle, __last);
   }
   return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last);
 }
diff --git a/libcxx/test/benchmarks/algorithms/rotate.bench.cpp b/libcxx/test/benchmarks/algorithms/rotate.bench.cpp
new file mode 100644
index 0000000000000..36ec1021b75c3
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/rotate.bench.cpp
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <list>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+
+void run_sizes(auto benchmark) {
+  benchmark->Arg(1)->Arg(4)->Arg(32)->Arg(64)->Arg(512)->Arg(4096)->Arg(65536);
+}
+
+template <class Container>
+static void BM_std_rotate(benchmark::State& state) {
+  Container c(state.range(), typename Container::value_type());
+  const auto relative_offset = state.range();
+  auto middle = std::next(c.begin(), c.size() / relative_offset);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(c);
+    benchmark::DoNotOptimize(std::rotate(c.begin(), middle, c.end()));
+  }
+}
+
+BENCHMARK(BM_std_rotate<std::list<int>>)->Name("std::rotate(list) (by 1/2)")->Arg(2)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::list<int>>)->Name("std::rotate(list) (by 1/3)")->Arg(3)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::list<std::string>>)->Name("std::rotate(list) (by 1/2)")->Arg(2)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::list<std::string>>)->Name("std::rotate(list) (by 1/3)")->Arg(3)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::deque<int>>)->Name("std::rotate(deque) (by 1/2)")->Arg(2)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::deque<int>>)->Name("std::rotate(deque) (by 1/3)")->Arg(3)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::deque<std::string>>)->Name("std::rotate(deque) (by 1/2)")->Arg(2)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::deque<std::string>>)->Name("std::rotate(deque) (by 1/3)")->Arg(3)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::vector<int>>)->Name("std::rotate(vector) (by 1/2)")->Arg(2)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::vector<int>>)->Name("std::rotate(vector) (by 1/3)")->Arg(3)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::vector<std::string>>)->Name("std::rotate(vector) (by 1/2)")->Arg(2)->Apply(run_sizes);
+BENCHMARK(BM_std_rotate<std::vector<std::string>>)->Name("std::rotate(vector) (by 1/3)")->Arg(3)->Apply(run_sizes);
+
+template <class Container>
+static void BM_std_rotate1(benchmark::State& state) {
+  Container c(state.range(), typename Container::value_type());
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(c);
+    benchmark::DoNotOptimize(std::rotate(c.begin(), std::next(c.begin()), c.end()));
+  }
+}
+
+BENCHMARK(BM_std_rotate1<std::list<int>>)->Name("std::rotate(list) (by 1 element)")->Apply(run_sizes);
+BENCHMARK(BM_std_rotate1<std::list<std::string>>)->Name("std::rotate(list) (by 1 element)")->Apply(run_sizes);
+BENCHMARK(BM_std_rotate1<std::deque<int>>)->Name("std::rotate(deque) (by 1 element)")->Apply(run_sizes);
+BENCHMARK(BM_std_rotate1<std::deque<std::string>>)->Name("std::rotate(deque) (by 1 element)")->Apply(run_sizes);
+BENCHMARK(BM_std_rotate1<std::vector<int>>)->Name("std::rotate(vector) (by 1 element)")->Apply(run_sizes);
+BENCHMARK(BM_std_rotate1<std::vector<std::string>>)->Name("std::rotate(vector) (by 1 element)")->Apply(run_sizes);
+
+BENCHMARK_MAIN();



More information about the libcxx-commits mailing list