[libcxx-commits] [libcxx] 79a8894 - [libc++] Optimize rotate (#120890)
via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Dec 23 01:18:47 PST 2025
Author: Nikolas Klauser
Date: 2025-12-23T10:18:43+01:00
New Revision: 79a88949cd6113537ec1dad09ad3473d5d38ee2f
URL: https://github.com/llvm/llvm-project/commit/79a88949cd6113537ec1dad09ad3473d5d38ee2f
DIFF: https://github.com/llvm/llvm-project/commit/79a88949cd6113537ec1dad09ad3473d5d38ee2f.diff
LOG: [libc++] Optimize rotate (#120890)
This implements a new algorithm for `rotate` with random access
iterators, which uses `swap_ranges`. This reduces cache misses and
allows for vectorization.
Apple M4:
```
Benchmark old new Difference % Difference
--------------------------------------------------- -------------- -------------- ------------ --------------
rng::rotate(deque<int>)_(1_element_backward)/1024 46.17 45.13 -1.04 -2.26%
rng::rotate(deque<int>)_(1_element_backward)/32 4.90 4.92 0.02 0.45%
rng::rotate(deque<int>)_(1_element_backward)/50 6.12 6.02 -0.10 -1.56%
rng::rotate(deque<int>)_(1_element_backward)/8192 329.97 330.49 0.52 0.16%
rng::rotate(deque<int>)_(1_element_forward)/1024 42.20 42.99 0.79 1.87%
rng::rotate(deque<int>)_(1_element_forward)/32 4.99 5.26 0.27 5.37%
rng::rotate(deque<int>)_(1_element_forward)/50 6.33 6.48 0.14 2.28%
rng::rotate(deque<int>)_(1_element_forward)/8192 317.40 318.68 1.28 0.40%
rng::rotate(deque<int>)_(by_1/2)/1024 185.42 184.23 -1.18 -0.64%
rng::rotate(deque<int>)_(by_1/2)/32 8.72 8.99 0.27 3.14%
rng::rotate(deque<int>)_(by_1/2)/50 12.03 11.60 -0.43 -3.59%
rng::rotate(deque<int>)_(by_1/2)/8192 1549.94 1549.30 -0.64 -0.04%
rng::rotate(deque<int>)_(by_1/3)/1024 1886.50 409.41 -1477.09 -78.30%
rng::rotate(deque<int>)_(by_1/3)/32 46.51 21.68 -24.84 -53.40%
rng::rotate(deque<int>)_(by_1/3)/50 77.70 29.85 -47.85 -61.58%
rng::rotate(deque<int>)_(by_1/3)/8192 22814.91 3171.92 -19642.99 -86.10%
rng::rotate(deque<int>)_(by_1/4)/1024 811.39 283.28 -528.11 -65.09%
rng::rotate(deque<int>)_(by_1/4)/32 30.30 14.44 -15.86 -52.35%
rng::rotate(deque<int>)_(by_1/4)/50 76.23 29.29 -46.94 -61.58%
rng::rotate(deque<int>)_(by_1/4)/8192 7154.50 2357.48 -4797.02 -67.05%
rng::rotate(list<int>)_(1_element_backward)/1024 1483.46 742.31 -741.15 -49.96%
rng::rotate(list<int>)_(1_element_backward)/32 16.17 16.37 0.20 1.24%
rng::rotate(list<int>)_(1_element_backward)/50 29.31 29.20 -0.10 -0.34%
rng::rotate(list<int>)_(1_element_backward)/8192 8896.07 8949.56 53.49 0.60%
rng::rotate(list<int>)_(1_element_forward)/1024 1481.36 742.24 -739.12 -49.89%
rng::rotate(list<int>)_(1_element_forward)/32 15.72 16.22 0.50 3.19%
rng::rotate(list<int>)_(1_element_forward)/50 28.91 28.87 -0.03 -0.12%
rng::rotate(list<int>)_(1_element_forward)/8192 9595.63 9846.80 251.16 2.62%
rng::rotate(list<int>)_(by_1/2)/1024 833.67 408.22 -425.46 -51.03%
rng::rotate(list<int>)_(by_1/2)/32 8.88 8.76 -0.11 -1.25%
rng::rotate(list<int>)_(by_1/2)/50 15.67 15.73 0.06 0.40%
rng::rotate(list<int>)_(by_1/2)/8192 6392.01 6597.30 205.30 3.21%
rng::rotate(list<int>)_(by_1/3)/1024 1360.66 872.00 -488.66 -35.91%
rng::rotate(list<int>)_(by_1/3)/32 19.72 19.37 -0.35 -1.80%
rng::rotate(list<int>)_(by_1/3)/50 33.15 33.12 -0.04 -0.12%
rng::rotate(list<int>)_(by_1/3)/8192 10807.14 11216.71 409.58 3.79%
rng::rotate(list<int>)_(by_1/4)/1024 624.92 625.80 0.88 0.14%
rng::rotate(list<int>)_(by_1/4)/32 15.28 15.37 0.09 0.60%
rng::rotate(list<int>)_(by_1/4)/50 32.27 32.78 0.52 1.60%
rng::rotate(list<int>)_(by_1/4)/8192 11266.66 9594.79 -1671.87 -14.84%
rng::rotate(vector<bool>)_(1_element_backward)/1024 32.10 31.94 -0.16 -0.51%
rng::rotate(vector<bool>)_(1_element_backward)/32 18.48 18.07 -0.41 -2.21%
rng::rotate(vector<bool>)_(1_element_backward)/50 18.49 18.19 -0.30 -1.64%
rng::rotate(vector<bool>)_(1_element_backward)/8192 113.76 112.91 -0.85 -0.75%
rng::rotate(vector<bool>)_(1_element_forward)/1024 30.87 30.72 -0.15 -0.48%
rng::rotate(vector<bool>)_(1_element_forward)/32 18.16 17.98 -0.18 -1.01%
rng::rotate(vector<bool>)_(1_element_forward)/50 17.98 17.98 0.01 0.03%
rng::rotate(vector<bool>)_(1_element_forward)/8192 111.84 111.50 -0.34 -0.31%
rng::rotate(vector<bool>)_(by_1/2)/1024 9.27 9.34 0.07 0.75%
rng::rotate(vector<bool>)_(by_1/2)/32 18.32 17.90 -0.42 -2.29%
rng::rotate(vector<bool>)_(by_1/2)/50 18.14 17.69 -0.45 -2.49%
rng::rotate(vector<bool>)_(by_1/2)/8192 16.23 16.80 0.57 3.49%
rng::rotate(vector<bool>)_(by_1/3)/1024 58.89 58.74 -0.15 -0.25%
rng::rotate(vector<bool>)_(by_1/3)/32 18.16 17.74 -0.41 -2.28%
rng::rotate(vector<bool>)_(by_1/3)/50 18.15 17.72 -0.43 -2.38%
rng::rotate(vector<bool>)_(by_1/3)/8192 164.52 166.17 1.65 1.00%
rng::rotate(vector<bool>)_(by_1/4)/1024 13.44 14.20 0.76 5.65%
rng::rotate(vector<bool>)_(by_1/4)/32 18.38 17.86 -0.52 -2.84%
rng::rotate(vector<bool>)_(by_1/4)/50 18.26 17.75 -0.51 -2.79%
rng::rotate(vector<bool>)_(by_1/4)/8192 34.49 34.59 0.11 0.31%
rng::rotate(vector<int>)_(1_element_backward)/1024 37.22 37.55 0.33 0.88%
rng::rotate(vector<int>)_(1_element_backward)/32 3.02 3.01 -0.00 -0.10%
rng::rotate(vector<int>)_(1_element_backward)/50 5.31 5.33 0.02 0.31%
rng::rotate(vector<int>)_(1_element_backward)/8192 302.96 295.02 -7.94 -2.62%
rng::rotate(vector<int>)_(1_element_forward)/1024 36.78 36.97 0.19 0.51%
rng::rotate(vector<int>)_(1_element_forward)/32 3.08 3.19 0.11 3.56%
rng::rotate(vector<int>)_(1_element_forward)/50 5.33 5.33 -0.00 -0.05%
rng::rotate(vector<int>)_(1_element_forward)/8192 288.25 285.28 -2.97 -1.03%
rng::rotate(vector<int>)_(by_1/2)/1024 32.49 32.13 -0.36 -1.10%
rng::rotate(vector<int>)_(by_1/2)/32 3.80 2.63 -1.17 -30.82%
rng::rotate(vector<int>)_(by_1/2)/50 5.29 3.69 -1.59 -30.13%
rng::rotate(vector<int>)_(by_1/2)/8192 256.13 252.05 -4.07 -1.59%
rng::rotate(vector<int>)_(by_1/3)/1024 1389.57 135.85 -1253.72 -90.22%
rng::rotate(vector<int>)_(by_1/3)/32 21.73 11.99 -9.73 -44.80%
rng::rotate(vector<int>)_(by_1/3)/50 40.23 11.88 -28.35 -70.48%
rng::rotate(vector<int>)_(by_1/3)/8192 11040.23 946.51 -10093.72 -91.43%
rng::rotate(vector<int>)_(by_1/4)/1024 335.13 49.05 -286.08 -85.36%
rng::rotate(vector<int>)_(by_1/4)/32 12.32 6.06 -6.26 -50.83%
rng::rotate(vector<int>)_(by_1/4)/50 40.54 12.54 -28.00 -69.07%
rng::rotate(vector<int>)_(by_1/4)/8192 2648.84 391.91 -2256.93 -85.20%
std::rotate(deque<int>)_(1_element_backward)/1024 45.81 45.88 0.08 0.16%
std::rotate(deque<int>)_(1_element_backward)/32 4.81 4.85 0.04 0.76%
std::rotate(deque<int>)_(1_element_backward)/50 6.11 6.10 -0.01 -0.13%
std::rotate(deque<int>)_(1_element_backward)/8192 329.99 330.63 0.64 0.19%
std::rotate(deque<int>)_(1_element_forward)/1024 42.45 42.21 -0.24 -0.57%
std::rotate(deque<int>)_(1_element_forward)/32 5.03 5.14 0.11 2.23%
std::rotate(deque<int>)_(1_element_forward)/50 5.96 6.04 0.09 1.43%
std::rotate(deque<int>)_(1_element_forward)/8192 316.09 317.73 1.64 0.52%
std::rotate(deque<int>)_(by_1/2)/1024 185.34 185.41 0.07 0.04%
std::rotate(deque<int>)_(by_1/2)/32 8.08 8.57 0.49 6.01%
std::rotate(deque<int>)_(by_1/2)/50 11.13 11.61 0.48 4.35%
std::rotate(deque<int>)_(by_1/2)/8192 1549.74 1527.97 -21.77 -1.40%
std::rotate(deque<int>)_(by_1/3)/1024 1837.15 410.26 -1426.89 -77.67%
std::rotate(deque<int>)_(by_1/3)/32 46.73 22.85 -23.88 -51.11%
std::rotate(deque<int>)_(by_1/3)/50 78.05 29.61 -48.43 -62.06%
std::rotate(deque<int>)_(by_1/3)/8192 22784.08 3174.02 -19610.06 -86.07%
std::rotate(deque<int>)_(by_1/4)/1024 874.13 282.91 -591.22 -67.64%
std::rotate(deque<int>)_(by_1/4)/32 31.14 14.31 -16.83 -54.05%
std::rotate(deque<int>)_(by_1/4)/50 77.39 28.67 -48.72 -62.96%
std::rotate(deque<int>)_(by_1/4)/8192 7207.73 2344.73 -4863.00 -67.47%
std::rotate(list<int>)_(1_element_backward)/1024 1490.45 745.80 -744.65 -49.96%
std::rotate(list<int>)_(1_element_backward)/32 16.15 16.43 0.28 1.71%
std::rotate(list<int>)_(1_element_backward)/50 29.28 29.29 0.01 0.02%
std::rotate(list<int>)_(1_element_backward)/8192 9305.23 10372.35 1067.13 11.47%
std::rotate(list<int>)_(1_element_forward)/1024 1479.13 743.71 -735.42 -49.72%
std::rotate(list<int>)_(1_element_forward)/32 15.86 16.21 0.35 2.20%
std::rotate(list<int>)_(1_element_forward)/50 28.91 28.90 -0.01 -0.04%
std::rotate(list<int>)_(1_element_forward)/8192 9796.34 8826.58 -969.76 -9.90%
std::rotate(list<int>)_(by_1/2)/1024 851.06 411.44 -439.61 -51.66%
std::rotate(list<int>)_(by_1/2)/32 8.90 8.80 -0.10 -1.10%
std::rotate(list<int>)_(by_1/2)/50 15.69 15.69 0.00 0.02%
std::rotate(list<int>)_(by_1/2)/8192 6479.34 6599.58 120.24 1.86%
std::rotate(list<int>)_(by_1/3)/1024 865.82 877.81 11.99 1.38%
std::rotate(list<int>)_(by_1/3)/32 19.52 19.44 -0.09 -0.44%
std::rotate(list<int>)_(by_1/3)/50 33.67 33.14 -0.53 -1.59%
std::rotate(list<int>)_(by_1/3)/8192 10513.57 10355.02 -158.55 -1.51%
std::rotate(list<int>)_(by_1/4)/1024 639.35 655.30 15.95 2.49%
std::rotate(list<int>)_(by_1/4)/32 15.22 15.45 0.24 1.55%
std::rotate(list<int>)_(by_1/4)/50 32.51 32.93 0.42 1.29%
std::rotate(list<int>)_(by_1/4)/8192 9883.06 9284.28 -598.79 -6.06%
std::rotate(vector<bool>)_(1_element_backward)/1024 30.77 30.95 0.18 0.58%
std::rotate(vector<bool>)_(1_element_backward)/32 17.73 17.57 -0.16 -0.89%
std::rotate(vector<bool>)_(1_element_backward)/50 17.74 17.58 -0.16 -0.91%
std::rotate(vector<bool>)_(1_element_backward)/8192 111.37 111.04 -0.33 -0.29%
std::rotate(vector<bool>)_(1_element_forward)/1024 30.16 29.92 -0.24 -0.81%
std::rotate(vector<bool>)_(1_element_forward)/32 17.22 17.38 0.16 0.94%
std::rotate(vector<bool>)_(1_element_forward)/50 17.22 17.36 0.14 0.83%
std::rotate(vector<bool>)_(1_element_forward)/8192 111.28 111.19 -0.09 -0.08%
std::rotate(vector<bool>)_(by_1/2)/1024 9.20 9.65 0.46 4.96%
std::rotate(vector<bool>)_(by_1/2)/32 17.23 17.12 -0.11 -0.62%
std::rotate(vector<bool>)_(by_1/2)/50 17.18 17.04 -0.14 -0.82%
std::rotate(vector<bool>)_(by_1/2)/8192 15.93 16.92 0.99 6.22%
std::rotate(vector<bool>)_(by_1/3)/1024 57.99 58.05 0.06 0.10%
std::rotate(vector<bool>)_(by_1/3)/32 17.14 17.06 -0.08 -0.47%
std::rotate(vector<bool>)_(by_1/3)/50 17.09 16.95 -0.15 -0.87%
std::rotate(vector<bool>)_(by_1/3)/8192 164.14 165.70 1.56 0.95%
std::rotate(vector<bool>)_(by_1/4)/1024 14.00 14.21 0.21 1.49%
std::rotate(vector<bool>)_(by_1/4)/32 17.20 17.20 -0.00 -0.00%
std::rotate(vector<bool>)_(by_1/4)/50 17.09 17.10 0.01 0.08%
std::rotate(vector<bool>)_(by_1/4)/8192 33.90 34.16 0.26 0.76%
std::rotate(vector<int>)_(1_element_backward)/1024 37.04 37.54 0.50 1.35%
std::rotate(vector<int>)_(1_element_backward)/32 3.01 3.01 0.00 0.04%
std::rotate(vector<int>)_(1_element_backward)/50 5.30 4.96 -0.34 -6.44%
std::rotate(vector<int>)_(1_element_backward)/8192 302.63 286.92 -15.71 -5.19%
std::rotate(vector<int>)_(1_element_forward)/1024 36.83 36.88 0.05 0.15%
std::rotate(vector<int>)_(1_element_forward)/32 3.08 3.06 -0.02 -0.56%
std::rotate(vector<int>)_(1_element_forward)/50 5.34 5.07 -0.26 -4.95%
std::rotate(vector<int>)_(1_element_forward)/8192 283.85 284.99 1.14 0.40%
std::rotate(vector<int>)_(by_1/2)/1024 32.12 32.19 0.06 0.20%
std::rotate(vector<int>)_(by_1/2)/32 3.83 2.22 -1.61 -42.16%
std::rotate(vector<int>)_(by_1/2)/50 4.28 4.38 0.09 2.19%
std::rotate(vector<int>)_(by_1/2)/8192 256.28 252.63 -3.65 -1.42%
std::rotate(vector<int>)_(by_1/3)/1024 1386.79 142.76 -1244.03 -89.71%
std::rotate(vector<int>)_(by_1/3)/32 21.76 11.54 -10.22 -46.98%
std::rotate(vector<int>)_(by_1/3)/50 40.30 11.79 -28.51 -70.74%
std::rotate(vector<int>)_(by_1/3)/8192 11017.09 949.95 -10067.14 -91.38%
std::rotate(vector<int>)_(by_1/4)/1024 335.51 48.69 -286.83 -85.49%
std::rotate(vector<int>)_(by_1/4)/32 12.31 6.09 -6.22 -50.51%
std::rotate(vector<int>)_(by_1/4)/50 40.82 13.03 -27.79 -68.07%
std::rotate(vector<int>)_(by_1/4)/8192 2647.90 392.58 -2255.32 -85.17%
Geomean 76.74 56.58 -20.16 -26.27%
```
Fixes #54949
Added:
Modified:
libcxx/docs/ReleaseNotes/22.rst
libcxx/include/__algorithm/rotate.h
Removed:
################################################################################
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index 36048165003dd..82271be7a22ab 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -97,6 +97,8 @@ Improvements and New Features
- The performance of ``std::align`` has been improved by making it an inline function, which allows the compiler to
better optimize calls to it.
+- ``rotate`` has been optimized, resulting in a performance improvement of up to 3x.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h
index c676980f0c1ca..b6d9eb3b2dd00 100644
--- a/libcxx/include/__algorithm/rotate.h
+++ b/libcxx/include/__algorithm/rotate.h
@@ -12,16 +12,13 @@
#include <__algorithm/copy.h>
#include <__algorithm/copy_backward.h>
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/min.h>
#include <__algorithm/move.h>
#include <__algorithm/move_backward.h>
#include <__algorithm/swap_ranges.h>
#include <__config>
-#include <__cstddef/size_t.h>
#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
-#include <__memory/construct_at.h>
-#include <__memory/pointer_traits.h>
-#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -89,46 +86,32 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt
return __r;
}
-template <typename _Integral>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Integral __algo_gcd(_Integral __x, _Integral __y) {
- do {
- _Integral __t = __x % __y;
- __x = __y;
- __y = __t;
- } while (__y);
- return __x;
-}
-
-template <class _AlgPolicy, typename _RandomAccessIterator>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _RandomAccessIterator
-__rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) {
- typedef typename iterator_traits<_RandomAccessIterator>::
diff erence_type
diff erence_type;
- typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
- using _Ops = _IterOps<_AlgPolicy>;
-
- const
diff erence_type __m1 = __middle - __first;
- const
diff erence_type __m2 = _Ops::distance(__middle, __last);
- if (__m1 == __m2) {
- std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle, __last);
- return __middle;
- }
- const
diff erence_type __g = std::__algo_gcd(__m1, __m2);
- for (_RandomAccessIterator __p = __first + __g; __p != __first;) {
- value_type __t(_Ops::__iter_move(--__p));
- _RandomAccessIterator __p1 = __p;
- _RandomAccessIterator __p2 = __p1 + __m1;
- do {
- *__p1 = _Ops::__iter_move(__p2);
- __p1 = __p2;
- const
diff erence_type __d = _Ops::distance(__p2, __last);
- if (__m1 < __d)
- __p2 += __m1;
- else
- __p2 = __first + (__m1 - __d);
- } while (__p2 != __p);
- *__p1 = std::move(__t);
+template <class _AlgPolicy, class _Iter, class _Sent>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Iter
+__rotate_random_access(_Iter __first, _Iter __middle, _Sent __sent) {
+ auto __left = _IterOps<_AlgPolicy>::distance(__first, __middle);
+ auto __right = _IterOps<_AlgPolicy>::distance(__middle, __sent);
+ auto __last = __first + __right;
+
+ auto __min_len = std::min(__left, __right);
+
+ while (__min_len > 0) {
+ if (__left <= __right) {
+ do {
+ std::__swap_ranges<_AlgPolicy>(__first, __first + __left, __first + __left);
+ __first += __left;
+ __right -= __left;
+ } while (__left <= __right);
+ __min_len = __right;
+ } else {
+ do {
+ std::__swap_ranges<_AlgPolicy>(__first + (__left - __right), __first + __left, __first + __left);
+ __left -= __right;
+ } while (__left > __right);
+ __min_len = __left;
+ }
}
- return __first + __m2;
+ return __last;
}
template <class _AlgPolicy, class _ForwardIterator>
@@ -170,7 +153,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator
return std::__rotate_left<_AlgPolicy>(__first, __last);
if (_IterOps<_AlgPolicy>::next(__middle) == __last)
return std::__rotate_right<_AlgPolicy>(__first, __last);
- return std::__rotate_gcd<_AlgPolicy>(__first, __middle, __last);
+ return std::__rotate_random_access<_AlgPolicy>(__first, __middle, __last);
}
return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last);
}
More information about the libcxx-commits
mailing list