[libcxx-commits] [libcxx] [libcxx][algorithm] Optimize std::stable_sort via radix sort algorithm (PR #104683)

Дмитрий Изволов via libcxx-commits libcxx-commits at lists.llvm.org
Sat Aug 17 15:17:55 PDT 2024


https://github.com/izvolov updated https://github.com/llvm/llvm-project/pull/104683

>From 5717ad80b633d5a2005b98bc35001c3cd0a47887 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?=
 =?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= <dmitriy at izvolov.ru>
Date: Sun, 5 May 2024 11:08:26 +0300
Subject: [PATCH 1/4] radix-sort

---
 libcxx/include/CMakeLists.txt            |   1 +
 libcxx/include/__algorithm/radix_sort.h  | 410 +++++++++++++++++++++++
 libcxx/include/__algorithm/stable_sort.h | 111 ++++--
 libcxx/include/module.modulemap          |   1 +
 4 files changed, 491 insertions(+), 32 deletions(-)
 create mode 100644 libcxx/include/__algorithm/radix_sort.h

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 32579272858a8e..95e4e3faf88671 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -74,6 +74,7 @@ set(files
   __algorithm/prev_permutation.h
   __algorithm/pstl.h
   __algorithm/push_heap.h
+  __algorithm/radix_sort.h
   __algorithm/ranges_adjacent_find.h
   __algorithm/ranges_all_of.h
   __algorithm/ranges_any_of.h
diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
new file mode 100644
index 00000000000000..5e14dec9df0918
--- /dev/null
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -0,0 +1,410 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_RADIX_SORT_H
+#define _LIBCPP___ALGORITHM_RADIX_SORT_H
+
+#include <__algorithm/copy.h>
+#include <__algorithm/for_each.h>
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/move_iterator.h>
+#include <__iterator/next.h>
+#include <__numeric/partial_sum.h>
+#include <__type_traits/decay.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_assignable.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_unsigned.h>
+#include <__type_traits/make_unsigned.h>
+#include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
+#include <climits>
+#include <cstdint>
+#include <initializer_list>
+#include <limits>
+#include <stdexcept>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+inline void __variadic_expansion_dummy(initializer_list<int>) {}
+
+#  define EXPAND_VARIADIC(expression) __variadic_expansion_dummy({(expression, 0)...})
+
+template <typename _Iterator>
+constexpr auto __move_assign_please(_Iterator __i)
+    -> enable_if_t<is_move_assignable<typename iterator_traits<_Iterator>::value_type>::value,
+                   move_iterator<_Iterator> > {
+  return make_move_iterator(std::move(__i));
+}
+
+template <typename _Iterator>
+constexpr auto __move_assign_please(_Iterator __i)
+    -> enable_if_t<not is_move_assignable<typename iterator_traits<_Iterator>::value_type>::value, _Iterator> {
+  return __i;
+}
+
+template <typename _Integer>
+constexpr _Integer __intlog2_impl(_Integer __integer) {
+  auto __degree = _Integer{0};
+
+  while ((__integer >>= 1) > 0) {
+    ++__degree;
+  }
+
+  return __degree;
+}
+
+template <typename _Integer>
+constexpr _Integer __intlog2(_Integer __integer) {
+  static_assert(is_integral<_Integer>::value, "Must be an integral type");
+
+  return __integer > 0 ? __intlog2_impl(__integer)
+                       : throw domain_error("The binary logarithm is not defined on non-positive numbers");
+}
+
+template <typename _InputIterator, typename _OutputIterator>
+pair<_OutputIterator, typename iterator_traits<_InputIterator>::value_type>
+__partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) {
+  if (__first == __last)
+    return {__result, 0};
+
+  auto __max                                                 = *__first;
+  typename iterator_traits<_InputIterator>::value_type __sum = *__first;
+  *__result                                                  = __sum;
+
+  while (++__first != __last) {
+    if (__max < *__first) {
+      __max = *__first;
+    }
+    __sum       = std::move(__sum) + *__first;
+    *++__result = __sum;
+  }
+  return {++__result, __max};
+}
+
+template <typename _Value, typename _Map, typename _Radix>
+struct __radix_sort_traits {
+  using image_type = decay_t<invoke_result_t<_Map, _Value> >;
+  static_assert(is_integral<image_type>::value, "");
+  static_assert(is_unsigned<image_type>::value, "");
+
+  using radix_type = decay_t<invoke_result_t<_Radix, image_type> >;
+  static_assert(is_integral<radix_type>::value, "");
+
+  constexpr static auto radix_value_range = numeric_limits<radix_type>::max() + 1;
+  constexpr static auto radix_size        = __intlog2<uint64_t>(radix_value_range);
+  constexpr static auto radix_count       = sizeof(image_type) * CHAR_BIT / radix_size;
+};
+
+template <typename _Value, typename _Map>
+struct __counting_sort_traits {
+  using image_type = decay_t<invoke_result_t<_Map, _Value> >;
+  static_assert(is_integral<image_type>::value, "");
+  static_assert(is_unsigned<image_type>::value, "");
+
+  constexpr static const auto value_range = numeric_limits<image_type>::max() + 1;
+  constexpr static auto radix_size        = __intlog2<uint64_t>(value_range);
+};
+
+template <typename _Radix>
+auto __nth_radix(size_t __radix_number, _Radix __radix) {
+  return [__radix_number, __radix = std::move(__radix)](auto __n) {
+    using value_type = decltype(__n);
+    static_assert(is_integral<value_type>::value, "");
+    static_assert(is_unsigned<value_type>::value, "");
+    using traits = __counting_sort_traits<value_type, _Radix>;
+
+    return __radix(static_cast<value_type>(__n >> traits::radix_size * __radix_number));
+  };
+}
+
+template <typename _ForwardIterator, typename _Map, typename _RandomAccessIterator>
+void __count(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+  std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; });
+}
+
+template <typename _ForwardIterator, typename _Map, typename _RandomAccessIterator>
+void __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+  using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+  using traits     = __counting_sort_traits<value_type, _Map>;
+
+  __count(__first, __last, __map, __counters);
+
+  const auto __counters_end = __counters + traits::value_range;
+  partial_sum(__counters, __counters_end, __counters);
+}
+
+template <typename _ForwardIterator, typename _RandomAccessIterator1, typename _Map, typename _RandomAccessIterator2>
+void __dispose(_ForwardIterator __first,
+               _ForwardIterator __last,
+               _RandomAccessIterator1 __result,
+               _Map __map,
+               _RandomAccessIterator2 __counters) {
+  std::for_each(__first, __last, [&__result, &__counters, &__map](auto&& __preimage) {
+    auto __index      = __counters[__map(__preimage)]++;
+    __result[__index] = std::forward<decltype(__preimage)>(__preimage);
+  });
+}
+
+template <typename _BidirectionalIterator,
+          typename _RandomAccessIterator1,
+          typename _Map,
+          typename _RandomAccessIterator2>
+void dispose_backward(_BidirectionalIterator __first,
+                      _BidirectionalIterator __last,
+                      _RandomAccessIterator1 __result,
+                      _Map __map,
+                      _RandomAccessIterator2 __counters) {
+  std::for_each(make_reverse_iterator(__last),
+                make_reverse_iterator(__first),
+                [&__result, &__counters, &__map](auto&& __preimage) {
+                  auto __index      = --__counters[__map(__preimage)];
+                  __result[__index] = std::forward<decltype(__preimage)>(__preimage);
+                });
+}
+
+template <typename _ForwardIterator,
+          typename _Map,
+          typename _Radix,
+          typename _RandomAccessIterator1,
+          typename _RandomAccessIterator2,
+          size_t... _Radices>
+bool __collect_impl(
+    _ForwardIterator __first,
+    _ForwardIterator __last,
+    _Map __map,
+    _Radix __radix,
+    _RandomAccessIterator1 __counters,
+    _RandomAccessIterator2 __maximums,
+    index_sequence<_Radices...>) {
+  using value_type                   = typename iterator_traits<_ForwardIterator>::value_type;
+  constexpr auto __radix_value_range = __radix_sort_traits<value_type, _Map, _Radix>::radix_value_range;
+
+  auto __previous  = numeric_limits<invoke_result_t<_Map, value_type> >::min();
+  auto __is_sorted = true;
+  for_each(__first, __last, [&__counters, &__map, &__radix, &__previous, &__is_sorted](const auto& value) {
+    auto __current = __map(value);
+    __is_sorted &= (__current >= __previous);
+    __previous = __current;
+
+    EXPAND_VARIADIC(++__counters[_Radices][__nth_radix(_Radices, __radix)(__current)]);
+  });
+
+  EXPAND_VARIADIC(
+      __maximums[_Radices] =
+          __partial_sum_max(__counters[_Radices], __counters[_Radices] + __radix_value_range, __counters[_Radices])
+              .second);
+
+  return __is_sorted;
+}
+
+template <typename _ForwardIterator,
+          typename _Map,
+          typename _Radix,
+          typename _RandomAccessIterator1,
+          typename _RandomAccessIterator2>
+bool __collect(_ForwardIterator __first,
+               _ForwardIterator __last,
+               _Map __map,
+               _Radix __radix,
+               _RandomAccessIterator1 __counters,
+               _RandomAccessIterator2 __maximums) {
+  using value_type             = typename iterator_traits<_ForwardIterator>::value_type;
+  constexpr auto __radix_count = __radix_sort_traits<value_type, _Map, _Radix>::radix_count;
+  return __collect_impl(__first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>());
+}
+
+template <typename _BidirectionalIterator,
+          typename _RandomAccessIterator1,
+          typename _Map,
+          typename _RandomAccessIterator2>
+void __dispose_backward(_BidirectionalIterator __first,
+                        _BidirectionalIterator __last,
+                        _RandomAccessIterator1 __result,
+                        _Map __map,
+                        _RandomAccessIterator2 __counters) {
+  for_each(
+      make_reverse_iterator(__last), make_reverse_iterator(__first), [&__result, &__counters, &__map](auto&& preimage) {
+        auto __index      = --__counters[__map(preimage)];
+        __result[__index] = std::forward<decltype(preimage)>(preimage);
+      });
+}
+
+template <typename _ForwardIterator, typename _RandomAccessIterator, typename _Map>
+_RandomAccessIterator
+__counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) {
+  using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+  using traits     = __counting_sort_traits<value_type, _Map>;
+
+  using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
+  difference_type __counters[traits::value_range + 1] = {0};
+
+  __collect(__first, __last, __map, next(std::begin(__counters)));
+  __dispose(__first, __last, __result, __map, std::begin(__counters));
+
+  return __result + __counters[traits::value_range];
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+typename enable_if<
+    __radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::radix_count == 1,
+    void>::type
+__radix_sort_impl(_RandomAccessIterator1 __first,
+                  _RandomAccessIterator1 __last,
+                  _RandomAccessIterator2 buffer,
+                  _Map __map,
+                  _Radix __radix) {
+  auto __buffer_end = __counting_sort_impl(
+      __move_assign_please(__first), __move_assign_please(__last), buffer, [&__map, &__radix](const auto& value) {
+        return __radix(__map(value));
+      });
+
+  std::copy(__move_assign_please(buffer), __move_assign_please(__buffer_end), __first);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+typename enable_if<
+    __radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::radix_count % 2 ==
+        0,
+    void>::type
+__radix_sort_impl(_RandomAccessIterator1 __first,
+                  _RandomAccessIterator1 __last,
+                  _RandomAccessIterator2 __buffer_begin,
+                  _Map __map,
+                  _Radix __radix) {
+  using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type;
+  using traits     = __radix_sort_traits<value_type, _Map, _Radix>;
+
+  using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
+  difference_type __counters[traits::radix_count][traits::radix_value_range] = {{0}};
+  difference_type __maximums[traits::radix_count]                            = {0};
+  const auto __is_sorted = __collect(__first, __last, __map, __radix, __counters, __maximums);
+  if (not __is_sorted) {
+    const auto __range_size = distance(__first, __last);
+    auto __buffer_end       = __buffer_begin + __range_size;
+    for (size_t __radix_number = 0; __radix_number < traits::radix_count; __radix_number += 2) {
+      const auto __n0th_is_single = __maximums[__radix_number] == __range_size;
+      const auto __n1th_is_single = __maximums[__radix_number + 1] == __range_size;
+
+      if (__n0th_is_single && __n1th_is_single) {
+        continue;
+      }
+
+      if (__n0th_is_single) {
+        copy(__move_assign_please(__first), __move_assign_please(__last), __buffer_begin);
+      } else {
+        auto __n0th = [__radix_number, &__map, &__radix](const auto& __v) {
+          return __nth_radix(__radix_number, __radix)(__map(__v));
+        };
+        __dispose_backward(
+            __move_assign_please(__first),
+            __move_assign_please(__last),
+            __buffer_begin,
+            __n0th,
+            __counters[__radix_number]);
+      }
+
+      if (__n1th_is_single) {
+        copy(__move_assign_please(__buffer_begin), __move_assign_please(__buffer_end), __first);
+      } else {
+        auto __n1th = [__radix_number, &__map, &__radix](const auto& __v) {
+          return __nth_radix(__radix_number + 1, __radix)(__map(__v));
+        };
+        __dispose_backward(
+            __move_assign_please(__buffer_begin),
+            __move_assign_please(__buffer_end),
+            __first,
+            __n1th,
+            __counters[__radix_number + 1]);
+      }
+    }
+  }
+}
+
+constexpr auto __to_unsigned(bool __b) { return __b; }
+
+template <typename _Ip>
+constexpr auto __to_unsigned(_Ip __n) {
+  constexpr const auto __min_value = numeric_limits<_Ip>::min();
+  return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
+}
+
+struct __identity_fn {
+  template <typename _Tp>
+  constexpr decltype(auto) operator()(_Tp&& __value) const {
+    return std::forward<_Tp>(__value);
+  }
+};
+
+struct __low_byte_fn {
+  template <typename _Ip>
+  constexpr uint8_t operator()(_Ip __integer) const {
+    static_assert(is_integral<_Ip>::value, "");
+    static_assert(is_unsigned<_Ip>::value, "");
+
+    return static_cast<uint8_t>(__integer & 0xff);
+  }
+};
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+void __radix_sort(_RandomAccessIterator1 __first,
+                  _RandomAccessIterator1 __last,
+                  _RandomAccessIterator2 buffer,
+                  _Map __map,
+                  _Radix __radix) {
+  auto __map_to_unsigned = [__map = std::move(__map)](const auto& x) { return __to_unsigned(__map(x)); };
+  __radix_sort_impl(__first, __last, buffer, __map_to_unsigned, __radix);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+void __radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 buffer) {
+  __radix_sort(__first, __last, buffer, __identity_fn{}, __low_byte_fn{});
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+bool __radix_sort(
+    _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 buffer, _BoolConstant<true>) {
+  __radix_sort(__first, __last, buffer, __identity_fn{}, __low_byte_fn{});
+  return true;
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+bool __radix_sort(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BoolConstant<false>) {
+  return false;
+}
+
+#  undef EXPAND_VARIADIC
+
+#else // _LIBCPP_STD_VER > 14
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, bool _B>
+bool __radix_sort(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BoolConstant<_B>) {
+  return false;
+}
+
+#endif // _LIBCPP_STD_VER > 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___ALGORITHM_RADIX_SORT_H
diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index 726e7e16b3564a..f8624726a4e323 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -13,6 +13,7 @@
 #include <__algorithm/comp_ref_type.h>
 #include <__algorithm/inplace_merge.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/radix_sort.h>
 #include <__algorithm/sort.h>
 #include <__config>
 #include <__debug_utils/strict_weak_ordering_check.h>
@@ -20,6 +21,9 @@
 #include <__memory/destruct_n.h>
 #include <__memory/temporary_buffer.h>
 #include <__memory/unique_ptr.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/is_trivially_assignable.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
@@ -133,20 +137,24 @@ _LIBCPP_HIDE_FROM_ABI void __merge_move_assign(
     *__result = _Ops::__iter_move(__first2);
 }
 
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort(_RandomAccessIterator __first,
-                   _RandomAccessIterator __last,
-                   _Compare __comp,
-                   typename iterator_traits<_RandomAccessIterator>::difference_type __len,
-                   typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
-                   ptrdiff_t __buff_size);
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _EnableRadixSort>
+void __stable_sort(
+    _RandomAccessIterator __first,
+    _RandomAccessIterator __last,
+    _Compare __comp,
+    typename iterator_traits<_RandomAccessIterator>::difference_type __len,
+    typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
+    ptrdiff_t __buff_size,
+    _BoolConstant<_EnableRadixSort>);
 
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort_move(_RandomAccessIterator __first1,
-                        _RandomAccessIterator __last1,
-                        _Compare __comp,
-                        typename iterator_traits<_RandomAccessIterator>::difference_type __len,
-                        typename iterator_traits<_RandomAccessIterator>::value_type* __first2) {
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _EnableRadixSort>
+void __stable_sort_move(
+    _RandomAccessIterator __first1,
+    _RandomAccessIterator __last1,
+    _Compare __comp,
+    typename iterator_traits<_RandomAccessIterator>::difference_type __len,
+    typename iterator_traits<_RandomAccessIterator>::value_type* __first2,
+    _BoolConstant<_EnableRadixSort> __rs) {
   using _Ops = _IterOps<_AlgPolicy>;
 
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
@@ -179,8 +187,8 @@ void __stable_sort_move(_RandomAccessIterator __first1,
   }
   typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2;
   _RandomAccessIterator __m                                             = __first1 + __l2;
-  std::__stable_sort<_AlgPolicy, _Compare>(__first1, __m, __comp, __l2, __first2, __l2);
-  std::__stable_sort<_AlgPolicy, _Compare>(__m, __last1, __comp, __len - __l2, __first2 + __l2, __len - __l2);
+  std::__stable_sort<_AlgPolicy, _Compare>(__first1, __m, __comp, __l2, __first2, __l2, __rs);
+  std::__stable_sort<_AlgPolicy, _Compare>(__m, __last1, __comp, __len - __l2, __first2 + __l2, __len - __l2, __rs);
   std::__merge_move_construct<_AlgPolicy, _Compare>(__first1, __m, __m, __last1, __first2, __comp);
 }
 
@@ -189,13 +197,35 @@ struct __stable_sort_switch {
   static const unsigned value = 128 * is_trivially_copy_assignable<_Tp>::value;
 };
 
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort(_RandomAccessIterator __first,
-                   _RandomAccessIterator __last,
-                   _Compare __comp,
-                   typename iterator_traits<_RandomAccessIterator>::difference_type __len,
-                   typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
-                   ptrdiff_t __buff_size) {
+template <class _Tp, class = void>
+struct __radix_sort_min_switch {
+  static const unsigned value = (1 << 10);
+};
+
+template <class _Int8>
+struct __radix_sort_min_switch<_Int8, enable_if_t<is_integral_v<_Int8> && sizeof(_Int8) == 1>> {
+  static const unsigned value = (1 << 8);
+};
+
+template <class _Tp, class = void>
+struct __radix_sort_max_switch {
+  static const unsigned value = (1 << 16);
+};
+
+template <class _Int64>
+struct __radix_sort_max_switch<_Int64, enable_if_t<is_integral_v<_Int64> && sizeof(_Int64) == 8>> {
+  static const unsigned value = (1 << 15);
+};
+
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _EnableRadixSort>
+void __stable_sort(
+    _RandomAccessIterator __first,
+    _RandomAccessIterator __last,
+    _Compare __comp,
+    typename iterator_traits<_RandomAccessIterator>::difference_type __len,
+    typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
+    ptrdiff_t __buff_size,
+    _BoolConstant<_EnableRadixSort> __rs) {
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
   switch (__len) {
@@ -211,14 +241,20 @@ void __stable_sort(_RandomAccessIterator __first,
     std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
     return;
   }
+  if (__len <= __buff_size && __len >= static_cast<difference_type>(__radix_sort_min_switch<value_type>::value) &&
+      __len <= static_cast<difference_type>(__radix_sort_max_switch<value_type>::value)) {
+    if (std::__radix_sort(__first, __last, __buff, __rs)) {
+      return;
+    }
+  }
   typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2;
   _RandomAccessIterator __m                                             = __first + __l2;
   if (__len <= __buff_size) {
     __destruct_n __d(0);
     unique_ptr<value_type, __destruct_n&> __h2(__buff, __d);
-    std::__stable_sort_move<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff);
+    std::__stable_sort_move<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff, __rs);
     __d.__set(__l2, (value_type*)nullptr);
-    std::__stable_sort_move<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff + __l2);
+    std::__stable_sort_move<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff + __l2, __rs);
     __d.__set(__len, (value_type*)nullptr);
     std::__merge_move_assign<_AlgPolicy, _Compare>(
         __buff, __buff + __l2, __buff + __l2, __buff + __len, __first, __comp);
@@ -229,14 +265,17 @@ void __stable_sort(_RandomAccessIterator __first,
     //                                  __first, __comp);
     return;
   }
-  std::__stable_sort<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff, __buff_size);
-  std::__stable_sort<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff, __buff_size);
+  std::__stable_sort<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff, __buff_size, __rs);
+  std::__stable_sort<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff, __buff_size, __rs);
   std::__inplace_merge<_AlgPolicy>(__first, __m, __last, __comp, __l2, __len - __l2, __buff, __buff_size);
 }
 
-template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
-inline _LIBCPP_HIDE_FROM_ABI void
-__stable_sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) {
+template <class _AlgPolicy, class _RandomAccessIterator, class _Compare, bool _EnableRadixSort = false>
+inline _LIBCPP_HIDE_FROM_ABI void __stable_sort_impl(
+    _RandomAccessIterator __first,
+    _RandomAccessIterator __last,
+    _Compare& __comp,
+    _BoolConstant<_EnableRadixSort> __rs = _BoolConstant<false>{}) {
   using value_type      = typename iterator_traits<_RandomAccessIterator>::value_type;
   using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
 
@@ -251,19 +290,27 @@ __stable_sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last,
     __h.reset(__buf.first);
   }
 
-  std::__stable_sort<_AlgPolicy, __comp_ref_type<_Compare> >(__first, __last, __comp, __len, __buf.first, __buf.second);
+  std::__stable_sort<_AlgPolicy, __comp_ref_type<_Compare> >(
+      __first, __last, __comp, __len, __buf.first, __buf.second, __rs);
   std::__check_strict_weak_ordering_sorted(__first, __last, __comp);
 }
 
 template <class _RandomAccessIterator, class _Compare>
 inline _LIBCPP_HIDE_FROM_ABI void
 stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
-  std::__stable_sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
+  std::__stable_sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp, _BoolConstant<false>());
 }
 
 template <class _RandomAccessIterator>
 inline _LIBCPP_HIDE_FROM_ABI void stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
-  std::stable_sort(__first, __last, __less<>());
+  using value_type     = typename iterator_traits<_RandomAccessIterator>::value_type;
+  using reference_type = typename iterator_traits<_RandomAccessIterator>::reference;
+  auto __comp          = __less<>();
+  std::__stable_sort_impl<_ClassicAlgPolicy>(
+      std::move(__first),
+      std::move(__last),
+      __comp,
+      _BoolConstant < is_integral<value_type>::value && is_same<value_type&, reference_type>::value > ());
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 13d0dce34d97e3..b6125add8acf2d 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -720,6 +720,7 @@ module std_private_algorithm_pstl                                        [system
   export *
 }
 module std_private_algorithm_push_heap                                   [system] { header "__algorithm/push_heap.h" }
+module std_private_algorithm_radix_sort                                  [system] { header "__algorithm/radix_sort.h" }
 module std_private_algorithm_ranges_adjacent_find                        [system] { header "__algorithm/ranges_adjacent_find.h" }
 module std_private_algorithm_ranges_all_of                               [system] { header "__algorithm/ranges_all_of.h" }
 module std_private_algorithm_ranges_any_of                               [system] { header "__algorithm/ranges_any_of.h" }

>From edaab46ed6286d4170daaad78af6bfad8791b6c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?=
 =?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= <dmitriy at izvolov.ru>
Date: Sat, 17 Aug 2024 17:21:06 +0300
Subject: [PATCH 2/4] ranged

---
 libcxx/include/__algorithm/ranges_stable_sort.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/ranges_stable_sort.h b/libcxx/include/__algorithm/ranges_stable_sort.h
index 9c7df80ae98722..96d84b208687fc 100644
--- a/libcxx/include/__algorithm/ranges_stable_sort.h
+++ b/libcxx/include/__algorithm/ranges_stable_sort.h
@@ -24,6 +24,8 @@
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/dangling.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
 #include <__utility/forward.h>
 #include <__utility/move.h>
 
@@ -45,7 +47,18 @@ struct __stable_sort {
     auto __last_iter = ranges::next(__first, __last);
 
     auto&& __projected_comp = std::__make_projected(__comp, __proj);
-    std::__stable_sort_impl<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp);
+    constexpr auto __default_comp        = is_same_v<_Comp, ranges::less>;
+    constexpr auto __default_proj        = is_same_v<_Proj, identity>;
+    constexpr auto __integral_value      = is_integral_v<iter_value_t<_Iter>>;
+    constexpr auto __integral_projection = __default_proj && __integral_value;
+    // constexpr auto __integral_projection = is_integral_v<remove_reference_t<invoke_result_t<_Proj&,
+    // iter_value_t<_Iter>>>>;
+    // TODO: Support projection in stable_sort
+    std::__stable_sort_impl<_RangeAlgPolicy>(
+        std::move(__first),
+        __last_iter,
+        __projected_comp,
+        _BoolConstant < __default_comp && __integral_projection > {});
 
     return __last_iter;
   }

>From ec4f083b90171e86d091f30bd67140efe6dd8927 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?=
 =?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= <dmitriy at izvolov.ru>
Date: Sun, 18 Aug 2024 01:17:10 +0300
Subject: [PATCH 3/4] distance

---
 libcxx/include/__algorithm/radix_sort.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
index 5e14dec9df0918..8b3dce525fbcf2 100644
--- a/libcxx/include/__algorithm/radix_sort.h
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -13,6 +13,7 @@
 #include <__algorithm/copy.h>
 #include <__algorithm/for_each.h>
 #include <__config>
+#include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/move_iterator.h>
 #include <__iterator/next.h>
@@ -299,7 +300,7 @@ __radix_sort_impl(_RandomAccessIterator1 __first,
   difference_type __maximums[traits::radix_count]                            = {0};
   const auto __is_sorted = __collect(__first, __last, __map, __radix, __counters, __maximums);
   if (not __is_sorted) {
-    const auto __range_size = distance(__first, __last);
+    const auto __range_size = std::distance(__first, __last);
     auto __buffer_end       = __buffer_begin + __range_size;
     for (size_t __radix_number = 0; __radix_number < traits::radix_count; __radix_number += 2) {
       const auto __n0th_is_single = __maximums[__radix_number] == __range_size;

>From 23fa74b05abba227e3f6737fbf7278071830f063 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?=
 =?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= <dmitriy at izvolov.ru>
Date: Sun, 18 Aug 2024 01:17:24 +0300
Subject: [PATCH 4/4] c++03

---
 libcxx/include/__algorithm/stable_sort.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index f8624726a4e323..8e6c02b55ea630 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -203,7 +203,7 @@ struct __radix_sort_min_switch {
 };
 
 template <class _Int8>
-struct __radix_sort_min_switch<_Int8, enable_if_t<is_integral_v<_Int8> && sizeof(_Int8) == 1>> {
+struct __radix_sort_min_switch<_Int8, enable_if_t<is_integral<_Int8>::value && sizeof(_Int8) == 1> > {
   static const unsigned value = (1 << 8);
 };
 
@@ -213,7 +213,7 @@ struct __radix_sort_max_switch {
 };
 
 template <class _Int64>
-struct __radix_sort_max_switch<_Int64, enable_if_t<is_integral_v<_Int64> && sizeof(_Int64) == 8>> {
+struct __radix_sort_max_switch<_Int64, enable_if_t<is_integral<_Int64>::value && sizeof(_Int64) == 8> > {
   static const unsigned value = (1 << 15);
 };
 
@@ -275,7 +275,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __stable_sort_impl(
     _RandomAccessIterator __first,
     _RandomAccessIterator __last,
     _Compare& __comp,
-    _BoolConstant<_EnableRadixSort> __rs = _BoolConstant<false>{}) {
+    _BoolConstant<_EnableRadixSort> __rs = _BoolConstant<false>()) {
   using value_type      = typename iterator_traits<_RandomAccessIterator>::value_type;
   using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
 



More information about the libcxx-commits mailing list