[libcxx-commits] [libcxx] [libc++] Extend the scope of radix sorting inside std::stable_sort to floating-point types (PR #129452)
Дмитрий Изволов via libcxx-commits
libcxx-commits at lists.llvm.org
Fri Apr 4 09:44:55 PDT 2025
https://github.com/izvolov updated https://github.com/llvm/llvm-project/pull/129452
>From 396431491795cc4be6e66c47327937e70d9182a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?=
=?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= <dmitriy at izvolov.ru>
Date: Wed, 2 Apr 2025 09:45:31 +0300
Subject: [PATCH] radix-sort-floats
---
libcxx/docs/ReleaseNotes/21.rst | 3 +
libcxx/include/__algorithm/radix_sort.h | 100 +++++++++++++++++-
libcxx/include/__algorithm/stable_sort.h | 15 ++-
.../alg.sort/stable.sort/stable_sort.pass.cpp | 59 ++++++++++-
4 files changed, 166 insertions(+), 11 deletions(-)
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 7af109ddc8657..8291bae4c31c2 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -60,6 +60,9 @@ Improvements and New Features
- Updated formatting library to Unicode 16.0.0.
+- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance
+ up to 10x, depending on type of sorted elements and the initial state of the sorted array.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
index f6d9fb1ad7ca9..055d8a0765d7c 100644
--- a/libcxx/include/__algorithm/radix_sort.h
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -29,9 +29,10 @@
#include <__algorithm/for_each.h>
#include <__algorithm/move.h>
+#include <__bit/bit_cast.h>
#include <__bit/bit_log2.h>
-#include <__bit/countl.h>
#include <__config>
+#include <__cstddef/size_t.h>
#include <__functional/identity.h>
#include <__iterator/access.h>
#include <__iterator/distance.h>
@@ -44,9 +45,12 @@
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_assignable.h>
+#include <__type_traits/is_enum.h>
#include <__type_traits/is_integral.h>
#include <__type_traits/is_unsigned.h>
#include <__type_traits/make_unsigned.h>
+#include <__type_traits/void_t.h>
+#include <__utility/declval.h>
#include <__utility/forward.h>
#include <__utility/integer_sequence.h>
#include <__utility/move.h>
@@ -298,6 +302,96 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) {
return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
}
+template <size_t _Size>
+struct __unsigned_integer_of_size;
+
+template <>
+struct __unsigned_integer_of_size<1> {
+ using type _LIBCPP_NODEBUG = uint8_t;
+};
+
+template <>
+struct __unsigned_integer_of_size<2> {
+ using type _LIBCPP_NODEBUG = uint16_t;
+};
+
+template <>
+struct __unsigned_integer_of_size<4> {
+ using type _LIBCPP_NODEBUG = uint32_t;
+};
+
+template <>
+struct __unsigned_integer_of_size<8> {
+ using type _LIBCPP_NODEBUG = uint64_t;
+};
+
+# if _LIBCPP_HAS_INT128
+template <>
+struct __unsigned_integer_of_size<16> {
+ using type _LIBCPP_NODEBUG = unsigned __int128;
+};
+# endif
+
+template <size_t _Size>
+using __unsigned_integer_of_size_t _LIBCPP_NODEBUG = typename __unsigned_integer_of_size<_Size>::type;
+
+template <class _Sc>
+using __unsigned_representation_for_t _LIBCPP_NODEBUG = __unsigned_integer_of_size_t<sizeof(_Sc)>;
+
+// The function `__to_ordered_integral` is defined for integers and IEEE 754 floating-point numbers.
+// Returns an integer representation such that for any `x` and `y` such that `x < y`, the expression
+// `__to_ordered_integral(x) < __to_ordered_integral(y)` is true, where `x`, `y` are integers or IEEE 754 floats.
+template <class _Integral, enable_if_t< is_integral<_Integral>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Integral __n) {
+ return __n;
+}
+
+// An overload for IEEE 754 floating-point numbers
+
+// For the floats conforming to IEEE 754 (IEC 559) standard, we know that:
+// 1. The bit representation of positive floats directly reflects their order:
+// When comparing floats by magnitude, the number with the larger exponent is greater, and if the exponents are
+// equal, the one with the larger mantissa is greater.
+// 2. The bit representation of negative floats reflects their reverse order (for the same reasons).
+// 3. The most significant bit (sign bit) is zero for positive floats and one for negative floats. Therefore, in the raw
+// bit representation, any negative number will be greater than any positive number.
+
+// The only exception from this rule is `NaN`, which is unordered by definition.
+
+// Based on the above, to obtain correctly ordered integral representation of floating-point numbers, we need to:
+// 1. Invert the bit representation (including the sign bit) of negative floats to switch from reverse order to direct
+// order;
+// 2. Invert the sign bit for positive floats.
+
+// Thus, in final integral representation, we have reversed the order for negative floats and made all negative floats
+// smaller than all positive numbers (by inverting the sign bit).
+template <class _Floating, enable_if_t< numeric_limits<_Floating>::is_iec559, int> = 0>
+_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Floating __f) {
+ using __integral_type = __unsigned_representation_for_t<_Floating>;
+ constexpr auto __bit_count = std::numeric_limits<__integral_type>::digits;
+ constexpr auto __sign_bit_mask = static_cast<__integral_type>(__integral_type{1} << (__bit_count - 1));
+
+ const auto __u = std::__bit_cast<__integral_type>(__f);
+
+ return static_cast<__integral_type>(__u & __sign_bit_mask ? ~__u : __u ^ __sign_bit_mask);
+}
+
+// There may exist user-defined comparison for enum, so we cannot compare enums just like integers.
+template <class _Enum, enable_if_t< is_enum<_Enum>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Enum __e) = delete;
+
+// `long double` varies significantly across platforms and compilers, making it practically
+// impossible to determine its actual bit width for conversion to an ordered integer.
+inline _LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(long double) = delete;
+
+template <class _Tp, class = void>
+inline const bool __is_ordered_integer_representable_v = false;
+
+template <class _Tp>
+inline const bool
+ __is_ordered_integer_representable_v<_Tp, __void_t<decltype(std::__to_ordered_integral(std::declval<_Tp>()))>> =
+ true;
+
struct __low_byte_fn {
template <class _Ip>
_LIBCPP_HIDE_FROM_ABI constexpr uint8_t operator()(_Ip __integer) const {
@@ -314,7 +408,9 @@ __radix_sort(_RandomAccessIterator1 __first,
_RandomAccessIterator2 __buffer,
_Map __map,
_Radix __radix) {
- auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { return std::__shift_to_unsigned(__map(__x)); };
+ auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) {
+ return std::__shift_to_unsigned(__map(std::__to_ordered_integral(__x)));
+ };
std::__radix_sort_impl(__first, __last, __buffer, __map_to_unsigned, __radix);
}
diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index c7f9780e3f627..1ca66f6a51687 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -26,7 +26,6 @@
#include <__type_traits/desugars_to.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_constant_evaluated.h>
-#include <__type_traits/is_integral.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
@@ -201,7 +200,7 @@ struct __stable_sort_switch {
#if _LIBCPP_STD_VER >= 17
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {
- static_assert(is_integral<_Tp>::value);
+ static_assert(__is_ordered_integer_representable_v<_Tp>);
if constexpr (sizeof(_Tp) == 1) {
return 1 << 8;
}
@@ -211,7 +210,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_max_bound() {
- static_assert(is_integral<_Tp>::value);
+ static_assert(__is_ordered_integer_representable_v<_Tp>);
if constexpr (sizeof(_Tp) >= 8) {
return 1 << 15;
}
@@ -245,11 +244,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort(
}
#if _LIBCPP_STD_VER >= 17
- constexpr auto __default_comp = __desugars_to_v<__totally_ordered_less_tag, _Compare, value_type, value_type >;
- constexpr auto __integral_value =
- is_integral_v<value_type > && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
- constexpr auto __allowed_radix_sort = __default_comp && __integral_value;
- if constexpr (__allowed_radix_sort) {
+ constexpr auto __default_comp = __desugars_to_v<__less_tag, _Compare, value_type, value_type >;
+ constexpr auto __radix_sortable =
+ __is_ordered_integer_representable_v<value_type> &&
+ is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
+ if constexpr (__default_comp && __radix_sortable) {
if (__len <= __buff_size && __len >= static_cast<difference_type>(std::__radix_sort_min_bound<value_type>()) &&
__len <= static_cast<difference_type>(std::__radix_sort_max_bound<value_type>())) {
if (__libcpp_is_constant_evaluated()) {
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
index 4ee1d795a23b2..e05457492db32 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
@@ -18,7 +18,9 @@
#include <algorithm>
#include <cassert>
#include <iterator>
+#include <limits>
#include <random>
+#include <type_traits>
#include <vector>
#include "count_new.h"
@@ -68,6 +70,13 @@ TEST_CONSTEXPR_CXX26 std::vector<T> generate_sawtooth(int N, int M) {
if (++x == M)
x = 0;
}
+
+ if (std::is_signed<T>::value) {
+ for (auto& a : v) {
+ a -= (M / 2);
+ }
+ }
+
return v;
}
@@ -193,12 +202,60 @@ TEST_CONSTEXPR_CXX26 bool test() {
return true;
}
+template <class T>
+bool test_floating_special_values() {
+ static_assert(std::is_floating_point<T>::value, "");
+
+ auto v = generate_sawtooth<T>(1024, 512);
+ v.insert(v.end(), 256, static_cast<T>(0.0));
+ v.insert(v.end(), 256, static_cast<T>(-0.0));
+ v.insert(v.end(), 256, std::numeric_limits<T>::infinity());
+ v.insert(v.end(), 256, -std::numeric_limits<T>::infinity());
+
+ std::mt19937 randomness;
+ std::shuffle(v.begin(), v.end(), randomness);
+
+ std::stable_sort(v.begin(), v.end());
+ assert(std::is_sorted(v.begin(), v.end()));
+
+ return true;
+}
+
+template <class T>
+bool test_floating() {
+ return test<T>() && test_floating_special_values<T>();
+}
+
+enum struct Enum : int { a, b, c, d, e, f, g, h };
+TEST_CONSTEXPR_CXX26 bool operator<(Enum x, Enum y) { return static_cast<int>(x) > static_cast<int>(y); }
+
+TEST_CONSTEXPR_CXX26 bool test_enum() {
+ auto v = std::vector<Enum>(128, Enum::a);
+ v.resize(v.size() + 128, Enum::b);
+ v.resize(v.size() + 128, Enum::c);
+ v.resize(v.size() + 128, Enum::d);
+ v.resize(v.size() + 128, Enum::e);
+ v.resize(v.size() + 128, Enum::f);
+ v.resize(v.size() + 128, Enum::g);
+ v.resize(v.size() + 128, Enum::h);
+
+ // Order is reversed by definition
+ std::stable_sort(v.begin(), v.end());
+ assert(std::is_sorted(v.begin(), v.end()));
+
+ return true;
+}
+
int main(int, char**) {
test<int>();
- test<float>();
+ test_floating<float>();
+ test_floating<double>();
+ test_floating<long double>();
+ test_enum();
#if TEST_STD_VER >= 26
static_assert(test<int>());
static_assert(test<float>());
+ static_assert(test<double>());
// test constexprness of radix sort branch
static_assert(test<char>());
#endif
More information about the libcxx-commits
mailing list