[libc] [llvm] Revert "[libc] Improve qsort" (PR #121303)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 29 13:00:56 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: Schrodinger ZHU Yifan (SchrodingerZhu)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->120450
---
Patch is 42.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121303.diff
14 Files Affected:
- (modified) libc/src/stdlib/heap_sort.h (+6-6)
- (modified) libc/src/stdlib/qsort.cpp (+6-4)
- (modified) libc/src/stdlib/qsort_data.h (+70-101)
- (removed) libc/src/stdlib/qsort_pivot.h (-85)
- (modified) libc/src/stdlib/qsort_r.cpp (+6-5)
- (modified) libc/src/stdlib/qsort_util.h (+5-42)
- (modified) libc/src/stdlib/quick_sort.h (+56-147)
- (modified) libc/test/src/stdlib/CMakeLists.txt (+15-3)
- (modified) libc/test/src/stdlib/SortingTest.h (+87-112)
- (modified) libc/test/src/stdlib/heap_sort_test.cpp (+4-14)
- (modified) libc/test/src/stdlib/qsort_r_test.cpp (+2-2)
- (added) libc/test/src/stdlib/qsort_test.cpp (+17)
- (modified) libc/test/src/stdlib/quick_sort_test.cpp (+5-14)
- (modified) utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel (+12-4)
``````````diff
diff --git a/libc/src/stdlib/heap_sort.h b/libc/src/stdlib/heap_sort.h
index b9699776df89c1..ccb9ec5f82149e 100644
--- a/libc/src/stdlib/heap_sort.h
+++ b/libc/src/stdlib/heap_sort.h
@@ -18,12 +18,11 @@ namespace internal {
// A simple in-place heapsort implementation.
// Follow the implementation in https://en.wikipedia.org/wiki/Heapsort.
-template <typename A, typename F>
-LIBC_INLINE void heap_sort(const A &array, const F &is_less) {
- size_t end = array.len();
+LIBC_INLINE void heap_sort(const Array &array) {
+ size_t end = array.size();
size_t start = end / 2;
- const auto left_child = [](size_t i) -> size_t { return 2 * i + 1; };
+ auto left_child = [](size_t i) -> size_t { return 2 * i + 1; };
while (end > 1) {
if (start > 0) {
@@ -41,11 +40,12 @@ LIBC_INLINE void heap_sort(const A &array, const F &is_less) {
while (left_child(root) < end) {
size_t child = left_child(root);
// If there are two children, set child to the greater.
- if ((child + 1 < end) && is_less(array.get(child), array.get(child + 1)))
+ if (child + 1 < end &&
+ array.elem_compare(child, array.get(child + 1)) < 0)
++child;
// If the root is less than the greater child
- if (!is_less(array.get(root), array.get(child)))
+ if (array.elem_compare(root, array.get(child)) >= 0)
break;
// Swap the root with the greater child and continue sifting down.
diff --git a/libc/src/stdlib/qsort.cpp b/libc/src/stdlib/qsort.cpp
index 0bf5fc79805279..65a63c239f5c0d 100644
--- a/libc/src/stdlib/qsort.cpp
+++ b/libc/src/stdlib/qsort.cpp
@@ -18,12 +18,14 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(void, qsort,
(void *array, size_t array_size, size_t elem_size,
int (*compare)(const void *, const void *))) {
+ if (array == nullptr || array_size == 0 || elem_size == 0)
+ return;
+ internal::Comparator c(compare);
- const auto is_less = [compare](const void *a, const void *b) -> bool {
- return compare(a, b) < 0;
- };
+ auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size,
+ elem_size, c);
- internal::unstable_sort(array, array_size, elem_size, is_less);
+ internal::sort(arr);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h
index aa6d9bbc123de8..c529d55ca46ffd 100644
--- a/libc/src/stdlib/qsort_data.h
+++ b/libc/src/stdlib/qsort_data.h
@@ -17,122 +17,91 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-class ArrayGenericSize {
- cpp::byte *array_base;
- size_t array_len;
- size_t elem_size;
-
- LIBC_INLINE cpp::byte *get_internal(size_t i) const {
- return array_base + (i * elem_size);
- }
-
-public:
- LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
- : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
- elem_size(e) {}
-
- static constexpr bool has_fixed_size() { return false; }
-
- LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
-
- LIBC_INLINE void swap(size_t i, size_t j) const {
- // It's possible to use 8 byte blocks with `uint64_t`, but that
- // generates more machine code as the remainder loop gets
- // unrolled, plus 4 byte operations are more likely to be
- // efficient on a wider variety of hardware. On x86 LLVM tends
- // to unroll the block loop again into 2 16 byte swaps per
- // iteration which is another reason that 4 byte blocks yields
- // good performance even for big types.
- using block_t = uint32_t;
- constexpr size_t BLOCK_SIZE = sizeof(block_t);
-
- alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
-
- cpp::byte *elem_i = get_internal(i);
- cpp::byte *elem_j = get_internal(j);
-
- const size_t elem_size_rem = elem_size % BLOCK_SIZE;
- const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
-
- while (elem_i != elem_i_block_end) {
- __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE);
- __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE);
- __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE);
-
- elem_i += BLOCK_SIZE;
- elem_j += BLOCK_SIZE;
- }
-
- for (size_t n = 0; n < elem_size_rem; ++n) {
- cpp::byte tmp = elem_i[n];
- elem_i[n] = elem_j[n];
- elem_j[n] = tmp;
+using Compare = int(const void *, const void *);
+using CompareWithState = int(const void *, const void *, void *);
+
+enum class CompType { COMPARE, COMPARE_WITH_STATE };
+
+struct Comparator {
+ union {
+ Compare *comp_func;
+ CompareWithState *comp_func_r;
+ };
+ const CompType comp_type;
+
+ void *arg;
+
+ Comparator(Compare *func)
+ : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {}
+
+ Comparator(CompareWithState *func, void *arg_val)
+ : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE),
+ arg(arg_val) {}
+
+#if defined(__clang__)
+ // Recent upstream changes to -fsanitize=function find more instances of
+ // function type mismatches. One case is with the comparator passed to this
+ // class. Libraries will tend to pass comparators that take pointers to
+ // varying types while this comparator expects to accept const void pointers.
+ // Ideally those tools would pass a function that strictly accepts const
+ // void*s to avoid UB, or would use qsort_r to pass their own comparator.
+ [[clang::no_sanitize("function")]]
+#endif
+ int comp_vals(const void *a, const void *b) const {
+ if (comp_type == CompType::COMPARE) {
+ return comp_func(a, b);
+ } else {
+ return comp_func_r(a, b, arg);
}
}
-
- LIBC_INLINE size_t len() const { return array_len; }
-
- // Make an Array starting at index |i| and length |s|.
- LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const {
- return ArrayGenericSize(get_internal(i), s, elem_size);
- }
-
- // Reset this Array to point at a different interval of the same
- // items starting at index |i|.
- LIBC_INLINE void reset_bounds(size_t i, size_t s) {
- array_base = get_internal(i);
- array_len = s;
- }
};
-// Having a specialized Array type for sorting that knows at
-// compile-time what the size of the element is, allows for much more
-// efficient swapping and for cheaper offset calculations.
-template <size_t ELEM_SIZE> class ArrayFixedSize {
- cpp::byte *array_base;
- size_t array_len;
-
- LIBC_INLINE cpp::byte *get_internal(size_t i) const {
- return array_base + (i * ELEM_SIZE);
- }
+class Array {
+ uint8_t *array;
+ size_t array_size;
+ size_t elem_size;
+ Comparator compare;
public:
- LIBC_INLINE ArrayFixedSize(void *a, size_t s)
- : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
-
- // Beware this function is used a heuristic for cheap to swap types, so
- // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
- // idea perf wise.
- static constexpr bool has_fixed_size() { return true; }
-
- LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
-
- LIBC_INLINE void swap(size_t i, size_t j) const {
- alignas(32) cpp::byte tmp[ELEM_SIZE];
-
- cpp::byte *elem_i = get_internal(i);
- cpp::byte *elem_j = get_internal(j);
+ Array(uint8_t *a, size_t s, size_t e, Comparator c)
+ : array(a), array_size(s), elem_size(e), compare(c) {}
+
+ uint8_t *get(size_t i) const { return array + i * elem_size; }
+
+ void swap(size_t i, size_t j) const {
+ uint8_t *elem_i = get(i);
+ uint8_t *elem_j = get(j);
+ for (size_t b = 0; b < elem_size; ++b) {
+ uint8_t temp = elem_i[b];
+ elem_i[b] = elem_j[b];
+ elem_j[b] = temp;
+ }
+ }
- __builtin_memcpy(tmp, elem_i, ELEM_SIZE);
- __builtin_memmove(elem_i, elem_j, ELEM_SIZE);
- __builtin_memcpy(elem_j, tmp, ELEM_SIZE);
+ int elem_compare(size_t i, const uint8_t *other) const {
+ // An element must compare equal to itself so we don't need to consult the
+ // user provided comparator.
+ if (get(i) == other)
+ return 0;
+ return compare.comp_vals(get(i), other);
}
- LIBC_INLINE size_t len() const { return array_len; }
+ size_t size() const { return array_size; }
- // Make an Array starting at index |i| and length |s|.
- LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const {
- return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s);
+ // Make an Array starting at index |i| and size |s|.
+ LIBC_INLINE Array make_array(size_t i, size_t s) const {
+ return Array(get(i), s, elem_size, compare);
}
- // Reset this Array to point at a different interval of the same
- // items starting at index |i|.
- LIBC_INLINE void reset_bounds(size_t i, size_t s) {
- array_base = get_internal(i);
- array_len = s;
+ // Reset this Array to point at a different interval of the same items.
+ LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) {
+ array = a;
+ array_size = s;
}
};
+using SortingRoutine = void(const Array &);
+
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_pivot.h b/libc/src/stdlib/qsort_pivot.h
deleted file mode 100644
index b7e1b4294f6d61..00000000000000
--- a/libc/src/stdlib/qsort_pivot.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===-- Implementation header for qsort utilities ---------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
-#define LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
-
-#include <stdint.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace internal {
-
-// Recursively select a pseudomedian if above this threshold.
-constexpr size_t PSEUDO_MEDIAN_REC_THRESHOLD = 64;
-
-// Selects a pivot from `array`. Algorithm taken from glidesort by Orson Peters.
-//
-// This chooses a pivot by sampling an adaptive amount of points, approximating
-// the quality of a median of sqrt(n) elements.
-template <typename A, typename F>
-size_t choose_pivot(const A &array, const F &is_less) {
- const size_t len = array.len();
-
- if (len < 8) {
- return 0;
- }
-
- const size_t len_div_8 = len / 8;
-
- const size_t a = 0; // [0, floor(n/8))
- const size_t b = len_div_8 * 4; // [4*floor(n/8), 5*floor(n/8))
- const size_t c = len_div_8 * 7; // [7*floor(n/8), 8*floor(n/8))
-
- if (len < PSEUDO_MEDIAN_REC_THRESHOLD)
- return median3(array, a, b, c, is_less);
- else
- return median3_rec(array, a, b, c, len_div_8, is_less);
-}
-
-// Calculates an approximate median of 3 elements from sections a, b, c, or
-// recursively from an approximation of each, if they're large enough. By
-// dividing the size of each section by 8 when recursing we have logarithmic
-// recursion depth and overall sample from f(n) = 3*f(n/8) -> f(n) =
-// O(n^(log(3)/log(8))) ~= O(n^0.528) elements.
-template <typename A, typename F>
-size_t median3_rec(const A &array, size_t a, size_t b, size_t c, size_t n,
- const F &is_less) {
- if (n * 8 >= PSEUDO_MEDIAN_REC_THRESHOLD) {
- const size_t n8 = n / 8;
- a = median3_rec(array, a, a + (n8 * 4), a + (n8 * 7), n8, is_less);
- b = median3_rec(array, b, b + (n8 * 4), b + (n8 * 7), n8, is_less);
- c = median3_rec(array, c, c + (n8 * 4), c + (n8 * 7), n8, is_less);
- }
- return median3(array, a, b, c, is_less);
-}
-
-/// Calculates the median of 3 elements.
-template <typename A, typename F>
-size_t median3(const A &array, size_t a, size_t b, size_t c, const F &is_less) {
- const void *a_ptr = array.get(a);
- const void *b_ptr = array.get(b);
- const void *c_ptr = array.get(c);
-
- const bool x = is_less(a_ptr, b_ptr);
- const bool y = is_less(a_ptr, c_ptr);
- if (x == y) {
- // If x=y=0 then b, c <= a. In this case we want to return max(b, c).
- // If x=y=1 then a < b, c. In this case we want to return min(b, c).
- // By toggling the outcome of b < c using XOR x we get this behavior.
- const bool z = is_less(b_ptr, c_ptr);
- return z ^ x ? c : b;
- } else {
- // Either c <= a < b or b <= a < c, thus a is our median.
- return a;
- }
-}
-
-} // namespace internal
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif // LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
diff --git a/libc/src/stdlib/qsort_r.cpp b/libc/src/stdlib/qsort_r.cpp
index 4e60998b6a6df9..bf61a40e847341 100644
--- a/libc/src/stdlib/qsort_r.cpp
+++ b/libc/src/stdlib/qsort_r.cpp
@@ -19,12 +19,13 @@ LLVM_LIBC_FUNCTION(void, qsort_r,
(void *array, size_t array_size, size_t elem_size,
int (*compare)(const void *, const void *, void *),
void *arg)) {
+ if (array == nullptr || array_size == 0 || elem_size == 0)
+ return;
+ internal::Comparator c(compare, arg);
+ auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size,
+ elem_size, c);
- const auto is_less = [compare, arg](const void *a, const void *b) -> bool {
- return compare(a, b, arg) < 0;
- };
-
- internal::unstable_sort(array, array_size, elem_size, is_less);
+ internal::sort(arr);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_util.h b/libc/src/stdlib/qsort_util.h
index 7882b829d32744..d42adde06d9762 100644
--- a/libc/src/stdlib/qsort_util.h
+++ b/libc/src/stdlib/qsort_util.h
@@ -27,48 +27,11 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-template <bool USE_QUICKSORT, typename F>
-LIBC_INLINE void unstable_sort_impl(void *array, size_t array_len,
- size_t elem_size, const F &is_less) {
- if (array == nullptr || array_len == 0 || elem_size == 0)
- return;
-
- if constexpr (USE_QUICKSORT) {
- switch (elem_size) {
- case 4: {
- auto arr_fixed_size = internal::ArrayFixedSize<4>(array, array_len);
- quick_sort(arr_fixed_size, is_less);
- return;
- }
- case 8: {
- auto arr_fixed_size = internal::ArrayFixedSize<8>(array, array_len);
- quick_sort(arr_fixed_size, is_less);
- return;
- }
- case 16: {
- auto arr_fixed_size = internal::ArrayFixedSize<16>(array, array_len);
- quick_sort(arr_fixed_size, is_less);
- return;
- }
- default:
- auto arr_generic_size =
- internal::ArrayGenericSize(array, array_len, elem_size);
- quick_sort(arr_generic_size, is_less);
- return;
- }
- } else {
- auto arr_generic_size =
- internal::ArrayGenericSize(array, array_len, elem_size);
- heap_sort(arr_generic_size, is_less);
- }
-}
-
-template <typename F>
-LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size,
- const F &is_less) {
-#define USE_QUICK_SORT ((LIBC_QSORT_IMPL) == (LIBC_QSORT_QUICK_SORT))
- unstable_sort_impl<USE_QUICK_SORT, F>(array, array_len, elem_size, is_less);
-}
+#if LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT
+constexpr auto sort = quick_sort;
+#elif LIBC_QSORT_IMPL == LIBC_QSORT_HEAP_SORT
+constexpr auto sort = heap_sort;
+#endif
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/quick_sort.h b/libc/src/stdlib/quick_sort.h
index 9ab28302500186..82b90a7d511d99 100644
--- a/libc/src/stdlib/quick_sort.h
+++ b/libc/src/stdlib/quick_sort.h
@@ -9,175 +9,84 @@
#ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
#define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
-#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/cstddef.h"
+#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
-#include "src/stdlib/qsort_pivot.h"
+#include "src/stdlib/qsort_data.h"
#include <stdint.h>
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-// Branchless Lomuto partition based on the implementation by Lukas
-// Bergdoll and Orson Peters
-// https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md.
-// Simplified to avoid having to stack allocate.
-template <typename A, typename F>
-LIBC_INLINE size_t partition_lomuto_branchless(const A &array,
- const void *pivot,
- const F &is_less) {
- const size_t array_len = array.len();
-
- size_t left = 0;
- size_t right = 0;
-
- while (right < array_len) {
- const bool right_is_lt = is_less(array.get(right), pivot);
- array.swap(left, right);
- left += static_cast<size_t>(right_is_lt);
- right += 1;
- }
-
- return left;
-}
-
-// Optimized for large types that are expensive to move. Not optimized
-// for integers. It's possible to use a cyclic permutation here for
-// large types as done in ipnsort but the advantages of this are limited
-// as `is_less` is a small wrapper around a call to a function pointer
-// and won't incur much binary-size overhead. The other reason to use
-// cyclic permutation is to have more efficient swapping, but we don't
-// know the element size so this isn't applicable here either.
-template <typename A, typename F>
-LIBC_INLINE size_t partition_hoare_branchy(const A &array, const void *pivot,
- const F &is_less) {
- const size_t array_len = array.len();
-
- size_t left = 0;
- size_t right = array_len;
+// A simple quicksort implementation using the Hoare partition scheme.
+LIBC_INLINE size_t partition(const Array &array) {
+ const size_t array_size = array.size();
+ size_t pivot_index = array_size / 2;
+ uint8_t *pivot = array.get(pivot_index);
+ size_t i = 0;
+ size_t j = array_size - 1;
while (true) {
- while (left < right && is_less(array.get(left), pivot))
- ++left;
-
- while (true) {
- --right;
- if (left >= right || is_less(array.get(right), pivot)) {
- break;
- }
+ int compare_i, compare_j;
+
+ while ((compare_i = array.elem_compare(i, pivot)) < 0)
+ ++i;
+ while ((compare_j = array.elem_compare(j, pivot)) > 0)
+ --j;
+
+ // At some point i will crossover j so we will definitely break out of
+ // this while loop.
+ if (i >= j)
+ return j + 1;
+
+ array.swap(i, j);
+
+ // The pivot itself might have got swapped so we will update the pivot.
+ if (i == pivot_index) {
+ pivot = array.get(j);
+ pivot_index = j;
+ } else if (j == pivot_index) {
+ pivot = array.get(i);
+ pivot_index = i;
}
- if (left >= right)
- break;
-
- array.swap(left, right);
- ++left;
- }
-
- return left;
-}
-
-template <typename A, typename F>
-LIBC_INLINE size_t partition(const A &array, size_t pivot_index,
- const F &is_less) {
- // Place the pivot at the beginning of the array.
- if (pivot_index != 0) {
- array.swap(0, pivot_index);
- }
-
- const A array_without_pivot = array.make_array(1, array.len() - 1);
- const void *pivot = array.get(0);
-
- size_t num_lt;
- if constexpr (A::has_fixed_size()) {
- // Branchless Lomuto avoid branch misprediction penalties, but
- // it also swaps more often which is only faster if the swap is a fast
- // constant operation.
- num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less);
- } else {
- num_lt = partition_hoare_branchy(array_without_pivot, pivot, is_less);
+ if (compare_i == 0 && compare_j == 0) {
+ // If we do not move the pointers, we will end up with an
+ // infinite loop as i and j will be stuck without advancing.
+ ++i;
+ --j;
+ }
}
-
- // Place the pivot between the two partitions.
- array.swap(0, num_lt);
-
- return num_lt;
}
-template <typename A, typename F>
-LIBC_INLINE void quick_sort_impl(A &array, const void *ancestor_pivot,
- size_t limit, const F &is_less) {
+LIBC_INLINE void quick_sort(Array array) {
while (true) {
- const size_t array_len = array.len();
- if (array_len <= 1)
+ const size_t array_size = array.size();
+ if (array_size <= 1)
return;
-
- // If too many bad pivot choices were made, simply fall back to
- // heapsort in order to guarantee `O(N x log(N))` worst-case.
- if (limit == 0) {
- heap_sort(array, is_less);
- return;
- }
-
- limit -= 1;
-
- const size_t pivot_index = choose_p...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121303
More information about the llvm-commits
mailing list