[libc] [llvm] [libc] Improve qsort (with build fix) (PR #121482)

Lukas Bergdoll via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 2 07:48:49 PST 2025


https://github.com/Voultapher updated https://github.com/llvm/llvm-project/pull/121482

>From 32e360cf45ecface43f2a34c037ab9fd422b9cc2 Mon Sep 17 00:00:00 2001
From: Lukas Bergdoll <lukas.bergdoll at gmail.com>
Date: Thu, 2 Jan 2025 16:45:08 +0100
Subject: [PATCH 1/2] Reapply "[libc] Improve qsort" (#121303)

This reverts commit 0b96f1cf6877c21fee42445fa33f26a52bf3dfe8.
---
 libc/src/stdlib/heap_sort.h                   |  12 +-
 libc/src/stdlib/qsort.cpp                     |  10 +-
 libc/src/stdlib/qsort_data.h                  | 171 +++++++++------
 libc/src/stdlib/qsort_pivot.h                 |  85 ++++++++
 libc/src/stdlib/qsort_r.cpp                   |  11 +-
 libc/src/stdlib/qsort_util.h                  |  47 +++-
 libc/src/stdlib/quick_sort.h                  | 203 +++++++++++++-----
 libc/test/src/stdlib/CMakeLists.txt           |  18 +-
 libc/test/src/stdlib/SortingTest.h            | 199 +++++++++--------
 libc/test/src/stdlib/heap_sort_test.cpp       |  18 +-
 libc/test/src/stdlib/qsort_r_test.cpp         |   4 +-
 libc/test/src/stdlib/qsort_test.cpp           |  17 --
 libc/test/src/stdlib/quick_sort_test.cpp      |  19 +-
 .../libc/test/src/stdlib/BUILD.bazel          |  16 +-
 14 files changed, 539 insertions(+), 291 deletions(-)
 create mode 100644 libc/src/stdlib/qsort_pivot.h
 delete mode 100644 libc/test/src/stdlib/qsort_test.cpp

diff --git a/libc/src/stdlib/heap_sort.h b/libc/src/stdlib/heap_sort.h
index ccb9ec5f82149e..b9699776df89c1 100644
--- a/libc/src/stdlib/heap_sort.h
+++ b/libc/src/stdlib/heap_sort.h
@@ -18,11 +18,12 @@ namespace internal {
 // A simple in-place heapsort implementation.
 // Follow the implementation in https://en.wikipedia.org/wiki/Heapsort.
 
-LIBC_INLINE void heap_sort(const Array &array) {
-  size_t end = array.size();
+template <typename A, typename F>
+LIBC_INLINE void heap_sort(const A &array, const F &is_less) {
+  size_t end = array.len();
   size_t start = end / 2;
 
-  auto left_child = [](size_t i) -> size_t { return 2 * i + 1; };
+  const auto left_child = [](size_t i) -> size_t { return 2 * i + 1; };
 
   while (end > 1) {
     if (start > 0) {
@@ -40,12 +41,11 @@ LIBC_INLINE void heap_sort(const Array &array) {
     while (left_child(root) < end) {
       size_t child = left_child(root);
       // If there are two children, set child to the greater.
-      if (child + 1 < end &&
-          array.elem_compare(child, array.get(child + 1)) < 0)
+      if ((child + 1 < end) && is_less(array.get(child), array.get(child + 1)))
         ++child;
 
       // If the root is less than the greater child
-      if (array.elem_compare(root, array.get(child)) >= 0)
+      if (!is_less(array.get(root), array.get(child)))
         break;
 
       // Swap the root with the greater child and continue sifting down.
diff --git a/libc/src/stdlib/qsort.cpp b/libc/src/stdlib/qsort.cpp
index 65a63c239f5c0d..0bf5fc79805279 100644
--- a/libc/src/stdlib/qsort.cpp
+++ b/libc/src/stdlib/qsort.cpp
@@ -18,14 +18,12 @@ namespace LIBC_NAMESPACE_DECL {
 LLVM_LIBC_FUNCTION(void, qsort,
                    (void *array, size_t array_size, size_t elem_size,
                     int (*compare)(const void *, const void *))) {
-  if (array == nullptr || array_size == 0 || elem_size == 0)
-    return;
-  internal::Comparator c(compare);
 
-  auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size,
-                             elem_size, c);
+  const auto is_less = [compare](const void *a, const void *b) -> bool {
+    return compare(a, b) < 0;
+  };
 
-  internal::sort(arr);
+  internal::unstable_sort(array, array_size, elem_size, is_less);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h
index c529d55ca46ffd..aa6d9bbc123de8 100644
--- a/libc/src/stdlib/qsort_data.h
+++ b/libc/src/stdlib/qsort_data.h
@@ -17,91 +17,122 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-using Compare = int(const void *, const void *);
-using CompareWithState = int(const void *, const void *, void *);
-
-enum class CompType { COMPARE, COMPARE_WITH_STATE };
-
-struct Comparator {
-  union {
-    Compare *comp_func;
-    CompareWithState *comp_func_r;
-  };
-  const CompType comp_type;
-
-  void *arg;
-
-  Comparator(Compare *func)
-      : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {}
-
-  Comparator(CompareWithState *func, void *arg_val)
-      : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE),
-        arg(arg_val) {}
-
-#if defined(__clang__)
-  // Recent upstream changes to -fsanitize=function find more instances of
-  // function type mismatches. One case is with the comparator passed to this
-  // class. Libraries will tend to pass comparators that take pointers to
-  // varying types while this comparator expects to accept const void pointers.
-  // Ideally those tools would pass a function that strictly accepts const
-  // void*s to avoid UB, or would use qsort_r to pass their own comparator.
-  [[clang::no_sanitize("function")]]
-#endif
-  int comp_vals(const void *a, const void *b) const {
-    if (comp_type == CompType::COMPARE) {
-      return comp_func(a, b);
-    } else {
-      return comp_func_r(a, b, arg);
+class ArrayGenericSize {
+  cpp::byte *array_base;
+  size_t array_len;
+  size_t elem_size;
+
+  LIBC_INLINE cpp::byte *get_internal(size_t i) const {
+    return array_base + (i * elem_size);
+  }
+
+public:
+  LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
+      : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
+        elem_size(e) {}
+
+  static constexpr bool has_fixed_size() { return false; }
+
+  LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
+
+  LIBC_INLINE void swap(size_t i, size_t j) const {
+    // It's possible to use 8 byte blocks with `uint64_t`, but that
+    // generates more machine code as the remainder loop gets
+    // unrolled, plus 4 byte operations are more likely to be
+    // efficient on a wider variety of hardware. On x86 LLVM tends
+    // to unroll the block loop again into 2 16 byte swaps per
+    // iteration which is another reason that 4 byte blocks yields
+    // good performance even for big types.
+    using block_t = uint32_t;
+    constexpr size_t BLOCK_SIZE = sizeof(block_t);
+
+    alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
+
+    cpp::byte *elem_i = get_internal(i);
+    cpp::byte *elem_j = get_internal(j);
+
+    const size_t elem_size_rem = elem_size % BLOCK_SIZE;
+    const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
+
+    while (elem_i != elem_i_block_end) {
+      __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE);
+      __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE);
+      __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE);
+
+      elem_i += BLOCK_SIZE;
+      elem_j += BLOCK_SIZE;
+    }
+
+    for (size_t n = 0; n < elem_size_rem; ++n) {
+      cpp::byte tmp = elem_i[n];
+      elem_i[n] = elem_j[n];
+      elem_j[n] = tmp;
     }
   }
+
+  LIBC_INLINE size_t len() const { return array_len; }
+
+  // Make an Array starting at index |i| and length |s|.
+  LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const {
+    return ArrayGenericSize(get_internal(i), s, elem_size);
+  }
+
+  // Reset this Array to point at a different interval of the same
+  // items starting at index |i|.
+  LIBC_INLINE void reset_bounds(size_t i, size_t s) {
+    array_base = get_internal(i);
+    array_len = s;
+  }
 };
 
-class Array {
-  uint8_t *array;
-  size_t array_size;
-  size_t elem_size;
-  Comparator compare;
+// Having a specialized Array type for sorting that knows at
+// compile-time what the size of the element is, allows for much more
+// efficient swapping and for cheaper offset calculations.
+template <size_t ELEM_SIZE> class ArrayFixedSize {
+  cpp::byte *array_base;
+  size_t array_len;
 
-public:
-  Array(uint8_t *a, size_t s, size_t e, Comparator c)
-      : array(a), array_size(s), elem_size(e), compare(c) {}
-
-  uint8_t *get(size_t i) const { return array + i * elem_size; }
-
-  void swap(size_t i, size_t j) const {
-    uint8_t *elem_i = get(i);
-    uint8_t *elem_j = get(j);
-    for (size_t b = 0; b < elem_size; ++b) {
-      uint8_t temp = elem_i[b];
-      elem_i[b] = elem_j[b];
-      elem_j[b] = temp;
-    }
+  LIBC_INLINE cpp::byte *get_internal(size_t i) const {
+    return array_base + (i * ELEM_SIZE);
   }
 
-  int elem_compare(size_t i, const uint8_t *other) const {
-    // An element must compare equal to itself so we don't need to consult the
-    // user provided comparator.
-    if (get(i) == other)
-      return 0;
-    return compare.comp_vals(get(i), other);
+public:
+  LIBC_INLINE ArrayFixedSize(void *a, size_t s)
+      : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
+
+  // Beware this function is used a heuristic for cheap to swap types, so
+  // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
+  // idea perf wise.
+  static constexpr bool has_fixed_size() { return true; }
+
+  LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
+
+  LIBC_INLINE void swap(size_t i, size_t j) const {
+    alignas(32) cpp::byte tmp[ELEM_SIZE];
+
+    cpp::byte *elem_i = get_internal(i);
+    cpp::byte *elem_j = get_internal(j);
+
+    __builtin_memcpy(tmp, elem_i, ELEM_SIZE);
+    __builtin_memmove(elem_i, elem_j, ELEM_SIZE);
+    __builtin_memcpy(elem_j, tmp, ELEM_SIZE);
   }
 
-  size_t size() const { return array_size; }
+  LIBC_INLINE size_t len() const { return array_len; }
 
-  // Make an Array starting at index |i| and size |s|.
-  LIBC_INLINE Array make_array(size_t i, size_t s) const {
-    return Array(get(i), s, elem_size, compare);
+  // Make an Array starting at index |i| and length |s|.
+  LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const {
+    return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s);
   }
 
-  // Reset this Array to point at a different interval of the same items.
-  LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) {
-    array = a;
-    array_size = s;
+  // Reset this Array to point at a different interval of the same
+  // items starting at index |i|.
+  LIBC_INLINE void reset_bounds(size_t i, size_t s) {
+    array_base = get_internal(i);
+    array_len = s;
   }
 };
 
-using SortingRoutine = void(const Array &);
-
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/src/stdlib/qsort_pivot.h b/libc/src/stdlib/qsort_pivot.h
new file mode 100644
index 00000000000000..b7e1b4294f6d61
--- /dev/null
+++ b/libc/src/stdlib/qsort_pivot.h
@@ -0,0 +1,85 @@
+//===-- Implementation header for qsort utilities ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
+#define LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+// Recursively select a pseudomedian if above this threshold.
+constexpr size_t PSEUDO_MEDIAN_REC_THRESHOLD = 64;
+
+// Selects a pivot from `array`. Algorithm taken from glidesort by Orson Peters.
+//
+// This chooses a pivot by sampling an adaptive amount of points, approximating
+// the quality of a median of sqrt(n) elements.
+template <typename A, typename F>
+size_t choose_pivot(const A &array, const F &is_less) {
+  const size_t len = array.len();
+
+  if (len < 8) {
+    return 0;
+  }
+
+  const size_t len_div_8 = len / 8;
+
+  const size_t a = 0;             // [0, floor(n/8))
+  const size_t b = len_div_8 * 4; // [4*floor(n/8), 5*floor(n/8))
+  const size_t c = len_div_8 * 7; // [7*floor(n/8), 8*floor(n/8))
+
+  if (len < PSEUDO_MEDIAN_REC_THRESHOLD)
+    return median3(array, a, b, c, is_less);
+  else
+    return median3_rec(array, a, b, c, len_div_8, is_less);
+}
+
+// Calculates an approximate median of 3 elements from sections a, b, c, or
+// recursively from an approximation of each, if they're large enough. By
+// dividing the size of each section by 8 when recursing we have logarithmic
+// recursion depth and overall sample from f(n) = 3*f(n/8) -> f(n) =
+// O(n^(log(3)/log(8))) ~= O(n^0.528) elements.
+template <typename A, typename F>
+size_t median3_rec(const A &array, size_t a, size_t b, size_t c, size_t n,
+                   const F &is_less) {
+  if (n * 8 >= PSEUDO_MEDIAN_REC_THRESHOLD) {
+    const size_t n8 = n / 8;
+    a = median3_rec(array, a, a + (n8 * 4), a + (n8 * 7), n8, is_less);
+    b = median3_rec(array, b, b + (n8 * 4), b + (n8 * 7), n8, is_less);
+    c = median3_rec(array, c, c + (n8 * 4), c + (n8 * 7), n8, is_less);
+  }
+  return median3(array, a, b, c, is_less);
+}
+
+/// Calculates the median of 3 elements.
+template <typename A, typename F>
+size_t median3(const A &array, size_t a, size_t b, size_t c, const F &is_less) {
+  const void *a_ptr = array.get(a);
+  const void *b_ptr = array.get(b);
+  const void *c_ptr = array.get(c);
+
+  const bool x = is_less(a_ptr, b_ptr);
+  const bool y = is_less(a_ptr, c_ptr);
+  if (x == y) {
+    // If x=y=0 then b, c <= a. In this case we want to return max(b, c).
+    // If x=y=1 then a < b, c. In this case we want to return min(b, c).
+    // By toggling the outcome of b < c using XOR x we get this behavior.
+    const bool z = is_less(b_ptr, c_ptr);
+    return z ^ x ? c : b;
+  } else {
+    // Either c <= a < b or b <= a < c, thus a is our median.
+    return a;
+  }
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
diff --git a/libc/src/stdlib/qsort_r.cpp b/libc/src/stdlib/qsort_r.cpp
index bf61a40e847341..4e60998b6a6df9 100644
--- a/libc/src/stdlib/qsort_r.cpp
+++ b/libc/src/stdlib/qsort_r.cpp
@@ -19,13 +19,12 @@ LLVM_LIBC_FUNCTION(void, qsort_r,
                    (void *array, size_t array_size, size_t elem_size,
                     int (*compare)(const void *, const void *, void *),
                     void *arg)) {
-  if (array == nullptr || array_size == 0 || elem_size == 0)
-    return;
-  internal::Comparator c(compare, arg);
-  auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size,
-                             elem_size, c);
 
-  internal::sort(arr);
+  const auto is_less = [compare, arg](const void *a, const void *b) -> bool {
+    return compare(a, b, arg) < 0;
+  };
+
+  internal::unstable_sort(array, array_size, elem_size, is_less);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_util.h b/libc/src/stdlib/qsort_util.h
index d42adde06d9762..7882b829d32744 100644
--- a/libc/src/stdlib/qsort_util.h
+++ b/libc/src/stdlib/qsort_util.h
@@ -27,11 +27,48 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-#if LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT
-constexpr auto sort = quick_sort;
-#elif LIBC_QSORT_IMPL == LIBC_QSORT_HEAP_SORT
-constexpr auto sort = heap_sort;
-#endif
+template <bool USE_QUICKSORT, typename F>
+LIBC_INLINE void unstable_sort_impl(void *array, size_t array_len,
+                                    size_t elem_size, const F &is_less) {
+  if (array == nullptr || array_len == 0 || elem_size == 0)
+    return;
+
+  if constexpr (USE_QUICKSORT) {
+    switch (elem_size) {
+    case 4: {
+      auto arr_fixed_size = internal::ArrayFixedSize<4>(array, array_len);
+      quick_sort(arr_fixed_size, is_less);
+      return;
+    }
+    case 8: {
+      auto arr_fixed_size = internal::ArrayFixedSize<8>(array, array_len);
+      quick_sort(arr_fixed_size, is_less);
+      return;
+    }
+    case 16: {
+      auto arr_fixed_size = internal::ArrayFixedSize<16>(array, array_len);
+      quick_sort(arr_fixed_size, is_less);
+      return;
+    }
+    default:
+      auto arr_generic_size =
+          internal::ArrayGenericSize(array, array_len, elem_size);
+      quick_sort(arr_generic_size, is_less);
+      return;
+    }
+  } else {
+    auto arr_generic_size =
+        internal::ArrayGenericSize(array, array_len, elem_size);
+    heap_sort(arr_generic_size, is_less);
+  }
+}
+
+template <typename F>
+LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size,
+                               const F &is_less) {
+#define USE_QUICK_SORT ((LIBC_QSORT_IMPL) == (LIBC_QSORT_QUICK_SORT))
+  unstable_sort_impl<USE_QUICK_SORT, F>(array, array_len, elem_size, is_less);
+}
 
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/quick_sort.h b/libc/src/stdlib/quick_sort.h
index 82b90a7d511d99..9ab28302500186 100644
--- a/libc/src/stdlib/quick_sort.h
+++ b/libc/src/stdlib/quick_sort.h
@@ -9,84 +9,175 @@
 #ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
 #define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
 
-#include "src/__support/macros/attributes.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/cstddef.h"
 #include "src/__support/macros/config.h"
-#include "src/stdlib/qsort_data.h"
+#include "src/stdlib/qsort_pivot.h"
 
 #include <stdint.h>
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-// A simple quicksort implementation using the Hoare partition scheme.
-LIBC_INLINE size_t partition(const Array &array) {
-  const size_t array_size = array.size();
-  size_t pivot_index = array_size / 2;
-  uint8_t *pivot = array.get(pivot_index);
-  size_t i = 0;
-  size_t j = array_size - 1;
+// Branchless Lomuto partition based on the implementation by Lukas
+// Bergdoll and Orson Peters
+// https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md.
+// Simplified to avoid having to stack allocate.
+template <typename A, typename F>
+LIBC_INLINE size_t partition_lomuto_branchless(const A &array,
+                                               const void *pivot,
+                                               const F &is_less) {
+  const size_t array_len = array.len();
+
+  size_t left = 0;
+  size_t right = 0;
+
+  while (right < array_len) {
+    const bool right_is_lt = is_less(array.get(right), pivot);
+    array.swap(left, right);
+    left += static_cast<size_t>(right_is_lt);
+    right += 1;
+  }
+
+  return left;
+}
+
+// Optimized for large types that are expensive to move. Not optimized
+// for integers. It's possible to use a cyclic permutation here for
+// large types as done in ipnsort but the advantages of this are limited
+// as `is_less` is a small wrapper around a call to a function pointer
+// and won't incur much binary-size overhead. The other reason to use
+// cyclic permutation is to have more efficient swapping, but we don't
+// know the element size so this isn't applicable here either.
+template <typename A, typename F>
+LIBC_INLINE size_t partition_hoare_branchy(const A &array, const void *pivot,
+                                           const F &is_less) {
+  const size_t array_len = array.len();
+
+  size_t left = 0;
+  size_t right = array_len;
 
   while (true) {
-    int compare_i, compare_j;
-
-    while ((compare_i = array.elem_compare(i, pivot)) < 0)
-      ++i;
-    while ((compare_j = array.elem_compare(j, pivot)) > 0)
-      --j;
-
-    // At some point i will crossover j so we will definitely break out of
-    // this while loop.
-    if (i >= j)
-      return j + 1;
-
-    array.swap(i, j);
-
-    // The pivot itself might have got swapped so we will update the pivot.
-    if (i == pivot_index) {
-      pivot = array.get(j);
-      pivot_index = j;
-    } else if (j == pivot_index) {
-      pivot = array.get(i);
-      pivot_index = i;
+    while (left < right && is_less(array.get(left), pivot))
+      ++left;
+
+    while (true) {
+      --right;
+      if (left >= right || is_less(array.get(right), pivot)) {
+        break;
+      }
     }
 
-    if (compare_i == 0 && compare_j == 0) {
-      // If we do not move the pointers, we will end up with an
-      // infinite loop as i and j will be stuck without advancing.
-      ++i;
-      --j;
-    }
+    if (left >= right)
+      break;
+
+    array.swap(left, right);
+    ++left;
+  }
+
+  return left;
+}
+
+template <typename A, typename F>
+LIBC_INLINE size_t partition(const A &array, size_t pivot_index,
+                             const F &is_less) {
+  // Place the pivot at the beginning of the array.
+  if (pivot_index != 0) {
+    array.swap(0, pivot_index);
   }
+
+  const A array_without_pivot = array.make_array(1, array.len() - 1);
+  const void *pivot = array.get(0);
+
+  size_t num_lt;
+  if constexpr (A::has_fixed_size()) {
+    // Branchless Lomuto avoid branch misprediction penalties, but
+    // it also swaps more often which is only faster if the swap is a fast
+    // constant operation.
+    num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less);
+  } else {
+    num_lt = partition_hoare_branchy(array_without_pivot, pivot, is_less);
+  }
+
+  // Place the pivot between the two partitions.
+  array.swap(0, num_lt);
+
+  return num_lt;
 }
 
-LIBC_INLINE void quick_sort(Array array) {
+template <typename A, typename F>
+LIBC_INLINE void quick_sort_impl(A &array, const void *ancestor_pivot,
+                                 size_t limit, const F &is_less) {
   while (true) {
-    const size_t array_size = array.size();
-    if (array_size <= 1)
+    const size_t array_len = array.len();
+    if (array_len <= 1)
       return;
-    size_t split_index = partition(array);
-    if (array_size == 2)
-      // The partition operation sorts the two element array.
+
+    // If too many bad pivot choices were made, simply fall back to
+    // heapsort in order to guarantee `O(N x log(N))` worst-case.
+    if (limit == 0) {
+      heap_sort(array, is_less);
       return;
+    }
 
-    // Make Arrays describing the two sublists that still need sorting.
-    Array left = array.make_array(0, split_index);
-    Array right = array.make_array(split_index, array.size() - split_index);
-
-    // Recurse to sort the smaller of the two, and then loop round within this
-    // function to sort the larger. This way, recursive call depth is bounded
-    // by log2 of the total array size, because every recursive call is sorting
-    // a list at most half the length of the one in its caller.
-    if (left.size() < right.size()) {
-      quick_sort(left);
-      array.reset_bounds(right.get(0), right.size());
-    } else {
-      quick_sort(right);
-      array.reset_bounds(left.get(0), left.size());
+    limit -= 1;
+
+    const size_t pivot_index = choose_pivot(array, is_less);
+
+    // If the chosen pivot is equal to the predecessor, then it's the smallest
+    // element in the slice. Partition the slice into elements equal to and
+    // elements greater than the pivot. This case is usually hit when the slice
+    // contains many duplicate elements.
+    if (ancestor_pivot) {
+      if (!is_less(ancestor_pivot, array.get(pivot_index))) {
+        const size_t num_lt =
+            partition(array, pivot_index,
+                      [is_less](const void *a, const void *b) -> bool {
+                        return !is_less(b, a);
+                      });
+
+        // Continue sorting elements greater than the pivot. We know that
+        // `num_lt` cont
+        array.reset_bounds(num_lt + 1, array.len() - (num_lt + 1));
+        ancestor_pivot = nullptr;
+        continue;
+      }
     }
+
+    size_t split_index = partition(array, pivot_index, is_less);
+
+    if (array_len == 2)
+      // The partition operation sorts the two element array.
+      return;
+
+    // Split the array into `left`, `pivot`, and `right`.
+    A left = array.make_array(0, split_index);
+    const void *pivot = array.get(split_index);
+    const size_t right_start = split_index + 1;
+    A right = array.make_array(right_start, array.len() - right_start);
+
+    // Recurse into the left side. We have a fixed recursion limit,
+    // testing shows no real benefit for recursing into the shorter
+    // side.
+    quick_sort_impl(left, ancestor_pivot, limit, is_less);
+
+    // Continue with the right side.
+    array = right;
+    ancestor_pivot = pivot;
   }
 }
 
+constexpr size_t ilog2(size_t n) { return cpp::bit_width(n) - 1; }
+
+template <typename A, typename F>
+LIBC_INLINE void quick_sort(A &array, const F &is_less) {
+  const void *ancestor_pivot = nullptr;
+  // Limit the number of imbalanced partitions to `2 * floor(log2(len))`.
+  // The binary OR by one is used to eliminate the zero-check in the logarithm.
+  const size_t limit = 2 * ilog2((array.len() | 1));
+  quick_sort_impl(array, ancestor_pivot, limit, is_less);
+}
+
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index 4ca2043ab4c9b6..8cc0428632ba39 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -300,18 +300,6 @@ add_libc_test(
     libc.src.stdlib.bsearch
 )
 
-add_libc_test(
-  quick_sort_test
-  SUITE
-    libc-stdlib-tests
-  SRCS
-    quick_sort_test.cpp
-  HDRS
-    SortingTest.h
-  DEPENDS
-    libc.src.stdlib.qsort_util
-)
-
 add_libc_test(
   heap_sort_test
   SUITE
@@ -321,15 +309,15 @@ add_libc_test(
   HDRS
     SortingTest.h
   DEPENDS
-    libc.src.stdlib.qsort_util
+    libc.src.stdlib.qsort
 )
 
 add_libc_test(
-  qsort_test
+  quick_sort_test
   SUITE
     libc-stdlib-tests
   SRCS
-    qsort_test.cpp
+    quick_sort_test.cpp
   HDRS
     SortingTest.h
   DEPENDS
diff --git a/libc/test/src/stdlib/SortingTest.h b/libc/test/src/stdlib/SortingTest.h
index d34584e5addf03..034c0e4f1fd01d 100644
--- a/libc/test/src/stdlib/SortingTest.h
+++ b/libc/test/src/stdlib/SortingTest.h
@@ -7,19 +7,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/__support/macros/config.h"
-#include "src/stdlib/qsort_data.h"
+#include "src/stdlib/qsort.h"
 #include "test/UnitTest/Test.h"
 
 class SortingTest : public LIBC_NAMESPACE::testing::Test {
 
-  using Array = LIBC_NAMESPACE::internal::Array;
-  using Comparator = LIBC_NAMESPACE::internal::Comparator;
-  using SortingRoutine = LIBC_NAMESPACE::internal::SortingRoutine;
+  using SortingRoutine = void (*)(void *array, size_t array_len,
+                                  size_t elem_size,
+                                  int (*compare)(const void *, const void *));
 
-public:
   static int int_compare(const void *l, const void *r) {
     int li = *reinterpret_cast<const int *>(l);
     int ri = *reinterpret_cast<const int *>(r);
+
     if (li == ri)
       return 0;
     else if (li > ri)
@@ -28,16 +28,19 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
       return -1;
   }
 
+  static void int_sort(SortingRoutine sort_func, int *array, size_t array_len) {
+    sort_func(reinterpret_cast<void *>(array), array_len, sizeof(int),
+              int_compare);
+  }
+
+public:
   void test_sorted_array(SortingRoutine sort_func) {
     int array[25] = {10,   23,   33,   35,   55,   70,    71,   100,  110,
                      123,  133,  135,  155,  170,  171,   1100, 1110, 1123,
                      1133, 1135, 1155, 1170, 1171, 11100, 12310};
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_LE(array[0], 10);
     ASSERT_LE(array[1], 23);
@@ -69,14 +72,11 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_reversed_sorted_array(SortingRoutine sort_func) {
     int array[] = {25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13,
                    12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1};
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    int_sort(sort_func, array, ARRAY_LEN);
 
-    sort_func(arr);
-
-    for (int i = 0; i < int(ARRAY_SIZE - 1); ++i)
+    for (int i = 0; i < int(ARRAY_LEN - 1); ++i)
       ASSERT_EQ(array[i], i + 1);
   }
 
@@ -84,14 +84,11 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
     int array[] = {100, 100, 100, 100, 100, 100, 100, 100, 100,
                    100, 100, 100, 100, 100, 100, 100, 100, 100,
                    100, 100, 100, 100, 100, 100, 100};
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
-    for (size_t i = 0; i < ARRAY_SIZE; ++i)
+    for (size_t i = 0; i < ARRAY_LEN; ++i)
       ASSERT_EQ(array[i], 100);
   }
 
@@ -99,12 +96,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
     int array[25] = {10,  23,  8,    35,   55,   45,  40,  100, 110,
                      123, 90,  80,   70,   60,   171, 11,  1,   -1,
                      -5,  -10, 1155, 1170, 1171, 12,  -100};
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], -100);
     ASSERT_EQ(array[1], -10);
@@ -135,12 +129,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
 
   void test_unsorted_array_2(SortingRoutine sort_func) {
     int array[7] = {10, 40, 45, 55, 35, 23, 60};
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
-
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 10);
     ASSERT_EQ(array[1], 23);
@@ -153,12 +144,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
 
   void test_unsorted_array_duplicated_1(SortingRoutine sort_func) {
     int array[6] = {10, 10, 20, 20, 5, 5};
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 5);
     ASSERT_EQ(array[1], 5);
@@ -170,12 +158,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
 
   void test_unsorted_array_duplicated_2(SortingRoutine sort_func) {
     int array[10] = {20, 10, 10, 10, 10, 20, 21, 21, 21, 21};
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 10);
     ASSERT_EQ(array[1], 10);
@@ -191,12 +176,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
 
   void test_unsorted_array_duplicated_3(SortingRoutine sort_func) {
     int array[10] = {20, 30, 30, 30, 30, 20, 21, 21, 21, 21};
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
-
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 20);
     ASSERT_EQ(array[1], 20);
@@ -213,12 +195,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_unsorted_three_element_1(SortingRoutine sort_func) {
     int array[3] = {14999024, 0, 3};
 
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 0);
     ASSERT_EQ(array[1], 3);
@@ -228,12 +207,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_unsorted_three_element_2(SortingRoutine sort_func) {
     int array[3] = {3, 14999024, 0};
 
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 0);
     ASSERT_EQ(array[1], 3);
@@ -243,12 +219,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_unsorted_three_element_3(SortingRoutine sort_func) {
     int array[3] = {3, 0, 14999024};
 
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
-
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 0);
     ASSERT_EQ(array[1], 3);
@@ -258,12 +231,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_same_three_element(SortingRoutine sort_func) {
     int array[3] = {12345, 12345, 12345};
 
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 12345);
     ASSERT_EQ(array[1], 12345);
@@ -273,12 +243,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_unsorted_two_element_1(SortingRoutine sort_func) {
     int array[] = {14999024, 0};
 
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 0);
     ASSERT_EQ(array[1], 14999024);
@@ -287,12 +254,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_unsorted_two_element_2(SortingRoutine sort_func) {
     int array[] = {0, 14999024};
 
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
-
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 0);
     ASSERT_EQ(array[1], 14999024);
@@ -301,12 +265,9 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_same_two_element(SortingRoutine sort_func) {
     int array[] = {12345, 12345};
 
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 12345);
     ASSERT_EQ(array[1], 12345);
@@ -315,15 +276,76 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   void test_single_element(SortingRoutine sort_func) {
     int array[] = {12345};
 
-    constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-    auto arr = Array(reinterpret_cast<uint8_t *>(array), ARRAY_SIZE,
-                     sizeof(int), Comparator(int_compare));
+    constexpr size_t ARRAY_LEN = sizeof(array) / sizeof(int);
 
-    sort_func(arr);
+    int_sort(sort_func, array, ARRAY_LEN);
 
     ASSERT_EQ(array[0], 12345);
   }
+
+  void test_different_elem_size(SortingRoutine sort_func) {
+    // Random order of values [0,50) to avoid only testing pre-sorted handling.
+    // Long enough to reach interesting code.
+    constexpr uint8_t ARRAY_INITIAL_VALS[] = {
+        42, 13, 8,  4,  17, 28, 20, 32, 22, 29, 7,  2,  46, 37, 26, 49, 24,
+        38, 10, 18, 40, 36, 47, 15, 11, 48, 44, 33, 1,  5,  16, 35, 39, 41,
+        14, 23, 3,  9,  6,  27, 21, 25, 31, 45, 12, 43, 34, 30, 19, 0};
+
+    constexpr size_t ARRAY_LEN = sizeof(ARRAY_INITIAL_VALS);
+    constexpr size_t MAX_ELEM_SIZE = 150;
+    constexpr size_t BUF_SIZE = ARRAY_LEN * MAX_ELEM_SIZE;
+
+    static_assert(ARRAY_LEN < 256); // so we can encode the values.
+
+    // Minimum alignment to test implementation for bugs related to assuming
+    // incorrect association between alignment and element size.
+    alignas(1) uint8_t buf[BUF_SIZE];
+
+    const auto fill_buf = [&buf](size_t elem_size) {
+      for (size_t i = 0; i < BUF_SIZE; ++i) {
+        buf[i] = 0;
+      }
+
+      for (size_t elem_i = 0, buf_i = 0; elem_i < ARRAY_LEN; ++elem_i) {
+        const uint8_t elem_val = ARRAY_INITIAL_VALS[elem_i];
+        for (size_t elem_byte_i = 0; elem_byte_i < elem_size; ++elem_byte_i) {
+          buf[buf_i] = elem_val;
+          buf_i += 1;
+        }
+      }
+    };
+
+    for (size_t elem_size = 0; elem_size <= MAX_ELEM_SIZE; ++elem_size) {
+      // Fill all bytes with data to ensure mistakes in elem swap are noticed.
+      fill_buf(elem_size);
+
+      sort_func(reinterpret_cast<void *>(buf), ARRAY_LEN, elem_size,
+                [](const void *a, const void *b) -> int {
+                  const uint8_t a_val = *reinterpret_cast<const uint8_t *>(a);
+                  const uint8_t b_val = *reinterpret_cast<const uint8_t *>(b);
+
+                  if (a_val < b_val) {
+                    return -1;
+                  } else if (a_val > b_val) {
+                    return 1;
+                  } else {
+                    return 0;
+                  }
+                });
+
+      for (size_t elem_i = 0, buf_i = 0; elem_i < ARRAY_LEN; ++elem_i) {
+        const uint8_t expected_elem_val = static_cast<uint8_t>(elem_i);
+
+        for (size_t elem_byte_i = 0; elem_byte_i < elem_size; ++elem_byte_i) {
+          const uint8_t buf_val = buf[buf_i];
+          // Check that every byte in the element has the expected value.
+          ASSERT_EQ(buf_val, expected_elem_val)
+              << "elem_size: " << elem_size << " buf_i: " << buf_i << '\n';
+          buf_i += 1;
+        }
+      }
+    }
+  }
 };
 
 #define LIST_SORTING_TESTS(Name, Func)                                         \
@@ -374,4 +396,7 @@ class SortingTest : public LIBC_NAMESPACE::testing::Test {
   TEST_F(LlvmLibc##Name##Test, SingleElementArray) {                           \
     test_single_element(Func);                                                 \
   }                                                                            \
+  TEST_F(LlvmLibc##Name##Test, DifferentElemSizeArray) {                       \
+    test_different_elem_size(Func);                                            \
+  }                                                                            \
   static_assert(true)
diff --git a/libc/test/src/stdlib/heap_sort_test.cpp b/libc/test/src/stdlib/heap_sort_test.cpp
index d70e3dc2272beb..18d4244506ec25 100644
--- a/libc/test/src/stdlib/heap_sort_test.cpp
+++ b/libc/test/src/stdlib/heap_sort_test.cpp
@@ -7,10 +7,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "SortingTest.h"
-#include "src/stdlib/heap_sort.h"
+#include "src/stdlib/qsort_util.h"
 
-void sort(const LIBC_NAMESPACE::internal::Array &array) {
-  LIBC_NAMESPACE::internal::heap_sort(array);
+void heap_sort(void *array, size_t array_size, size_t elem_size,
+               int (*compare)(const void *, const void *)) {
+
+  constexpr bool USE_QUICKSORT = false;
+
+  const auto is_less = [compare](const void *a,
+                                 const void *b) noexcept -> bool {
+    return compare(a, b) < 0;
+  };
+
+  LIBC_NAMESPACE::internal::unstable_sort_impl<USE_QUICKSORT>(
+      array, array_size, elem_size, is_less);
 }
 
-LIST_SORTING_TESTS(HeapSort, sort);
+LIST_SORTING_TESTS(HeapSort, heap_sort);
diff --git a/libc/test/src/stdlib/qsort_r_test.cpp b/libc/test/src/stdlib/qsort_r_test.cpp
index 6893fdc7b74c82..f18923618ed5ee 100644
--- a/libc/test/src/stdlib/qsort_r_test.cpp
+++ b/libc/test/src/stdlib/qsort_r_test.cpp
@@ -62,9 +62,9 @@ TEST(LlvmLibcQsortRTest, SortedArray) {
   ASSERT_LE(array[23], 11100);
   ASSERT_LE(array[24], 12310);
 
-  // This is a sorted list, but there still have to have been at least N
+  // This is a sorted list, but there still have to have been at least N - 1
   // comparisons made.
-  ASSERT_GE(count, ARRAY_SIZE);
+  ASSERT_GE(count, ARRAY_SIZE - 1);
 }
 
 TEST(LlvmLibcQsortRTest, ReverseSortedArray) {
diff --git a/libc/test/src/stdlib/qsort_test.cpp b/libc/test/src/stdlib/qsort_test.cpp
deleted file mode 100644
index 1e921a86fd1fd3..00000000000000
--- a/libc/test/src/stdlib/qsort_test.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-//===-- Unittests for qsort -----------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "SortingTest.h"
-#include "src/stdlib/qsort.h"
-
-void sort(const LIBC_NAMESPACE::internal::Array &array) {
-  LIBC_NAMESPACE::qsort(reinterpret_cast<void *>(array.get(0)), array.size(),
-                        sizeof(int), SortingTest::int_compare);
-}
-
-LIST_SORTING_TESTS(Qsort, sort);
diff --git a/libc/test/src/stdlib/quick_sort_test.cpp b/libc/test/src/stdlib/quick_sort_test.cpp
index d6bf77ebfd40d7..2832c855370bcb 100644
--- a/libc/test/src/stdlib/quick_sort_test.cpp
+++ b/libc/test/src/stdlib/quick_sort_test.cpp
@@ -1,4 +1,4 @@
-//===-- Unittests for quick sort ------------------------------------------===//
+//===-- Unittests for qsort -----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,10 +7,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "SortingTest.h"
-#include "src/stdlib/quick_sort.h"
+#include "src/stdlib/qsort_util.h"
 
-void sort(const LIBC_NAMESPACE::internal::Array &array) {
-  LIBC_NAMESPACE::internal::quick_sort(array);
+void quick_sort(void *array, size_t array_size, size_t elem_size,
+                int (*compare)(const void *, const void *)) {
+  constexpr bool USE_QUICKSORT = true;
+
+  const auto is_less = [compare](const void *a,
+                                 const void *b) noexcept -> bool {
+    return compare(a, b) < 0;
+  };
+
+  LIBC_NAMESPACE::internal::unstable_sort_impl<USE_QUICKSORT>(
+      array, array_size, elem_size, is_less);
 }
 
-LIST_SORTING_TESTS(QuickSort, sort);
+LIST_SORTING_TESTS(Qsort, quick_sort);
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel
index e4b4b075705e8a..c0f15469126628 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel
@@ -120,31 +120,23 @@ libc_support_library(
     ],
 )
 
-libc_test(
-    name = "qsort_test",
-    srcs = ["qsort_test.cpp"],
-    libc_function_deps = ["//libc:qsort"],
-    deps = [
-        ":qsort_test_helper",
-        "//libc:types_size_t",
-    ],
-)
-
 libc_test(
     name = "quick_sort_test",
     srcs = ["quick_sort_test.cpp"],
+    libc_function_deps = ["//libc:qsort"],
     deps = [
         ":qsort_test_helper",
-        "//libc:qsort_util",
+        "//libc:types_size_t",
     ],
 )
 
 libc_test(
     name = "heap_sort_test",
     srcs = ["heap_sort_test.cpp"],
+    libc_function_deps = ["//libc:qsort"],
     deps = [
         ":qsort_test_helper",
-        "//libc:qsort_util",
+        "//libc:types_size_t",
     ],
 )
 

>From 6c345a356d5492d3cd04149d7dd9a23d81329403 Mon Sep 17 00:00:00 2001
From: Lukas Bergdoll <lukas.bergdoll at gmail.com>
Date: Thu, 2 Jan 2025 16:09:28 +0100
Subject: [PATCH 2/2] Move sort fuzz tests to new interface

---
 libc/fuzzing/stdlib/CMakeLists.txt            |  6 ++--
 libc/fuzzing/stdlib/heap_sort_fuzz.cpp        | 29 +++++++++----------
 .../{qsort_fuzz.cpp => quick_sort_fuzz.cpp}   | 29 +++++++++----------
 3 files changed, 30 insertions(+), 34 deletions(-)
 rename libc/fuzzing/stdlib/{qsort_fuzz.cpp => quick_sort_fuzz.cpp} (62%)

diff --git a/libc/fuzzing/stdlib/CMakeLists.txt b/libc/fuzzing/stdlib/CMakeLists.txt
index 9b3298cfc55a77..3dbd640a67dbd8 100644
--- a/libc/fuzzing/stdlib/CMakeLists.txt
+++ b/libc/fuzzing/stdlib/CMakeLists.txt
@@ -1,9 +1,9 @@
 add_libc_fuzzer(
-  qsort_fuzz
+  quick_sort_fuzz
   SRCS
-    qsort_fuzz.cpp
+    quick_sort_fuzz.cpp
   DEPENDS
-    libc.src.stdlib.qsort
+    libc.src.stdlib.qsort_util
 )
 
 add_libc_fuzzer(
diff --git a/libc/fuzzing/stdlib/heap_sort_fuzz.cpp b/libc/fuzzing/stdlib/heap_sort_fuzz.cpp
index 876c5f9975d4d9..6b00306ec7dc1d 100644
--- a/libc/fuzzing/stdlib/heap_sort_fuzz.cpp
+++ b/libc/fuzzing/stdlib/heap_sort_fuzz.cpp
@@ -10,21 +10,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "src/stdlib/heap_sort.h"
+#include "src/stdlib/qsort_util.h"
 #include <stdint.h>
 
-static int int_compare(const void *l, const void *r) {
-  int li = *reinterpret_cast<const int *>(l);
-  int ri = *reinterpret_cast<const int *>(r);
-  if (li == ri)
-    return 0;
-  if (li > ri)
-    return 1;
-  return -1;
-}
-
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
-
   const size_t array_size = size / sizeof(int);
   if (array_size == 0)
     return 0;
@@ -34,14 +23,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   for (size_t i = 0; i < array_size; ++i)
     array[i] = data_as_int[i];
 
-  auto arr = LIBC_NAMESPACE::internal::Array(
-      reinterpret_cast<uint8_t *>(array), array_size, sizeof(int), int_compare);
+  const auto is_less = [](const void *a_ptr,
+                          const void *b_ptr) noexcept -> bool {
+    const int &a = *static_cast<const int *>(a_ptr);
+    const int &b = *static_cast<const int *>(b_ptr);
+
+    return a < b;
+  };
 
-  LIBC_NAMESPACE::internal::heap_sort(arr);
+  constexpr bool USE_QUICKSORT = false;
+  LIBC_NAMESPACE::internal::unstable_sort_impl<USE_QUICKSORT>(
+      array, array_size, sizeof(int), is_less);
 
-  for (size_t i = 0; i < array_size - 1; ++i)
+  for (size_t i = 0; i < array_size - 1; ++i) {
     if (array[i] > array[i + 1])
       __builtin_trap();
+  }
 
   delete[] array;
   return 0;
diff --git a/libc/fuzzing/stdlib/qsort_fuzz.cpp b/libc/fuzzing/stdlib/quick_sort_fuzz.cpp
similarity index 62%
rename from libc/fuzzing/stdlib/qsort_fuzz.cpp
rename to libc/fuzzing/stdlib/quick_sort_fuzz.cpp
index 5d5053cff5c58c..6371e851d2fc3e 100644
--- a/libc/fuzzing/stdlib/qsort_fuzz.cpp
+++ b/libc/fuzzing/stdlib/quick_sort_fuzz.cpp
@@ -1,4 +1,4 @@
-//===-- qsort_fuzz.cpp ----------------------------------------------------===//
+//===-- quick_sort_fuzz.cpp------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,24 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 ///
-/// Fuzzing test for llvm-libc qsort implementation.
+/// Fuzzing test for llvm-libc quick_sort implementation.
 ///
 //===----------------------------------------------------------------------===//
 
-#include "src/stdlib/qsort.h"
+#include "src/stdlib/qsort_util.h"
 #include <stdint.h>
 
-static int int_compare(const void *l, const void *r) {
-  int li = *reinterpret_cast<const int *>(l);
-  int ri = *reinterpret_cast<const int *>(r);
-  if (li == ri)
-    return 0;
-  else if (li > ri)
-    return 1;
-  else
-    return -1;
-}
-
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   const size_t array_size = size / sizeof(int);
   if (array_size == 0)
@@ -34,7 +23,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   for (size_t i = 0; i < array_size; ++i)
     array[i] = data_as_int[i];
 
-  LIBC_NAMESPACE::qsort(array, array_size, sizeof(int), int_compare);
+  const auto is_less = [](const void *a_ptr,
+                          const void *b_ptr) noexcept -> bool {
+    const int &a = *static_cast<const int *>(a_ptr);
+    const int &b = *static_cast<const int *>(b_ptr);
+
+    return a < b;
+  };
+
+  constexpr bool USE_QUICKSORT = true;
+  LIBC_NAMESPACE::internal::unstable_sort_impl<USE_QUICKSORT>(
+      array, array_size, sizeof(int), is_less);
 
   for (size_t i = 0; i < array_size - 1; ++i) {
     if (array[i] > array[i + 1])



More information about the llvm-commits mailing list