[libc-commits] [libc] [llvm] [libc] Improve qsort (PR #120450)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Wed Dec 18 17:13:07 PST 2024


================
@@ -11,96 +11,155 @@
 
 #include "src/__support/CPP/cstddef.h"
 #include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "src/string/memory_utils/inline_memmove.h"
 
 #include <stdint.h>
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-using Compare = int(const void *, const void *);
-using CompareWithState = int(const void *, const void *, void *);
-
-enum class CompType { COMPARE, COMPARE_WITH_STATE };
-
-struct Comparator {
-  union {
-    Compare *comp_func;
-    CompareWithState *comp_func_r;
-  };
-  const CompType comp_type;
-
-  void *arg;
-
-  Comparator(Compare *func)
-      : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {}
-
-  Comparator(CompareWithState *func, void *arg_val)
-      : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE),
-        arg(arg_val) {}
-
-#if defined(__clang__)
-  // Recent upstream changes to -fsanitize=function find more instances of
-  // function type mismatches. One case is with the comparator passed to this
-  // class. Libraries will tend to pass comparators that take pointers to
-  // varying types while this comparator expects to accept const void pointers.
-  // Ideally those tools would pass a function that strictly accepts const
-  // void*s to avoid UB, or would use qsort_r to pass their own comparator.
-  [[clang::no_sanitize("function")]]
-#endif
-  int comp_vals(const void *a, const void *b) const {
-    if (comp_type == CompType::COMPARE) {
-      return comp_func(a, b);
-    } else {
-      return comp_func_r(a, b, arg);
-    }
+// Returns the max amount of bytes deemed reasonable - based on the target
+// properties - for use in local stack arrays.
+constexpr size_t max_stack_array_size() {
+  // Uses target pointer size as heuristic how much memory is available and
+  // unlikely to run into stack overflow and perf problems.
+  constexpr size_t ptr_diff_size = sizeof(ptrdiff_t);
+
+  if constexpr (ptr_diff_size >= 8) {
+    return 4096;
   }
-};
 
-class Array {
-  uint8_t *array;
-  size_t array_size;
+  if constexpr (ptr_diff_size == 4) {
+    return 512;
+  }
+
+  // 8-bit platforms are just not gonna work well with libc, qsort
+  // won't be the problem.
+  // 16-bit platforms ought to be able to store 64 bytes on the stack.
+  return 64;
+}
+
+class ArrayGenericSize {
+  uint8_t *array_base;
+  size_t array_len;
   size_t elem_size;
-  Comparator compare;
+
+  uint8_t *get_internal(size_t i) const noexcept {
+    return array_base + (i * elem_size);
+  }
 
 public:
-  Array(uint8_t *a, size_t s, size_t e, Comparator c)
-      : array(a), array_size(s), elem_size(e), compare(c) {}
-
-  uint8_t *get(size_t i) const { return array + i * elem_size; }
-
-  void swap(size_t i, size_t j) const {
-    uint8_t *elem_i = get(i);
-    uint8_t *elem_j = get(j);
-    for (size_t b = 0; b < elem_size; ++b) {
-      uint8_t temp = elem_i[b];
-      elem_i[b] = elem_j[b];
-      elem_j[b] = temp;
-    }
+  ArrayGenericSize(uint8_t *a, size_t s, size_t e) noexcept
+      : array_base(a), array_len(s), elem_size(e) {}
+
+  static constexpr bool has_fixed_size() { return false; }
+
+  void *get(size_t i) const noexcept {
+    return reinterpret_cast<void *>(get_internal(i));
   }
 
-  int elem_compare(size_t i, const uint8_t *other) const {
-    // An element must compare equal to itself so we don't need to consult the
-    // user provided comparator.
-    if (get(i) == other)
-      return 0;
-    return compare.comp_vals(get(i), other);
+  void swap(size_t i, size_t j) const noexcept {
+    // It's possible to use 8 byte blocks with `uint64_t`, but that
+    // generates more machine code as the remainder loop gets
+    // unrolled, plus 4 byte operations are more likely to be
+    // efficient on a wider variety of hardware. On x86 LLVM tends
+    // to unroll the block loop again into 2 16 byte swaps per
+    // iteration which is another reason that 4 byte blocks yields
+    // good performance even for big types.
+    using block_t = uint32_t;
+    constexpr size_t BLOCK_SIZE = sizeof(block_t);
+
+    uint8_t *elem_i = get_internal(i);
+    uint8_t *elem_j = get_internal(j);
+
+    const size_t elem_size_rem = elem_size % BLOCK_SIZE;
+    const block_t *elem_i_block_end =
+        reinterpret_cast<block_t *>(elem_i + (elem_size - elem_size_rem));
+
+    block_t *elem_i_block = reinterpret_cast<block_t *>(elem_i);
+    block_t *elem_j_block = reinterpret_cast<block_t *>(elem_j);
+
+    while (elem_i_block != elem_i_block_end) {
+      block_t tmp = *elem_i_block;
+      *elem_i_block = *elem_j_block;
+      *elem_j_block = tmp;
+      elem_i_block += 1;
+      elem_j_block += 1;
+    }
+
+    elem_i = reinterpret_cast<uint8_t *>(elem_i_block);
+    elem_j = reinterpret_cast<uint8_t *>(elem_j_block);
+    for (size_t n = 0; n < elem_size_rem; ++n) {
+      uint8_t tmp = elem_i[n];
+      elem_i[n] = elem_j[n];
+      elem_j[n] = tmp;
+    }
   }
 
-  size_t size() const { return array_size; }
+  size_t len() const noexcept { return array_len; }
 
-  // Make an Array starting at index |i| and size |s|.
-  LIBC_INLINE Array make_array(size_t i, size_t s) const {
-    return Array(get(i), s, elem_size, compare);
+  // Make an Array starting at index |i| and length |s|.
+  ArrayGenericSize make_array(size_t i, size_t s) const noexcept {
+    return ArrayGenericSize(get_internal(i), s, elem_size);
   }
 
-  // Reset this Array to point at a different interval of the same items.
-  LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) {
-    array = a;
-    array_size = s;
+  // Reset this Array to point at a different interval of the same
+  // items starting at index |i|.
+  void reset_bounds(size_t i, size_t s) noexcept {
+    array_base = get_internal(i);
+    array_len = s;
   }
 };
 
-using SortingRoutine = void(const Array &);
+// Having a specialized Array type for sorting that knowns at
----------------
SchrodingerZhu wrote:

may be `known at`

https://github.com/llvm/llvm-project/pull/120450


More information about the libc-commits mailing list