[llvm-branch-commits] [libcxx] 9ec8096 - [release/16.x][libc++] Revert the bitset sort optimization

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Apr 3 21:25:39 PDT 2023


Author: Louis Dionne
Date: 2023-04-03T21:25:04-07:00
New Revision: 9ec8096d0d50a353a5bc5a91064c6332bd634021

URL: https://github.com/llvm/llvm-project/commit/9ec8096d0d50a353a5bc5a91064c6332bd634021
DIFF: https://github.com/llvm/llvm-project/commit/9ec8096d0d50a353a5bc5a91064c6332bd634021.diff

LOG: [release/16.x][libc++] Revert the bitset sort optimization

Part of the bitset sort optimization can cause code using an invalid
comparator to go out-of-bounds, which is potentially dangerous. This
was an unintended consequence of the original patch, or at least the
fact that we didn't have anything in place to catch that or announce
that to vendors was unintended.

More specifically, std::sort assumes that the comparison predicate is a
strict weak order (which is a pre-condition to the algorithm). However,
in practice, some predicates may not satisfy the pre-condition in subtle
ways. While that is technically a user problem, we need a good answer
for how to handle this given that we know this exists in the wild and
unconditionally reading out-of-bounds data is not a great answer. We
can't weaken the preconditions of the algorithm, however letting this
situation happen without even a way of diagnosing the issue is pretty bad.

Hence, this patch reverts the optimization from release/16.x so that we
can buy a bit of time to figure out how to properly handle and communicate
this change in the LLVM 17 release.

The bitset sort optimization was 4eddbf9f10a (aka https://llvm.org/D122780).
This patch is not a pure revert of the patch, I also fixed up the includes
a bit.

Differential Revision: https://reviews.llvm.org/D146421

Added: 
    

Modified: 
    libcxx/docs/ReleaseNotes.rst
    libcxx/include/__algorithm/sort.h

Removed: 
    


################################################################################
diff  --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index df2b8f78c2c89..d78c4702060fe 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -90,7 +90,6 @@ Improvements and New Features
   now provided when implementations in the global namespace are provided by
   the C library.
 - Implemented ``<memory_resource>`` header from C++17
-- `D122780 <https://reviews.llvm.org/D122780>`_ Improved the performance of std::sort
 - The ``ranges`` versions of ``copy``, ``move``, ``copy_backward`` and ``move_backward`` are now also optimized for
   ``std::deque<>::iterator``, which can lead to up to 20x performance improvements on certain algorithms.
 - The ``std`` and ``ranges`` versions of ``copy``, ``move``, ``copy_backward`` and ``move_backward`` are now also

diff  --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
index a7d2d55a06f8a..a236be0a4dafd 100644
--- a/libcxx/include/__algorithm/sort.h
+++ b/libcxx/include/__algorithm/sort.h
@@ -11,15 +11,10 @@
 
 #include <__algorithm/comp.h>
 #include <__algorithm/comp_ref_type.h>
-#include <__algorithm/iter_swap.h>
 #include <__algorithm/iterator_operations.h>
 #include <__algorithm/min_element.h>
 #include <__algorithm/partial_sort.h>
 #include <__algorithm/unwrap_iter.h>
-#include <__assert>
-#include <__bit/blsr.h>
-#include <__bit/countl.h>
-#include <__bit/countr.h>
 #include <__config>
 #include <__debug>
 #include <__debug_utils/randomize_range.h>
@@ -28,10 +23,11 @@
 #include <__iterator/iterator_traits.h>
 #include <__memory/destruct_n.h>
 #include <__memory/unique_ptr.h>
-#include <__type_traits/conditional.h>
 #include <__type_traits/is_arithmetic.h>
+#include <__type_traits/is_trivially_copy_assignable.h>
+#include <__type_traits/is_trivially_copy_constructible.h>
 #include <__utility/move.h>
-#include <__utility/pair.h>
+#include <bit>
 #include <climits>
 #include <cstdint>
 
@@ -132,7 +128,8 @@ template <class _AlgPolicy, class _Compare, class _ForwardIterator>
 _LIBCPP_HIDE_FROM_ABI
 unsigned __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4,
                  _Compare __c) {
-  using _Ops   = _IterOps<_AlgPolicy>;
+  using _Ops = _IterOps<_AlgPolicy>;
+
   unsigned __r = std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
   if (__c(*__x4, *__x3)) {
     _Ops::iter_swap(__x3, __x4);
@@ -187,7 +184,7 @@ _LIBCPP_HIDE_FROM_ABI unsigned __sort5_wrap_policy(
     _Compare __c) {
   using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Compare>::type;
   _WrappedComp __wrapped_comp(__c);
-  return std::__sort5<_WrappedComp, _ForwardIterator>(
+  return std::__sort5<_WrappedComp>(
       std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __wrapped_comp);
 }
 
@@ -212,13 +209,6 @@ using __use_branchless_sort =
     integral_constant<bool, __is_cpp17_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
                                 is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
 
-namespace __detail {
-
-// Size in bits for the bitset in use.
-enum { __block_size = sizeof(uint64_t) * 8 };
-
-} // namespace __detail
-
 // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
 template <class _Compare, class _RandomAccessIterator>
 inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
@@ -278,15 +268,10 @@ __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2,
   std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
 }
 
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
+template <class, class _Compare, class _RandomAccessIterator>
 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
-__sort5_maybe_branchless(
-    _RandomAccessIterator __x1,
-    _RandomAccessIterator __x2,
-    _RandomAccessIterator __x3,
-    _RandomAccessIterator __x4,
-    _RandomAccessIterator __x5,
-    _Compare __c) {
+__sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
+                         _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) {
   std::__cond_swap<_Compare>(__x1, __x2, __c);
   std::__cond_swap<_Compare>(__x4, __x5, __c);
   std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
@@ -315,48 +300,34 @@ _LIBCPP_CONSTEXPR_SINCE_CXX14 void __selection_sort(_BidirectionalIterator __fir
   }
 }
 
-// Sort the iterator range [__first, __last) using the comparator __comp using
-// the insertion sort algorithm.
 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
 _LIBCPP_HIDE_FROM_ABI
 void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
   using _Ops = _IterOps<_AlgPolicy>;
 
   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
-  if (__first == __last)
-    return;
-  _BidirectionalIterator __i = __first;
-  for (++__i; __i != __last; ++__i) {
-    _BidirectionalIterator __j = __i;
-    --__j;
-    if (__comp(*__i, *__j)) {
-      value_type __t(_Ops::__iter_move(__i));
-      _BidirectionalIterator __k = __j;
-      __j                        = __i;
-      do {
+  if (__first != __last) {
+    _BidirectionalIterator __i = __first;
+    for (++__i; __i != __last; ++__i) {
+      _BidirectionalIterator __j = __i;
+      value_type __t(_Ops::__iter_move(__j));
+      for (_BidirectionalIterator __k = __i; __k != __first && __comp(__t, *--__k); --__j)
         *__j = _Ops::__iter_move(__k);
-        __j  = __k;
-      } while (__j != __first && __comp(__t, *--__k));
       *__j = std::move(__t);
     }
   }
 }
 
-// Sort the iterator range [__first, __last) using the comparator __comp using
-// the insertion sort algorithm.  Insertion sort has two loops, outer and inner.
-// The implementation below has not bounds check (unguarded) for the inner loop.
-// Assumes that there is an element in the position (__first - 1) and that each
-// element in the input range is greater or equal to the element at __first - 1.
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-_LIBCPP_HIDE_FROM_ABI void
-__insertion_sort_unguarded(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
+_LIBCPP_HIDE_FROM_ABI
+void __insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
   using _Ops = _IterOps<_AlgPolicy>;
+
   typedef typename iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
-  if (__first == __last)
-    return;
-  for (_RandomAccessIterator __i = __first + 
diff erence_type(1); __i != __last; ++__i) {
-    _RandomAccessIterator __j = __i - 
diff erence_type(1);
+  _RandomAccessIterator __j = __first + 
diff erence_type(2);
+  std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + 
diff erence_type(1), __j, __comp);
+  for (_RandomAccessIterator __i = __j + 
diff erence_type(1); __i != __last; ++__i) {
     if (__comp(*__i, *__j)) {
       value_type __t(_Ops::__iter_move(__i));
       _RandomAccessIterator __k = __j;
@@ -364,9 +335,10 @@ __insertion_sort_unguarded(_RandomAccessIterator __first, _RandomAccessIterator
       do {
         *__j = _Ops::__iter_move(__k);
         __j = __k;
-      } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above.
+      } while (__j != __first && __comp(__t, *--__k));
       *__j = std::move(__t);
     }
+    __j = __i;
   }
 }
 
@@ -387,7 +359,7 @@ _LIBCPP_HIDDEN bool __insertion_sort_incomplete(
     return true;
   case 2:
     if (__comp(*--__last, *__first))
-      _Ops::iter_swap(__first, __last);
+      _IterOps<_AlgPolicy>::iter_swap(__first, __last);
     return true;
   case 3:
     std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + 
diff erence_type(1), --__last, __comp);
@@ -456,336 +428,17 @@ void __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterat
   }
 }
 
-template <class _AlgPolicy, class _RandomAccessIterator>
-inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
-    _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) {
-  using _Ops = _IterOps<_AlgPolicy>;
-  typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
-  // Swap one pair on each iteration as long as both bitsets have at least one
-  // element for swapping.
-  while (__left_bitset != 0 && __right_bitset != 0) {
-    
diff erence_type tz_left  = __libcpp_ctz(__left_bitset);
-    __left_bitset            = __libcpp_blsr(__left_bitset);
-    
diff erence_type tz_right = __libcpp_ctz(__right_bitset);
-    __right_bitset           = __libcpp_blsr(__right_bitset);
-    _Ops::iter_swap(__first + tz_left, __last - tz_right);
-  }
-}
-
-template <class _Compare,
-          class _RandomAccessIterator,
-          class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
-inline _LIBCPP_HIDE_FROM_ABI void
-__populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) {
-  // Possible vectorization. With a proper "-march" flag, the following loop
-  // will be compiled into a set of SIMD instructions.
-  _RandomAccessIterator __iter = __first;
-  for (int __j = 0; __j < __detail::__block_size;) {
-    bool __comp_result = !__comp(*__iter, __pivot);
-    __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
-    __j++;
-    ++__iter;
-  }
-}
-
-template <class _Compare,
-          class _RandomAccessIterator,
-          class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
-inline _LIBCPP_HIDE_FROM_ABI void
-__populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) {
-  // Possible vectorization. With a proper "-march" flag, the following loop
-  // will be compiled into a set of SIMD instructions.
-  _RandomAccessIterator __iter = __lm1;
-  for (int __j = 0; __j < __detail::__block_size;) {
-    bool __comp_result = __comp(*__iter, __pivot);
-    __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
-    __j++;
-    --__iter;
-  }
-}
-
-template <class _AlgPolicy,
-          class _Compare,
-          class _RandomAccessIterator,
-          class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
-inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks(
-    _RandomAccessIterator& __first,
-    _RandomAccessIterator& __lm1,
-    _Compare __comp,
-    _ValueType& __pivot,
-    uint64_t& __left_bitset,
-    uint64_t& __right_bitset) {
-  typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
-  
diff erence_type __remaining_len = __lm1 - __first + 1;
-  
diff erence_type __l_size;
-  
diff erence_type __r_size;
-  if (__left_bitset == 0 && __right_bitset == 0) {
-    __l_size = __remaining_len / 2;
-    __r_size = __remaining_len - __l_size;
-  } else if (__left_bitset == 0) {
-    // We know at least one side is a full block.
-    __l_size = __remaining_len - __detail::__block_size;
-    __r_size = __detail::__block_size;
-  } else { // if (__right_bitset == 0)
-    __l_size = __detail::__block_size;
-    __r_size = __remaining_len - __detail::__block_size;
-  }
-  // Record the comparison outcomes for the elements currently on the left side.
-  if (__left_bitset == 0) {
-    _RandomAccessIterator __iter = __first;
-    for (int j = 0; j < __l_size; j++) {
-      bool __comp_result = !__comp(*__iter, __pivot);
-      __left_bitset |= (static_cast<uint64_t>(__comp_result) << j);
-      ++__iter;
-    }
-  }
-  // Record the comparison outcomes for the elements currently on the right
-  // side.
-  if (__right_bitset == 0) {
-    _RandomAccessIterator __iter = __lm1;
-    for (int j = 0; j < __r_size; j++) {
-      bool __comp_result = __comp(*__iter, __pivot);
-      __right_bitset |= (static_cast<uint64_t>(__comp_result) << j);
-      --__iter;
-    }
-  }
-  std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
-  __first += (__left_bitset == 0) ? __l_size : 0;
-  __lm1 -= (__right_bitset == 0) ? __r_size : 0;
-}
-
-template <class _AlgPolicy, class _RandomAccessIterator>
-inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
-    _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) {
-  using _Ops = _IterOps<_AlgPolicy>;
-  typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
-  if (__left_bitset) {
-    // Swap within the left side.  Need to find set positions in the reverse
-    // order.
-    while (__left_bitset != 0) {
-      
diff erence_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
-      __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
-      _RandomAccessIterator it = __first + __tz_left;
-      if (it != __lm1) {
-        _Ops::iter_swap(it, __lm1);
-      }
-      --__lm1;
-    }
-    __first = __lm1 + 
diff erence_type(1);
-  } else if (__right_bitset) {
-    // Swap within the right side.  Need to find set positions in the reverse
-    // order.
-    while (__right_bitset != 0) {
-      
diff erence_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
-      __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
-      _RandomAccessIterator it = __lm1 - __tz_right;
-      if (it != __first) {
-        _Ops::iter_swap(it, __first);
-      }
-      ++__first;
-    }
-  }
-}
-
-// Partition [__first, __last) using the comparator __comp.  *__first has the
-// chosen pivot.  Elements that are equivalent are kept to the left of the
-// pivot.  Returns the iterator for the pivot and a bool value which is true if
-// the provided range is already sorted, false otherwise.  We assume that the
-// length of the range is at least three elements.
-//
-// __bitset_partition uses bitsets for storing outcomes of the comparisons
-// between the pivot and other elements.
-template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
-_LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
-__bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
-  using _Ops = _IterOps<_AlgPolicy>;
-  typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
-  typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
-  _LIBCPP_ASSERT(__last - __first >= 
diff erence_type(3), "");
-
-  _RandomAccessIterator __begin = __first;
-  value_type __pivot(_Ops::__iter_move(__first));
-  // Find the first element greater than the pivot.
-  if (__comp(__pivot, *(__last - 
diff erence_type(1)))) {
-    // Not guarded since we know the last element is greater than the pivot.
-    while (!__comp(__pivot, *++__first)) {
-    }
-  } else {
-    while (++__first < __last && !__comp(__pivot, *__first)) {
-    }
-  }
-  // Find the last element less than or equal to the pivot.
-  if (__first < __last) {
-    // It will be always guarded because __introsort will do the median-of-three
-    // before calling this.
-    while (__comp(__pivot, *--__last)) {
-    }
-  }
-  // If the first element greater than the pivot is at or after the
-  // last element less than or equal to the pivot, then we have covered the
-  // entire range without swapping elements.  This implies the range is already
-  // partitioned.
-  bool __already_partitioned = __first >= __last;
-  if (!__already_partitioned) {
-    _Ops::iter_swap(__first, __last);
-    ++__first;
-  }
-
-  // In [__first, __last) __last is not inclusive. From now on, it uses last
-  // minus one to be inclusive on both sides.
-  _RandomAccessIterator __lm1 = __last - 
diff erence_type(1);
-  uint64_t __left_bitset      = 0;
-  uint64_t __right_bitset     = 0;
-
-  // Reminder: length = __lm1 - __first + 1.
-  while (__lm1 - __first >= 2 * __detail::__block_size - 1) {
-    // Record the comparison outcomes for the elements currently on the left
-    // side.
-    if (__left_bitset == 0)
-      std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset);
-    // Record the comparison outcomes for the elements currently on the right
-    // side.
-    if (__right_bitset == 0)
-      std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset);
-    // Swap the elements recorded to be the candidates for swapping in the
-    // bitsets.
-    std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
-    // Only advance the iterator if all the elements that need to be moved to
-    // other side were moved.
-    __first += (__left_bitset == 0) ? 
diff erence_type(__detail::__block_size) : 
diff erence_type(0);
-    __lm1 -= (__right_bitset == 0) ? 
diff erence_type(__detail::__block_size) : 
diff erence_type(0);
-  }
-  // Now, we have a less-than a block worth of elements on at least one of the
-  // sides.
-  std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>(
-      __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset);
-  // At least one the bitsets would be empty.  For the non-empty one, we need to
-  // properly partition the elements that appear within that bitset.
-  std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset);
-
-  // Move the pivot to its correct position.
-  _RandomAccessIterator __pivot_pos = __first - 
diff erence_type(1);
-  if (__begin != __pivot_pos) {
-    *__begin = _Ops::__iter_move(__pivot_pos);
-  }
-  *__pivot_pos = std::move(__pivot);
-  return std::make_pair(__pivot_pos, __already_partitioned);
-}
-
-// Partition [__first, __last) using the comparator __comp.  *__first has the
-// chosen pivot.  Elements that are equivalent are kept to the right of the
-// pivot.  Returns the iterator for the pivot and a bool value which is true if
-// the provided range is already sorted, false otherwise.  We assume that the
-// length of the range is at least three elements.
-template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
-_LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
-__partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
-  using _Ops = _IterOps<_AlgPolicy>;
-  typedef typename iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
-  typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
-  _LIBCPP_ASSERT(__last - __first >= 
diff erence_type(3), "");
-  _RandomAccessIterator __begin = __first;
-  value_type __pivot(_Ops::__iter_move(__first));
-  // Find the first element greater or equal to the pivot.  It will be always
-  // guarded because __introsort will do the median-of-three before calling
-  // this.
-  while (__comp(*++__first, __pivot))
-    ;
-
-  // Find the last element less than the pivot.
-  if (__begin == __first - 
diff erence_type(1)) {
-    while (__first < __last && !__comp(*--__last, __pivot))
-      ;
-  } else {
-    // Guarded.
-    while (!__comp(*--__last, __pivot))
-      ;
-  }
-
-  // If the first element greater than or equal to the pivot is at or after the
-  // last element less than the pivot, then we have covered the entire range
-  // without swapping elements.  This implies the range is already partitioned.
-  bool __already_partitioned = __first >= __last;
-  // Go through the remaining elements.  Swap pairs of elements (one to the
-  // right of the pivot and the other to left of the pivot) that are not on the
-  // correct side of the pivot.
-  while (__first < __last) {
-    _Ops::iter_swap(__first, __last);
-    while (__comp(*++__first, __pivot))
-      ;
-    while (!__comp(*--__last, __pivot))
-      ;
-  }
-  // Move the pivot to its correct position.
-  _RandomAccessIterator __pivot_pos = __first - 
diff erence_type(1);
-  if (__begin != __pivot_pos) {
-    *__begin = _Ops::__iter_move(__pivot_pos);
-  }
-  *__pivot_pos = std::move(__pivot);
-  return std::make_pair(__pivot_pos, __already_partitioned);
-}
-
-// Similar to the above function.  Elements equivalent to the pivot are put to
-// the left of the pivot.  Returns the iterator to the pivot element.
-template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
-_LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
-__partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
+void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
+                 typename iterator_traits<_RandomAccessIterator>::
diff erence_type __depth) {
   using _Ops = _IterOps<_AlgPolicy>;
-  typedef typename iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
-  typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
-  _RandomAccessIterator __begin = __first;
-  value_type __pivot(_Ops::__iter_move(__first));
-  if (__comp(__pivot, *(__last - 
diff erence_type(1)))) {
-    // Guarded.
-    while (!__comp(__pivot, *++__first)) {
-    }
-  } else {
-    while (++__first < __last && !__comp(__pivot, *__first)) {
-    }
-  }
-
-  if (__first < __last) {
-    // It will be always guarded because __introsort will do the
-    // median-of-three before calling this.
-    while (__comp(__pivot, *--__last)) {
-    }
-  }
-  while (__first < __last) {
-    _Ops::iter_swap(__first, __last);
-    while (!__comp(__pivot, *++__first))
-      ;
-    while (__comp(__pivot, *--__last))
-      ;
-  }
-  _RandomAccessIterator __pivot_pos = __first - 
diff erence_type(1);
-  if (__begin != __pivot_pos) {
-    *__begin = _Ops::__iter_move(__pivot_pos);
-  }
-  *__pivot_pos = std::move(__pivot);
-  return __first;
-}
 
-// The main sorting function.  Implements introsort combined with other ideas:
-//  - option of using block quick sort for partitioning,
-//  - guarded and unguarded insertion sort for small lengths,
-//  - Tuckey's ninther technique for computing the pivot,
-//  - check on whether partition was not required.
-// The implementation is partly based on Orson Peters' pattern-defeating
-// quicksort, published at: <https://github.com/orlp/pdqsort>.
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _UseBitSetPartition>
-void __introsort(_RandomAccessIterator __first,
-                 _RandomAccessIterator __last,
-                 _Compare __comp,
-                 typename iterator_traits<_RandomAccessIterator>::
diff erence_type __depth,
-                 bool __leftmost = true) {
-  using _Ops = _IterOps<_AlgPolicy>;
   typedef typename iterator_traits<_RandomAccessIterator>::
diff erence_type 
diff erence_type;
-  using _Comp_ref = __comp_ref_type<_Compare>;
-  // Upper bound for using insertion sort for sorting.
-  _LIBCPP_CONSTEXPR 
diff erence_type __limit = 24;
-  // Lower bound for using Tuckey's ninther technique for median computation.
-  _LIBCPP_CONSTEXPR 
diff erence_type __ninther_threshold = 128;
+  typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
+  const 
diff erence_type __limit =
+      is_trivially_copy_constructible<value_type>::value && is_trivially_copy_assignable<value_type>::value ? 30 : 6;
   while (true) {
+  __restart:
     
diff erence_type __len = __last - __first;
     switch (__len) {
     case 0:
@@ -793,7 +446,7 @@ void __introsort(_RandomAccessIterator __first,
       return;
     case 2:
       if (__comp(*--__last, *__first))
-        _Ops::iter_swap(__first, __last);
+        _IterOps<_AlgPolicy>::iter_swap(__first, __last);
       return;
     case 3:
       std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + 
diff erence_type(1), --__last, __comp);
@@ -808,60 +461,127 @@ void __introsort(_RandomAccessIterator __first,
           --__last, __comp);
       return;
     }
-    // Use insertion sort if the length of the range is below the specified limit.
-    if (__len < __limit) {
-      if (__leftmost) {
-        std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
-      } else {
-        std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp);
-      }
+    if (__len <= __limit) {
+      std::__insertion_sort_3<_AlgPolicy, _Compare>(__first, __last, __comp);
       return;
     }
+    // __len > 5
     if (__depth == 0) {
       // Fallback to heap sort as Introsort suggests.
       std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp);
       return;
     }
     --__depth;
+    _RandomAccessIterator __m = __first;
+    _RandomAccessIterator __lm1 = __last;
+    --__lm1;
+    unsigned __n_swaps;
     {
-      
diff erence_type __half_len = __len / 2;
-      // Use Tuckey's ninther technique or median of 3 for pivot selection
-      // depending on the length of the range being sorted.
-      if (__len > __ninther_threshold) {
-        std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - 
diff erence_type(1), __comp);
-        std::__sort3<_AlgPolicy, _Compare>(
-            __first + 
diff erence_type(1), __first + (__half_len - 1), __last - 
diff erence_type(2), __comp);
-        std::__sort3<_AlgPolicy, _Compare>(
-            __first + 
diff erence_type(2), __first + (__half_len + 1), __last - 
diff erence_type(3), __comp);
-        std::__sort3<_AlgPolicy, _Compare>(
-            __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp);
-        _Ops::iter_swap(__first, __first + __half_len);
+      
diff erence_type __delta;
+      if (__len >= 1000) {
+        __delta = __len / 2;
+        __m += __delta;
+        __delta /= 2;
+        __n_swaps = std::__sort5_wrap_policy<_AlgPolicy, _Compare>(
+            __first, __first + __delta, __m, __m + __delta, __lm1, __comp);
       } else {
-        std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - 
diff erence_type(1), __comp);
+        __delta = __len / 2;
+        __m += __delta;
+        __n_swaps = std::__sort3<_AlgPolicy, _Compare>(__first, __m, __lm1, __comp);
       }
     }
-    // The elements to the left of the current iterator range are already
-    // sorted.  If the current iterator range to be sorted is not the
-    // leftmost part of the entire iterator range and the pivot is same as
-    // the highest element in the range to the left, then we know that all
-    // the elements in the range [first, pivot] would be equal to the pivot,
-    // assuming the equal elements are put on the left side when
-    // partitioned.  This also means that we do not need to sort the left
-    // side of the partition.
-    if (!__leftmost && !__comp(*(__first - 
diff erence_type(1)), *__first)) {
-      __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>(
-          __first, __last, _Comp_ref(__comp));
-      continue;
+    // *__m is median
+    // partition [__first, __m) < *__m and *__m <= [__m, __last)
+    // (this inhibits tossing elements equivalent to __m around unnecessarily)
+    _RandomAccessIterator __i = __first;
+    _RandomAccessIterator __j = __lm1;
+    // j points beyond range to be tested, *__m is known to be <= *__lm1
+    // The search going up is known to be guarded but the search coming down isn't.
+    // Prime the downward search with a guard.
+    if (!__comp(*__i, *__m)) // if *__first == *__m
+    {
+      // *__first == *__m, *__first doesn't go in first part
+      // manually guard downward moving __j against __i
+      while (true) {
+        if (__i == --__j) {
+          // *__first == *__m, *__m <= all other elements
+          // Parition instead into [__first, __i) == *__first and *__first < [__i, __last)
+          ++__i; // __first + 1
+          __j = __last;
+          if (!__comp(*__first, *--__j)) // we need a guard if *__first == *(__last-1)
+          {
+            while (true) {
+              if (__i == __j)
+                return; // [__first, __last) all equivalent elements
+              if (__comp(*__first, *__i)) {
+                _Ops::iter_swap(__i, __j);
+                ++__n_swaps;
+                ++__i;
+                break;
+              }
+              ++__i;
+            }
+          }
+          // [__first, __i) == *__first and *__first < [__j, __last) and __j == __last - 1
+          if (__i == __j)
+            return;
+          while (true) {
+            while (!__comp(*__first, *__i))
+              ++__i;
+            while (__comp(*__first, *--__j))
+              ;
+            if (__i >= __j)
+              break;
+            _Ops::iter_swap(__i, __j);
+            ++__n_swaps;
+            ++__i;
+          }
+          // [__first, __i) == *__first and *__first < [__i, __last)
+          // The first part is sorted, sort the second part
+          // std::__sort<_Compare>(__i, __last, __comp);
+          __first = __i;
+          goto __restart;
+        }
+        if (__comp(*__j, *__m)) {
+          _Ops::iter_swap(__i, __j);
+          ++__n_swaps;
+          break; // found guard for downward moving __j, now use unguarded partition
+        }
+      }
+    }
+    // It is known that *__i < *__m
+    ++__i;
+    // j points beyond range to be tested, *__m is known to be <= *__lm1
+    // if not yet partitioned...
+    if (__i < __j) {
+      // known that *(__i - 1) < *__m
+      // known that __i <= __m
+      while (true) {
+        // __m still guards upward moving __i
+        while (__comp(*__i, *__m))
+          ++__i;
+        // It is now known that a guard exists for downward moving __j
+        while (!__comp(*--__j, *__m))
+          ;
+        if (__i > __j)
+          break;
+        _Ops::iter_swap(__i, __j);
+        ++__n_swaps;
+        // It is known that __m != __j
+        // If __m just moved, follow it
+        if (__m == __i)
+          __m = __j;
+        ++__i;
+      }
+    }
+    // [__first, __i) < *__m and *__m <= [__i, __last)
+    if (__i != __m && __comp(*__m, *__i)) {
+      _Ops::iter_swap(__i, __m);
+      ++__n_swaps;
     }
-    // Use bitset partition only if asked for.
-    auto __ret =
-        _UseBitSetPartition
-            ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp)
-            : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp);
-    _RandomAccessIterator __i = __ret.first;
     // [__first, __i) < *__i and *__i <= [__i+1, __last)
     // If we were given a perfect partition, see if insertion sort is quick...
-    if (__ret.second) {
+    if (__n_swaps == 0) {
       using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Compare>::type;
       _WrappedComp __wrapped_comp(__comp);
       bool __fs = std::__insertion_sort_incomplete<_WrappedComp>(__first, __i, __wrapped_comp);
@@ -877,11 +597,14 @@ void __introsort(_RandomAccessIterator __first,
         }
       }
     }
-    // Sort the left partiton recursively and the right partition with tail recursion elimination.
-    std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>(
-        __first, __i, __comp, __depth, __leftmost);
-    __leftmost = false;
-    __first    = ++__i;
+    // sort smaller range with recursive call and larger with tail recursion elimination
+    if (__i - __first < __last - __i) {
+      std::__introsort<_AlgPolicy, _Compare>(__first, __i, __comp, __depth);
+      __first = ++__i;
+    } else {
+      std::__introsort<_AlgPolicy, _Compare>(__i + 
diff erence_type(1), __last, __comp, __depth);
+      __last = __i;
+    }
   }
 }
 
@@ -913,14 +636,7 @@ _LIBCPP_HIDDEN void __sort(_RandomAccessIterator __first, _RandomAccessIterator
   using _AlgPolicy = typename _Unwrap::_AlgPolicy;
   using _Compare = typename _Unwrap::_Comp;
   _Compare __comp = _Unwrap::__get_comp(__wrapped_comp);
-  // Only use bitset partitioning for arithmetic types.  We should also check
-  // that the default comparator is in use so that we are sure that there are no
-  // branches in the comparator.
-  std::__introsort<_AlgPolicy,
-                   _Compare,
-                   _RandomAccessIterator,
-                   __use_branchless_sort<_Compare, _RandomAccessIterator>::value>(
-      __first, __last, __comp, __depth_limit);
+  std::__introsort<_AlgPolicy, _Compare>(__first, __last, __comp, __depth_limit);
 }
 
 template <class _Compare, class _Tp>


        


More information about the llvm-branch-commits mailing list