[libcxx] r349358 - [libcxx] Speeding up partition_point/lower_bound/upper_bound

Mon Dec 17 08:04:39 PST 2018

Author: ldionne
Date: Mon Dec 17 08:04:39 2018
New Revision: 349358

URL: http://llvm.org/viewvc/llvm-project?rev=349358&view=rev
Log:
[libcxx] Speeding up partition_point/lower_bound/upper_bound

This is a re-application of r345525, which had been reverted by fear of
a regression.

Reviewed as https://reviews.llvm.org/D53994.
Thanks to Denis Yaroshevskiy for the patch.

Added:
    libcxx/trunk/benchmarks/algorithms.partition_point.bench.cpp
    libcxx/trunk/test/libcxx/algorithms/half_positive.pass.cpp
Modified:
    libcxx/trunk/include/algorithm

Added: libcxx/trunk/benchmarks/algorithms.partition_point.bench.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/algorithms.partition_point.bench.cpp?rev=349358&view=auto
==============================================================================

--- libcxx/trunk/benchmarks/algorithms.partition_point.bench.cpp (added)
+++ libcxx/trunk/benchmarks/algorithms.partition_point.bench.cpp Mon Dec 17 08:04:39 2018
@@ -0,0 +1,124 @@
+#include <array>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+#include "CartesianBenchmarks.hpp"
+#include "GenerateInput.hpp"
+
+namespace {
+
+template <typename I, typename N>
+std::array<I, 10> every_10th_percentile_N(I first, N n) {
+  N step = n / 10;
+  std::array<I, 10> res;
+
+  for (size_t i = 0; i < 10; ++i) {
+    res[i] = first;
+    std::advance(first, step);
+  }
+
+  return res;
+}
+
+template <class IntT>
+struct TestIntBase {
+  static std::vector<IntT> generateInput(size_t size) {
+    std::vector<IntT> Res(size);
+    std::generate(Res.begin(), Res.end(),
+                  [] { return getRandomInteger<IntT>(); });
+    return Res;
+  }
+};
+
+struct TestInt32 : TestIntBase<std::int32_t> {
+  static constexpr const char* Name = "TestInt32";
+};
+
+struct TestInt64 : TestIntBase<std::int64_t> {
+  static constexpr const char* Name = "TestInt64";
+};
+
+struct TestUint32 : TestIntBase<std::uint32_t> {
+  static constexpr const char* Name = "TestUint32";
+};
+
+struct TestMediumString {
+  static constexpr const char* Name = "TestMediumString";
+  static constexpr size_t StringSize = 32;
+
+  static std::vector<std::string> generateInput(size_t size) {
+    std::vector<std::string> Res(size);
+    std::generate(Res.begin(), Res.end(), [] { return getRandomString(StringSize); });
+    return Res;
+  }
+};
+
+using AllTestTypes = std::tuple<TestInt32, TestInt64, TestUint32, TestMediumString>;
+
+struct LowerBoundAlg {
+  template <class I, class V>
+  I operator()(I first, I last, const V& value) const {
+    return std::lower_bound(first, last, value);
+  }
+
+  static constexpr const char* Name = "LowerBoundAlg";
+};
+
+struct UpperBoundAlg {
+  template <class I, class V>
+  I operator()(I first, I last, const V& value) const {
+    return std::upper_bound(first, last, value);
+  }
+
+  static constexpr const char* Name = "UpperBoundAlg";
+};
+
+struct EqualRangeAlg {
+  template <class I, class V>
+  std::pair<I, I> operator()(I first, I last, const V& value) const {
+    return std::equal_range(first, last, value);
+  }
+
+  static constexpr const char* Name = "EqualRangeAlg";
+};
+
+using AllAlgs = std::tuple<LowerBoundAlg, UpperBoundAlg, EqualRangeAlg>;
+
+template <class Alg, class TestType>
+struct PartitionPointBench {
+  size_t Quantity;
+
+  std::string name() const {
+    return std::string("PartitionPointBench_") + Alg::Name + "_" +
+           TestType::Name + '/' + std::to_string(Quantity);
+  }
+
+  void run(benchmark::State& state) const {
+    auto Data = TestType::generateInput(Quantity);
+    std::sort(Data.begin(), Data.end());
+    auto Every10Percentile = every_10th_percentile_N(Data.begin(), Data.size());
+
+    for (auto _ : state) {
+      for (auto Test : Every10Percentile)
+        benchmark::DoNotOptimize(Alg{}(Data.begin(), Data.end(), *Test));
+    }
+  }
+};
+
+} // namespace
+
+int main(int argc, char** argv) {
+  benchmark::Initialize(&argc, argv);
+  if (benchmark::ReportUnrecognizedArguments(argc, argv))
+    return 1;
+
+  const std::vector<size_t> Quantities = {1 << 8, 1 << 10, 1 << 20};
+  makeCartesianProductBenchmark<PartitionPointBench, AllAlgs, AllTestTypes>(
+      Quantities);
+  benchmark::RunSpecifiedBenchmarks();
+}

Modified: libcxx/trunk/include/algorithm
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/algorithm?rev=349358&r1=349357&r2=349358&view=diff
==============================================================================
--- libcxx/trunk/include/algorithm (original)
+++ libcxx/trunk/include/algorithm Mon Dec 17 08:04:39 2018
@@ -750,6 +750,32 @@ public:
     bool operator()(const _T1& __x, const _T2& __y) {return __p_(__y, __x);}
 };
 
+// Perform division by two quickly for positive integers (llvm.org/PR39129)
+
+template <typename _Integral>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
+typename enable_if
+<
+    is_integral<_Integral>::value,
+    _Integral
+>::type
+__half_positive(_Integral __value)
+{
+    return static_cast<_Integral>(static_cast<typename make_unsigned<_Integral>::type>(__value) / 2);
+}
+
+template <typename _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
+typename enable_if
+<
+    !is_integral<_Tp>::value,
+    _Tp
+>::type
+__half_positive(_Tp __value)
+{
+    return __value / 2;
+}
+
 #ifdef _LIBCPP_DEBUG
 
 template <class _Compare>
@@ -3202,7 +3228,7 @@ partition_point(_ForwardIterator __first
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = _VSTD::__half_positive(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__pred(*__m))
@@ -4070,7 +4096,7 @@ __lower_bound(_ForwardIterator __first,
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = _VSTD::__half_positive(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__comp(*__m, __value_))
@@ -4112,7 +4138,7 @@ __upper_bound(_ForwardIterator __first,
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = _VSTD::__half_positive(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__comp(__value_, *__m))
@@ -4154,7 +4180,7 @@ __equal_range(_ForwardIterator __first,
     difference_type __len = _VSTD::distance(__first, __last);
     while (__len != 0)
     {
-        difference_type __l2 = __len / 2;
+        difference_type __l2 = _VSTD::__half_positive(__len);
         _ForwardIterator __m = __first;
         _VSTD::advance(__m, __l2);
         if (__comp(*__m, __value_))

Added: libcxx/trunk/test/libcxx/algorithms/half_positive.pass.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/test/libcxx/algorithms/half_positive.pass.cpp?rev=349358&view=auto
==============================================================================
--- libcxx/trunk/test/libcxx/algorithms/half_positive.pass.cpp (added)
+++ libcxx/trunk/test/libcxx/algorithms/half_positive.pass.cpp Mon Dec 17 08:04:39 2018
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// <algorithm>
+
+// __half_positive divides an integer number by 2 as unsigned number for known types.
+// It can be an important optimization for lower bound, for example.
+
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <type_traits>
+
+#include "test_macros.h"
+#include "user_defined_integral.hpp"
+
+namespace {
+
+template <class IntType, class UnderlyingType = IntType>
+TEST_CONSTEXPR bool test(IntType max_v = IntType(std::numeric_limits<UnderlyingType>::max())) {
+    return std::__half_positive(max_v) == max_v / 2;
+}
+
+}  // namespace
+
+int main()
+{
+    {
+        assert(test<char>());
+        assert(test<int>());
+        assert(test<long>());
+        assert((test<UserDefinedIntegral<int>, int>()));
+        assert(test<size_t>());
+#if !defined(_LIBCPP_HAS_NO_INT128)
+        assert(test<__int128_t>());
+#endif  // !defined(_LIBCPP_HAS_NO_INT128)
+    }
+
+#if TEST_STD_VER >= 11
+    {
+        static_assert(test<char>(), "");
+        static_assert(test<int>(), "");
+        static_assert(test<long>(), "");
+        static_assert(test<size_t>(), "");
+#if !defined(_LIBCPP_HAS_NO_INT128)
+        static_assert(test<__int128_t>(), "");
+#endif  // !defined(_LIBCPP_HAS_NO_INT128)
+    }
+#endif // TEST_STD_VER >= 11
+}