[libcxx-commits] [libcxx] 68b1035 - [libc++][PSTL] Add a __parallel_sort implementation to libdispatch
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Aug 15 12:39:47 PDT 2023
Author: Nikolas Klauser
Date: 2023-08-15T12:20:40-07:00
New Revision: 68b1035965f08d7cdfdcb6a5c17d53e45896f3f0
URL: https://github.com/llvm/llvm-project/commit/68b1035965f08d7cdfdcb6a5c17d53e45896f3f0
DIFF: https://github.com/llvm/llvm-project/commit/68b1035965f08d7cdfdcb6a5c17d53e45896f3f0.diff
LOG: [libc++][PSTL] Add a __parallel_sort implementation to libdispatch
Reviewed By: #libc, ldionne
Spies: ldionne, libcxx-commits
Differential Revision: https://reviews.llvm.org/D155136
Added:
libcxx/benchmarks/algorithms/pstl.stable_sort.bench.cpp
Modified:
libcxx/benchmarks/CMakeLists.txt
libcxx/benchmarks/algorithms/stable_sort.bench.cpp
libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
libcxx/src/pstl/libdispatch.cpp
libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/pstl.all_of.pass.cpp
libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/pstl.any_of.pass.cpp
libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/pstl.none_of.pass.cpp
libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/pstl.stable_sort.pass.cpp
Removed:
################################################################################
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index b5d3fbf05ed802..92f295ef399008 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -161,6 +161,7 @@ set(BENCHMARK_TESTS
algorithms/min.bench.cpp
algorithms/min_max_element.bench.cpp
algorithms/pop_heap.bench.cpp
+ algorithms/pstl.stable_sort.bench.cpp
algorithms/push_heap.bench.cpp
algorithms/ranges_make_heap.bench.cpp
algorithms/ranges_make_heap_then_sort_heap.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/pstl.stable_sort.bench.cpp b/libcxx/benchmarks/algorithms/pstl.stable_sort.bench.cpp
new file mode 100644
index 00000000000000..9357b870bece64
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/pstl.stable_sort.bench.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+
+#include "common.h"
+
+namespace {
+template <class ValueType, class Order>
+struct StableSort {
+ size_t Quantity;
+
+ void run(benchmark::State& state) const {
+ runOpOnCopies<ValueType>(state, Quantity, Order(), BatchSize::CountBatch, [](auto& Copy) {
+ std::stable_sort(std::execution::par, Copy.begin(), Copy.end());
+ });
+ }
+
+ bool skip() const { return Order() == ::Order::Heap; }
+
+ std::string name() const {
+ return "BM_pstl_stable_sort" + ValueType::name() + Order::name() + "/" + std::to_string(Quantity);
+ }
+};
+} // namespace
+
+int main(int argc, char** argv) {
+ benchmark::Initialize(&argc, argv);
+ if (benchmark::ReportUnrecognizedArguments(argc, argv))
+ return 1;
+ makeCartesianProductBenchmark<StableSort, AllValueTypes, AllOrders>(Quantities);
+ benchmark::RunSpecifiedBenchmarks();
+}
diff --git a/libcxx/benchmarks/algorithms/stable_sort.bench.cpp b/libcxx/benchmarks/algorithms/stable_sort.bench.cpp
index fe88d01dd66f89..024a036e0c2582 100644
--- a/libcxx/benchmarks/algorithms/stable_sort.bench.cpp
+++ b/libcxx/benchmarks/algorithms/stable_sort.bench.cpp
@@ -16,7 +16,7 @@ struct StableSort {
size_t Quantity;
void run(benchmark::State& state) const {
- runOpOnCopies<ValueType>(state, Quantity, Order(), BatchSize::CountElements, [](auto& Copy) {
+ runOpOnCopies<ValueType>(state, Quantity, Order(), BatchSize::CountBatch, [](auto& Copy) {
std::stable_sort(Copy.begin(), Copy.end());
});
}
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
index 51240e92e0d0b7..50b6e0b1d0a03f 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
@@ -9,8 +9,10 @@
#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H
#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H
+#include <__algorithm/inplace_merge.h>
#include <__algorithm/lower_bound.h>
#include <__algorithm/max.h>
+#include <__algorithm/merge.h>
#include <__algorithm/upper_bound.h>
#include <__atomic/atomic.h>
#include <__config>
@@ -21,7 +23,6 @@
#include <__memory/construct_at.h>
#include <__memory/unique_ptr.h>
#include <__numeric/reduce.h>
-#include <__utility/exception_guard.h>
#include <__utility/move.h>
#include <__utility/pair.h>
#include <__utility/terminate_on_exception.h>
@@ -61,10 +62,7 @@ struct __chunk_partitions {
template <class _RandomAccessIterator, class _Functor>
_LIBCPP_HIDE_FROM_ABI void
-__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
- auto __partitions = __libdispatch::__partition_chunks(__last - __first);
-
- // Perform the chunked execution.
+__dispatch_parallel_for(__chunk_partitions __partitions, _RandomAccessIterator __first, _Functor __func) {
__libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
auto __index =
@@ -75,6 +73,13 @@ __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fun
});
}
+template <class _RandomAccessIterator, class _Functor>
+_LIBCPP_HIDE_FROM_ABI void
+__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
+ return __libdispatch::__dispatch_parallel_for(
+ __libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func));
+}
+
template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut>
struct __merge_range {
__merge_range(_RandomAccessIterator1 __mid1, _RandomAccessIterator2 __mid2, _RandomAccessIteratorOut __result)
@@ -220,11 +225,92 @@ _LIBCPP_HIDE_FROM_ABI _Value __parallel_transform_reduce(
});
}
-// TODO: parallelize this
template <class _RandomAccessIterator, class _Comp, class _LeafSort>
_LIBCPP_HIDE_FROM_ABI void __parallel_stable_sort(
_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) {
- __leaf_sort(__first, __last, __comp);
+ const auto __size = __last - __first;
+ auto __partitions = __libdispatch::__partition_chunks(__size);
+
+ if (__partitions.__chunk_count_ == 0)
+ return;
+
+ if (__partitions.__chunk_count_ == 1)
+ return __leaf_sort(__first, __last, __comp);
+
+ using _Value = __iter_value_type<_RandomAccessIterator>;
+
+ auto __destroy = [__size](_Value* __ptr) {
+ std::destroy_n(__ptr, __size);
+ std::allocator<_Value>().deallocate(__ptr, __size);
+ };
+
+ // TODO: use __uninitialized_buffer
+ unique_ptr<_Value[], decltype(__destroy)> __values(std::allocator<_Value>().allocate(__size), __destroy);
+
+ return std::__terminate_on_exception([&] {
+ // Initialize all elements to a moved-from state
+ // TODO: Don't do this - this can be done in the first merge - see https://llvm.org/PR63928
+ std::__construct_at(__values.get(), std::move(*__first));
+ for (__iter_
diff _t<_RandomAccessIterator> __i = 1; __i != __size; ++__i) {
+ std::__construct_at(__values.get() + __i, std::move(__values.get()[__i - 1]));
+ }
+ *__first = std::move(__values.get()[__size - 1]);
+
+ __libdispatch::__dispatch_parallel_for(
+ __partitions,
+ __first,
+ [&__leaf_sort, &__comp](_RandomAccessIterator __chunk_first, _RandomAccessIterator __chunk_last) {
+ __leaf_sort(std::move(__chunk_first), std::move(__chunk_last), __comp);
+ });
+
+ bool __objects_are_in_buffer = false;
+ do {
+ const auto __old_chunk_size = __partitions.__chunk_size_;
+ if (__partitions.__chunk_count_ % 2 == 1) {
+ auto __inplace_merge_chunks = [&__comp, &__partitions](auto __first_chunk_begin) {
+ std::inplace_merge(
+ __first_chunk_begin,
+ __first_chunk_begin + __partitions.__first_chunk_size_,
+ __first_chunk_begin + __partitions.__first_chunk_size_ + __partitions.__chunk_size_,
+ __comp);
+ };
+ if (__objects_are_in_buffer)
+ __inplace_merge_chunks(__values.get());
+ else
+ __inplace_merge_chunks(__first);
+ __partitions.__first_chunk_size_ += 2 * __partitions.__chunk_size_;
+ } else {
+ __partitions.__first_chunk_size_ += __partitions.__chunk_size_;
+ }
+
+ __partitions.__chunk_size_ *= 2;
+ __partitions.__chunk_count_ /= 2;
+
+ auto __merge_chunks = [__partitions, __old_chunk_size, &__comp](auto __from_first, auto __to_first) {
+ __libdispatch::__dispatch_parallel_for(
+ __partitions,
+ __from_first,
+ [__old_chunk_size, &__from_first, &__to_first, &__comp](auto __chunk_first, auto __chunk_last) {
+ std::merge(std::make_move_iterator(__chunk_first),
+ std::make_move_iterator(__chunk_last - __old_chunk_size),
+ std::make_move_iterator(__chunk_last - __old_chunk_size),
+ std::make_move_iterator(__chunk_last),
+ __to_first + (__chunk_first - __from_first),
+ __comp);
+ });
+ };
+
+ if (__objects_are_in_buffer)
+ __merge_chunks(__values.get(), __first);
+ else
+ __merge_chunks(__first, __values.get());
+ __objects_are_in_buffer = !__objects_are_in_buffer;
+ } while (__partitions.__chunk_count_ > 1);
+
+ if (__objects_are_in_buffer) {
+ std::move(__values.get(), __values.get() + __size, __first);
+ }
+ });
}
_LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {}
diff --git a/libcxx/src/pstl/libdispatch.cpp b/libcxx/src/pstl/libdispatch.cpp
index 0ed028b37096ec..52d4afbcce6e00 100644
--- a/libcxx/src/pstl/libdispatch.cpp
+++ b/libcxx/src/pstl/libdispatch.cpp
@@ -24,6 +24,8 @@ __chunk_partitions __partition_chunks(ptr
diff _t element_count) noexcept {
partitions.__chunk_count_ = std::max<ptr
diff _t>(1, element_count / 256);
partitions.__chunk_size_ = element_count / partitions.__chunk_count_;
partitions.__first_chunk_size_ = element_count - (partitions.__chunk_count_ - 1) * partitions.__chunk_size_;
+ if (partitions.__chunk_count_ == 0 && element_count > 0)
+ partitions.__chunk_count_ = 1;
return partitions;
}
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/pstl.all_of.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/pstl.all_of.pass.cpp
index f581b3dfbaea4c..82ba4338886d69 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/pstl.all_of.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/pstl.all_of.pass.cpp
@@ -62,7 +62,7 @@ struct Test {
// check that a large number of elements works
std::vector<int> vec(100);
std::fill(vec.begin(), vec.end(), 3);
- assert(std::all_of(Iter(vec.data()), Iter(vec.data() + vec.size()), [](int i) { return i == 3; }));
+ assert(std::all_of(policy, Iter(vec.data()), Iter(vec.data() + vec.size()), [](int i) { return i == 3; }));
}
};
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/pstl.any_of.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/pstl.any_of.pass.cpp
index 62a7c152207fbb..c21ce8afe556bc 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/pstl.any_of.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/pstl.any_of.pass.cpp
@@ -56,7 +56,7 @@ struct Test {
// check that a large number of elements works
std::vector<int> vec(100, 2);
vec[96] = 3;
- assert(std::any_of(Iter(vec.data()), Iter(vec.data() + vec.size()), [](int i) { return i == 3; }));
+ assert(std::any_of(policy, Iter(vec.data()), Iter(vec.data() + vec.size()), [](int i) { return i == 3; }));
}
};
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/pstl.none_of.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/pstl.none_of.pass.cpp
index a42260d77593bd..f6dfe0cbf7b3de 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/pstl.none_of.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.none_of/pstl.none_of.pass.cpp
@@ -56,7 +56,7 @@ struct Test {
// check that a large number of elements works
std::vector<int> vec(100);
std::fill(vec.begin(), vec.end(), 3);
- assert(std::none_of(Iter(vec.data()), Iter(vec.data() + vec.size()), [](int i) { return i != 3; }));
+ assert(std::none_of(policy, Iter(vec.data()), Iter(vec.data() + vec.size()), [](int i) { return i != 3; }));
}
};
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/pstl.stable_sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/pstl.stable_sort.pass.cpp
index 7298b367f7f39e..0c4dda5b03ca5c 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/pstl.stable_sort.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/pstl.stable_sort.pass.cpp
@@ -30,6 +30,7 @@
#include "test_macros.h"
#include "test_execution_policies.h"
#include "test_iterators.h"
+#include "MoveOnly.h"
EXECUTION_POLICY_SFINAE_TEST(stable_sort);
@@ -45,9 +46,9 @@ struct OrderedValue {
auto operator>(const OrderedValue& rhs) const { return value > rhs.value; }
};
-template <class Iter, std::size_t N>
-void test_one(std::array<int, N> input, std::array<int, N> expected) {
- std::stable_sort(Iter(input.data()), Iter(input.data() + input.size()));
+template <class Iter, std::size_t N, class Policy, class ValueT = typename std::iterator_traits<Iter>::value_type>
+void test_one(Policy&& policy, std::array<ValueT, N> input, std::array<ValueT, N> expected) {
+ std::stable_sort(policy, Iter(input.data()), Iter(input.data() + input.size()));
assert(input == expected);
}
@@ -55,27 +56,26 @@ template <class Iter>
struct Test {
template <class Policy>
void operator()(Policy&& policy) {
-
// Empty sequence.
- test_one<Iter, 0>({}, {});
+ test_one<Iter, 0>(policy, {}, {});
// 1-element sequence.
- test_one<Iter, 1>({1}, {1});
+ test_one<Iter, 1>(policy, {1}, {1});
// 2-element sequence.
- test_one<Iter, 2>({2, 1}, {1, 2});
+ test_one<Iter, 2>(policy, {2, 1}, {1, 2});
// 3-element sequence.
- test_one<Iter, 3>({2, 1, 3}, {1, 2, 3});
+ test_one<Iter, 3>(policy, {2, 1, 3}, {1, 2, 3});
// Longer sequence.
- test_one<Iter, 8>({2, 1, 3, 6, 8, 4, 11, 5}, {1, 2, 3, 4, 5, 6, 8, 11});
+ test_one<Iter, 8>(policy, {2, 1, 3, 6, 8, 4, 11, 5}, {1, 2, 3, 4, 5, 6, 8, 11});
// Longer sequence with duplicates.
- test_one<Iter, 7>({2, 1, 3, 6, 2, 8, 6}, {1, 2, 2, 3, 6, 6, 8});
+ test_one<Iter, 7>(policy, {2, 1, 3, 6, 2, 8, 6}, {1, 2, 2, 3, 6, 6, 8});
// All elements are the same.
- test_one<Iter, 3>({1, 1, 1}, {1, 1, 1});
+ test_one<Iter, 3>(policy, {1, 1, 1}, {1, 1, 1});
// Already sorted.
- test_one<Iter, 5>({1, 2, 3, 4, 5}, {1, 2, 3, 4, 5});
+ test_one<Iter, 5>(policy, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5});
// Reverse-sorted.
- test_one<Iter, 5>({5, 4, 3, 2, 1}, {1, 2, 3, 4, 5});
+ test_one<Iter, 5>(policy, {5, 4, 3, 2, 1}, {1, 2, 3, 4, 5});
// Repeating pattern.
- test_one<Iter, 6>({1, 2, 1, 2, 1, 2}, {1, 1, 1, 2, 2, 2});
+ test_one<Iter, 6>(policy, {1, 2, 1, 2, 1, 2}, {1, 1, 1, 2, 2, 2});
{ // The sort is stable (equivalent elements remain in the same order).
using V = OrderedValue;
@@ -126,8 +126,25 @@ struct Test {
}
};
+struct NotDefaultConstructible {
+ NotDefaultConstructible(int i) : i_(i) {}
+
+ int i_;
+
+ friend bool operator==(NotDefaultConstructible lhs, NotDefaultConstructible rhs) {
+ return lhs.i_ == rhs.i_;
+ }
+
+ friend bool operator<(NotDefaultConstructible lhs, NotDefaultConstructible rhs) {
+ return lhs.i_ < rhs.i_;
+ }
+};
+
int main(int, char**) {
- types::for_each(types::random_access_iterator_list<int*>{}, TestIteratorWithPolicies<Test>{});
+ types::for_each(types::concatenate_t<types::random_access_iterator_list<int*>,
+ types::random_access_iterator_list<MoveOnly*>,
+ types::random_access_iterator_list<NotDefaultConstructible*>>{},
+ TestIteratorWithPolicies<Test>{});
#ifndef TEST_HAS_NO_EXCEPTIONS
std::set_terminate(terminate_successful);
More information about the libcxx-commits
mailing list