[libcxx-commits] [libcxx] 051c863 - [libc++][PSTL] Simplify the partitioning algorithm until we have better data to know how to chunk better
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Aug 14 16:44:36 PDT 2023
Author: Nikolas Klauser
Date: 2023-08-14T16:44:30-07:00
New Revision: 051c8630124d6b0aa4541268976d404a167bcf2c
URL: https://github.com/llvm/llvm-project/commit/051c8630124d6b0aa4541268976d404a167bcf2c
DIFF: https://github.com/llvm/llvm-project/commit/051c8630124d6b0aa4541268976d404a167bcf2c.diff
LOG: [libc++][PSTL] Simplify the partitioning algorithm until we have better data to know how to chunk better
The current chunking strategy is very bad for sorting, and we don't really know how to chunk in general. This fixes the performance problem for sorting.
Reviewed By: ldionne, #libc
Spies: libcxx-commits, krytarowski
Differential Revision: https://reviews.llvm.org/D155531
Added:
Modified:
libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
libcxx/src/pstl/libdispatch.cpp
libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
Removed:
################################################################################
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
index bab6a3639bd084..51240e92e0d0b7 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
@@ -57,7 +57,7 @@ struct __chunk_partitions {
ptr
diff _t __first_chunk_size_;
};
-[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptr
diff _t __size);
+[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptr
diff _t __size) noexcept;
template <class _RandomAccessIterator, class _Functor>
_LIBCPP_HIDE_FROM_ABI void
diff --git a/libcxx/src/pstl/libdispatch.cpp b/libcxx/src/pstl/libdispatch.cpp
index b3a9559d085f68..0ed028b37096ec 100644
--- a/libcxx/src/pstl/libdispatch.cpp
+++ b/libcxx/src/pstl/libdispatch.cpp
@@ -10,59 +10,20 @@
#include <__algorithm/pstl_backends/cpu_backends/libdispatch.h>
#include <__config>
#include <dispatch/dispatch.h>
-#include <thread>
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __par_backend::inline __libdispatch {
-
void __dispatch_apply(size_t chunk_count, void* context, void (*func)(void* context, size_t chunk)) noexcept {
::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func);
}
-__chunk_partitions __partition_chunks(ptr
diff _t element_count) {
- if (element_count == 0) {
- return __chunk_partitions{1, 0, 0};
- } else if (element_count == 1) {
- return __chunk_partitions{1, 0, 1};
- }
-
+__chunk_partitions __partition_chunks(ptr
diff _t element_count) noexcept {
__chunk_partitions partitions;
- partitions.__chunk_count_ = [&] {
- ptr
diff _t cores = std::max(1u, thread::hardware_concurrency());
-
- auto medium = [&](ptr
diff _t n) { return cores + ((n - cores) / cores); };
-
- // This is an approximation of `log(1.01, sqrt(n))` which seemes to be reasonable for `n` larger than 500 and tops
- // at 800 tasks for n ~ 8 million
- auto large = [](ptr
diff _t n) { return static_cast<ptr
diff _t>(100.499 * std::log(std::sqrt(n))); };
-
- if (element_count < cores)
- return element_count;
- else if (element_count < 500)
- return medium(element_count);
- else
- return std::min(medium(element_count), large(element_count)); // provide a "smooth" transition
- }();
+ partitions.__chunk_count_ = std::max<ptr
diff _t>(1, element_count / 256);
partitions.__chunk_size_ = element_count / partitions.__chunk_count_;
- partitions.__first_chunk_size_ = partitions.__chunk_size_;
-
- const ptr
diff _t leftover_item_count = element_count - (partitions.__chunk_count_ * partitions.__chunk_size_);
-
- if (leftover_item_count == 0)
- return partitions;
-
- if (leftover_item_count == partitions.__chunk_size_) {
- partitions.__chunk_count_ += 1;
- return partitions;
- }
-
- const ptr
diff _t n_extra_items_per_chunk = leftover_item_count / partitions.__chunk_count_;
- const ptr
diff _t n_final_leftover_items = leftover_item_count - (n_extra_items_per_chunk * partitions.__chunk_count_);
-
- partitions.__chunk_size_ += n_extra_items_per_chunk;
- partitions.__first_chunk_size_ = partitions.__chunk_size_ + n_final_leftover_items;
+ partitions.__first_chunk_size_ = element_count - (partitions.__chunk_count_ - 1) * partitions.__chunk_size_;
return partitions;
}
diff --git a/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp b/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
index 91935a8df72483..554924a0179d56 100644
--- a/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
+++ b/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
@@ -28,7 +28,7 @@ int main(int, char**) {
auto chunks = std::__par_backend::__libdispatch::__partition_chunks(1);
assert(chunks.__chunk_count_ == 1);
assert(chunks.__first_chunk_size_ == 1);
- assert(chunks.__chunk_size_ == 0);
+ assert(chunks.__chunk_size_ == 1);
}
for (std::ptr
diff _t i = 2; i != 2ll << 20; ++i) {
More information about the libcxx-commits
mailing list