[libcxx-commits] [PATCH] D155531: [libc++][PSTL] Simplify the partitioning algorithm until we have better data to know how to chunk better
Nikolas Klauser via Phabricator via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Jul 17 17:59:37 PDT 2023
philnik created this revision.
Herald added a subscriber: krytarowski.
Herald added a reviewer: ldionne.
Herald added a project: All.
philnik requested review of this revision.
Herald added a project: libc++.
Herald added a subscriber: libcxx-commits.
Herald added a reviewer: libc++.
The current chunking strategy is very bad for sorting, and we don't really know how to chunk in general. This fixes the performance problem for sorting.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D155531
Files:
libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
libcxx/src/pstl/libdispatch.cpp
libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
Index: libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
===================================================================
--- libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
+++ libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
@@ -8,7 +8,7 @@
// <algorithm>
-// REQUIRES: libcpp-pstl-cpu-backend-libdispatch
+// REQUIRES-: libcpp-pstl-cpu-backend-libdispatch
// ADDITIONAL_COMPILE_FLAGS: -Wno-private-header
Index: libcxx/src/pstl/libdispatch.cpp
===================================================================
--- libcxx/src/pstl/libdispatch.cpp
+++ libcxx/src/pstl/libdispatch.cpp
@@ -26,42 +26,11 @@
::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func);
}
-__chunk_partitions __partition_chunks(ptrdiff_t element_count) {
+__chunk_partitions __partition_chunks(ptrdiff_t element_count) noexcept {
__chunk_partitions partitions;
- partitions.__chunk_count_ = [&] {
- ptrdiff_t cores = std::max(1u, thread::hardware_concurrency());
-
- auto medium = [&](ptrdiff_t n) { return cores + ((n - cores) / cores); };
-
- // This is an approximation of `log(1.01, sqrt(n))` which seemes to be reasonable for `n` larger than 500 and tops
- // at 800 tasks for n ~ 8 million
- auto large = [](ptrdiff_t n) { return static_cast<ptrdiff_t>(100.499 * std::log(std::sqrt(n))); };
-
- if (element_count < cores)
- return element_count;
- else if (element_count < 500)
- return medium(element_count);
- else
- return std::min(medium(element_count), large(element_count)); // provide a "smooth" transition
- }();
- partitions.__chunk_size_ = element_count / partitions.__chunk_count_;
- partitions.__first_chunk_size_ = partitions.__chunk_size_;
-
- const ptrdiff_t leftover_item_count = element_count - (partitions.__chunk_count_ * partitions.__chunk_size_);
-
- if (leftover_item_count == 0)
- return partitions;
-
- if (leftover_item_count == partitions.__chunk_size_) {
- partitions.__chunk_count_ += 1;
- return partitions;
- }
-
- const ptrdiff_t n_extra_items_per_chunk = leftover_item_count / partitions.__chunk_count_;
- const ptrdiff_t n_final_leftover_items = leftover_item_count - (n_extra_items_per_chunk * partitions.__chunk_count_);
-
- partitions.__chunk_size_ += n_extra_items_per_chunk;
- partitions.__first_chunk_size_ = partitions.__chunk_size_ + n_final_leftover_items;
+ partitions.__chunk_count_ = element_count / 256;
+ partitions.__chunk_size_ = partitions.__chunk_count_ * 256;
+ partitions.__first_chunk_size_ = element_count - (partitions.__chunk_count_ - 1) * partitions.__chunk_size_;
return partitions;
}
Index: libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
===================================================================
--- libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
+++ libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
@@ -54,7 +54,7 @@
};
[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI pmr::memory_resource* __get_memory_resource();
-[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size);
+[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size) noexcept;
template <class _RandomAccessIterator, class _Functor>
_LIBCPP_HIDE_FROM_ABI void
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155531.541286.patch
Type: text/x-patch
Size: 3437 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libcxx-commits/attachments/20230718/bef58483/attachment-0001.bin>
More information about the libcxx-commits
mailing list