[libcxx-commits] [PATCH] D155531: [libc++][PSTL] Simplify the partitioning algorithm until we have better data to know how to chunk better

Nikolas Klauser via Phabricator via libcxx-commits libcxx-commits at lists.llvm.org
Mon Aug 14 16:44:51 PDT 2023


This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG051c8630124d: [libc++][PSTL] Simplify the partitioning algorithm until we have better data to… (authored by philnik).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155531/new/

https://reviews.llvm.org/D155531

Files:
  libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
  libcxx/src/pstl/libdispatch.cpp
  libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp


Index: libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
===================================================================
--- libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
+++ libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
@@ -28,7 +28,7 @@
     auto chunks = std::__par_backend::__libdispatch::__partition_chunks(1);
     assert(chunks.__chunk_count_ == 1);
     assert(chunks.__first_chunk_size_ == 1);
-    assert(chunks.__chunk_size_ == 0);
+    assert(chunks.__chunk_size_ == 1);
   }
 
   for (std::ptrdiff_t i = 2; i != 2ll << 20; ++i) {
Index: libcxx/src/pstl/libdispatch.cpp
===================================================================
--- libcxx/src/pstl/libdispatch.cpp
+++ libcxx/src/pstl/libdispatch.cpp
@@ -10,59 +10,20 @@
 #include <__algorithm/pstl_backends/cpu_backends/libdispatch.h>
 #include <__config>
 #include <dispatch/dispatch.h>
-#include <thread>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __par_backend::inline __libdispatch {
 
-
 void __dispatch_apply(size_t chunk_count, void* context, void (*func)(void* context, size_t chunk)) noexcept {
   ::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func);
 }
 
-__chunk_partitions __partition_chunks(ptrdiff_t element_count) {
-  if (element_count == 0) {
-    return __chunk_partitions{1, 0, 0};
-  } else if (element_count == 1) {
-    return __chunk_partitions{1, 0, 1};
-  }
-
+__chunk_partitions __partition_chunks(ptrdiff_t element_count) noexcept {
   __chunk_partitions partitions;
-  partitions.__chunk_count_ = [&] {
-    ptrdiff_t cores = std::max(1u, thread::hardware_concurrency());
-
-    auto medium = [&](ptrdiff_t n) { return cores + ((n - cores) / cores); };
-
-    // This is an approximation of `log(1.01, sqrt(n))` which seemes to be reasonable for `n` larger than 500 and tops
-    // at 800 tasks for n ~ 8 million
-    auto large = [](ptrdiff_t n) { return static_cast<ptrdiff_t>(100.499 * std::log(std::sqrt(n))); };
-
-    if (element_count < cores)
-      return element_count;
-    else if (element_count < 500)
-      return medium(element_count);
-    else
-      return std::min(medium(element_count), large(element_count)); // provide a "smooth" transition
-  }();
+  partitions.__chunk_count_      = std::max<ptrdiff_t>(1, element_count / 256);
   partitions.__chunk_size_       = element_count / partitions.__chunk_count_;
-  partitions.__first_chunk_size_ = partitions.__chunk_size_;
-
-  const ptrdiff_t leftover_item_count = element_count - (partitions.__chunk_count_ * partitions.__chunk_size_);
-
-  if (leftover_item_count == 0)
-    return partitions;
-
-  if (leftover_item_count == partitions.__chunk_size_) {
-    partitions.__chunk_count_ += 1;
-    return partitions;
-  }
-
-  const ptrdiff_t n_extra_items_per_chunk = leftover_item_count / partitions.__chunk_count_;
-  const ptrdiff_t n_final_leftover_items  = leftover_item_count - (n_extra_items_per_chunk * partitions.__chunk_count_);
-
-  partitions.__chunk_size_ += n_extra_items_per_chunk;
-  partitions.__first_chunk_size_ = partitions.__chunk_size_ + n_final_leftover_items;
+  partitions.__first_chunk_size_ = element_count - (partitions.__chunk_count_ - 1) * partitions.__chunk_size_;
   return partitions;
 }
 
Index: libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
===================================================================
--- libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
+++ libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
@@ -57,7 +57,7 @@
   ptrdiff_t __first_chunk_size_;
 };
 
-[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size);
+[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size) noexcept;
 
 template <class _RandomAccessIterator, class _Functor>
 _LIBCPP_HIDE_FROM_ABI void


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155531.550138.patch
Type: text/x-patch
Size: 3949 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libcxx-commits/attachments/20230814/117aec2b/attachment.bin>


More information about the libcxx-commits mailing list