[libcxx-commits] [PATCH] D155531: [libc++][PSTL] Simplify the partitioning algorithm until we have better data to know how to chunk better

Nikolas Klauser via Phabricator via libcxx-commits libcxx-commits at lists.llvm.org
Mon Jul 17 17:59:37 PDT 2023


philnik created this revision.
Herald added a subscriber: krytarowski.
Herald added a reviewer: ldionne.
Herald added a project: All.
philnik requested review of this revision.
Herald added a project: libc++.
Herald added a subscriber: libcxx-commits.
Herald added a reviewer: libc++.

The current chunking strategy is very bad for sorting, and we don't really know how to chunk in general. This fixes the performance problem for sorting.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155531

Files:
  libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
  libcxx/src/pstl/libdispatch.cpp
  libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp


Index: libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
===================================================================
--- libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
+++ libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
@@ -8,7 +8,7 @@
 
 // <algorithm>
 
-// REQUIRES: libcpp-pstl-cpu-backend-libdispatch
+// REQUIRES-: libcpp-pstl-cpu-backend-libdispatch
 
 // ADDITIONAL_COMPILE_FLAGS: -Wno-private-header
 
Index: libcxx/src/pstl/libdispatch.cpp
===================================================================
--- libcxx/src/pstl/libdispatch.cpp
+++ libcxx/src/pstl/libdispatch.cpp
@@ -26,42 +26,11 @@
   ::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func);
 }
 
-__chunk_partitions __partition_chunks(ptrdiff_t element_count) {
+__chunk_partitions __partition_chunks(ptrdiff_t element_count) noexcept {
   __chunk_partitions partitions;
-  partitions.__chunk_count_ = [&] {
-    ptrdiff_t cores = std::max(1u, thread::hardware_concurrency());
-
-    auto medium = [&](ptrdiff_t n) { return cores + ((n - cores) / cores); };
-
-    // This is an approximation of `log(1.01, sqrt(n))` which seemes to be reasonable for `n` larger than 500 and tops
-    // at 800 tasks for n ~ 8 million
-    auto large = [](ptrdiff_t n) { return static_cast<ptrdiff_t>(100.499 * std::log(std::sqrt(n))); };
-
-    if (element_count < cores)
-      return element_count;
-    else if (element_count < 500)
-      return medium(element_count);
-    else
-      return std::min(medium(element_count), large(element_count)); // provide a "smooth" transition
-  }();
-  partitions.__chunk_size_       = element_count / partitions.__chunk_count_;
-  partitions.__first_chunk_size_ = partitions.__chunk_size_;
-
-  const ptrdiff_t leftover_item_count = element_count - (partitions.__chunk_count_ * partitions.__chunk_size_);
-
-  if (leftover_item_count == 0)
-    return partitions;
-
-  if (leftover_item_count == partitions.__chunk_size_) {
-    partitions.__chunk_count_ += 1;
-    return partitions;
-  }
-
-  const ptrdiff_t n_extra_items_per_chunk = leftover_item_count / partitions.__chunk_count_;
-  const ptrdiff_t n_final_leftover_items  = leftover_item_count - (n_extra_items_per_chunk * partitions.__chunk_count_);
-
-  partitions.__chunk_size_ += n_extra_items_per_chunk;
-  partitions.__first_chunk_size_ = partitions.__chunk_size_ + n_final_leftover_items;
+  partitions.__chunk_count_      = element_count / 256;
+  partitions.__chunk_size_       = partitions.__chunk_count_ * 256;
+  partitions.__first_chunk_size_ = element_count - (partitions.__chunk_count_ - 1) * partitions.__chunk_size_;
   return partitions;
 }
 
Index: libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
===================================================================
--- libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
+++ libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
@@ -54,7 +54,7 @@
 };
 
 [[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI pmr::memory_resource* __get_memory_resource();
-[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size);
+[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size) noexcept;
 
 template <class _RandomAccessIterator, class _Functor>
 _LIBCPP_HIDE_FROM_ABI void


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155531.541286.patch
Type: text/x-patch
Size: 3437 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libcxx-commits/attachments/20230718/bef58483/attachment-0001.bin>


More information about the libcxx-commits mailing list