[libcxx-commits] [libcxx] 503f2ee - [libc++] Make sure we use the libdispatch backend on Apple platforms

Thu Jul 20 12:53:44 PDT 2023

Author: Louis Dionne
Date: 2023-07-20T15:53:27-04:00
New Revision: 503f2ee4a86a740c4b4f442d95ff4e6d7f66a670

URL: https://github.com/llvm/llvm-project/commit/503f2ee4a86a740c4b4f442d95ff4e6d7f66a670
DIFF: https://github.com/llvm/llvm-project/commit/503f2ee4a86a740c4b4f442d95ff4e6d7f66a670.diff

LOG: [libc++] Make sure we use the libdispatch backend on Apple platforms

The Apple.cmake cache wasn't set up properly, so we wouldn't enable
the libdispatch backend by default on Apple platforms. This patch
fixes the issue and adds a test.

We also need to make various drive-by fixes:
- Drop the usage of std::vector in libdispatch.h to avoid changing
  the transitive includes only on Apple platforms.
- Fix includes
- Use __construct at since construct_at is unavailable in C++17
- Get rid of the (unused) __get_memory_resource function since that
  adds a back-deployment requirement and we don't use it right now.
- Fix bugs in the chunking logic around boundary conditions.

Differential Revision: https://reviews.llvm.org/D155649

Added: 
    

Modified: 
    libcxx/cmake/caches/Apple.cmake
    libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
    libcxx/src/pstl/libdispatch.cpp
    libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
    libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/cmake/caches/Apple.cmake b/libcxx/cmake/caches/Apple.cmake
index 32aae6d4a98457..804eccd3a5dc5e 100644

--- a/libcxx/cmake/caches/Apple.cmake
+++ b/libcxx/cmake/caches/Apple.cmake
@@ -7,7 +7,7 @@ set(LIBCXX_ENABLE_STATIC ON CACHE BOOL "")
 set(LIBCXX_ENABLE_SHARED ON CACHE BOOL "")
 set(LIBCXX_CXX_ABI libcxxabi CACHE STRING "")
 set(LIBCXX_ENABLE_VENDOR_AVAILABILITY_ANNOTATIONS ON CACHE BOOL "")
-set(LIBCXX_PSTL_CPU_BACKEND libdispatch)
+set(LIBCXX_PSTL_CPU_BACKEND libdispatch CACHE STRING "")
 
 set(LIBCXX_HERMETIC_STATIC_LIBRARY ON CACHE BOOL "")
 set(LIBCXXABI_HERMETIC_STATIC_LIBRARY ON CACHE BOOL "")

diff  --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
index 49af1c4b1c0564..bab6a3639bd084 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
@@ -10,22 +10,26 @@
 #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H
 
 #include <__algorithm/lower_bound.h>
+#include <__algorithm/max.h>
 #include <__algorithm/upper_bound.h>
 #include <__atomic/atomic.h>
 #include <__config>
 #include <__exception/terminate.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/move_iterator.h>
+#include <__memory/allocator.h>
 #include <__memory/construct_at.h>
 #include <__memory/unique_ptr.h>
-#include <__memory_resource/memory_resource.h>
 #include <__numeric/reduce.h>
 #include <__utility/exception_guard.h>
 #include <__utility/move.h>
+#include <__utility/pair.h>
 #include <__utility/terminate_on_exception.h>
 #include <cstddef>
 #include <new>
-#include <vector>
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
 
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
@@ -53,7 +57,6 @@ struct __chunk_partitions {
   ptr
diff _t __first_chunk_size_;
 };
 
-[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI pmr::memory_resource* __get_memory_resource();
 [[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptr
diff _t __size);
 
 template <class _RandomAccessIterator, class _Functor>
@@ -107,13 +110,20 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_merge(
   }
 
   using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>;
+  auto const __n_ranges = __partitions.__chunk_count_ + 1;
 
-  vector<__merge_range_t> __ranges;
-  __ranges.reserve(__partitions.__chunk_count_ + 1);
+  // TODO: use __uninitialized_buffer
+  auto __destroy = [=](__merge_range_t* __ptr) {
+    std::destroy_n(__ptr, __n_ranges);
+    std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges);
+  };
+  unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges(
+      std::allocator<__merge_range_t>().allocate(__n_ranges), __destroy);
 
   // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case
   std::__terminate_on_exception([&] {
-    __ranges.emplace_back(__first1, __first2, __result);
+    __merge_range_t* __r = __ranges.get();
+    std::__construct_at(__r++, __first1, __first2, __result);
 
     bool __iterate_first_range = __last1 - __first1 > __last2 - __first2;
 
@@ -137,14 +147,14 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_merge(
     };
 
     // handle first chunk
-    __ranges.emplace_back(__compute_chunk(__partitions.__first_chunk_size_));
+    std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_));
 
     // handle 2 -> N - 1 chunks
     for (ptr
diff _t __i = 0; __i != __partitions.__chunk_count_ - 2; ++__i)
-      __ranges.emplace_back(__compute_chunk(__partitions.__chunk_size_));
+      std::__construct_at(__r++, __compute_chunk(__partitions.__chunk_size_));
 
     // handle last chunk
-    __ranges.emplace_back(__last1, __last2, __result);
+    std::__construct_at(__r, __last1, __last2, __result);
 
     __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __index) {
       auto __first_iters = __ranges[__index];
@@ -168,6 +178,9 @@ _LIBCPP_HIDE_FROM_ABI _Value __parallel_transform_reduce(
     _Value __init,
     _Combiner __combiner,
     _Reduction __reduction) {
+  if (__first == __last)
+    return __init;
+
   auto __partitions = __libdispatch::__partition_chunks(__last - __first);
 
   auto __destroy = [__count = __partitions.__chunk_count_](_Value* __ptr) {
@@ -223,4 +236,6 @@ _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H

diff  --git a/libcxx/src/pstl/libdispatch.cpp b/libcxx/src/pstl/libdispatch.cpp
index e264aade0298d5..b3a9559d085f68 100644
--- a/libcxx/src/pstl/libdispatch.cpp
+++ b/libcxx/src/pstl/libdispatch.cpp
@@ -10,23 +10,24 @@
 #include <__algorithm/pstl_backends/cpu_backends/libdispatch.h>
 #include <__config>
 #include <dispatch/dispatch.h>
-#include <memory_resource>
 #include <thread>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __par_backend::inline __libdispatch {
 
-pmr::memory_resource* __get_memory_resource() {
-  static std::pmr::synchronized_pool_resource pool{pmr::new_delete_resource()};
-  return &pool;
-}
 
 void __dispatch_apply(size_t chunk_count, void* context, void (*func)(void* context, size_t chunk)) noexcept {
   ::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func);
 }
 
 __chunk_partitions __partition_chunks(ptr
diff _t element_count) {
+  if (element_count == 0) {
+    return __chunk_partitions{1, 0, 0};
+  } else if (element_count == 1) {
+    return __chunk_partitions{1, 0, 1};
+  }
+
   __chunk_partitions partitions;
   partitions.__chunk_count_ = [&] {
     ptr
diff _t cores = std::max(1u, thread::hardware_concurrency());

diff  --git a/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp b/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
index 1f1c96e2891de3..91935a8df72483 100644
--- a/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
+++ b/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp
@@ -17,8 +17,23 @@
 #include <cstddef>
 
 int main(int, char**) {
-  for (std::ptr
diff _t i = 0; i != 2ll << 20; ++i) {
+  {
+    auto chunks = std::__par_backend::__libdispatch::__partition_chunks(0);
+    assert(chunks.__chunk_count_ == 1);
+    assert(chunks.__first_chunk_size_ == 0);
+    assert(chunks.__chunk_size_ == 0);
+  }
+
+  {
+    auto chunks = std::__par_backend::__libdispatch::__partition_chunks(1);
+    assert(chunks.__chunk_count_ == 1);
+    assert(chunks.__first_chunk_size_ == 1);
+    assert(chunks.__chunk_size_ == 0);
+  }
+
+  for (std::ptr
diff _t i = 2; i != 2ll << 20; ++i) {
     auto chunks = std::__par_backend::__libdispatch::__partition_chunks(i);
+    assert(chunks.__chunk_count_ >= 1);
     assert(chunks.__chunk_count_ <= i);
     assert((chunks.__chunk_count_ - 1) * chunks.__chunk_size_ + chunks.__first_chunk_size_ == i);
   }

diff  --git a/libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp b/libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp
index e155013ef3c026..6c84e0dfff2bab 100644
--- a/libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp
+++ b/libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp
@@ -42,3 +42,7 @@
 // when they are loaded by dyld, if the compatibility version was bumped.
 //
 // RUN: otool -L "%{lib}/libc++.1.dylib" | grep "libc++.1.dylib" | grep "compatibility version 1.0.0"
+
+// Make sure we use the libdispatch backend for the PSTL.
+//
+// RUN: grep "%{include}/__config_site" -e '#define _LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH'