[libcxx-commits] [libcxx] [libc++] Move the implementation of CPU-based basis operations to namespace __pstl (PR #95267)

via libcxx-commits libcxx-commits at lists.llvm.org
Wed Jun 12 09:40:01 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libcxx

Author: Louis Dionne (ldionne)

<details>
<summary>Changes</summary>

They were always intended to be in that namespace but I was trying to keep changes orthogonal.

---
Full diff: https://github.com/llvm/llvm-project/pull/95267.diff


8 Files Affected:

- (modified) libcxx/include/__pstl/cpu_algos/any_of.h (+6-4) 
- (modified) libcxx/include/__pstl/cpu_algos/fill.h (+4-2) 
- (modified) libcxx/include/__pstl/cpu_algos/find_if.h (+8-6) 
- (modified) libcxx/include/__pstl/cpu_algos/for_each.h (+4-2) 
- (modified) libcxx/include/__pstl/cpu_algos/merge.h (+3-1) 
- (modified) libcxx/include/__pstl/cpu_algos/stable_sort.h (+3-1) 
- (modified) libcxx/include/__pstl/cpu_algos/transform.h (+6-4) 
- (modified) libcxx/include/__pstl/cpu_algos/transform_reduce.h (+7-5) 


``````````diff
diff --git a/libcxx/include/__pstl/cpu_algos/any_of.h b/libcxx/include/__pstl/cpu_algos/any_of.h
index 3173eade7585b..cfc70c735dec6 100644
--- a/libcxx/include/__pstl/cpu_algos/any_of.h
+++ b/libcxx/include/__pstl/cpu_algos/any_of.h
@@ -29,14 +29,15 @@ _LIBCPP_PUSH_MACROS
 #  include <__undef_macros>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Backend, class _Index, class _Brick>
 _LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
   std::atomic<bool> __found(false);
-  auto __ret = __pstl::__cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
+  auto __ret = __cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
     if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
       __found.store(true, std::memory_order_relaxed);
-      __pstl::__cpu_traits<_Backend>::__cancel_execution();
+      __cpu_traits<_Backend>::__cancel_execution();
     }
   });
   if (!__ret)
@@ -76,7 +77,7 @@ struct __cpu_parallel_any_of {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return std::__parallel_or<_Backend>(
+      return __pstl::__parallel_or<_Backend>(
           __first, __last, [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
             using _AnyOfUnseq = __pstl::__any_of<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
             auto __res = _AnyOfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
@@ -85,13 +86,14 @@ struct __cpu_parallel_any_of {
           });
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return std::__simd_or(__first, __last - __first, __pred);
+      return __pstl::__simd_or(__first, __last - __first, __pred);
     } else {
       return std::any_of(__first, __last, __pred);
     }
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/fill.h b/libcxx/include/__pstl/cpu_algos/fill.h
index b99a9d3c660d8..6037435df2748 100644
--- a/libcxx/include/__pstl/cpu_algos/fill.h
+++ b/libcxx/include/__pstl/cpu_algos/fill.h
@@ -26,6 +26,7 @@
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Index, class _DifferenceType, class _Tp>
 _LIBCPP_HIDE_FROM_ABI _Index __simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
@@ -43,7 +44,7 @@ struct __cpu_parallel_fill {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return __pstl::__cpu_traits<_Backend>::__for_each(
+      return __cpu_traits<_Backend>::__for_each(
           __first, __last, [&__policy, &__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
             using _FillUnseq = __pstl::__fill<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
             [[maybe_unused]] auto __res =
@@ -52,7 +53,7 @@ struct __cpu_parallel_fill {
           });
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      std::__simd_fill_n(__first, __last - __first, __value);
+      __pstl::__simd_fill_n(__first, __last - __first, __value);
       return __empty{};
     } else {
       std::fill(__first, __last, __value);
@@ -61,6 +62,7 @@ struct __cpu_parallel_fill {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/find_if.h b/libcxx/include/__pstl/cpu_algos/find_if.h
index 3ddbee44890f6..46b684bc37925 100644
--- a/libcxx/include/__pstl/cpu_algos/find_if.h
+++ b/libcxx/include/__pstl/cpu_algos/find_if.h
@@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
 #  include <__undef_macros>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Backend, class _Index, class _Brick, class _Compare>
 _LIBCPP_HIDE_FROM_ABI optional<_Index>
@@ -43,8 +44,8 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
   _DifferenceType __initial_dist = __b_first ? __n : -1;
   std::atomic<_DifferenceType> __extremum(__initial_dist);
   // TODO: find out what is better here: parallel_for or parallel_reduce
-  auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
-      __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
+  auto __res =
+      __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
         // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
         // why using a shared variable scales fairly well in this situation.
         if (__comp(__i - __first, __extremum)) {
@@ -67,8 +68,8 @@ template <class _Backend, class _Index, class _DifferenceType, class _Compare>
 _LIBCPP_HIDE_FROM_ABI _Index
 __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
   // Experiments show good block sizes like this
-  const _DifferenceType __block_size                                                        = 8;
-  alignas(__pstl::__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
+  const _DifferenceType __block_size                                                = 8;
+  alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
   while (__end - __begin >= __block_size) {
     _DifferenceType __found = 0;
     _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
@@ -106,7 +107,7 @@ struct __cpu_parallel_find_if {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return std::__parallel_find<_Backend>(
+      return __pstl::__parallel_find<_Backend>(
           __first,
           __last,
           [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -120,7 +121,7 @@ struct __cpu_parallel_find_if {
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
       using __diff_t = __iter_diff_t<_ForwardIterator>;
-      return std::__simd_first<_Backend>(
+      return __pstl::__simd_first<_Backend>(
           __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
             return __pred(__iter[__i]);
           });
@@ -130,6 +131,7 @@ struct __cpu_parallel_find_if {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/for_each.h b/libcxx/include/__pstl/cpu_algos/for_each.h
index 1e5677d998994..0a1209cc25403 100644
--- a/libcxx/include/__pstl/cpu_algos/for_each.h
+++ b/libcxx/include/__pstl/cpu_algos/for_each.h
@@ -26,6 +26,7 @@
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Iterator, class _DifferenceType, class _Function>
 _LIBCPP_HIDE_FROM_ABI _Iterator __simd_for_each(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
@@ -43,7 +44,7 @@ struct __cpu_parallel_for_each {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Function __func) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return __pstl::__cpu_traits<_Backend>::__for_each(
+      return __cpu_traits<_Backend>::__for_each(
           __first, __last, [&__policy, __func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
             using _ForEachUnseq = __pstl::__for_each<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
             [[maybe_unused]] auto __res =
@@ -52,7 +53,7 @@ struct __cpu_parallel_for_each {
           });
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      std::__simd_for_each(__first, __last - __first, __func);
+      __pstl::__simd_for_each(__first, __last - __first, __func);
       return __empty{};
     } else {
       std::for_each(__first, __last, __func);
@@ -61,6 +62,7 @@ struct __cpu_parallel_for_each {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/merge.h b/libcxx/include/__pstl/cpu_algos/merge.h
index 4f4192cccb3e8..cfd006971bfaa 100644
--- a/libcxx/include/__pstl/cpu_algos/merge.h
+++ b/libcxx/include/__pstl/cpu_algos/merge.h
@@ -29,6 +29,7 @@ _LIBCPP_PUSH_MACROS
 #  include <__undef_macros>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Backend, class _RawExecutionPolicy>
 struct __cpu_parallel_merge {
@@ -45,7 +46,7 @@ struct __cpu_parallel_merge {
                   __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      auto __res = __pstl::__cpu_traits<_Backend>::__merge(
+      auto __res = __cpu_traits<_Backend>::__merge(
           __first1,
           __last1,
           __first2,
@@ -78,6 +79,7 @@ struct __cpu_parallel_merge {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/stable_sort.h b/libcxx/include/__pstl/cpu_algos/stable_sort.h
index 8ea5e8a01d2ce..90774c6e88fa6 100644
--- a/libcxx/include/__pstl/cpu_algos/stable_sort.h
+++ b/libcxx/include/__pstl/cpu_algos/stable_sort.h
@@ -24,6 +24,7 @@
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Backend, class _RawExecutionPolicy>
 struct __cpu_parallel_stable_sort {
@@ -31,7 +32,7 @@ struct __cpu_parallel_stable_sort {
   _LIBCPP_HIDE_FROM_ABI optional<__empty>
   operator()(_Policy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy>) {
-      return __pstl::__cpu_traits<_Backend>::__stable_sort(
+      return __cpu_traits<_Backend>::__stable_sort(
           __first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) {
             std::stable_sort(__g_first, __g_last, __g_comp);
           });
@@ -42,6 +43,7 @@ struct __cpu_parallel_stable_sort {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/transform.h b/libcxx/include/__pstl/cpu_algos/transform.h
index 440368d97f182..85bd990199195 100644
--- a/libcxx/include/__pstl/cpu_algos/transform.h
+++ b/libcxx/include/__pstl/cpu_algos/transform.h
@@ -30,6 +30,7 @@ _LIBCPP_PUSH_MACROS
 #  include <__undef_macros>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
 _LIBCPP_HIDE_FROM_ABI _Iterator2
@@ -61,7 +62,7 @@ struct __cpu_parallel_transform {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      __pstl::__cpu_traits<_Backend>::__for_each(
+      __cpu_traits<_Backend>::__for_each(
           __first,
           __last,
           [&__policy, __op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -79,7 +80,7 @@ struct __cpu_parallel_transform {
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      return std::__simd_transform(
+      return __pstl::__simd_transform(
           __first,
           __last - __first,
           __result,
@@ -110,7 +111,7 @@ struct __cpu_parallel_transform_binary {
                   __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
+      auto __res = __cpu_traits<_Backend>::__for_each(
           __first1,
           __last1,
           [&__policy, __op, __first1, __first2, __result](
@@ -132,7 +133,7 @@ struct __cpu_parallel_transform_binary {
                          __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
-      return std::__simd_transform(
+      return __pstl::__simd_transform(
           __first1,
           __last1 - __first1,
           __first2,
@@ -146,6 +147,7 @@ struct __cpu_parallel_transform_binary {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/transform_reduce.h b/libcxx/include/__pstl/cpu_algos/transform_reduce.h
index 914c46dcd6dcf..4dde257ca9518 100644
--- a/libcxx/include/__pstl/cpu_algos/transform_reduce.h
+++ b/libcxx/include/__pstl/cpu_algos/transform_reduce.h
@@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
 
 _LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
 
 template <typename _Backend,
           typename _DifferenceType,
@@ -63,7 +64,7 @@ template <typename _Backend,
                         int>    = 0>
 _LIBCPP_HIDE_FROM_ABI _Tp
 __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
-  constexpr size_t __lane_size = __pstl::__cpu_traits<_Backend>::__lane_size;
+  constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size;
   const _Size __block_size     = __lane_size / sizeof(_Tp);
   if (__n > 2 * __block_size && __block_size > 1) {
     alignas(__lane_size) char __lane_buffer[__lane_size];
@@ -124,7 +125,7 @@ struct __cpu_parallel_transform_reduce_binary {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
-      return __pstl::__cpu_traits<_Backend>::__transform_reduce(
+      return __cpu_traits<_Backend>::__transform_reduce(
           __first1,
           std::move(__last1),
           [__first1, __first2, __transform](_ForwardIterator1 __iter) {
@@ -148,7 +149,7 @@ struct __cpu_parallel_transform_reduce_binary {
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
-      return std::__simd_transform_reduce<_Backend>(
+      return __pstl::__simd_transform_reduce<_Backend>(
           __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
             return __transform(__first1[__i], __first2[__i]);
           });
@@ -176,7 +177,7 @@ struct __cpu_parallel_transform_reduce {
              _UnaryOperation __transform) const noexcept {
     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return __pstl::__cpu_traits<_Backend>::__transform_reduce(
+      return __cpu_traits<_Backend>::__transform_reduce(
           std::move(__first),
           std::move(__last),
           [__transform](_ForwardIterator __iter) { return __transform(*__iter); },
@@ -197,7 +198,7 @@ struct __cpu_parallel_transform_reduce {
           });
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      return std::__simd_transform_reduce<_Backend>(
+      return __pstl::__simd_transform_reduce<_Backend>(
           __last - __first,
           std::move(__init),
           std::move(__reduce),
@@ -209,6 +210,7 @@ struct __cpu_parallel_transform_reduce {
   }
 };
 
+} // namespace __pstl
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17

``````````

</details>


https://github.com/llvm/llvm-project/pull/95267


More information about the libcxx-commits mailing list