[libcxx-commits] [libcxx] [libc++] Move the implementation of CPU-based basis operations to namespace __pstl (PR #95267)
via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Jun 12 09:40:01 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Louis Dionne (ldionne)
<details>
<summary>Changes</summary>
They were always intended to be in that namespace but I was trying to keep changes orthogonal.
---
Full diff: https://github.com/llvm/llvm-project/pull/95267.diff
8 Files Affected:
- (modified) libcxx/include/__pstl/cpu_algos/any_of.h (+6-4)
- (modified) libcxx/include/__pstl/cpu_algos/fill.h (+4-2)
- (modified) libcxx/include/__pstl/cpu_algos/find_if.h (+8-6)
- (modified) libcxx/include/__pstl/cpu_algos/for_each.h (+4-2)
- (modified) libcxx/include/__pstl/cpu_algos/merge.h (+3-1)
- (modified) libcxx/include/__pstl/cpu_algos/stable_sort.h (+3-1)
- (modified) libcxx/include/__pstl/cpu_algos/transform.h (+6-4)
- (modified) libcxx/include/__pstl/cpu_algos/transform_reduce.h (+7-5)
``````````diff
diff --git a/libcxx/include/__pstl/cpu_algos/any_of.h b/libcxx/include/__pstl/cpu_algos/any_of.h
index 3173eade7585b..cfc70c735dec6 100644
--- a/libcxx/include/__pstl/cpu_algos/any_of.h
+++ b/libcxx/include/__pstl/cpu_algos/any_of.h
@@ -29,14 +29,15 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Backend, class _Index, class _Brick>
_LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
std::atomic<bool> __found(false);
- auto __ret = __pstl::__cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
+ auto __ret = __cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
__found.store(true, std::memory_order_relaxed);
- __pstl::__cpu_traits<_Backend>::__cancel_execution();
+ __cpu_traits<_Backend>::__cancel_execution();
}
});
if (!__ret)
@@ -76,7 +77,7 @@ struct __cpu_parallel_any_of {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__parallel_or<_Backend>(
+ return __pstl::__parallel_or<_Backend>(
__first, __last, [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _AnyOfUnseq = __pstl::__any_of<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
auto __res = _AnyOfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
@@ -85,13 +86,14 @@ struct __cpu_parallel_any_of {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__simd_or(__first, __last - __first, __pred);
+ return __pstl::__simd_or(__first, __last - __first, __pred);
} else {
return std::any_of(__first, __last, __pred);
}
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/fill.h b/libcxx/include/__pstl/cpu_algos/fill.h
index b99a9d3c660d8..6037435df2748 100644
--- a/libcxx/include/__pstl/cpu_algos/fill.h
+++ b/libcxx/include/__pstl/cpu_algos/fill.h
@@ -26,6 +26,7 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Index, class _DifferenceType, class _Tp>
_LIBCPP_HIDE_FROM_ABI _Index __simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
@@ -43,7 +44,7 @@ struct __cpu_parallel_fill {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return __pstl::__cpu_traits<_Backend>::__for_each(
+ return __cpu_traits<_Backend>::__for_each(
__first, __last, [&__policy, &__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _FillUnseq = __pstl::__fill<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __res =
@@ -52,7 +53,7 @@ struct __cpu_parallel_fill {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- std::__simd_fill_n(__first, __last - __first, __value);
+ __pstl::__simd_fill_n(__first, __last - __first, __value);
return __empty{};
} else {
std::fill(__first, __last, __value);
@@ -61,6 +62,7 @@ struct __cpu_parallel_fill {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/find_if.h b/libcxx/include/__pstl/cpu_algos/find_if.h
index 3ddbee44890f6..46b684bc37925 100644
--- a/libcxx/include/__pstl/cpu_algos/find_if.h
+++ b/libcxx/include/__pstl/cpu_algos/find_if.h
@@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Backend, class _Index, class _Brick, class _Compare>
_LIBCPP_HIDE_FROM_ABI optional<_Index>
@@ -43,8 +44,8 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
_DifferenceType __initial_dist = __b_first ? __n : -1;
std::atomic<_DifferenceType> __extremum(__initial_dist);
// TODO: find out what is better here: parallel_for or parallel_reduce
- auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
- __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
+ auto __res =
+ __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
// See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
// why using a shared variable scales fairly well in this situation.
if (__comp(__i - __first, __extremum)) {
@@ -67,8 +68,8 @@ template <class _Backend, class _Index, class _DifferenceType, class _Compare>
_LIBCPP_HIDE_FROM_ABI _Index
__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
// Experiments show good block sizes like this
- const _DifferenceType __block_size = 8;
- alignas(__pstl::__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
+ const _DifferenceType __block_size = 8;
+ alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
while (__end - __begin >= __block_size) {
_DifferenceType __found = 0;
_PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
@@ -106,7 +107,7 @@ struct __cpu_parallel_find_if {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__parallel_find<_Backend>(
+ return __pstl::__parallel_find<_Backend>(
__first,
__last,
[&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -120,7 +121,7 @@ struct __cpu_parallel_find_if {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
using __diff_t = __iter_diff_t<_ForwardIterator>;
- return std::__simd_first<_Backend>(
+ return __pstl::__simd_first<_Backend>(
__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
return __pred(__iter[__i]);
});
@@ -130,6 +131,7 @@ struct __cpu_parallel_find_if {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/for_each.h b/libcxx/include/__pstl/cpu_algos/for_each.h
index 1e5677d998994..0a1209cc25403 100644
--- a/libcxx/include/__pstl/cpu_algos/for_each.h
+++ b/libcxx/include/__pstl/cpu_algos/for_each.h
@@ -26,6 +26,7 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Iterator, class _DifferenceType, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator __simd_for_each(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
@@ -43,7 +44,7 @@ struct __cpu_parallel_for_each {
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Function __func) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return __pstl::__cpu_traits<_Backend>::__for_each(
+ return __cpu_traits<_Backend>::__for_each(
__first, __last, [&__policy, __func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _ForEachUnseq = __pstl::__for_each<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __res =
@@ -52,7 +53,7 @@ struct __cpu_parallel_for_each {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- std::__simd_for_each(__first, __last - __first, __func);
+ __pstl::__simd_for_each(__first, __last - __first, __func);
return __empty{};
} else {
std::for_each(__first, __last, __func);
@@ -61,6 +62,7 @@ struct __cpu_parallel_for_each {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/merge.h b/libcxx/include/__pstl/cpu_algos/merge.h
index 4f4192cccb3e8..cfd006971bfaa 100644
--- a/libcxx/include/__pstl/cpu_algos/merge.h
+++ b/libcxx/include/__pstl/cpu_algos/merge.h
@@ -29,6 +29,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_merge {
@@ -45,7 +46,7 @@ struct __cpu_parallel_merge {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- auto __res = __pstl::__cpu_traits<_Backend>::__merge(
+ auto __res = __cpu_traits<_Backend>::__merge(
__first1,
__last1,
__first2,
@@ -78,6 +79,7 @@ struct __cpu_parallel_merge {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/stable_sort.h b/libcxx/include/__pstl/cpu_algos/stable_sort.h
index 8ea5e8a01d2ce..90774c6e88fa6 100644
--- a/libcxx/include/__pstl/cpu_algos/stable_sort.h
+++ b/libcxx/include/__pstl/cpu_algos/stable_sort.h
@@ -24,6 +24,7 @@
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_stable_sort {
@@ -31,7 +32,7 @@ struct __cpu_parallel_stable_sort {
_LIBCPP_HIDE_FROM_ABI optional<__empty>
operator()(_Policy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy>) {
- return __pstl::__cpu_traits<_Backend>::__stable_sort(
+ return __cpu_traits<_Backend>::__stable_sort(
__first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) {
std::stable_sort(__g_first, __g_last, __g_comp);
});
@@ -42,6 +43,7 @@ struct __cpu_parallel_stable_sort {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
diff --git a/libcxx/include/__pstl/cpu_algos/transform.h b/libcxx/include/__pstl/cpu_algos/transform.h
index 440368d97f182..85bd990199195 100644
--- a/libcxx/include/__pstl/cpu_algos/transform.h
+++ b/libcxx/include/__pstl/cpu_algos/transform.h
@@ -30,6 +30,7 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator2
@@ -61,7 +62,7 @@ struct __cpu_parallel_transform {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- __pstl::__cpu_traits<_Backend>::__for_each(
+ __cpu_traits<_Backend>::__for_each(
__first,
__last,
[&__policy, __op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -79,7 +80,7 @@ struct __cpu_parallel_transform {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- return std::__simd_transform(
+ return __pstl::__simd_transform(
__first,
__last - __first,
__result,
@@ -110,7 +111,7 @@ struct __cpu_parallel_transform_binary {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
+ auto __res = __cpu_traits<_Backend>::__for_each(
__first1,
__last1,
[&__policy, __op, __first1, __first2, __result](
@@ -132,7 +133,7 @@ struct __cpu_parallel_transform_binary {
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
- return std::__simd_transform(
+ return __pstl::__simd_transform(
__first1,
__last1 - __first1,
__first2,
@@ -146,6 +147,7 @@ struct __cpu_parallel_transform_binary {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/__pstl/cpu_algos/transform_reduce.h b/libcxx/include/__pstl/cpu_algos/transform_reduce.h
index 914c46dcd6dcf..4dde257ca9518 100644
--- a/libcxx/include/__pstl/cpu_algos/transform_reduce.h
+++ b/libcxx/include/__pstl/cpu_algos/transform_reduce.h
@@ -34,6 +34,7 @@ _LIBCPP_PUSH_MACROS
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
template <typename _Backend,
typename _DifferenceType,
@@ -63,7 +64,7 @@ template <typename _Backend,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
- constexpr size_t __lane_size = __pstl::__cpu_traits<_Backend>::__lane_size;
+ constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size;
const _Size __block_size = __lane_size / sizeof(_Tp);
if (__n > 2 * __block_size && __block_size > 1) {
alignas(__lane_size) char __lane_buffer[__lane_size];
@@ -124,7 +125,7 @@ struct __cpu_parallel_transform_reduce_binary {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
- return __pstl::__cpu_traits<_Backend>::__transform_reduce(
+ return __cpu_traits<_Backend>::__transform_reduce(
__first1,
std::move(__last1),
[__first1, __first2, __transform](_ForwardIterator1 __iter) {
@@ -148,7 +149,7 @@ struct __cpu_parallel_transform_reduce_binary {
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
- return std::__simd_transform_reduce<_Backend>(
+ return __pstl::__simd_transform_reduce<_Backend>(
__last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
return __transform(__first1[__i], __first2[__i]);
});
@@ -176,7 +177,7 @@ struct __cpu_parallel_transform_reduce {
_UnaryOperation __transform) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return __pstl::__cpu_traits<_Backend>::__transform_reduce(
+ return __cpu_traits<_Backend>::__transform_reduce(
std::move(__first),
std::move(__last),
[__transform](_ForwardIterator __iter) { return __transform(*__iter); },
@@ -197,7 +198,7 @@ struct __cpu_parallel_transform_reduce {
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__simd_transform_reduce<_Backend>(
+ return __pstl::__simd_transform_reduce<_Backend>(
__last - __first,
std::move(__init),
std::move(__reduce),
@@ -209,6 +210,7 @@ struct __cpu_parallel_transform_reduce {
}
};
+} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
``````````
</details>
https://github.com/llvm/llvm-project/pull/95267
More information about the libcxx-commits
mailing list