[libcxx-commits] [libcxx] Adding Separate OpenMP Offloading Backend to `libcxx/include/__algorithm/pstl_backends` (PR #66968)
Anton Rydahl via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Sep 20 17:55:13 PDT 2023
https://github.com/AntonRydahl created https://github.com/llvm/llvm-project/pull/66968
To address some of the good feedback from the recent [request for comments](https://discourse.llvm.org/t/rfc-openmp-offloading-backend-for-c-parallel-algorithms/73468/6), I have added a separate OpenMP offloading backend in `libcxx/include/__algorithm/pstl_backends`.
To begin with, I have only added `std::for_each` and `std::fill`.
I know there are still a lot of things missing, such as LIT tests, but I think we should try to agree on the project structure first.
>From b35340e47de896c9933c54ce617538c46cf01488 Mon Sep 17 00:00:00 2001
From: AntonRydahl <rydahl2610 at gmail.com>
Date: Wed, 20 Sep 2023 17:06:10 -0700
Subject: [PATCH 1/2] Adding OpenMP Offloading Backend for C++ Parallel
Algorithms
---
libcxx/CMakeLists.txt | 14 +++
libcxx/include/CMakeLists.txt | 5 +
libcxx/include/__algorithm/pstl_backend.h | 8 ++
.../__algorithm/pstl_backends/gpu_backend.h | 21 +++++
.../pstl_backends/gpu_backends/backend.h | 33 +++++++
.../pstl_backends/gpu_backends/fill.h | 59 ++++++++++++
.../pstl_backends/gpu_backends/for_each.h | 59 ++++++++++++
.../pstl_backends/gpu_backends/omp_offload.h | 91 +++++++++++++++++++
libcxx/include/__config_site.in | 1 +
9 files changed, 291 insertions(+)
create mode 100644 libcxx/include/__algorithm/pstl_backends/gpu_backend.h
create mode 100644 libcxx/include/__algorithm/pstl_backends/gpu_backends/backend.h
create mode 100644 libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h
create mode 100644 libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h
create mode 100644 libcxx/include/__algorithm/pstl_backends/gpu_backends/omp_offload.h
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index bb2898b799bcef9..43d2a448de79584 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -290,6 +290,8 @@ option(LIBCXX_HAS_WIN32_THREAD_API "Ignore auto-detection and force use of win32
option(LIBCXX_HAS_EXTERNAL_THREAD_API
"Build libc++ with an externalized threading API.
This option may only be set to ON when LIBCXX_ENABLE_THREADS=ON." OFF)
+option(LIBCXX_ENABLE_GPU_OFFLOAD
+ "Build libc++ with support for GPU offload" OFF)
if (LIBCXX_ENABLE_THREADS)
set(LIBCXX_PSTL_CPU_BACKEND "std_thread" CACHE STRING "Which PSTL CPU backend to use")
@@ -297,6 +299,14 @@ else()
set(LIBCXX_PSTL_CPU_BACKEND "serial" CACHE STRING "Which PSTL CPU backend to use")
endif()
+if (NOT DEFINED LIBCXX_PSTL_GPU_BACKEND)
+ if (${LIBCXX_ENABLE_GPU_OFFLOAD})
+ set(LIBCXX_PSTL_GPU_BACKEND "omp_offload" CACHE STRING "Which PSTL GPU backend to use")
+ else()
+ set(LIBCXX_PSTL_GPU_BACKEND "none" CACHE STRING "Which PSTL GPU backend to use")
+ endif()
+endif()
+
# Misc options ----------------------------------------------------------------
# FIXME: Turn -pedantic back ON. It is currently off because it warns
# about #include_next which is used everywhere.
@@ -809,6 +819,10 @@ else()
Valid backends are: serial, std_thread and libdispatch")
endif()
+if (LIBCXX_PSTL_GPU_BACKEND STREQUAL "omp_offload")
+ config_define(1 _LIBCPP_PSTL_GPU_BACKEND_OMP_OFFLOAD)
+endif()
+
if (LIBCXX_ABI_DEFINES)
set(abi_defines)
foreach (abi_define ${LIBCXX_ABI_DEFINES})
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 2ec755236dbaee2..a3d72df61a86dde 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -85,6 +85,11 @@ set(files
__algorithm/pstl_backends/cpu_backends/thread.h
__algorithm/pstl_backends/cpu_backends/transform.h
__algorithm/pstl_backends/cpu_backends/transform_reduce.h
+ __algorithm/pstl_backends/gpu_backend.h
+ __algorithm/pstl_backends/gpu_backends/backend.h
+ __algorithm/pstl_backends/gpu_backends/fill.h
+ __algorithm/pstl_backends/gpu_backends/for_each.h
+ __algorithm/pstl_backends/gpu_backends/omp_offload.h
__algorithm/pstl_copy.h
__algorithm/pstl_count.h
__algorithm/pstl_fill.h
diff --git a/libcxx/include/__algorithm/pstl_backend.h b/libcxx/include/__algorithm/pstl_backend.h
index 93372f019031b63..f051e0ce9be13c3 100644
--- a/libcxx/include/__algorithm/pstl_backend.h
+++ b/libcxx/include/__algorithm/pstl_backend.h
@@ -10,6 +10,7 @@
#define _LIBCPP___ALGORITHM_PSTL_BACKEND_H
#include <__algorithm/pstl_backends/cpu_backend.h>
+#include <__algorithm/pstl_backends/gpu_backend.h>
#include <__config>
#include <execution>
@@ -179,10 +180,17 @@ struct __select_backend<std::execution::parallel_policy> {
using type = __cpu_backend_tag;
};
+# if defined(_LIBCPP_PSTL_GPU_BACKEND_OMP_OFFLOAD)
+template <>
+struct __select_backend<std::execution::parallel_unsequenced_policy> {
+ using type = __gpu_backend_tag;
+};
+# else
template <>
struct __select_backend<std::execution::parallel_unsequenced_policy> {
using type = __cpu_backend_tag;
};
+# endif
# else
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backend.h b/libcxx/include/__algorithm/pstl_backends/gpu_backend.h
new file mode 100644
index 000000000000000..46a85f77b5deb99
--- /dev/null
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backend.h
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKEND_H
+#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKEND_H
+
+#include <__config>
+
+#include <__algorithm/pstl_backends/gpu_backends/backend.h>
+
+#if defined(_LIBCPP_PSTL_GPU_BACKEND_OMP_OFFLOAD)
+#include <__algorithm/pstl_backends/gpu_backends/fill.h>
+#include <__algorithm/pstl_backends/gpu_backends/for_each.h>
+#endif
+
+#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKEND_H
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/backend.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/backend.h
new file mode 100644
index 000000000000000..a8b400afbb94d9d
--- /dev/null
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backends/backend.h
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKEND_BACKEND_H
+#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKEND_BACKEND_H
+
+#include <__config>
+#include <cstddef>
+
+#if defined(_LIBCPP_PSTL_GPU_BACKEND_OMP_OFFLOAD)
+# include <__algorithm/pstl_backends/gpu_backends/omp_offload.h>
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 17
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+struct __gpu_backend_tag {};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 17
+
+#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKEND_BACKEND_H
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h
new file mode 100644
index 000000000000000..5603e18a5d2d3fc
--- /dev/null
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKNEDS_FILL_H
+#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKNEDS_FILL_H
+
+#include <__algorithm/fill.h>
+#include <__algorithm/pstl_backends/gpu_backends/backend.h>
+#include <__algorithm/pstl_backends/cpu_backends/backend.h>
+#include <__config>
+#include <__iterator/concepts.h>
+#include <__type_traits/is_execution_policy.h>
+#include <__utility/terminate_on_exception.h>
+#include <stdio.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+_LIBCPP_HIDE_FROM_ABI void
+__pstl_fill(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
+ // It is only safe to execute for_each on the GPU, it the execution policy is
+ // parallel unsequenced, as it is the only execution policy prohibiting throwing
+ // exceptions and allowing SIMD instructions
+ if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
+ __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
+ std::__par_backend::__parallel_for_simd_val_1(__first, __last - __first, __value);
+ }
+ // Else if the excution policy is parallel, we execute for_each on the CPU instead
+ else if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
+ __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
+ std::__terminate_on_exception([&] {
+ __par_backend::__parallel_for(
+ __first, __last, [&__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
+ std::__pstl_fill<__remove_parallel_policy_t<_ExecutionPolicy>>(
+ __cpu_backend_tag{}, __brick_first, __brick_last, __value);
+ });
+ });
+ // Else we execute for_each in serial
+ } else {
+ std::fill(__first, __last, __value);
+ }
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+
+#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKNEDS_FILL_H
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h
new file mode 100644
index 000000000000000..20486d83863f420
--- /dev/null
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKNEDS_FOR_EACH_H
+#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKNEDS_FOR_EACH_H
+
+#include <__algorithm/for_each.h>
+#include <__algorithm/pstl_backends/gpu_backends/backend.h>
+#include <__algorithm/pstl_backends/cpu_backends/backend.h>
+#include <__config>
+#include <__iterator/concepts.h>
+#include <__type_traits/is_execution_policy.h>
+#include <__utility/terminate_on_exception.h>
+#include <stdio.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Functor>
+_LIBCPP_HIDE_FROM_ABI void
+__pstl_for_each(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Functor __func) {
+ // It is only safe to execute for_each on the GPU, it the execution policy is
+ // parallel unsequenced, as it is the only execution policy prohibiting throwing
+ // exceptions and allowing SIMD instructions
+ if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
+ __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
+ std::__par_backend::__parallel_for_simd_1(__first, __last - __first, __func);
+ }
+ // Else if the excution policy is parallel, we execute for_each on the CPU instead
+ else if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
+ __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
+ std::__terminate_on_exception([&] {
+ std::__par_backend::__parallel_for(
+ __first, __last, [__func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
+ std::__pstl_for_each<__remove_parallel_policy_t<_ExecutionPolicy>>(
+ __cpu_backend_tag{}, __brick_first, __brick_last, __func);
+ });
+ });
+ // Else we execute for_each in serial
+ } else {
+ std::for_each(__first, __last, __func);
+ }
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+
+#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKNEDS_FOR_EACH_H
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/omp_offload.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/omp_offload.h
new file mode 100644
index 000000000000000..840118dbec5057c
--- /dev/null
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backends/omp_offload.h
@@ -0,0 +1,91 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_OMP_OFFLOAD_H
+#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_OMP_OFFLOAD_H
+
+#include <__assert>
+#include <__config>
+#include <__utility/move.h>
+#include <cstddef>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+namespace __par_backend {
+inline namespace __omp_gpu_backend {
+
+// In OpenMP, we need to extract the pointer for the underlying data for data
+// structures like std::vector and std::array to be able to map the data to the
+// device.
+
+template <typename T>
+_LIBCPP_HIDE_FROM_ABI inline T __omp_extract_base_ptr(T p) {
+ return p;
+}
+
+template <typename T>
+_LIBCPP_HIDE_FROM_ABI inline T __omp_extract_base_ptr(std::__wrap_iter<T> w) {
+ std::pointer_traits<std::__wrap_iter<T>> PT;
+ return PT.to_address(w);
+}
+
+// Applying function or lambda in a loop
+
+template <class _Iterator, class _DifferenceType, class _Function>
+_LIBCPP_HIDE_FROM_ABI _Iterator __omp_parallel_for_simd_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
+ #pragma omp target teams distribute parallel for simd map(tofrom:__first[0:__n])
+ for (_DifferenceType __i = 0; __i < __n; ++__i)
+ __f(__first[__i]);
+
+ return __first + __n;
+}
+
+// Extracting the underlying pointer
+
+template <class _Iterator, class _DifferenceType, class _Function>
+_LIBCPP_HIDE_FROM_ABI _Iterator __parallel_for_simd_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
+ __omp_parallel_for_simd_1(__omp_gpu_backend::__omp_extract_base_ptr(__first), __n, __f);
+ return __first + __n;
+}
+
+// Assigning a value in a loop
+
+template <class _Index, class _DifferenceType, class _Tp>
+_LIBCPP_HIDE_FROM_ABI _Index __omp_parallel_for_simd_val_1(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
+ #pragma omp target teams distribute parallel for simd map(tofrom:__first[0:__n]) map(to:__value)
+ for (_DifferenceType __i = 0; __i < __n; ++__i)
+ __first[__i] = __value;
+
+ return __first + __n;
+}
+
+template <class _Index, class _DifferenceType, class _Tp>
+_LIBCPP_HIDE_FROM_ABI _Index __parallel_for_simd_val_1(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
+ __omp_parallel_for_simd_val_1(__omp_gpu_backend::__omp_extract_base_ptr(__first), __n, __value);
+ return __first + __n;
+}
+
+} // namespace __omp_gpu_backend
+} // namespace __par_backend
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && && _LIBCPP_STD_VER >= 17
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_OMP_OFFLOAD_H
diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in
index c85cbcd02c441b9..e0edddce3afc3ff 100644
--- a/libcxx/include/__config_site.in
+++ b/libcxx/include/__config_site.in
@@ -34,6 +34,7 @@
#cmakedefine _LIBCPP_PSTL_CPU_BACKEND_SERIAL
#cmakedefine _LIBCPP_PSTL_CPU_BACKEND_THREAD
#cmakedefine _LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH
+#cmakedefine _LIBCPP_PSTL_GPU_BACKEND_OMP_OFFLOAD
// Hardening.
#cmakedefine01 _LIBCPP_ENABLE_HARDENED_MODE_DEFAULT
>From af5ddf7709e44435c3b0b15421aa9cfc24b49e84 Mon Sep 17 00:00:00 2001
From: antonrydahl <rydahl2610 at gmail.com>
Date: Wed, 20 Sep 2023 17:48:25 -0700
Subject: [PATCH 2/2] Clang formatting OpenMP backend for parallel algorithms
---
libcxx/include/__algorithm/pstl_backend.h | 6 +++---
.../include/__algorithm/pstl_backends/gpu_backend.h | 4 ++--
.../__algorithm/pstl_backends/gpu_backends/fill.h | 12 ++++++------
.../pstl_backends/gpu_backends/for_each.h | 12 ++++++------
.../pstl_backends/gpu_backends/omp_offload.h | 13 ++++++++-----
5 files changed, 25 insertions(+), 22 deletions(-)
diff --git a/libcxx/include/__algorithm/pstl_backend.h b/libcxx/include/__algorithm/pstl_backend.h
index f051e0ce9be13c3..47f5191b48517ba 100644
--- a/libcxx/include/__algorithm/pstl_backend.h
+++ b/libcxx/include/__algorithm/pstl_backend.h
@@ -180,17 +180,17 @@ struct __select_backend<std::execution::parallel_policy> {
using type = __cpu_backend_tag;
};
-# if defined(_LIBCPP_PSTL_GPU_BACKEND_OMP_OFFLOAD)
+# if defined(_LIBCPP_PSTL_GPU_BACKEND_OMP_OFFLOAD)
template <>
struct __select_backend<std::execution::parallel_unsequenced_policy> {
using type = __gpu_backend_tag;
};
-# else
+# else
template <>
struct __select_backend<std::execution::parallel_unsequenced_policy> {
using type = __cpu_backend_tag;
};
-# endif
+# endif
# else
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backend.h b/libcxx/include/__algorithm/pstl_backends/gpu_backend.h
index 46a85f77b5deb99..7237036156a1bf3 100644
--- a/libcxx/include/__algorithm/pstl_backends/gpu_backend.h
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backend.h
@@ -14,8 +14,8 @@
#include <__algorithm/pstl_backends/gpu_backends/backend.h>
#if defined(_LIBCPP_PSTL_GPU_BACKEND_OMP_OFFLOAD)
-#include <__algorithm/pstl_backends/gpu_backends/fill.h>
-#include <__algorithm/pstl_backends/gpu_backends/for_each.h>
+# include <__algorithm/pstl_backends/gpu_backends/fill.h>
+# include <__algorithm/pstl_backends/gpu_backends/for_each.h>
#endif
#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKEND_H
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h
index 5603e18a5d2d3fc..32926da87e2a083 100644
--- a/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backends/fill.h
@@ -10,8 +10,8 @@
#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKNEDS_FILL_H
#include <__algorithm/fill.h>
-#include <__algorithm/pstl_backends/gpu_backends/backend.h>
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
+#include <__algorithm/pstl_backends/gpu_backends/backend.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__type_traits/is_execution_policy.h>
@@ -29,16 +29,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
_LIBCPP_HIDE_FROM_ABI void
__pstl_fill(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
- // It is only safe to execute for_each on the GPU, it the execution policy is
+ // It is only safe to execute for_each on the GPU, it the execution policy is
// parallel unsequenced, as it is the only execution policy prohibiting throwing
// exceptions and allowing SIMD instructions
if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
- __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
+ __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
std::__par_backend::__parallel_for_simd_val_1(__first, __last - __first, __value);
}
// Else if the excution policy is parallel, we execute for_each on the CPU instead
- else if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
- __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
+ else if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
+ __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
std::__terminate_on_exception([&] {
__par_backend::__parallel_for(
__first, __last, [&__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -46,7 +46,7 @@ __pstl_fill(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last
__cpu_backend_tag{}, __brick_first, __brick_last, __value);
});
});
- // Else we execute for_each in serial
+ // Else we execute for_each in serial
} else {
std::fill(__first, __last, __value);
}
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h
index 20486d83863f420..14de2af8e4a15c6 100644
--- a/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backends/for_each.h
@@ -10,8 +10,8 @@
#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_GPU_BACKNEDS_FOR_EACH_H
#include <__algorithm/for_each.h>
-#include <__algorithm/pstl_backends/gpu_backends/backend.h>
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
+#include <__algorithm/pstl_backends/gpu_backends/backend.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__type_traits/is_execution_policy.h>
@@ -29,16 +29,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _ExecutionPolicy, class _ForwardIterator, class _Functor>
_LIBCPP_HIDE_FROM_ABI void
__pstl_for_each(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Functor __func) {
- // It is only safe to execute for_each on the GPU, it the execution policy is
+ // It is only safe to execute for_each on the GPU, it the execution policy is
// parallel unsequenced, as it is the only execution policy prohibiting throwing
// exceptions and allowing SIMD instructions
if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
- __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
+ __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
std::__par_backend::__parallel_for_simd_1(__first, __last - __first, __func);
}
// Else if the excution policy is parallel, we execute for_each on the CPU instead
- else if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
- __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
+ else if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
+ __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
std::__terminate_on_exception([&] {
std::__par_backend::__parallel_for(
__first, __last, [__func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -46,7 +46,7 @@ __pstl_for_each(__gpu_backend_tag, _ForwardIterator __first, _ForwardIterator __
__cpu_backend_tag{}, __brick_first, __brick_last, __func);
});
});
- // Else we execute for_each in serial
+ // Else we execute for_each in serial
} else {
std::for_each(__first, __last, __func);
}
diff --git a/libcxx/include/__algorithm/pstl_backends/gpu_backends/omp_offload.h b/libcxx/include/__algorithm/pstl_backends/gpu_backends/omp_offload.h
index 840118dbec5057c..4baa4e7f65859d1 100644
--- a/libcxx/include/__algorithm/pstl_backends/gpu_backends/omp_offload.h
+++ b/libcxx/include/__algorithm/pstl_backends/gpu_backends/omp_offload.h
@@ -46,8 +46,9 @@ _LIBCPP_HIDE_FROM_ABI inline T __omp_extract_base_ptr(std::__wrap_iter<T> w) {
// Applying function or lambda in a loop
template <class _Iterator, class _DifferenceType, class _Function>
-_LIBCPP_HIDE_FROM_ABI _Iterator __omp_parallel_for_simd_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
- #pragma omp target teams distribute parallel for simd map(tofrom:__first[0:__n])
+_LIBCPP_HIDE_FROM_ABI _Iterator
+__omp_parallel_for_simd_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
+# pragma omp target teams distribute parallel for simd map(tofrom : __first[0 : __n])
for (_DifferenceType __i = 0; __i < __n; ++__i)
__f(__first[__i]);
@@ -65,8 +66,9 @@ _LIBCPP_HIDE_FROM_ABI _Iterator __parallel_for_simd_1(_Iterator __first, _Differ
// Assigning a value in a loop
template <class _Index, class _DifferenceType, class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Index __omp_parallel_for_simd_val_1(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
- #pragma omp target teams distribute parallel for simd map(tofrom:__first[0:__n]) map(to:__value)
+_LIBCPP_HIDE_FROM_ABI _Index
+__omp_parallel_for_simd_val_1(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
+# pragma omp target teams distribute parallel for simd map(tofrom : __first[0 : __n]) map(to : __value)
for (_DifferenceType __i = 0; __i < __n; ++__i)
__first[__i] = __value;
@@ -74,7 +76,8 @@ _LIBCPP_HIDE_FROM_ABI _Index __omp_parallel_for_simd_val_1(_Index __first, _Diff
}
template <class _Index, class _DifferenceType, class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Index __parallel_for_simd_val_1(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
+_LIBCPP_HIDE_FROM_ABI _Index
+__parallel_for_simd_val_1(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
__omp_parallel_for_simd_val_1(__omp_gpu_backend::__omp_extract_base_ptr(__first), __n, __value);
return __first + __n;
}
More information about the libcxx-commits
mailing list