[Openmp-commits] [openmp] fbc1dcb - [libomptarget][devicertl][nfc] Simplify target_atomic abstraction
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Wed Jan 20 11:51:05 PST 2021
Author: Jon Chesterfield
Date: 2021-01-20T19:50:50Z
New Revision: fbc1dcb946553a3dc923a63288d9275eea86f918
URL: https://github.com/llvm/llvm-project/commit/fbc1dcb946553a3dc923a63288d9275eea86f918
DIFF: https://github.com/llvm/llvm-project/commit/fbc1dcb946553a3dc923a63288d9275eea86f918.diff
LOG: [libomptarget][devicertl][nfc] Simplify target_atomic abstraction
[libomptarget][devicertl][nfc] Simplify target_atomic abstraction
Atomic functions were implemented as a shim around cuda's atomics, with
amdgcn implementing those symbols as a shim around gcc style intrinsics.
This patch folds target_atomic.h into target_impl.h and folds amdgcn.
Further work is likely to be useful here, either changing to openmp's atomic
interface or instantiating the templates on the few used types in order to
move them into a cuda/c++ implementation file. This change is mostly to
group the remaining uses of the cuda api under nvptx' target_impl abstraction.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D95062
Added:
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/common/omptargeti.h
openmp/libomptarget/deviceRTLs/common/src/libcall.cu
openmp/libomptarget/deviceRTLs/common/src/loop.cu
openmp/libomptarget/deviceRTLs/common/src/reduction.cu
openmp/libomptarget/deviceRTLs/common/state-queuei.h
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Removed:
openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h
openmp/libomptarget/deviceRTLs/common/target_atomic.h
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 8bb395f1126f..8d9abe5d0bbd 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -73,14 +73,12 @@ set(cuda_sources
set(h_files
${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
- ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_atomics.h
${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
${devicertl_base_directory}/common/debug.h
${devicertl_base_directory}/common/device_environment.h
${devicertl_base_directory}/common/omptarget.h
${devicertl_base_directory}/common/omptargeti.h
${devicertl_base_directory}/common/state-queue.h
- ${devicertl_base_directory}/common/target_atomic.h
${devicertl_base_directory}/common/state-queuei.h
${devicertl_base_directory}/common/support.h)
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h
deleted file mode 100644
index 04e80b945070..000000000000
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===---- hip_atomics.h - Declarations of hip atomic functions ---- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_AMDGCN_HIP_ATOMICS_H
-#define OMPTARGET_AMDGCN_HIP_ATOMICS_H
-
-#include "target_impl.h"
-
-namespace {
-
-template <typename T> DEVICE T atomicAdd(T *address, T val) {
- return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
-}
-
-template <typename T> DEVICE T atomicMax(T *address, T val) {
- return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
-}
-
-template <typename T> DEVICE T atomicExch(T *address, T val) {
- T r;
- __atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
- return r;
-}
-
-template <typename T> DEVICE T atomicCAS(T *address, T compare, T val) {
- (void)__atomic_compare_exchange(address, &compare, &val, false,
- __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
- return compare;
-}
-
-INLINE uint32_t atomicInc(uint32_t *address, uint32_t max) {
- return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
-}
-
-} // namespace
-#endif
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index b1e9a1a9403a..6e8a651bd886 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -29,8 +29,6 @@
#define SHARED __attribute__((shared))
#define ALIGN(N) __attribute__((aligned(N)))
-#include "hip_atomics.h"
-
////////////////////////////////////////////////////////////////////////////////
// Kernel options
////////////////////////////////////////////////////////////////////////////////
@@ -127,6 +125,31 @@ DEVICE int GetNumberOfThreadsInBlock();
DEVICE unsigned GetWarpId();
DEVICE unsigned GetLaneId();
+// Atomics
+template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
+ return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
+}
+
+INLINE uint32_t __kmpc_atomic_inc(uint32_t *address, uint32_t max) {
+ return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
+}
+
+template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
+ return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
+}
+
+template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
+ T r;
+ __atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
+ return r;
+}
+
+template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
+ (void)__atomic_compare_exchange(address, &compare, &val, false,
+ __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+ return compare;
+}
+
// Locks
DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);
diff --git a/openmp/libomptarget/deviceRTLs/common/omptargeti.h b/openmp/libomptarget/deviceRTLs/common/omptargeti.h
index 14faa59062ae..108724e0bd88 100644
--- a/openmp/libomptarget/deviceRTLs/common/omptargeti.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptargeti.h
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "common/target_atomic.h"
-
////////////////////////////////////////////////////////////////////////////////
// Task Descriptor
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
index 69b27f11a8d4..f43d74a0c872 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
@@ -13,7 +13,6 @@
#pragma omp declare target
#include "common/omptarget.h"
-#include "common/target_atomic.h"
#include "target_impl.h"
EXTERN double omp_get_wtick(void) {
diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
index dfe6c7fd5af7..a3ace098f06c 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
@@ -15,7 +15,6 @@
#include "common/omptarget.h"
#include "target_impl.h"
-#include "common/target_atomic.h"
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
index 3a3c44503f34..0cfae1fc495c 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
@@ -12,7 +12,6 @@
#pragma omp declare target
#include "common/omptarget.h"
-#include "common/target_atomic.h"
#include "target_impl.h"
EXTERN
diff --git a/openmp/libomptarget/deviceRTLs/common/state-queuei.h b/openmp/libomptarget/deviceRTLs/common/state-queuei.h
index 1bd261f2826a..902eff9031ae 100644
--- a/openmp/libomptarget/deviceRTLs/common/state-queuei.h
+++ b/openmp/libomptarget/deviceRTLs/common/state-queuei.h
@@ -17,7 +17,6 @@
//===----------------------------------------------------------------------===//
#include "state-queue.h"
-#include "common/target_atomic.h"
template <typename ElementType, uint32_t SIZE>
INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
diff --git a/openmp/libomptarget/deviceRTLs/common/target_atomic.h b/openmp/libomptarget/deviceRTLs/common/target_atomic.h
deleted file mode 100644
index 3c905d3cbbf2..000000000000
--- a/openmp/libomptarget/deviceRTLs/common/target_atomic.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Declarations of atomic functions provided by each target
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_TARGET_ATOMIC_H
-#define OMPTARGET_TARGET_ATOMIC_H
-
-#include "target_impl.h"
-
-template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
- return atomicAdd(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
- return atomicInc(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
- return atomicMax(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
- return atomicExch(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
- return atomicCAS(address, compare, val);
-}
-
-#endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
index b5ef549ece57..ffc7498e662e 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -13,7 +13,6 @@
#include "target_impl.h"
#include "common/debug.h"
-#include "common/target_atomic.h"
#include <cuda.h>
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index ab9fd1697f14..ba3d331876b1 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -130,6 +130,27 @@ DEVICE int GetNumberOfThreadsInBlock();
DEVICE unsigned GetWarpId();
DEVICE unsigned GetLaneId();
+// Atomics
+template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
+ return atomicAdd(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
+ return atomicInc(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
+ return atomicMax(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
+ return atomicExch(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
+ return atomicCAS(address, compare, val);
+}
+
// Locks
DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);
More information about the Openmp-commits
mailing list