[Openmp-commits] [openmp] 1b19c42 - [OpenMP][deviceRTLs] Separate declaration of target dependent functions from `target_impl.h`
Shilei Tian via Openmp-commits
openmp-commits at lists.llvm.org
Thu Jan 28 05:14:39 PST 2021
Author: Shilei Tian
Date: 2021-01-28T08:14:33-05:00
New Revision: 1b19c423029bd8eca75efe5cdb29ffd9f5b33feb
URL: https://github.com/llvm/llvm-project/commit/1b19c423029bd8eca75efe5cdb29ffd9f5b33feb
DIFF: https://github.com/llvm/llvm-project/commit/1b19c423029bd8eca75efe5cdb29ffd9f5b33feb.diff
LOG: [OpenMP][deviceRTLs] Separate declaration of target dependent functions from `target_impl.h`
This patch created a new header file `target_interface.h` for declarations of all target dependent functions. All future targets can get things work by simply implementing all functions declared in the header and macros/data same as each `target_impl.h`.
Reviewed By: JonChesterfield
Differential Revision: https://reviews.llvm.org/D95300
Added:
openmp/libomptarget/deviceRTLs/target_interface.h
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
openmp/libomptarget/deviceRTLs/common/debug.h
openmp/libomptarget/deviceRTLs/common/omptarget.h
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 721fb7f2a8b5..428edfab39a8 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -68,88 +68,6 @@ enum : __kmpc_impl_lanemask_t {
__kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0
};
-INLINE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
- lo = (uint32_t)(val & UINT64_C(0x00000000FFFFFFFF));
- hi = (uint32_t)((val & UINT64_C(0xFFFFFFFF00000000)) >> 32);
-}
-
-INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) {
- return (((uint64_t)hi) << 32) | (uint64_t)lo;
-}
-
-DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt();
-DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt();
-DEVICE uint32_t __kmpc_impl_smid();
-DEVICE double __kmpc_impl_get_wtick();
-DEVICE double __kmpc_impl_get_wtime();
-
-INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
-INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
-
-DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask();
-
-DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
- int32_t SrcLane);
-
-DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
- uint32_t Delta, int32_t Width);
-
-INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
-
-INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t) {
- // AMDGCN doesn't need to sync threads in a warp
-}
-
-// AMDGCN specific kernel initialization
-DEVICE void __kmpc_impl_target_init();
-
-// Equivalent to ptx bar.sync 1. Barrier until num_threads arrive.
-DEVICE void __kmpc_impl_named_sync(uint32_t num_threads);
-
-INLINE void __kmpc_impl_threadfence() {
- __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
-}
-
-INLINE void __kmpc_impl_threadfence_block() {
- __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
-}
-
-INLINE void __kmpc_impl_threadfence_system() {
- __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
-}
-
-// Calls to the AMDGCN layer (assuming 1D layout)
-INLINE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
-INLINE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
-DEVICE int GetNumberOfBlocksInKernel();
-DEVICE int GetNumberOfThreadsInBlock();
-DEVICE unsigned GetWarpId();
-DEVICE unsigned GetLaneId();
-
-// Atomics
-DEVICE uint32_t __kmpc_atomic_add(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_inc(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_max(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_exchange(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_cas(uint32_t *, uint32_t, uint32_t);
-
-static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
-DEVICE unsigned long long __kmpc_atomic_exchange(unsigned long long *,
- unsigned long long);
-DEVICE unsigned long long __kmpc_atomic_add(unsigned long long *,
- unsigned long long);
-
-// Locks
-DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
-DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);
-DEVICE void __kmpc_impl_set_lock(omp_lock_t *lock);
-DEVICE void __kmpc_impl_unset_lock(omp_lock_t *lock);
-DEVICE int __kmpc_impl_test_lock(omp_lock_t *lock);
-
-// Memory
-DEVICE void *__kmpc_impl_malloc(size_t x);
-DEVICE void __kmpc_impl_free(void *x);
-
// DEVICE versions of part of libc
INLINE void __assert_fail(const char *, const char *, unsigned int,
const char *) {
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 5f340e831b54..f8f9a5d8d60b 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -189,4 +189,35 @@ DEVICE unsigned long long __kmpc_atomic_add(unsigned long long *Address,
DEVICE void *__kmpc_impl_malloc(size_t) { return nullptr; }
DEVICE void __kmpc_impl_free(void *) {}
+DEVICE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
+ lo = (uint32_t)(val & UINT64_C(0x00000000FFFFFFFF));
+ hi = (uint32_t)((val & UINT64_C(0xFFFFFFFF00000000)) >> 32);
+}
+
+DEVICE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) {
+ return (((uint64_t)hi) << 32) | (uint64_t)lo;
+}
+
+DEVICE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
+
+DEVICE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t) {
+ // AMDGCN doesn't need to sync threads in a warp
+}
+
+DEVICE void __kmpc_impl_threadfence() {
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
+}
+
+DEVICE void __kmpc_impl_threadfence_block() {
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+}
+
+DEVICE void __kmpc_impl_threadfence_system() {
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+}
+
+// Calls to the AMDGCN layer (assuming 1D layout)
+DEVICE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
+DEVICE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
+
#pragma omp end declare target
diff --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h
index 6539b7ad70cf..aeffa2f7e4b1 100644
--- a/openmp/libomptarget/deviceRTLs/common/debug.h
+++ b/openmp/libomptarget/deviceRTLs/common/debug.h
@@ -29,6 +29,7 @@
#define _OMPTARGET_NVPTX_DEBUG_H_
#include "common/device_environment.h"
+#include "target_interface.h"
////////////////////////////////////////////////////////////////////////////////
// set desired level of debugging
diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h
index bbd44d95101f..76922333c849 100644
--- a/openmp/libomptarget/deviceRTLs/common/omptarget.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h
@@ -336,6 +336,10 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
// inlined implementation
////////////////////////////////////////////////////////////////////////////////
+INLINE uint32_t __kmpc_impl_ffs(uint32_t x) { return __builtin_ffs(x); }
+
+INLINE uint32_t __kmpc_impl_popc(uint32_t x) { return __builtin_popcount(x); }
+
#include "common/omptargeti.h"
#endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index cb5147a99ed8..33fa9bb78c9c 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -20,7 +20,7 @@
#include "nvptx_interface.h"
#define DEVICE
-#define INLINE inline __attribute__((always_inline))
+#define INLINE inline __attribute__((always_inline)) DEVICE
#define NOINLINE __attribute__((noinline))
#define ALIGN(N) __attribute__((aligned(N)))
@@ -83,69 +83,4 @@ enum : __kmpc_impl_lanemask_t {
__kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0
};
-DEVICE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi);
-DEVICE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi);
-DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt();
-DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt();
-DEVICE uint32_t __kmpc_impl_smid();
-DEVICE double __kmpc_impl_get_wtick();
-DEVICE double __kmpc_impl_get_wtime();
-
-INLINE uint32_t __kmpc_impl_ffs(uint32_t x) { return __builtin_ffs(x); }
-INLINE uint32_t __kmpc_impl_popc(uint32_t x) { return __builtin_popcount(x); }
-
-DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask();
-
-DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
- int32_t SrcLane);
-
-DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
- int32_t Var, uint32_t Delta,
- int32_t Width);
-
-DEVICE void __kmpc_impl_syncthreads();
-DEVICE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask);
-
-// NVPTX specific kernel initialization
-DEVICE void __kmpc_impl_target_init();
-
-// Barrier until num_threads arrive.
-DEVICE void __kmpc_impl_named_sync(uint32_t num_threads);
-
-DEVICE void __kmpc_impl_threadfence();
-DEVICE void __kmpc_impl_threadfence_block();
-DEVICE void __kmpc_impl_threadfence_system();
-
-// Calls to the NVPTX layer (assuming 1D layout)
-DEVICE int GetThreadIdInBlock();
-DEVICE int GetBlockIdInKernel();
-DEVICE int GetNumberOfBlocksInKernel();
-DEVICE int GetNumberOfThreadsInBlock();
-DEVICE unsigned GetWarpId();
-DEVICE unsigned GetLaneId();
-
-// Atomics
-DEVICE uint32_t __kmpc_atomic_add(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_inc(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_max(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_exchange(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_cas(uint32_t *, uint32_t, uint32_t);
-
-static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
-DEVICE unsigned long long __kmpc_atomic_exchange(unsigned long long *,
- unsigned long long);
-DEVICE unsigned long long __kmpc_atomic_add(unsigned long long *,
- unsigned long long);
-
-// Locks
-DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
-DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);
-DEVICE void __kmpc_impl_set_lock(omp_lock_t *lock);
-DEVICE void __kmpc_impl_unset_lock(omp_lock_t *lock);
-DEVICE int __kmpc_impl_test_lock(omp_lock_t *lock);
-
-// Memory
-DEVICE void *__kmpc_impl_malloc(size_t);
-DEVICE void __kmpc_impl_free(void *);
-
#endif
diff --git a/openmp/libomptarget/deviceRTLs/target_interface.h b/openmp/libomptarget/deviceRTLs/target_interface.h
new file mode 100644
index 000000000000..6b5477eddee8
--- /dev/null
+++ b/openmp/libomptarget/deviceRTLs/target_interface.h
@@ -0,0 +1,79 @@
+//===------------- target_interface.h - Target interfaces --------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains interfaces that must be implemented by each target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _OMPTARGET_TARGET_INTERFACE_H_
+#define _OMPTARGET_TARGET_INTERFACE_H_
+
+#include "target_impl.h"
+
+// Calls to the NVPTX layer (assuming 1D layout)
+EXTERN int GetThreadIdInBlock();
+EXTERN int GetBlockIdInKernel();
+EXTERN int GetNumberOfBlocksInKernel();
+EXTERN int GetNumberOfThreadsInBlock();
+EXTERN unsigned GetWarpId();
+EXTERN unsigned GetLaneId();
+
+// Atomics
+extern DEVICE uint32_t __kmpc_atomic_add(uint32_t *, uint32_t);
+extern DEVICE uint32_t __kmpc_atomic_inc(uint32_t *, uint32_t);
+extern DEVICE uint32_t __kmpc_atomic_max(uint32_t *, uint32_t);
+extern DEVICE uint32_t __kmpc_atomic_exchange(uint32_t *, uint32_t);
+extern DEVICE uint32_t __kmpc_atomic_cas(uint32_t *, uint32_t, uint32_t);
+static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
+extern DEVICE unsigned long long __kmpc_atomic_exchange(unsigned long long *,
+ unsigned long long);
+extern DEVICE unsigned long long __kmpc_atomic_add(unsigned long long *,
+ unsigned long long);
+
+// Locks
+EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock);
+EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock);
+EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock);
+
+EXTERN void __kmpc_impl_threadfence();
+EXTERN void __kmpc_impl_threadfence_block();
+EXTERN void __kmpc_impl_threadfence_system();
+
+EXTERN double __kmpc_impl_get_wtick();
+EXTERN double __kmpc_impl_get_wtime();
+
+EXTERN void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi);
+EXTERN uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi);
+EXTERN __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt();
+EXTERN __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt();
+EXTERN uint32_t __kmpc_impl_smid();
+
+EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask();
+
+EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
+ int32_t SrcLane);
+EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
+ int32_t Var, uint32_t Delta,
+ int32_t Width);
+
+EXTERN void __kmpc_impl_syncthreads();
+EXTERN void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask);
+
+// Kernel initialization
+EXTERN void __kmpc_impl_target_init();
+
+// Memory
+EXTERN void *__kmpc_impl_malloc(size_t);
+EXTERN void __kmpc_impl_free(void *);
+
+// Barrier until num_threads arrive.
+EXTERN void __kmpc_impl_named_sync(uint32_t num_threads);
+
+#endif // _OMPTARGET_TARGET_INTERFACE_H_
More information about the Openmp-commits
mailing list