[Openmp-commits] [openmp] d0bc85f - [libomptarget][nfc] Drop unused DEVICE macro
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Mon Mar 15 13:12:58 PDT 2021
Author: Jon Chesterfield
Date: 2021-03-15T20:12:50Z
New Revision: d0bc85f04a04b11add23d28a90d20b93ad12f6a5
URL: https://github.com/llvm/llvm-project/commit/d0bc85f04a04b11add23d28a90d20b93ad12f6a5
DIFF: https://github.com/llvm/llvm-project/commit/d0bc85f04a04b11add23d28a90d20b93ad12f6a5.diff
LOG: [libomptarget][nfc] Drop unused DEVICE macro
[libomptarget][nfc] Drop unused DEVICE macro
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D98655
Added:
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
openmp/libomptarget/deviceRTLs/common/device_environment.h
openmp/libomptarget/deviceRTLs/common/omptarget.h
openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
openmp/libomptarget/deviceRTLs/common/src/reduction.cu
openmp/libomptarget/deviceRTLs/common/src/support.cu
openmp/libomptarget/deviceRTLs/common/support.h
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
openmp/libomptarget/deviceRTLs/target_interface.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip
index abcc67e03f54..226150547610 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip
@@ -18,15 +18,15 @@
#include "common/debug.h"
-static DEVICE void warn() {
+static void warn() {
PRINT0(LD_ALL, "Locks are not supported in this thread mapping model");
}
-DEVICE void __kmpc_impl_init_lock(omp_lock_t *) { warn(); }
-DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *) { warn(); }
-DEVICE void __kmpc_impl_set_lock(omp_lock_t *) { warn(); }
-DEVICE void __kmpc_impl_unset_lock(omp_lock_t *) { warn(); }
-DEVICE int __kmpc_impl_test_lock(omp_lock_t *lock) {
+void __kmpc_impl_init_lock(omp_lock_t *) { warn(); }
+void __kmpc_impl_destroy_lock(omp_lock_t *) { warn(); }
+void __kmpc_impl_set_lock(omp_lock_t *) { warn(); }
+void __kmpc_impl_unset_lock(omp_lock_t *) { warn(); }
+int __kmpc_impl_test_lock(omp_lock_t *lock) {
warn();
return 0;
}
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index b8d112c3f834..197b7d7be250 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -25,9 +25,8 @@
#define PRId64 "ld"
#define PRIu64 "lu"
-#define DEVICE
-#define INLINE inline DEVICE
-#define NOINLINE __attribute__((noinline)) DEVICE
+#define INLINE inline
+#define NOINLINE __attribute__((noinline))
#define ALIGN(N) __attribute__((aligned(N)))
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 56ecab0fb85e..63a7091ec530 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -107,12 +107,12 @@ EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) {
}
namespace {
-DEVICE uint32_t get_grid_dim(uint32_t n, uint16_t d) {
+uint32_t get_grid_dim(uint32_t n, uint16_t d) {
uint32_t q = n / d;
return q + (n > q * d);
}
-DEVICE uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size,
- uint16_t group_size) {
+uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size,
+ uint16_t group_size) {
uint32_t r = grid_size - group_id * group_size;
return (r < group_size) ? r : group_size;
}
@@ -140,36 +140,35 @@ EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {
}
// Atomics
-DEVICE uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) {
+uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) {
return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST);
}
-DEVICE uint32_t __kmpc_atomic_inc(uint32_t *Address, uint32_t Val) {
+uint32_t __kmpc_atomic_inc(uint32_t *Address, uint32_t Val) {
return __builtin_amdgcn_atomic_inc32(Address, Val, __ATOMIC_SEQ_CST, "");
}
-DEVICE uint32_t __kmpc_atomic_max(uint32_t *Address, uint32_t Val) {
+uint32_t __kmpc_atomic_max(uint32_t *Address, uint32_t Val) {
return __atomic_fetch_max(Address, Val, __ATOMIC_SEQ_CST);
}
-DEVICE uint32_t __kmpc_atomic_exchange(uint32_t *Address, uint32_t Val) {
+uint32_t __kmpc_atomic_exchange(uint32_t *Address, uint32_t Val) {
uint32_t R;
__atomic_exchange(Address, &Val, &R, __ATOMIC_SEQ_CST);
return R;
}
-DEVICE uint32_t __kmpc_atomic_cas(uint32_t *Address, uint32_t Compare,
- uint32_t Val) {
+uint32_t __kmpc_atomic_cas(uint32_t *Address, uint32_t Compare, uint32_t Val) {
(void)__atomic_compare_exchange(Address, &Compare, &Val, false,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return Compare;
}
-DEVICE unsigned long long __kmpc_atomic_exchange(unsigned long long *Address,
- unsigned long long Val) {
+unsigned long long __kmpc_atomic_exchange(unsigned long long *Address,
+ unsigned long long Val) {
unsigned long long R;
__atomic_exchange(Address, &Val, &R, __ATOMIC_SEQ_CST);
return R;
}
-DEVICE unsigned long long __kmpc_atomic_add(unsigned long long *Address,
- unsigned long long Val) {
+unsigned long long __kmpc_atomic_add(unsigned long long *Address,
+ unsigned long long Val) {
return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST);
}
diff --git a/openmp/libomptarget/deviceRTLs/common/device_environment.h b/openmp/libomptarget/deviceRTLs/common/device_environment.h
index 68a7757d2047..d1629f89e53a 100644
--- a/openmp/libomptarget/deviceRTLs/common/device_environment.h
+++ b/openmp/libomptarget/deviceRTLs/common/device_environment.h
@@ -19,6 +19,6 @@ struct omptarget_device_environmentTy {
int32_t debug_level;
};
-extern DEVICE omptarget_device_environmentTy omptarget_device_environment;
+extern omptarget_device_environmentTy omptarget_device_environment;
#endif
diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h
index dde56524b4f0..d774b5db4ddc 100644
--- a/openmp/libomptarget/deviceRTLs/common/omptarget.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h
@@ -73,8 +73,7 @@ class omptarget_nvptx_SharedArgs {
uint32_t nArgs;
};
-extern DEVICE
- omptarget_nvptx_SharedArgs EXTERN_SHARED(omptarget_nvptx_globalArgs);
+extern omptarget_nvptx_SharedArgs EXTERN_SHARED(omptarget_nvptx_globalArgs);
// Worker slot type which is initialized with the default worker slot
// size of 4*32 bytes.
@@ -96,7 +95,7 @@ struct DataSharingStateTy {
__kmpc_impl_lanemask_t ActiveThreads[DS_Max_Warp_Number];
};
-extern DEVICE DataSharingStateTy EXTERN_SHARED(DataSharingState);
+extern DataSharingStateTy EXTERN_SHARED(DataSharingState);
////////////////////////////////////////////////////////////////////////////////
// task ICV and (implicit & explicit) task state
@@ -294,25 +293,23 @@ class omptarget_nvptx_SimpleMemoryManager {
// global data tables
////////////////////////////////////////////////////////////////////////////////
-extern DEVICE omptarget_nvptx_SimpleMemoryManager
- omptarget_nvptx_simpleMemoryManager;
-extern DEVICE uint32_t EXTERN_SHARED(usedMemIdx);
-extern DEVICE uint32_t EXTERN_SHARED(usedSlotIdx);
+extern omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager;
+extern uint32_t EXTERN_SHARED(usedMemIdx);
+extern uint32_t EXTERN_SHARED(usedSlotIdx);
#if _OPENMP
-extern DEVICE uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
+extern uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
#pragma omp allocate(parallelLevel) allocator(omp_pteam_mem_alloc)
#else
-extern DEVICE
- uint8_t EXTERN_SHARED(parallelLevel)[MAX_THREADS_PER_TEAM / WARPSIZE];
+extern uint8_t EXTERN_SHARED(parallelLevel)[MAX_THREADS_PER_TEAM / WARPSIZE];
#endif
-extern DEVICE uint16_t EXTERN_SHARED(threadLimit);
-extern DEVICE uint16_t EXTERN_SHARED(threadsInTeam);
-extern DEVICE uint16_t EXTERN_SHARED(nThreads);
-extern DEVICE omptarget_nvptx_ThreadPrivateContext *
+extern uint16_t EXTERN_SHARED(threadLimit);
+extern uint16_t EXTERN_SHARED(threadsInTeam);
+extern uint16_t EXTERN_SHARED(nThreads);
+extern omptarget_nvptx_ThreadPrivateContext *
EXTERN_SHARED(omptarget_nvptx_threadPrivateContext);
-extern DEVICE uint32_t EXTERN_SHARED(execution_param);
-extern DEVICE void *EXTERN_SHARED(ReductionScratchpadPtr);
+extern uint32_t EXTERN_SHARED(execution_param);
+extern void *EXTERN_SHARED(ReductionScratchpadPtr);
////////////////////////////////////////////////////////////////////////////////
// work function (outlined parallel/simd functions) and arguments.
@@ -320,8 +317,7 @@ extern DEVICE void *EXTERN_SHARED(ReductionScratchpadPtr);
////////////////////////////////////////////////////////////////////////////////
typedef void *omptarget_nvptx_WorkFn;
-extern volatile DEVICE
- omptarget_nvptx_WorkFn EXTERN_SHARED(omptarget_nvptx_workFn);
+extern volatile omptarget_nvptx_WorkFn EXTERN_SHARED(omptarget_nvptx_workFn);
////////////////////////////////////////////////////////////////////////////////
// get private data structures
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
index 4736d07108e0..e54e0f0bed78 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
@@ -19,30 +19,30 @@
// global device environment
////////////////////////////////////////////////////////////////////////////////
-DEVICE omptarget_device_environmentTy omptarget_device_environment;
+omptarget_device_environmentTy omptarget_device_environment;
////////////////////////////////////////////////////////////////////////////////
// global data holding OpenMP state information
////////////////////////////////////////////////////////////////////////////////
// OpenMP will try to call its ctor if we don't add the attribute explicitly
-[[clang::loader_uninitialized]] DEVICE
- omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
- omptarget_nvptx_device_State[MAX_SM];
+[[clang::loader_uninitialized]] omptarget_nvptx_Queue<
+ omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
+ omptarget_nvptx_device_State[MAX_SM];
-DEVICE omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager;
-DEVICE uint32_t SHARED(usedMemIdx);
-DEVICE uint32_t SHARED(usedSlotIdx);
+omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager;
+uint32_t SHARED(usedMemIdx);
+uint32_t SHARED(usedSlotIdx);
// SHARED doesn't work with array so we add the attribute explicitly.
-[[clang::loader_uninitialized]] DEVICE uint8_t
+[[clang::loader_uninitialized]] uint8_t
parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
#pragma omp allocate(parallelLevel) allocator(omp_pteam_mem_alloc)
-DEVICE uint16_t SHARED(threadLimit);
-DEVICE uint16_t SHARED(threadsInTeam);
-DEVICE uint16_t SHARED(nThreads);
+uint16_t SHARED(threadLimit);
+uint16_t SHARED(threadsInTeam);
+uint16_t SHARED(nThreads);
// Pointer to this team's OpenMP state object
-DEVICE omptarget_nvptx_ThreadPrivateContext *
+omptarget_nvptx_ThreadPrivateContext *
SHARED(omptarget_nvptx_threadPrivateContext);
////////////////////////////////////////////////////////////////////////////////
@@ -50,26 +50,26 @@ DEVICE omptarget_nvptx_ThreadPrivateContext *
// communicate with the workers. Since it is in shared memory, there is one
// copy of these variables for each kernel, instance, and team.
////////////////////////////////////////////////////////////////////////////////
-volatile DEVICE omptarget_nvptx_WorkFn SHARED(omptarget_nvptx_workFn);
+volatile omptarget_nvptx_WorkFn SHARED(omptarget_nvptx_workFn);
////////////////////////////////////////////////////////////////////////////////
// OpenMP kernel execution parameters
////////////////////////////////////////////////////////////////////////////////
-DEVICE uint32_t SHARED(execution_param);
+uint32_t SHARED(execution_param);
////////////////////////////////////////////////////////////////////////////////
// Data sharing state
////////////////////////////////////////////////////////////////////////////////
-DEVICE DataSharingStateTy SHARED(DataSharingState);
+DataSharingStateTy SHARED(DataSharingState);
////////////////////////////////////////////////////////////////////////////////
// Scratchpad for teams reduction.
////////////////////////////////////////////////////////////////////////////////
-DEVICE void *SHARED(ReductionScratchpadPtr);
+void *SHARED(ReductionScratchpadPtr);
////////////////////////////////////////////////////////////////////////////////
// Data sharing related variables.
////////////////////////////////////////////////////////////////////////////////
-DEVICE omptarget_nvptx_SharedArgs SHARED(omptarget_nvptx_globalArgs);
+omptarget_nvptx_SharedArgs SHARED(omptarget_nvptx_globalArgs);
#pragma omp end declare target
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
index 39b7b5ccbd55..e19d67affc2b 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
@@ -18,9 +18,9 @@
// global data tables
////////////////////////////////////////////////////////////////////////////////
-extern DEVICE
- omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
- omptarget_nvptx_device_State[MAX_SM];
+extern omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext,
+ OMP_STATE_COUNT>
+ omptarget_nvptx_device_State[MAX_SM];
////////////////////////////////////////////////////////////////////////////////
// init entry points
diff --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
index 9daa78dfb2b1..4054a6e2a97b 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
@@ -174,8 +174,8 @@ INLINE static uint32_t roundToWarpsize(uint32_t s) {
INLINE static uint32_t kmpcMin(uint32_t x, uint32_t y) { return x < y ? x : y; }
-DEVICE static volatile uint32_t IterCnt = 0;
-DEVICE static volatile uint32_t Cnt = 0;
+static volatile uint32_t IterCnt = 0;
+static volatile uint32_t Cnt = 0;
EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
kmp_Ident *loc, int32_t global_tid, void *global_buffer,
int32_t num_of_records, void *reduce_data, kmp_ShuffleReductFctPtr shflFct,
diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu
index ca0ce2021d8b..cd17a7a2901f 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu
@@ -19,20 +19,20 @@
// Execution Parameters
////////////////////////////////////////////////////////////////////////////////
-DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
+void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
execution_param = EMode;
execution_param |= RMode;
}
-DEVICE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
+bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
-DEVICE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
+bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
-DEVICE bool isRuntimeUninitialized() {
+bool isRuntimeUninitialized() {
return (execution_param & RuntimeMask) == RuntimeUninitialized;
}
-DEVICE bool isRuntimeInitialized() {
+bool isRuntimeInitialized() {
return (execution_param & RuntimeMask) == RuntimeInitialized;
}
@@ -40,7 +40,7 @@ DEVICE bool isRuntimeInitialized() {
// Execution Modes based on location parameter fields
////////////////////////////////////////////////////////////////////////////////
-DEVICE bool checkSPMDMode(kmp_Ident *loc) {
+bool checkSPMDMode(kmp_Ident *loc) {
if (!loc)
return isSPMDMode();
@@ -58,9 +58,9 @@ DEVICE bool checkSPMDMode(kmp_Ident *loc) {
return isSPMDMode();
}
-DEVICE bool checkGenericMode(kmp_Ident *loc) { return !checkSPMDMode(loc); }
+bool checkGenericMode(kmp_Ident *loc) { return !checkSPMDMode(loc); }
-DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc) {
+bool checkRuntimeUninitialized(kmp_Ident *loc) {
if (!loc)
return isRuntimeUninitialized();
@@ -83,7 +83,7 @@ DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc) {
return isRuntimeUninitialized();
}
-DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
+bool checkRuntimeInitialized(kmp_Ident *loc) {
return !checkRuntimeUninitialized(loc);
}
@@ -105,13 +105,13 @@ DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
// If NumThreads is 1024, master id is 992.
//
// Called in Generic Execution Mode only.
-DEVICE int GetMasterThreadID() {
+int GetMasterThreadID() {
return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1);
}
// The last warp is reserved for the master; other warps are workers.
// Called in Generic Execution Mode only.
-DEVICE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
+int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
////////////////////////////////////////////////////////////////////////////////
// get thread id in team
@@ -120,7 +120,7 @@ DEVICE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
// or a serial region by the master. If the master (whose CUDA thread
// id is GetMasterThreadID()) calls this routine, we return 0 because
// it is a shadow for the first worker.
-DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
+int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
// Implemented using control flow (predication) instead of with a modulo
// operation.
int tid = GetThreadIdInBlock();
@@ -136,7 +136,7 @@ DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
//
////////////////////////////////////////////////////////////////////////////////
-DEVICE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
+int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
// omp_thread_num
int rc;
if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
@@ -152,7 +152,7 @@ DEVICE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
return rc;
}
-DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
+int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
// omp_num_threads
int rc;
int Level = parallelLevel[GetWarpId()];
@@ -170,12 +170,12 @@ DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
////////////////////////////////////////////////////////////////////////////////
// Team id linked to OpenMP
-DEVICE int GetOmpTeamId() {
+int GetOmpTeamId() {
// omp_team_num
return GetBlockIdInKernel(); // assume 1 block per team
}
-DEVICE int GetNumberOfOmpTeams() {
+int GetNumberOfOmpTeams() {
// omp_num_teams
return GetNumberOfBlocksInKernel(); // assume 1 block per team
}
@@ -183,12 +183,12 @@ DEVICE int GetNumberOfOmpTeams() {
////////////////////////////////////////////////////////////////////////////////
// Masters
-DEVICE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
+int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
////////////////////////////////////////////////////////////////////////////////
// Parallel level
-DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
+void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask);
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
@@ -200,7 +200,7 @@ DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask);
}
-DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
+void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask);
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
@@ -216,13 +216,13 @@ DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
// get OpenMP number of procs
// Get the number of processors in the device.
-DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
+int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
if (!isSPMDExecutionMode)
return GetNumberOfWorkersInTeam();
return GetNumberOfThreadsInBlock();
}
-DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
+int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
return GetNumberOfProcsInDevice(isSPMDExecutionMode);
}
@@ -230,8 +230,8 @@ DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
// Memory
////////////////////////////////////////////////////////////////////////////////
-DEVICE unsigned long PadBytes(unsigned long size,
- unsigned long alignment) // must be a power of 2
+unsigned long PadBytes(unsigned long size,
+ unsigned long alignment) // must be a power of 2
{
// compute the necessary padding to satisfy alignment constraint
ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0,
@@ -239,7 +239,7 @@ DEVICE unsigned long PadBytes(unsigned long size,
return (~(unsigned long)size + 1) & (alignment - 1);
}
-DEVICE void *SafeMalloc(size_t size, const char *msg) // check if success
+void *SafeMalloc(size_t size, const char *msg) // check if success
{
void *ptr = __kmpc_impl_malloc(size);
PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n",
@@ -247,7 +247,7 @@ DEVICE void *SafeMalloc(size_t size, const char *msg) // check if success
return ptr;
}
-DEVICE void *SafeFree(void *ptr, const char *msg) {
+void *SafeFree(void *ptr, const char *msg) {
PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
__kmpc_impl_free(ptr);
return NULL;
@@ -257,11 +257,11 @@ DEVICE void *SafeFree(void *ptr, const char *msg) {
// Teams Reduction Scratchpad Helpers
////////////////////////////////////////////////////////////////////////////////
-DEVICE unsigned int *GetTeamsReductionTimestamp() {
+unsigned int *GetTeamsReductionTimestamp() {
return static_cast<unsigned int *>(ReductionScratchpadPtr);
}
-DEVICE char *GetTeamsReductionScratchpad() {
+char *GetTeamsReductionScratchpad() {
return static_cast<char *>(ReductionScratchpadPtr) + 256;
}
diff --git a/openmp/libomptarget/deviceRTLs/common/support.h b/openmp/libomptarget/deviceRTLs/common/support.h
index 4d260de72aa8..eae5954546d8 100644
--- a/openmp/libomptarget/deviceRTLs/common/support.h
+++ b/openmp/libomptarget/deviceRTLs/common/support.h
@@ -31,59 +31,59 @@ enum RuntimeMode {
RuntimeMask = 0x02u,
};
-DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode);
-DEVICE bool isGenericMode();
-DEVICE bool isSPMDMode();
-DEVICE bool isRuntimeUninitialized();
-DEVICE bool isRuntimeInitialized();
+void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode);
+bool isGenericMode();
+bool isSPMDMode();
+bool isRuntimeUninitialized();
+bool isRuntimeInitialized();
////////////////////////////////////////////////////////////////////////////////
// Execution Modes based on location parameter fields
////////////////////////////////////////////////////////////////////////////////
-DEVICE bool checkSPMDMode(kmp_Ident *loc);
-DEVICE bool checkGenericMode(kmp_Ident *loc);
-DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc);
-DEVICE bool checkRuntimeInitialized(kmp_Ident *loc);
+bool checkSPMDMode(kmp_Ident *loc);
+bool checkGenericMode(kmp_Ident *loc);
+bool checkRuntimeUninitialized(kmp_Ident *loc);
+bool checkRuntimeInitialized(kmp_Ident *loc);
////////////////////////////////////////////////////////////////////////////////
// get info from machine
////////////////////////////////////////////////////////////////////////////////
// get global ids to locate tread/team info (constant regardless of OMP)
-DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
-DEVICE int GetMasterThreadID();
-DEVICE int GetNumberOfWorkersInTeam();
+int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
+int GetMasterThreadID();
+int GetNumberOfWorkersInTeam();
// get OpenMP thread and team ids
-DEVICE int GetOmpThreadId(int threadId,
- bool isSPMDExecutionMode); // omp_thread_num
-DEVICE int GetOmpTeamId(); // omp_team_num
+int GetOmpThreadId(int threadId,
+ bool isSPMDExecutionMode); // omp_thread_num
+int GetOmpTeamId(); // omp_team_num
// get OpenMP number of threads and team
-DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads
-DEVICE int GetNumberOfOmpTeams(); // omp_num_teams
+int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads
+int GetNumberOfOmpTeams(); // omp_num_teams
// get OpenMP number of procs
-DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode);
-DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode);
+int GetNumberOfProcsInTeam(bool isSPMDExecutionMode);
+int GetNumberOfProcsInDevice(bool isSPMDExecutionMode);
// masters
-DEVICE int IsTeamMaster(int ompThreadId);
+int IsTeamMaster(int ompThreadId);
// Parallel level
-DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
-DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
+void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
+void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
////////////////////////////////////////////////////////////////////////////////
// Memory
////////////////////////////////////////////////////////////////////////////////
// safe alloc and free
-DEVICE void *SafeMalloc(size_t size, const char *msg); // check if success
-DEVICE void *SafeFree(void *ptr, const char *msg);
+void *SafeMalloc(size_t size, const char *msg); // check if success
+void *SafeFree(void *ptr, const char *msg);
// pad to a alignment (power of 2 only)
-DEVICE unsigned long PadBytes(unsigned long size, unsigned long alignment);
+unsigned long PadBytes(unsigned long size, unsigned long alignment);
#define ADD_BYTES(_addr, _bytes) \
((void *)((char *)((void *)(_addr)) + (_bytes)))
#define SUB_BYTES(_addr, _bytes) \
@@ -92,7 +92,7 @@ DEVICE unsigned long PadBytes(unsigned long size, unsigned long alignment);
////////////////////////////////////////////////////////////////////////////////
// Teams Reduction Scratchpad Helpers
////////////////////////////////////////////////////////////////////////////////
-DEVICE unsigned int *GetTeamsReductionTimestamp();
-DEVICE char *GetTeamsReductionScratchpad();
+unsigned int *GetTeamsReductionTimestamp();
+char *GetTeamsReductionScratchpad();
#endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
index b856b41b598f..eafa73426a95 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -97,39 +97,38 @@ EXTERN unsigned GetWarpSize() { return WARPSIZE; }
EXTERN unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
// Atomics
-DEVICE uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) {
+uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) {
return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST);
}
-DEVICE uint32_t __kmpc_atomic_inc(uint32_t *Address, uint32_t Val) {
+uint32_t __kmpc_atomic_inc(uint32_t *Address, uint32_t Val) {
return __nvvm_atom_inc_gen_ui(Address, Val);
}
-DEVICE uint32_t __kmpc_atomic_max(uint32_t *Address, uint32_t Val) {
+uint32_t __kmpc_atomic_max(uint32_t *Address, uint32_t Val) {
return __atomic_fetch_max(Address, Val, __ATOMIC_SEQ_CST);
}
-DEVICE uint32_t __kmpc_atomic_exchange(uint32_t *Address, uint32_t Val) {
+uint32_t __kmpc_atomic_exchange(uint32_t *Address, uint32_t Val) {
uint32_t R;
__atomic_exchange(Address, &Val, &R, __ATOMIC_SEQ_CST);
return R;
}
-DEVICE uint32_t __kmpc_atomic_cas(uint32_t *Address, uint32_t Compare,
- uint32_t Val) {
+uint32_t __kmpc_atomic_cas(uint32_t *Address, uint32_t Compare, uint32_t Val) {
(void)__atomic_compare_exchange(Address, &Compare, &Val, false,
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
return Compare;
}
-DEVICE unsigned long long __kmpc_atomic_exchange(unsigned long long *Address,
- unsigned long long Val) {
+unsigned long long __kmpc_atomic_exchange(unsigned long long *Address,
+ unsigned long long Val) {
unsigned long long R;
__atomic_exchange(Address, &Val, &R, __ATOMIC_SEQ_CST);
return R;
}
-DEVICE unsigned long long __kmpc_atomic_add(unsigned long long *Address,
- unsigned long long Val) {
+unsigned long long __kmpc_atomic_add(unsigned long long *Address,
+ unsigned long long Val) {
return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST);
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index e92ada5b0be8..d0d7127aac7d 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -18,8 +18,7 @@
#include "nvptx_interface.h"
-#define DEVICE
-#define INLINE inline __attribute__((always_inline)) DEVICE
+#define INLINE inline __attribute__((always_inline))
#define NOINLINE __attribute__((noinline))
#define ALIGN(N) __attribute__((aligned(N)))
diff --git a/openmp/libomptarget/deviceRTLs/target_interface.h b/openmp/libomptarget/deviceRTLs/target_interface.h
index 92fca47d489e..c5141c9d6fc1 100644
--- a/openmp/libomptarget/deviceRTLs/target_interface.h
+++ b/openmp/libomptarget/deviceRTLs/target_interface.h
@@ -25,16 +25,15 @@ EXTERN unsigned GetWarpSize();
EXTERN unsigned GetLaneId();
// Atomics
-DEVICE uint32_t __kmpc_atomic_add(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_inc(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_max(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_exchange(uint32_t *, uint32_t);
-DEVICE uint32_t __kmpc_atomic_cas(uint32_t *, uint32_t, uint32_t);
+uint32_t __kmpc_atomic_add(uint32_t *, uint32_t);
+uint32_t __kmpc_atomic_inc(uint32_t *, uint32_t);
+uint32_t __kmpc_atomic_max(uint32_t *, uint32_t);
+uint32_t __kmpc_atomic_exchange(uint32_t *, uint32_t);
+uint32_t __kmpc_atomic_cas(uint32_t *, uint32_t, uint32_t);
static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
-DEVICE unsigned long long __kmpc_atomic_exchange(unsigned long long *,
- unsigned long long);
-DEVICE unsigned long long __kmpc_atomic_add(unsigned long long *,
- unsigned long long);
+unsigned long long __kmpc_atomic_exchange(unsigned long long *,
+ unsigned long long);
+unsigned long long __kmpc_atomic_add(unsigned long long *, unsigned long long);
// Locks
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
More information about the Openmp-commits
mailing list