[Openmp-commits] [openmp] fd9fa99 - [libomptarget] Move supporti.h to support.cu
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Wed Nov 13 03:37:00 PST 2019
Author: JonChesterfield
Date: 2019-11-13T11:36:46Z
New Revision: fd9fa9995cdafa526543df466d762a982863a906
URL: https://github.com/llvm/llvm-project/commit/fd9fa9995cdafa526543df466d762a982863a906
DIFF: https://github.com/llvm/llvm-project/commit/fd9fa9995cdafa526543df466d762a982863a906.diff
LOG: [libomptarget] Move supporti.h to support.cu
Summary:
[libomptarget] Move supporti.h to support.cu
Reimplementation of D69652, without the unity build and refactors.
Will need a clean build of libomptarget as the cmakelists changed.
Reviewers: ABataev, jdoerfert
Reviewed By: jdoerfert
Subscribers: mgorny, jfb, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D70131
Added:
openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
Modified:
openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
openmp/libomptarget/deviceRTLs/nvptx/src/support.h
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Removed:
openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 1cd13c50395f..b3cd8ef46d27 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -55,6 +55,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
src/omptarget-nvptx.cu
src/parallel.cu
src/reduction.cu
+ src/support.cu
src/sync.cu
src/task.cu
)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 986150402f16..6c98f0104efc 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -385,6 +385,5 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
////////////////////////////////////////////////////////////////////////////////
#include "omptarget-nvptxi.h"
-#include "supporti.h"
#endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
similarity index 79%
rename from openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
rename to openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
index 6fa857899905..a7aa8aa814ab 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
@@ -1,4 +1,4 @@
-//===--------- supporti.h - NVPTX OpenMP support functions ------- CUDA -*-===//
+//===--------- support.cu - NVPTX OpenMP support functions ------- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,26 +10,28 @@
//
//===----------------------------------------------------------------------===//
+#include "support.h"
+#include "debug.h"
+#include "omptarget-nvptx.h"
+
////////////////////////////////////////////////////////////////////////////////
// Execution Parameters
////////////////////////////////////////////////////////////////////////////////
-#include "target_impl.h"
-
-INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
+DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
execution_param = EMode;
execution_param |= RMode;
}
-INLINE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
+DEVICE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
-INLINE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
+DEVICE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
-INLINE bool isRuntimeUninitialized() {
+DEVICE bool isRuntimeUninitialized() {
return (execution_param & RuntimeMask) == RuntimeUninitialized;
}
-INLINE bool isRuntimeInitialized() {
+DEVICE bool isRuntimeInitialized() {
return (execution_param & RuntimeMask) == RuntimeInitialized;
}
@@ -37,7 +39,7 @@ INLINE bool isRuntimeInitialized() {
// Execution Modes based on location parameter fields
////////////////////////////////////////////////////////////////////////////////
-INLINE bool checkSPMDMode(kmp_Ident *loc) {
+DEVICE bool checkSPMDMode(kmp_Ident *loc) {
if (!loc)
return isSPMDMode();
@@ -55,11 +57,11 @@ INLINE bool checkSPMDMode(kmp_Ident *loc) {
return isSPMDMode();
}
-INLINE bool checkGenericMode(kmp_Ident *loc) {
+DEVICE bool checkGenericMode(kmp_Ident *loc) {
return !checkSPMDMode(loc);
}
-INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
+DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc) {
if (!loc)
return isRuntimeUninitialized();
@@ -82,7 +84,7 @@ INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
return isRuntimeUninitialized();
}
-INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
+DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
return !checkRuntimeUninitialized(loc);
}
@@ -96,17 +98,17 @@ INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
//
////////////////////////////////////////////////////////////////////////////////
-INLINE int GetThreadIdInBlock() { return threadIdx.x; }
+DEVICE int GetThreadIdInBlock() { return threadIdx.x; }
-INLINE int GetBlockIdInKernel() { return blockIdx.x; }
+DEVICE int GetBlockIdInKernel() { return blockIdx.x; }
-INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
+DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; }
-INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
+DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; }
-INLINE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
+DEVICE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
-INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
+DEVICE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
////////////////////////////////////////////////////////////////////////////////
//
@@ -122,11 +124,11 @@ INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
// If NumThreads is 1024, master id is 992.
//
// Called in Generic Execution Mode only.
-INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
+DEVICE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
// The last warp is reserved for the master; other warps are workers.
// Called in Generic Execution Mode only.
-INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
+DEVICE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
////////////////////////////////////////////////////////////////////////////////
// get thread id in team
@@ -135,7 +137,7 @@ INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
// or a serial region by the master. If the master (whose CUDA thread
// id is GetMasterThreadID()) calls this routine, we return 0 because
// it is a shadow for the first worker.
-INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
+DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
// Implemented using control flow (predication) instead of with a modulo
// operation.
int tid = GetThreadIdInBlock();
@@ -151,7 +153,7 @@ INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
//
////////////////////////////////////////////////////////////////////////////////
-INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
+DEVICE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
// omp_thread_num
int rc;
if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
@@ -167,7 +169,7 @@ INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
return rc;
}
-INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
+DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
// omp_num_threads
int rc;
int Level = parallelLevel[GetWarpId()];
@@ -185,12 +187,12 @@ INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
////////////////////////////////////////////////////////////////////////////////
// Team id linked to OpenMP
-INLINE int GetOmpTeamId() {
+DEVICE int GetOmpTeamId() {
// omp_team_num
return GetBlockIdInKernel(); // assume 1 block per team
}
-INLINE int GetNumberOfOmpTeams() {
+DEVICE int GetNumberOfOmpTeams() {
// omp_num_teams
return GetNumberOfBlocksInKernel(); // assume 1 block per team
}
@@ -198,12 +200,12 @@ INLINE int GetNumberOfOmpTeams() {
////////////////////////////////////////////////////////////////////////////////
// Masters
-INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
+DEVICE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
////////////////////////////////////////////////////////////////////////////////
// Parallel level
-INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
+DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask);
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
@@ -215,7 +217,7 @@ INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask);
}
-INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
+DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
__kmpc_impl_syncwarp(Mask);
__kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
@@ -231,13 +233,13 @@ INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
// get OpenMP number of procs
// Get the number of processors in the device.
-INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
+DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
if (!isSPMDExecutionMode)
return GetNumberOfWorkersInTeam();
return GetNumberOfThreadsInBlock();
}
-INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
+DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
return GetNumberOfProcsInDevice(isSPMDExecutionMode);
}
@@ -245,7 +247,7 @@ INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
// Memory
////////////////////////////////////////////////////////////////////////////////
-INLINE unsigned long PadBytes(unsigned long size,
+DEVICE unsigned long PadBytes(unsigned long size,
unsigned long alignment) // must be a power of 2
{
// compute the necessary padding to satisfy alignment constraint
@@ -254,7 +256,7 @@ INLINE unsigned long PadBytes(unsigned long size,
return (~(unsigned long)size + 1) & (alignment - 1);
}
-INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
+DEVICE void *SafeMalloc(size_t size, const char *msg) // check if success
{
void *ptr = malloc(size);
PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n",
@@ -262,7 +264,7 @@ INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
return ptr;
}
-INLINE void *SafeFree(void *ptr, const char *msg) {
+DEVICE void *SafeFree(void *ptr, const char *msg) {
PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
free(ptr);
return NULL;
@@ -272,14 +274,14 @@ INLINE void *SafeFree(void *ptr, const char *msg) {
// Teams Reduction Scratchpad Helpers
////////////////////////////////////////////////////////////////////////////////
-INLINE unsigned int *GetTeamsReductionTimestamp() {
+DEVICE unsigned int *GetTeamsReductionTimestamp() {
return static_cast<unsigned int *>(ReductionScratchpadPtr);
}
-INLINE char *GetTeamsReductionScratchpad() {
+DEVICE char *GetTeamsReductionScratchpad() {
return static_cast<char *>(ReductionScratchpadPtr) + 256;
}
-INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) {
+DEVICE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) {
ReductionScratchpadPtr = ScratchpadPtr;
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
index de685b894509..8cffd91c9f3a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
@@ -10,7 +10,12 @@
//
//===----------------------------------------------------------------------===//
+#ifndef OMPTARGET_SUPPORT_H
+#define OMPTARGET_SUPPORT_H
+
+#include "interface.h"
#include "target_impl.h"
+
////////////////////////////////////////////////////////////////////////////////
// Execution Parameters
////////////////////////////////////////////////////////////////////////////////
@@ -26,58 +31,67 @@ enum RuntimeMode {
RuntimeMask = 0x02u,
};
-INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode);
-INLINE bool isGenericMode();
-INLINE bool isSPMDMode();
-INLINE bool isRuntimeUninitialized();
-INLINE bool isRuntimeInitialized();
+DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode);
+DEVICE bool isGenericMode();
+DEVICE bool isSPMDMode();
+DEVICE bool isRuntimeUninitialized();
+DEVICE bool isRuntimeInitialized();
+
+////////////////////////////////////////////////////////////////////////////////
+// Execution Modes based on location parameter fields
+////////////////////////////////////////////////////////////////////////////////
+
+DEVICE bool checkSPMDMode(kmp_Ident *loc);
+DEVICE bool checkGenericMode(kmp_Ident *loc);
+DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc);
+DEVICE bool checkRuntimeInitialized(kmp_Ident *loc);
////////////////////////////////////////////////////////////////////////////////
// get info from machine
////////////////////////////////////////////////////////////////////////////////
// get low level ids of resources
-INLINE int GetThreadIdInBlock();
-INLINE int GetBlockIdInKernel();
-INLINE int GetNumberOfBlocksInKernel();
-INLINE int GetNumberOfThreadsInBlock();
-INLINE unsigned GetWarpId();
-INLINE unsigned GetLaneId();
+DEVICE int GetThreadIdInBlock();
+DEVICE int GetBlockIdInKernel();
+DEVICE int GetNumberOfBlocksInKernel();
+DEVICE int GetNumberOfThreadsInBlock();
+DEVICE unsigned GetWarpId();
+DEVICE unsigned GetLaneId();
// get global ids to locate tread/team info (constant regardless of OMP)
-INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
-INLINE int GetMasterThreadID();
-INLINE int GetNumberOfWorkersInTeam();
+DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
+DEVICE int GetMasterThreadID();
+DEVICE int GetNumberOfWorkersInTeam();
// get OpenMP thread and team ids
-INLINE int GetOmpThreadId(int threadId,
+DEVICE int GetOmpThreadId(int threadId,
bool isSPMDExecutionMode); // omp_thread_num
-INLINE int GetOmpTeamId(); // omp_team_num
+DEVICE int GetOmpTeamId(); // omp_team_num
// get OpenMP number of threads and team
-INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads
-INLINE int GetNumberOfOmpTeams(); // omp_num_teams
+DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads
+DEVICE int GetNumberOfOmpTeams(); // omp_num_teams
// get OpenMP number of procs
-INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode);
-INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode);
+DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode);
+DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode);
// masters
-INLINE int IsTeamMaster(int ompThreadId);
+DEVICE int IsTeamMaster(int ompThreadId);
// Parallel level
-INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
-INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
+DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
+DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask);
////////////////////////////////////////////////////////////////////////////////
// Memory
////////////////////////////////////////////////////////////////////////////////
// safe alloc and free
-INLINE void *SafeMalloc(size_t size, const char *msg); // check if success
-INLINE void *SafeFree(void *ptr, const char *msg);
+DEVICE void *SafeMalloc(size_t size, const char *msg); // check if success
+DEVICE void *SafeFree(void *ptr, const char *msg);
// pad to a alignment (power of 2 only)
-INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
+DEVICE unsigned long PadBytes(unsigned long size, unsigned long alignment);
#define ADD_BYTES(_addr, _bytes) \
((void *)((char *)((void *)(_addr)) + (_bytes)))
#define SUB_BYTES(_addr, _bytes) \
@@ -86,6 +100,8 @@ INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
////////////////////////////////////////////////////////////////////////////////
// Teams Reduction Scratchpad Helpers
////////////////////////////////////////////////////////////////////////////////
-INLINE unsigned int *GetTeamsReductionTimestamp();
-INLINE char *GetTeamsReductionScratchpad();
-INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr);
+DEVICE unsigned int *GetTeamsReductionTimestamp();
+DEVICE char *GetTeamsReductionScratchpad();
+DEVICE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr);
+
+#endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 95fe2ad3d3d5..e0bacab3c643 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -15,8 +15,9 @@
#include <cuda.h>
#include "nvptx_interface.h"
-#define INLINE __forceinline__ __device__
-#define NOINLINE __noinline__ __device__
+#define DEVICE __device__
+#define INLINE __forceinline__ DEVICE
+#define NOINLINE __noinline__ DEVICE
////////////////////////////////////////////////////////////////////////////////
// Kernel options
More information about the Openmp-commits
mailing list