[Openmp-commits] [openmp] b4f8443 - [Libomptarget] Allow the device runtime to be compiled for the host

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Fri May 13 11:39:16 PDT 2022


Author: Joseph Huber
Date: 2022-05-13T14:38:27-04:00
New Revision: b4f8443d97baf390e3a1e64021e39790c410af9d

URL: https://github.com/llvm/llvm-project/commit/b4f8443d97baf390e3a1e64021e39790c410af9d
DIFF: https://github.com/llvm/llvm-project/commit/b4f8443d97baf390e3a1e64021e39790c410af9d.diff

LOG: [Libomptarget] Allow the device runtime to be compiled for the host

Currently the OpenMP offloading device runtime is only expected to be
compiled for the specific architecture it's targeting. This is
problematic if we want to make compiling the device runtime more general
via the standar `clang` driver rather than invoking the clang front-end
directly. This patch addresses this by primarily changing the declare
type to `nohost` so the host will not contain any of this code.
Additionally we forward declare the functions that are defined via
variants, otherwise these would cause problems on the host.

Reviewed By: jdoerfert, tianshilei1992

Differential Revision: https://reviews.llvm.org/D125260

Added: 
    

Modified: 
    openmp/libomptarget/DeviceRTL/include/Mapping.h
    openmp/libomptarget/DeviceRTL/include/State.h
    openmp/libomptarget/DeviceRTL/src/Configuration.cpp
    openmp/libomptarget/DeviceRTL/src/Debug.cpp
    openmp/libomptarget/DeviceRTL/src/Kernel.cpp
    openmp/libomptarget/DeviceRTL/src/Mapping.cpp
    openmp/libomptarget/DeviceRTL/src/Misc.cpp
    openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
    openmp/libomptarget/DeviceRTL/src/Reduction.cpp
    openmp/libomptarget/DeviceRTL/src/State.cpp
    openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
    openmp/libomptarget/DeviceRTL/src/Tasking.cpp
    openmp/libomptarget/DeviceRTL/src/Utils.cpp
    openmp/libomptarget/DeviceRTL/src/Workshare.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/DeviceRTL/include/Mapping.h b/openmp/libomptarget/DeviceRTL/include/Mapping.h
index 36cfae7c5efa..c9e07bab3c9a 100644
--- a/openmp/libomptarget/DeviceRTL/include/Mapping.h
+++ b/openmp/libomptarget/DeviceRTL/include/Mapping.h
@@ -18,7 +18,7 @@ namespace _OMP {
 
 namespace mapping {
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 inline constexpr uint32_t MaxThreadsPerTeam = 1024;
 

diff  --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h
index 2f9cbd4c9ca6..183b68416f0a 100644
--- a/openmp/libomptarget/DeviceRTL/include/State.h
+++ b/openmp/libomptarget/DeviceRTL/include/State.h
@@ -15,7 +15,7 @@
 #include "Debug.h"
 #include "Types.h"
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 namespace _OMP {
 

diff  --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index e9cc9bb0e318..b3d779a96361 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -18,7 +18,7 @@
 
 using namespace _OMP;
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 // defined by CGOpenMPRuntimeGPU
 extern uint32_t __omp_rtl_debug_kind;

diff  --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp
index f458a1b2403d..e97c77da3b99 100644
--- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp
@@ -18,7 +18,7 @@
 
 using namespace _OMP;
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 extern "C" {
 void __assert_assume(bool condition) { __builtin_assume(condition); }
@@ -30,6 +30,10 @@ void __assert_fail(const char *assertion, const char *file, unsigned line,
   __builtin_trap();
 }
 
+namespace impl {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t);
+}
+
 #pragma omp begin declare variant match(                                       \
     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
 int32_t vprintf(const char *, void *);
@@ -55,8 +59,7 @@ int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
 }
 
 /// Current indentation level for the function trace. Only accessed by thread 0.
-__attribute__((loader_uninitialized))
-static uint32_t Level;
+__attribute__((loader_uninitialized)) static uint32_t Level;
 #pragma omp allocate(Level) allocator(omp_pteam_mem_alloc)
 
 DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,

diff  --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
index 8b7a8a2495c4..74c22a61f3b8 100644
--- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
@@ -19,7 +19,7 @@
 
 using namespace _OMP;
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 static void inititializeRuntime(bool IsSPMD) {
   // Order is important here.

diff  --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index 21104be3d02e..48ca13a5c31d 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -15,7 +15,7 @@
 #include "Types.h"
 #include "Utils.h"
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
@@ -24,6 +24,23 @@ using namespace _OMP;
 namespace _OMP {
 namespace impl {
 
+// Forward declarations defined to be defined for AMDGCN and NVPTX.
+const llvm::omp::GV &getGridValue();
+uint32_t getGridDim(uint32_t n, uint16_t d);
+uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
+                         uint16_t group_size);
+uint32_t getNumHardwareThreadsInBlock();
+LaneMaskTy activemask();
+LaneMaskTy lanemaskLT();
+LaneMaskTy lanemaskGT();
+uint32_t getThreadIdInWarp();
+uint32_t getThreadIdInBlock();
+uint32_t getKernelSize();
+uint32_t getBlockId();
+uint32_t getNumberOfBlocks();
+uint32_t getWarpId();
+uint32_t getNumberOfWarpsInBlock();
+
 /// AMDGCN Implementation
 ///
 ///{

diff  --git a/openmp/libomptarget/DeviceRTL/src/Misc.cpp b/openmp/libomptarget/DeviceRTL/src/Misc.cpp
index 7284be87896f..554a13ae4794 100644
--- a/openmp/libomptarget/DeviceRTL/src/Misc.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Misc.cpp
@@ -13,11 +13,15 @@
 
 #include "Debug.h"
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 namespace _OMP {
 namespace impl {
 
+double getWTick();
+
+double getWTime();
+
 /// AMDGCN Implementation
 ///
 ///{

diff  --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
index 5584f34e63ed..fd419b83e5b5 100644
--- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
@@ -42,7 +42,7 @@
 
 using namespace _OMP;
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 namespace {
 

diff  --git a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp
index dd1d30dd4cbf..516da6bf8719 100644
--- a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp
@@ -22,7 +22,7 @@ using namespace _OMP;
 
 namespace {
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 void gpu_regular_warp_reduce(void *reduce_data, ShuffleReductFnTy shflFct) {
   for (uint32_t mask = mapping::getWarpSize() / 2; mask > 0; mask /= 2) {

diff  --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index a39d8d6dcd9d..685c697d7a0d 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -19,7 +19,7 @@
 
 using namespace _OMP;
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 /// Memory implementation
 ///

diff  --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
index 6b4bab0bcbb2..43278715be8d 100644
--- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -19,7 +19,7 @@
 #include "Types.h"
 #include "Utils.h"
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 using namespace _OMP;
 
@@ -63,6 +63,22 @@ uint64_t atomicAdd(uint64_t *Address, uint64_t Val, int Ordering) {
 }
 ///}
 
+// Forward declarations defined to be defined for AMDGCN and NVPTX.
+uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering);
+void namedBarrierInit();
+void namedBarrier();
+void fenceTeam(int Ordering);
+void fenceKernel(int Ordering);
+void fenceSystem(int Ordering);
+void syncWarp(__kmpc_impl_lanemask_t);
+void syncThreads();
+void syncThreadsAligned() { syncThreads(); }
+void unsetLock(omp_lock_t *);
+int testLock(omp_lock_t *);
+void initLock(omp_lock_t *);
+void destroyLock(omp_lock_t *);
+void setLock(omp_lock_t *);
+
 /// AMDGCN Implementation
 ///
 ///{

diff  --git a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp
index 2c80e71a2fb4..06804e0d388c 100644
--- a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp
@@ -20,7 +20,7 @@
 
 using namespace _OMP;
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, uint32_t, int32_t,
                                         uint64_t TaskSizeInclPrivateValues,

diff  --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
index 0816f078e2ab..e6bcba811f80 100644
--- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
@@ -15,7 +15,7 @@
 #include "Interface.h"
 #include "Mapping.h"
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 using namespace _OMP;
 
@@ -32,6 +32,9 @@ __attribute__((used, retain, weak, optnone, cold)) void keepAlive() {
 
 namespace impl {
 
+void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits);
+uint64_t Pack(uint32_t LowBits, uint32_t HighBits);
+
 /// AMDGCN Implementation
 ///
 ///{
@@ -72,6 +75,10 @@ uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
 
 #pragma omp end declare variant
 
+int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
+int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
+                    int32_t Width);
+
 /// AMDGCN Implementation
 ///
 ///{

diff  --git a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp
index 0bdbf30c23d3..81b3f6c8dcdb 100644
--- a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp
@@ -43,7 +43,7 @@ struct DynamicScheduleTracker {
 #define NOT_FINISHED 1
 #define LAST_CHUNK 2
 
-#pragma omp declare target
+#pragma omp begin declare target device_type(nohost)
 
 // TODO: This variable is a hack inherited from the old runtime.
 static uint64_t SHARED(Cnt);


        


More information about the Openmp-commits mailing list