[Openmp-commits] [openmp] 208f900 - [Libomptarget] Add an external interface to dynamic shared memory

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Fri Oct 8 12:37:07 PDT 2021


Author: Joseph Huber
Date: 2021-10-08T15:36:57-04:00
New Revision: 208f9005277a22d7e282a568ea7849895b2a09d3

URL: https://github.com/llvm/llvm-project/commit/208f9005277a22d7e282a568ea7849895b2a09d3
DIFF: https://github.com/llvm/llvm-project/commit/208f9005277a22d7e282a568ea7849895b2a09d3.diff

LOG: [Libomptarget] Add an external interface to dynamic shared memory

This patch adds an external interface to access the dynamic shared
memory buffer in the device runtime. The function introduced is
``llvm_omp_get_dynamic_shared``. This includes a host-side
definition that only returns a null pointer so that it can be used when
host-fallback is enabled without crashing. Support for dynamic shared
memory was also ported to the old device runtime.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D110957

Added: 
    

Modified: 
    openmp/libomptarget/DeviceRTL/include/Interface.h
    openmp/libomptarget/DeviceRTL/src/State.cpp
    openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
    openmp/libomptarget/deviceRTLs/interface.h
    openmp/libomptarget/include/omptarget.h
    openmp/libomptarget/src/api.cpp
    openmp/libomptarget/src/exports
    openmp/libomptarget/test/api/omp_dynamic_shared_memory.c

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h
index 1d41acf5dad6b..ff4f929345651 100644
--- a/openmp/libomptarget/DeviceRTL/include/Interface.h
+++ b/openmp/libomptarget/DeviceRTL/include/Interface.h
@@ -132,6 +132,8 @@ int omp_get_team_num();
 
 int omp_get_initial_device(void);
 
+void *llvm_omp_get_dynamic_shared();
+
 /// Synchronization
 ///
 ///{

diff  --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index e3361551e3df0..b35b9b3e7837a 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -503,9 +503,9 @@ __attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
   memory::freeShared(Ptr, Bytes, "Frontend free shared");
 }
 
-__attribute__((noinline)) void *__kmpc_get_dynamic_shared() {
-  return memory::getDynamicBuffer();
-}
+void *__kmpc_get_dynamic_shared() { return memory::getDynamicBuffer(); }
+
+void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
 
 /// Allocate storage in shared memory to communicate arguments from the main
 /// thread to the workers in generic mode. If we exceed

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
index 491c2795587e7..f834a7a8e172b 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
@@ -21,6 +21,18 @@
 
 static constexpr unsigned MinBytes = 8;
 
+static constexpr unsigned Alignment = 8;
+
+/// External symbol to access dynamic shared memory.
+extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
+#pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
+
+EXTERN void *__kmpc_get_dynamic_shared() { return DynamicSharedBuffer; }
+
+EXTERN void *llvm_omp_get_dynamic_shared() {
+  return __kmpc_get_dynamic_shared();
+}
+
 template <unsigned BPerThread, unsigned NThreads = MAX_THREADS_PER_TEAM>
 struct alignas(32) ThreadStackTy {
   static constexpr unsigned BytesPerThread = BPerThread;

diff  --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h
index 96e583c2133f4..f414e274a7a7e 100644
--- a/openmp/libomptarget/deviceRTLs/interface.h
+++ b/openmp/libomptarget/deviceRTLs/interface.h
@@ -92,6 +92,8 @@ EXTERN int omp_get_team_num(void);
 EXTERN int omp_get_initial_device(void);
 EXTERN int omp_get_max_task_priority(void);
 
+EXTERN void *llvm_omp_get_dynamic_shared();
+
 ////////////////////////////////////////////////////////////////////////////////
 // file below is swiped from kmpc host interface
 ////////////////////////////////////////////////////////////////////////////////
@@ -499,4 +501,7 @@ EXTERN void *__kmpc_alloc_shared(uint64_t Bytes);
 /// paired allocation to make memory management easier.
 EXTERN void __kmpc_free_shared(void *Ptr, size_t Bytes);
 
+/// Get a pointer to the dynamic shared memory buffer in the device.
+EXTERN void *__kmpc_get_dynamic_shared();
+
 #endif

diff  --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 40914cc80d4c0..3776e1e2bb463 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -220,6 +220,9 @@ void *llvm_omp_target_alloc_device(size_t size, int device_num);
 void *llvm_omp_target_alloc_host(size_t size, int device_num);
 void *llvm_omp_target_alloc_shared(size_t size, int device_num);
 
+/// Dummy target so we have a symbol for generating host fallback.
+void *llvm_omp_get_dynamic_shared();
+
 /// add the clauses of the requires directives in a given file
 void __tgt_register_requires(int64_t flags);
 

diff  --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp
index 94ca4c6331bc6..47f30e69b1136 100644
--- a/openmp/libomptarget/src/api.cpp
+++ b/openmp/libomptarget/src/api.cpp
@@ -53,6 +53,8 @@ EXTERN void *llvm_omp_target_alloc_shared(size_t size, int device_num) {
   return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__);
 }
 
+EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; }
+
 EXTERN void omp_target_free(void *device_ptr, int device_num) {
   TIMESCOPE();
   DP("Call to omp_target_free for device %d and address " DPxMOD "\n",

diff  --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index 4d6c1f7f0ea4e..a77e176a2aacb 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -40,6 +40,7 @@ VERS1.0 {
     llvm_omp_target_alloc_host;
     llvm_omp_target_alloc_shared;
     llvm_omp_target_alloc_device;
+    llvm_omp_get_dynamic_shared;
     __tgt_set_info_flag;
     __tgt_print_device_info;
   local:

diff  --git a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
index 9a74ed36c96f2..98bf4eb5da39f 100644
--- a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
+++ b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
@@ -1,22 +1,18 @@
 // RUN: %libomptarget-compile-nvptx64-nvidia-cuda -fopenmp-target-new-runtime
-// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=4 \
+// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
 // RUN:   %libomptarget-run-nvptx64-nvidia-cuda | %fcheck-nvptx64-nvidia-cuda
 // REQUIRES: nvptx64-nvidia-cuda
 
 #include <omp.h>
 #include <stdio.h>
 
-void *get_dynamic_shared() { return NULL; }
-#pragma omp begin declare variant match(device = {arch(nvptx64)})
-extern void *__kmpc_get_dynamic_shared();
-void *get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
-#pragma omp end declare variant
+void *llvm_omp_get_dynamic_shared();
 
 int main() {
   int x;
 #pragma omp target parallel map(from : x)
   {
-    int *buf = get_dynamic_shared();
+    int *buf = llvm_omp_get_dynamic_shared() + 252;
 #pragma omp barrier
     if (omp_get_thread_num() == 0)
       *buf = 1;
@@ -26,6 +22,6 @@ int main() {
   }
 
   // CHECK: PASS
-  if (x == 1)
+  if (x == 1 && llvm_omp_get_dynamic_shared() == NULL)
     printf("PASS\n");
 }


        


More information about the Openmp-commits mailing list