[Openmp-commits] [openmp] 208f900 - [Libomptarget] Add an external interface to dynamic shared memory
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Fri Oct 8 12:37:07 PDT 2021
Author: Joseph Huber
Date: 2021-10-08T15:36:57-04:00
New Revision: 208f9005277a22d7e282a568ea7849895b2a09d3
URL: https://github.com/llvm/llvm-project/commit/208f9005277a22d7e282a568ea7849895b2a09d3
DIFF: https://github.com/llvm/llvm-project/commit/208f9005277a22d7e282a568ea7849895b2a09d3.diff
LOG: [Libomptarget] Add an external interface to dynamic shared memory
This patch adds an external interface to access the dynamic shared
memory buffer in the device runtime. The function introduced is
``llvm_omp_get_dynamic_shared``. This includes a host-side
definition that only returns a null pointer so that it can be used when
host-fallback is enabled without crashing. Support for dynamic shared
memory was also ported to the old device runtime.
Reviewed By: JonChesterfield
Differential Revision: https://reviews.llvm.org/D110957
Added:
Modified:
openmp/libomptarget/DeviceRTL/include/Interface.h
openmp/libomptarget/DeviceRTL/src/State.cpp
openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
openmp/libomptarget/deviceRTLs/interface.h
openmp/libomptarget/include/omptarget.h
openmp/libomptarget/src/api.cpp
openmp/libomptarget/src/exports
openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
Removed:
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h
index 1d41acf5dad6b..ff4f929345651 100644
--- a/openmp/libomptarget/DeviceRTL/include/Interface.h
+++ b/openmp/libomptarget/DeviceRTL/include/Interface.h
@@ -132,6 +132,8 @@ int omp_get_team_num();
int omp_get_initial_device(void);
+void *llvm_omp_get_dynamic_shared();
+
/// Synchronization
///
///{
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index e3361551e3df0..b35b9b3e7837a 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -503,9 +503,9 @@ __attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
memory::freeShared(Ptr, Bytes, "Frontend free shared");
}
-__attribute__((noinline)) void *__kmpc_get_dynamic_shared() {
- return memory::getDynamicBuffer();
-}
+void *__kmpc_get_dynamic_shared() { return memory::getDynamicBuffer(); }
+
+void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
/// Allocate storage in shared memory to communicate arguments from the main
/// thread to the workers in generic mode. If we exceed
diff --git a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
index 491c2795587e7..f834a7a8e172b 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
@@ -21,6 +21,18 @@
static constexpr unsigned MinBytes = 8;
+static constexpr unsigned Alignment = 8;
+
+/// External symbol to access dynamic shared memory.
+extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
+#pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
+
+EXTERN void *__kmpc_get_dynamic_shared() { return DynamicSharedBuffer; }
+
+EXTERN void *llvm_omp_get_dynamic_shared() {
+ return __kmpc_get_dynamic_shared();
+}
+
template <unsigned BPerThread, unsigned NThreads = MAX_THREADS_PER_TEAM>
struct alignas(32) ThreadStackTy {
static constexpr unsigned BytesPerThread = BPerThread;
diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h
index 96e583c2133f4..f414e274a7a7e 100644
--- a/openmp/libomptarget/deviceRTLs/interface.h
+++ b/openmp/libomptarget/deviceRTLs/interface.h
@@ -92,6 +92,8 @@ EXTERN int omp_get_team_num(void);
EXTERN int omp_get_initial_device(void);
EXTERN int omp_get_max_task_priority(void);
+EXTERN void *llvm_omp_get_dynamic_shared();
+
////////////////////////////////////////////////////////////////////////////////
// file below is swiped from kmpc host interface
////////////////////////////////////////////////////////////////////////////////
@@ -499,4 +501,7 @@ EXTERN void *__kmpc_alloc_shared(uint64_t Bytes);
/// paired allocation to make memory management easier.
EXTERN void __kmpc_free_shared(void *Ptr, size_t Bytes);
+/// Get a pointer to the dynamic shared memory buffer in the device.
+EXTERN void *__kmpc_get_dynamic_shared();
+
#endif
diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 40914cc80d4c0..3776e1e2bb463 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -220,6 +220,9 @@ void *llvm_omp_target_alloc_device(size_t size, int device_num);
void *llvm_omp_target_alloc_host(size_t size, int device_num);
void *llvm_omp_target_alloc_shared(size_t size, int device_num);
+/// Dummy target so we have a symbol for generating host fallback.
+void *llvm_omp_get_dynamic_shared();
+
/// add the clauses of the requires directives in a given file
void __tgt_register_requires(int64_t flags);
diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp
index 94ca4c6331bc6..47f30e69b1136 100644
--- a/openmp/libomptarget/src/api.cpp
+++ b/openmp/libomptarget/src/api.cpp
@@ -53,6 +53,8 @@ EXTERN void *llvm_omp_target_alloc_shared(size_t size, int device_num) {
return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__);
}
+EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; }
+
EXTERN void omp_target_free(void *device_ptr, int device_num) {
TIMESCOPE();
DP("Call to omp_target_free for device %d and address " DPxMOD "\n",
diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index 4d6c1f7f0ea4e..a77e176a2aacb 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -40,6 +40,7 @@ VERS1.0 {
llvm_omp_target_alloc_host;
llvm_omp_target_alloc_shared;
llvm_omp_target_alloc_device;
+ llvm_omp_get_dynamic_shared;
__tgt_set_info_flag;
__tgt_print_device_info;
local:
diff --git a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
index 9a74ed36c96f2..98bf4eb5da39f 100644
--- a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
+++ b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c
@@ -1,22 +1,18 @@
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda -fopenmp-target-new-runtime
-// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=4 \
+// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
// RUN: %libomptarget-run-nvptx64-nvidia-cuda | %fcheck-nvptx64-nvidia-cuda
// REQUIRES: nvptx64-nvidia-cuda
#include <omp.h>
#include <stdio.h>
-void *get_dynamic_shared() { return NULL; }
-#pragma omp begin declare variant match(device = {arch(nvptx64)})
-extern void *__kmpc_get_dynamic_shared();
-void *get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
-#pragma omp end declare variant
+void *llvm_omp_get_dynamic_shared();
int main() {
int x;
#pragma omp target parallel map(from : x)
{
- int *buf = get_dynamic_shared();
+ int *buf = llvm_omp_get_dynamic_shared() + 252;
#pragma omp barrier
if (omp_get_thread_num() == 0)
*buf = 1;
@@ -26,6 +22,6 @@ int main() {
}
// CHECK: PASS
- if (x == 1)
+ if (x == 1 && llvm_omp_get_dynamic_shared() == NULL)
printf("PASS\n");
}
More information about the Openmp-commits
mailing list