[Openmp-commits] [openmp] f4f23de - [Libomptarget] Add basic support for dynamic shared memory on AMDGPU

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Wed Jun 1 10:33:45 PDT 2022


Author: Joseph Huber
Date: 2022-06-01T13:32:50-04:00
New Revision: f4f23de1a46f94762b8192e82f20fb86b41c339f

URL: https://github.com/llvm/llvm-project/commit/f4f23de1a46f94762b8192e82f20fb86b41c339f
DIFF: https://github.com/llvm/llvm-project/commit/f4f23de1a46f94762b8192e82f20fb86b41c339f.diff

LOG: [Libomptarget] Add basic support for dynamic shared memory on AMDGPU

This patchs adds the arguments necessary to allocate the size of the
dynamic shared memory via the `LIBOMPTARGET_SHARED_MEMORY_SIZE`
environment variable. This patch only allocates the memory, AMDGPU has a
limitation that shared memory can only be accessed from the kernel
directly. So this will currently only work with optimizations to inline
the accessor function.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D125252

Added: 
    openmp/libomptarget/test/api/omp_dynamic_shared_memory_amdgpu.c

Modified: 
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 3086334f8ef63..4b5dd0de4f06c 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -336,6 +336,7 @@ struct EnvironmentVariables {
   int TeamLimit;
   int TeamThreadLimit;
   int MaxTeamsDefault;
+  int DynamicMemSize;
 };
 
 template <uint32_t wavesize>
@@ -692,9 +693,9 @@ class RTLDeviceInfoTy : HSALifetime {
     return HostFineGrainedMemoryPool;
   }
 
-  static int readEnvElseMinusOne(const char *Env) {
+  static int readEnv(const char *Env, int Default = -1) {
     const char *envStr = getenv(Env);
-    int res = -1;
+    int res = Default;
     if (envStr) {
       res = std::stoi(envStr);
       DP("Parsed %s=%d\n", Env, res);
@@ -811,10 +812,11 @@ class RTLDeviceInfoTy : HSALifetime {
     }
 
     // Get environment variables regarding teams
-    Env.TeamLimit = readEnvElseMinusOne("OMP_TEAM_LIMIT");
-    Env.NumTeams = readEnvElseMinusOne("OMP_NUM_TEAMS");
-    Env.MaxTeamsDefault = readEnvElseMinusOne("OMP_MAX_TEAMS_DEFAULT");
-    Env.TeamThreadLimit = readEnvElseMinusOne("OMP_TEAMS_THREAD_LIMIT");
+    Env.TeamLimit = readEnv("OMP_TEAM_LIMIT");
+    Env.NumTeams = readEnv("OMP_NUM_TEAMS");
+    Env.MaxTeamsDefault = readEnv("OMP_MAX_TEAMS_DEFAULT");
+    Env.TeamThreadLimit = readEnv("OMP_TEAMS_THREAD_LIMIT");
+    Env.DynamicMemSize = readEnv("LIBOMPTARGET_SHARED_MEMORY_SIZE", 0);
 
     // Default state.
     RequiresFlags = OMP_REQ_UNDEFINED;
@@ -1123,7 +1125,8 @@ int32_t runRegionLocked(int32_t device_id, void *tgt_entry_ptr, void **tgt_args,
 
   const atl_kernel_info_t KernelInfoEntry =
       KernelInfoTable[device_id][kernel_name];
-  const uint32_t group_segment_size = KernelInfoEntry.group_segment_size;
+  const uint32_t group_segment_size =
+      KernelInfoEntry.group_segment_size + DeviceInfo.Env.DynamicMemSize;
   const uint32_t sgpr_count = KernelInfoEntry.sgpr_count;
   const uint32_t vgpr_count = KernelInfoEntry.vgpr_count;
   const uint32_t sgpr_spill_count = KernelInfoEntry.sgpr_spill_count;
@@ -1182,7 +1185,7 @@ int32_t runRegionLocked(int32_t device_id, void *tgt_entry_ptr, void **tgt_args,
     packet->grid_size_y = 1;
     packet->grid_size_z = 1;
     packet->private_segment_size = KernelInfoEntry.private_segment_size;
-    packet->group_segment_size = KernelInfoEntry.group_segment_size;
+    packet->group_segment_size = group_segment_size;
     packet->kernel_object = KernelInfoEntry.kernel_object;
     packet->kernarg_address = 0;     // use the block allocator
     packet->reserved2 = 0;           // impl writes id_ here
@@ -1530,14 +1533,14 @@ struct device_environment {
   __tgt_device_image *image;
   const size_t img_size;
 
-  device_environment(int device_id, int number_devices,
+  device_environment(int device_id, int number_devices, int dynamic_mem_size,
                      __tgt_device_image *image, const size_t img_size)
       : image(image), img_size(img_size) {
 
     host_device_env.NumDevices = number_devices;
     host_device_env.DeviceNum = device_id;
     host_device_env.DebugKind = 0;
-    host_device_env.DynamicMemSize = 0;
+    host_device_env.DynamicMemSize = dynamic_mem_size;
     if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) {
       host_device_env.DebugKind = std::stoi(envStr);
     }
@@ -1861,8 +1864,9 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
   }
 
   {
-    auto env = device_environment(device_id, DeviceInfo.NumberOfDevices, image,
-                                  img_size);
+    auto env =
+        device_environment(device_id, DeviceInfo.NumberOfDevices,
+                           DeviceInfo.Env.DynamicMemSize, image, img_size);
 
     auto &KernelInfo = DeviceInfo.KernelInfoTable[device_id];
     auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id];

diff  --git a/openmp/libomptarget/test/api/omp_dynamic_shared_memory_amdgpu.c b/openmp/libomptarget/test/api/omp_dynamic_shared_memory_amdgpu.c
new file mode 100644
index 0000000000000..0b4d9d6ea9d46
--- /dev/null
+++ b/openmp/libomptarget/test/api/omp_dynamic_shared_memory_amdgpu.c
@@ -0,0 +1,25 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm -openmp-opt-inline-device
+// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
+// RUN:   %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
+// REQUIRES: amdgcn-amd-amdhsa
+
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+  int x;
+#pragma omp target parallel map(from : x)
+  {
+    int *buf = llvm_omp_target_dynamic_shared_alloc() + 252;
+#pragma omp barrier
+    if (omp_get_thread_num() == 0)
+      *buf = 1;
+#pragma omp barrier
+    if (omp_get_thread_num() == 1)
+      x = *buf;
+  }
+
+  // CHECK: PASS
+  if (x == 1 && llvm_omp_target_dynamic_shared_alloc() == NULL)
+    printf("PASS\n");
+}


        


More information about the Openmp-commits mailing list