[Openmp-commits] [PATCH] D114274: [openmp][amdgpu] Make plugin robust to presence of explicit implicit arguments

Jon Chesterfield via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Fri Nov 19 15:15:31 PST 2021


JonChesterfield updated this revision to Diff 388643.
JonChesterfield added a comment.

- use positional encoding, clang isn't setting print or hostcall metadata


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114274/new/

https://reviews.llvm.org/D114274

Files:
  openmp/libomptarget/plugins/amdgpu/impl/internal.h
  openmp/libomptarget/plugins/amdgpu/impl/system.cpp
  openmp/libomptarget/plugins/amdgpu/src/rtl.cpp


Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -2071,7 +2071,7 @@
   const uint32_t sgpr_spill_count = KernelInfoEntry.sgpr_spill_count;
   const uint32_t vgpr_spill_count = KernelInfoEntry.vgpr_spill_count;
 
-  assert(arg_num == (int)KernelInfoEntry.num_args);
+  assert(arg_num == (int)KernelInfoEntry.explicit_argument_count);
 
   /*
    * Set limit based on ThreadsPerGroup and GroupsPerDevice
@@ -2173,14 +2173,31 @@
         // under a multiple reader lock, not a writer lock.
         static pthread_mutex_t hostcall_init_lock = PTHREAD_MUTEX_INITIALIZER;
         pthread_mutex_lock(&hostcall_init_lock);
-        impl_args->hostcall_ptr = hostrpc_assign_buffer(
+        unsigned long buffer = hostrpc_assign_buffer(
             DeviceInfo.HSAAgents[device_id], queue, device_id);
         pthread_mutex_unlock(&hostcall_init_lock);
-        if (!impl_args->hostcall_ptr) {
+        if (!buffer) {
           DP("hostrpc_assign_buffer failed, gpu would dereference null and "
              "error\n");
           return OFFLOAD_FAIL;
         }
+
+        if (KernelInfoEntry.implicit_argument_count >= 4) {
+          // Initialise pointer for implicit_argument_count != 0 ABI
+          // Guess that the right implicit argument is at offset 24 after
+          // the explicit arguments. In the future, should be able to read
+          // the offset from msgpack. Clang is not annotating it at present.
+          uint64_t Offset =
+              sizeof(void *) * (KernelInfoEntry.explicit_argument_count + 3);
+          if ((Offset + 8) > (ArgPool->kernarg_segment_size)) {
+            DP("Bad offset of hostcall, exceeds kernarg segment size\n");
+          } else {
+            memcpy(static_cast<char *>(kernarg) + Offset, &buffer, 8);
+          }
+        }
+
+        // initialise pointer for implicit_argument_count == 0 ABI
+        impl_args->hostcall_ptr = buffer;
       }
 
       packet->kernarg_address = kernarg;
Index: openmp/libomptarget/plugins/amdgpu/impl/system.cpp
===================================================================
--- openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -381,7 +381,7 @@
       return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
     }
 
-    atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+    atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
     uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
     msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count);
@@ -446,8 +446,6 @@
         return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
       }
 
-      info.num_args = argsSize;
-
       for (size_t i = 0; i < argsSize; ++i) {
         KernelArgMD lcArg;
 
@@ -476,8 +474,10 @@
         // check if the arg is a hidden/implicit arg
         // this logic assumes that all hidden args are 8-byte aligned
         if (!isImplicit(lcArg.valueKind_)) {
+          info.explicit_argument_count++;
           kernel_explicit_args_size += lcArg.size_;
         } else {
+          info.implicit_argument_count++;
           hasHiddenArgs = true;
         }
         kernel_explicit_args_size += padding;
Index: openmp/libomptarget/plugins/amdgpu/impl/internal.h
===================================================================
--- openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -54,7 +54,8 @@
   uint32_t sgpr_spill_count;
   uint32_t vgpr_spill_count;
   uint32_t kernel_segment_size;
-  uint32_t num_args;
+  uint32_t explicit_argument_count;
+  uint32_t implicit_argument_count;
 } atl_kernel_info_t;
 
 typedef struct atl_symbol_info_s {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D114274.388643.patch
Type: text/x-patch
Size: 3864 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20211119/6c2cb885/attachment.bin>


More information about the Openmp-commits mailing list