[llvm-branch-commits] [openmp] e1b8e8a - [libomptarget][amdgpu] Skip device_State allocation when using bss global

Jon Chesterfield via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Dec 6 04:18:45 PST 2020


Author: Jon Chesterfield
Date: 2020-12-06T12:13:56Z
New Revision: e1b8e8a1f4c35c8596956d56ffc9f1d91b64f780

URL: https://github.com/llvm/llvm-project/commit/e1b8e8a1f4c35c8596956d56ffc9f1d91b64f780
DIFF: https://github.com/llvm/llvm-project/commit/e1b8e8a1f4c35c8596956d56ffc9f1d91b64f780.diff

LOG: [libomptarget][amdgpu] Skip device_State allocation when using bss global

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index ea8770e4543a..e688ef7f41ec 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1033,54 +1033,64 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
 
   DP("ATMI module successfully loaded!\n");
 
-  // Zero the pseudo-bss variable by calling into hsa
-  // Do this post-load to handle got
-  uint64_t device_State_bytes =
-      get_device_State_bytes((char *)image->ImageStart, img_size);
-  auto &dss = DeviceInfo.deviceStateStore[device_id];
-  if (device_State_bytes != 0) {
-
-    if (dss.first.get() == nullptr) {
-      assert(dss.second == 0);
-      void *ptr = NULL;
-      atmi_status_t err =
-          atmi_calloc(&ptr, device_State_bytes, get_gpu_mem_place(device_id));
-      if (err != ATMI_STATUS_SUCCESS) {
-        fprintf(stderr, "Failed to allocate device_state array\n");
-        return NULL;
-      }
-      dss = {std::unique_ptr<void, RTLDeviceInfoTy::atmiFreePtrDeletor>{ptr},
-             device_State_bytes};
-    }
-
-    void *ptr = dss.first.get();
-    if (device_State_bytes != dss.second) {
-      fprintf(stderr, "Inconsistent sizes of device_State unsupported\n");
-      exit(1);
-    }
+  {
+    // the device_State array is either large value in bss or a void* that
+    // needs to be assigned to a pointer to an array of size device_state_bytes
 
     void *state_ptr;
     uint32_t state_ptr_size;
-    err = atmi_interop_hsa_get_symbol_info(get_gpu_mem_place(device_id),
-                                           "omptarget_nvptx_device_State",
-                                           &state_ptr, &state_ptr_size);
+    atmi_status_t err = atmi_interop_hsa_get_symbol_info(
+        get_gpu_mem_place(device_id), "omptarget_nvptx_device_State",
+        &state_ptr, &state_ptr_size);
 
     if (err != ATMI_STATUS_SUCCESS) {
-      fprintf(stderr, "failed to find device_state ptr\n");
+      fprintf(stderr, "failed to find device_state symbol\n");
       return NULL;
     }
-    if (state_ptr_size != sizeof(void *)) {
+
+    if (state_ptr_size < sizeof(void *)) {
       fprintf(stderr, "unexpected size of state_ptr %u != %zu\n",
               state_ptr_size, sizeof(void *));
       return NULL;
     }
 
-    // write ptr to device memory so it can be used by later kernels
-    err = DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &ptr, sizeof(void *),
-                                               device_id);
-    if (err != ATMI_STATUS_SUCCESS) {
-      fprintf(stderr, "memcpy install of state_ptr failed\n");
-      return NULL;
+    // if it's larger than a void*, assume it's a bss array and no further
+    // initialization is required. Only try to set up a pointer for
+    // sizeof(void*)
+    if (state_ptr_size == sizeof(void *)) {
+      uint64_t device_State_bytes =
+          get_device_State_bytes((char *)image->ImageStart, img_size);
+      if (device_State_bytes == 0) {
+        return NULL;
+      }
+
+      auto &dss = DeviceInfo.deviceStateStore[device_id];
+      if (dss.first.get() == nullptr) {
+        assert(dss.second == 0);
+        void *ptr = NULL;
+        atmi_status_t err =
+            atmi_calloc(&ptr, device_State_bytes, get_gpu_mem_place(device_id));
+        if (err != ATMI_STATUS_SUCCESS) {
+          fprintf(stderr, "Failed to allocate device_state array\n");
+          return NULL;
+        }
+        dss = {std::unique_ptr<void, RTLDeviceInfoTy::atmiFreePtrDeletor>{ptr},
+               device_State_bytes};
+      }
+
+      void *ptr = dss.first.get();
+      if (device_State_bytes != dss.second) {
+        fprintf(stderr, "Inconsistent sizes of device_State unsupported\n");
+        exit(1);
+      }
+
+      // write ptr to device memory so it can be used by later kernels
+      err = DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &ptr,
+                                                 sizeof(void *), device_id);
+      if (err != ATMI_STATUS_SUCCESS) {
+        fprintf(stderr, "memcpy install of state_ptr failed\n");
+        return NULL;
+      }
     }
   }
 


        


More information about the llvm-branch-commits mailing list