[Openmp-commits] [openmp] 91dfb32 - [openmp][amdgpu][nfc] Mark all external functions extern C to get type checking

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Fri Dec 17 10:46:54 PST 2021


Author: Jon Chesterfield
Date: 2021-12-17T18:46:43Z
New Revision: 91dfb32f2fa56e6e71428f381529f1f9b7fdbbc2

URL: https://github.com/llvm/llvm-project/commit/91dfb32f2fa56e6e71428f381529f1f9b7fdbbc2
DIFF: https://github.com/llvm/llvm-project/commit/91dfb32f2fa56e6e71428f381529f1f9b7fdbbc2.diff

LOG: [openmp][amdgpu][nfc] Mark all external functions extern C to get type checking

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index a3d52706f13b2..41f5b1a570b5e 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1323,29 +1323,7 @@ uint32_t elf_e_flags(__tgt_device_image *image) {
   DP("ELF Flags: 0x%x\n", Flags);
   return Flags;
 }
-} // namespace
 
-int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
-  return elf_machine_id_is_amdgcn(image);
-}
-
-int __tgt_rtl_number_of_devices() {
-  // If the construction failed, no methods are safe to call
-  if (DeviceInfo.ConstructionSucceeded) {
-    return DeviceInfo.NumberOfDevices;
-  } else {
-    DP("AMDGPU plugin construction failed. Zero devices available\n");
-    return 0;
-  }
-}
-
-int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
-  DP("Init requires flags to %ld\n", RequiresFlags);
-  DeviceInfo.RequiresFlags = RequiresFlags;
-  return RequiresFlags;
-}
-
-namespace {
 template <typename T> bool enforce_upper_bound(T *value, T upper) {
   bool changed = *value > upper;
   if (changed) {
@@ -1353,164 +1331,7 @@ template <typename T> bool enforce_upper_bound(T *value, T upper) {
   }
   return changed;
 }
-} // namespace
-
-int32_t __tgt_rtl_init_device(int device_id) {
-  hsa_status_t err;
-
-  // this is per device id init
-  DP("Initialize the device id: %d\n", device_id);
-
-  hsa_agent_t agent = DeviceInfo.HSAAgents[device_id];
-
-  // Get number of Compute Unit
-  uint32_t compute_units = 0;
-  err = hsa_agent_get_info(
-      agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
-      &compute_units);
-  if (err != HSA_STATUS_SUCCESS) {
-    DeviceInfo.ComputeUnits[device_id] = 1;
-    DP("Error getting compute units : settiing to 1\n");
-  } else {
-    DeviceInfo.ComputeUnits[device_id] = compute_units;
-    DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
-  }
-
-  char GetInfoName[64]; // 64 max size returned by get info
-  err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
-                           (void *)GetInfoName);
-  if (err)
-    DeviceInfo.GPUName[device_id] = "--unknown gpu--";
-  else {
-    DeviceInfo.GPUName[device_id] = GetInfoName;
-  }
-
-  if (print_kernel_trace & STARTUP_DETAILS)
-    DP("Device#%-2d CU's: %2d %s\n", device_id,
-       DeviceInfo.ComputeUnits[device_id],
-       DeviceInfo.GPUName[device_id].c_str());
-
-  // Query attributes to determine number of threads/block and blocks/grid.
-  uint16_t workgroup_max_dim[3];
-  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
-                           &workgroup_max_dim);
-  if (err != HSA_STATUS_SUCCESS) {
-    DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::DefaultNumTeams;
-    DP("Error getting grid dims: num groups : %d\n",
-       RTLDeviceInfoTy::DefaultNumTeams);
-  } else if (workgroup_max_dim[0] <= RTLDeviceInfoTy::HardTeamLimit) {
-    DeviceInfo.GroupsPerDevice[device_id] = workgroup_max_dim[0];
-    DP("Using %d ROCm blocks per grid\n",
-       DeviceInfo.GroupsPerDevice[device_id]);
-  } else {
-    DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::HardTeamLimit;
-    DP("Max ROCm blocks per grid %d exceeds the hard team limit %d, capping "
-       "at the hard limit\n",
-       workgroup_max_dim[0], RTLDeviceInfoTy::HardTeamLimit);
-  }
 
-  // Get thread limit
-  hsa_dim3_t grid_max_dim;
-  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim);
-  if (err == HSA_STATUS_SUCCESS) {
-    DeviceInfo.ThreadsPerGroup[device_id] =
-        reinterpret_cast<uint32_t *>(&grid_max_dim)[0] /
-        DeviceInfo.GroupsPerDevice[device_id];
-
-    if (DeviceInfo.ThreadsPerGroup[device_id] == 0) {
-      DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size;
-      DP("Default thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size);
-    } else if (enforce_upper_bound(&DeviceInfo.ThreadsPerGroup[device_id],
-                                   RTLDeviceInfoTy::Max_WG_Size)) {
-      DP("Capped thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size);
-    } else {
-      DP("Using ROCm Queried thread limit: %d\n",
-         DeviceInfo.ThreadsPerGroup[device_id]);
-    }
-  } else {
-    DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size;
-    DP("Error getting max block dimension, use default:%d \n",
-       RTLDeviceInfoTy::Max_WG_Size);
-  }
-
-  // Get wavefront size
-  uint32_t wavefront_size = 0;
-  err =
-      hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size);
-  if (err == HSA_STATUS_SUCCESS) {
-    DP("Queried wavefront size: %d\n", wavefront_size);
-    DeviceInfo.WarpSize[device_id] = wavefront_size;
-  } else {
-    // TODO: Burn the wavefront size into the code object
-    DP("Warning: Unknown wavefront size, assuming 64\n");
-    DeviceInfo.WarpSize[device_id] = 64;
-  }
-
-  // Adjust teams to the env variables
-
-  if (DeviceInfo.Env.TeamLimit > 0 &&
-      (enforce_upper_bound(&DeviceInfo.GroupsPerDevice[device_id],
-                           DeviceInfo.Env.TeamLimit))) {
-    DP("Capping max groups per device to OMP_TEAM_LIMIT=%d\n",
-       DeviceInfo.Env.TeamLimit);
-  }
-
-  // Set default number of teams
-  if (DeviceInfo.Env.NumTeams > 0) {
-    DeviceInfo.NumTeams[device_id] = DeviceInfo.Env.NumTeams;
-    DP("Default number of teams set according to environment %d\n",
-       DeviceInfo.Env.NumTeams);
-  } else {
-    char *TeamsPerCUEnvStr = getenv("OMP_TARGET_TEAMS_PER_PROC");
-    int TeamsPerCU = DefaultTeamsPerCU;
-    if (TeamsPerCUEnvStr) {
-      TeamsPerCU = std::stoi(TeamsPerCUEnvStr);
-    }
-
-    DeviceInfo.NumTeams[device_id] =
-        TeamsPerCU * DeviceInfo.ComputeUnits[device_id];
-    DP("Default number of teams = %d * number of compute units %d\n",
-       TeamsPerCU, DeviceInfo.ComputeUnits[device_id]);
-  }
-
-  if (enforce_upper_bound(&DeviceInfo.NumTeams[device_id],
-                          DeviceInfo.GroupsPerDevice[device_id])) {
-    DP("Default number of teams exceeds device limit, capping at %d\n",
-       DeviceInfo.GroupsPerDevice[device_id]);
-  }
-
-  // Adjust threads to the env variables
-  if (DeviceInfo.Env.TeamThreadLimit > 0 &&
-      (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
-                           DeviceInfo.Env.TeamThreadLimit))) {
-    DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n",
-       DeviceInfo.Env.TeamThreadLimit);
-  }
-
-  // Set default number of threads
-  DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size;
-  DP("Default number of threads set according to library's default %d\n",
-     RTLDeviceInfoTy::Default_WG_Size);
-  if (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
-                          DeviceInfo.ThreadsPerGroup[device_id])) {
-    DP("Default number of threads exceeds device limit, capping at %d\n",
-       DeviceInfo.ThreadsPerGroup[device_id]);
-  }
-
-  DP("Device %d: default limit for groupsPerDevice %d & threadsPerGroup %d\n",
-     device_id, DeviceInfo.GroupsPerDevice[device_id],
-     DeviceInfo.ThreadsPerGroup[device_id]);
-
-  DP("Device %d: wavefront size %d, total threads %d x %d = %d\n", device_id,
-     DeviceInfo.WarpSize[device_id], DeviceInfo.ThreadsPerGroup[device_id],
-     DeviceInfo.GroupsPerDevice[device_id],
-     DeviceInfo.GroupsPerDevice[device_id] *
-         DeviceInfo.ThreadsPerGroup[device_id]);
-
-  return OFFLOAD_SUCCESS;
-}
-
-namespace {
 Elf64_Shdr *find_only_SHT_HASH(Elf *elf) {
   size_t N;
   int rc = elf_getshdrnum(elf, &N);
@@ -1660,9 +1481,8 @@ hsa_status_t module_register_from_memory_to_place(
       DeviceInfo.HSAAgents[DeviceId], L, static_cast<void *>(&cb),
       HSAExecutables);
 }
-} // namespace
 
-static uint64_t get_device_State_bytes(char *ImageStart, size_t img_size) {
+uint64_t get_device_State_bytes(char *ImageStart, size_t img_size) {
   uint64_t device_State_bytes = 0;
   {
     // If this is the deviceRTL, get the state variable size
@@ -1683,20 +1503,6 @@ static uint64_t get_device_State_bytes(char *ImageStart, size_t img_size) {
   return device_State_bytes;
 }
 
-static __tgt_target_table *
-__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image);
-
-static __tgt_target_table *
-__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image);
-
-__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
-                                          __tgt_device_image *image) {
-  DeviceInfo.load_run_lock.lock();
-  __tgt_target_table *res = __tgt_rtl_load_binary_locked(device_id, image);
-  DeviceInfo.load_run_lock.unlock();
-  return res;
-}
-
 struct device_environment {
   // initialise an DeviceEnvironmentTy in the deviceRTL
   // patches around 
diff erences in the deviceRTL between trunk, aomp,
@@ -1793,7 +1599,7 @@ struct device_environment {
   }
 };
 
-static hsa_status_t impl_calloc(void **ret_ptr, size_t size, int DeviceId) {
+hsa_status_t impl_calloc(void **ret_ptr, size_t size, int DeviceId) {
   uint64_t rounded = 4 * ((size + 3) / 4);
   void *ptr;
   hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(DeviceId);
@@ -1813,12 +1619,208 @@ static hsa_status_t impl_calloc(void **ret_ptr, size_t size, int DeviceId) {
   return HSA_STATUS_SUCCESS;
 }
 
-static bool image_contains_symbol(void *data, size_t size, const char *sym) {
+bool image_contains_symbol(void *data, size_t size, const char *sym) {
   symbol_info si;
   int rc = get_symbol_info_without_loading((char *)data, size, sym, &si);
   return (rc == 0) && (si.addr != nullptr);
 }
 
+} // namespace
+
+namespace core {
+hsa_status_t allow_access_to_all_gpu_agents(void *ptr) {
+  return hsa_amd_agents_allow_access(DeviceInfo.HSAAgents.size(),
+                                     &DeviceInfo.HSAAgents[0], NULL, ptr);
+}
+} // namespace core
+
+extern "C" {
+int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
+  return elf_machine_id_is_amdgcn(image);
+}
+
+int __tgt_rtl_number_of_devices() {
+  // If the construction failed, no methods are safe to call
+  if (DeviceInfo.ConstructionSucceeded) {
+    return DeviceInfo.NumberOfDevices;
+  } else {
+    DP("AMDGPU plugin construction failed. Zero devices available\n");
+    return 0;
+  }
+}
+
+int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
+  DP("Init requires flags to %ld\n", RequiresFlags);
+  DeviceInfo.RequiresFlags = RequiresFlags;
+  return RequiresFlags;
+}
+
+int32_t __tgt_rtl_init_device(int device_id) {
+  hsa_status_t err;
+
+  // this is per device id init
+  DP("Initialize the device id: %d\n", device_id);
+
+  hsa_agent_t agent = DeviceInfo.HSAAgents[device_id];
+
+  // Get number of Compute Unit
+  uint32_t compute_units = 0;
+  err = hsa_agent_get_info(
+      agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
+      &compute_units);
+  if (err != HSA_STATUS_SUCCESS) {
+    DeviceInfo.ComputeUnits[device_id] = 1;
+    DP("Error getting compute units : settiing to 1\n");
+  } else {
+    DeviceInfo.ComputeUnits[device_id] = compute_units;
+    DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
+  }
+
+  char GetInfoName[64]; // 64 max size returned by get info
+  err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
+                           (void *)GetInfoName);
+  if (err)
+    DeviceInfo.GPUName[device_id] = "--unknown gpu--";
+  else {
+    DeviceInfo.GPUName[device_id] = GetInfoName;
+  }
+
+  if (print_kernel_trace & STARTUP_DETAILS)
+    DP("Device#%-2d CU's: %2d %s\n", device_id,
+       DeviceInfo.ComputeUnits[device_id],
+       DeviceInfo.GPUName[device_id].c_str());
+
+  // Query attributes to determine number of threads/block and blocks/grid.
+  uint16_t workgroup_max_dim[3];
+  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
+                           &workgroup_max_dim);
+  if (err != HSA_STATUS_SUCCESS) {
+    DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::DefaultNumTeams;
+    DP("Error getting grid dims: num groups : %d\n",
+       RTLDeviceInfoTy::DefaultNumTeams);
+  } else if (workgroup_max_dim[0] <= RTLDeviceInfoTy::HardTeamLimit) {
+    DeviceInfo.GroupsPerDevice[device_id] = workgroup_max_dim[0];
+    DP("Using %d ROCm blocks per grid\n",
+       DeviceInfo.GroupsPerDevice[device_id]);
+  } else {
+    DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::HardTeamLimit;
+    DP("Max ROCm blocks per grid %d exceeds the hard team limit %d, capping "
+       "at the hard limit\n",
+       workgroup_max_dim[0], RTLDeviceInfoTy::HardTeamLimit);
+  }
+
+  // Get thread limit
+  hsa_dim3_t grid_max_dim;
+  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim);
+  if (err == HSA_STATUS_SUCCESS) {
+    DeviceInfo.ThreadsPerGroup[device_id] =
+        reinterpret_cast<uint32_t *>(&grid_max_dim)[0] /
+        DeviceInfo.GroupsPerDevice[device_id];
+
+    if (DeviceInfo.ThreadsPerGroup[device_id] == 0) {
+      DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size;
+      DP("Default thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size);
+    } else if (enforce_upper_bound(&DeviceInfo.ThreadsPerGroup[device_id],
+                                   RTLDeviceInfoTy::Max_WG_Size)) {
+      DP("Capped thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size);
+    } else {
+      DP("Using ROCm Queried thread limit: %d\n",
+         DeviceInfo.ThreadsPerGroup[device_id]);
+    }
+  } else {
+    DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size;
+    DP("Error getting max block dimension, use default:%d \n",
+       RTLDeviceInfoTy::Max_WG_Size);
+  }
+
+  // Get wavefront size
+  uint32_t wavefront_size = 0;
+  err =
+      hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size);
+  if (err == HSA_STATUS_SUCCESS) {
+    DP("Queried wavefront size: %d\n", wavefront_size);
+    DeviceInfo.WarpSize[device_id] = wavefront_size;
+  } else {
+    // TODO: Burn the wavefront size into the code object
+    DP("Warning: Unknown wavefront size, assuming 64\n");
+    DeviceInfo.WarpSize[device_id] = 64;
+  }
+
+  // Adjust teams to the env variables
+
+  if (DeviceInfo.Env.TeamLimit > 0 &&
+      (enforce_upper_bound(&DeviceInfo.GroupsPerDevice[device_id],
+                           DeviceInfo.Env.TeamLimit))) {
+    DP("Capping max groups per device to OMP_TEAM_LIMIT=%d\n",
+       DeviceInfo.Env.TeamLimit);
+  }
+
+  // Set default number of teams
+  if (DeviceInfo.Env.NumTeams > 0) {
+    DeviceInfo.NumTeams[device_id] = DeviceInfo.Env.NumTeams;
+    DP("Default number of teams set according to environment %d\n",
+       DeviceInfo.Env.NumTeams);
+  } else {
+    char *TeamsPerCUEnvStr = getenv("OMP_TARGET_TEAMS_PER_PROC");
+    int TeamsPerCU = DefaultTeamsPerCU;
+    if (TeamsPerCUEnvStr) {
+      TeamsPerCU = std::stoi(TeamsPerCUEnvStr);
+    }
+
+    DeviceInfo.NumTeams[device_id] =
+        TeamsPerCU * DeviceInfo.ComputeUnits[device_id];
+    DP("Default number of teams = %d * number of compute units %d\n",
+       TeamsPerCU, DeviceInfo.ComputeUnits[device_id]);
+  }
+
+  if (enforce_upper_bound(&DeviceInfo.NumTeams[device_id],
+                          DeviceInfo.GroupsPerDevice[device_id])) {
+    DP("Default number of teams exceeds device limit, capping at %d\n",
+       DeviceInfo.GroupsPerDevice[device_id]);
+  }
+
+  // Adjust threads to the env variables
+  if (DeviceInfo.Env.TeamThreadLimit > 0 &&
+      (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
+                           DeviceInfo.Env.TeamThreadLimit))) {
+    DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n",
+       DeviceInfo.Env.TeamThreadLimit);
+  }
+
+  // Set default number of threads
+  DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size;
+  DP("Default number of threads set according to library's default %d\n",
+     RTLDeviceInfoTy::Default_WG_Size);
+  if (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
+                          DeviceInfo.ThreadsPerGroup[device_id])) {
+    DP("Default number of threads exceeds device limit, capping at %d\n",
+       DeviceInfo.ThreadsPerGroup[device_id]);
+  }
+
+  DP("Device %d: default limit for groupsPerDevice %d & threadsPerGroup %d\n",
+     device_id, DeviceInfo.GroupsPerDevice[device_id],
+     DeviceInfo.ThreadsPerGroup[device_id]);
+
+  DP("Device %d: wavefront size %d, total threads %d x %d = %d\n", device_id,
+     DeviceInfo.WarpSize[device_id], DeviceInfo.ThreadsPerGroup[device_id],
+     DeviceInfo.GroupsPerDevice[device_id],
+     DeviceInfo.GroupsPerDevice[device_id] *
+         DeviceInfo.ThreadsPerGroup[device_id]);
+
+  return OFFLOAD_SUCCESS;
+}
+
+static __tgt_target_table *
+__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image);
+
+__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
+                                          __tgt_device_image *image) {
+  DeviceInfo.load_run_lock.lock();
+  __tgt_target_table *res = __tgt_rtl_load_binary_locked(device_id, image);
+  DeviceInfo.load_run_lock.unlock();
+  return res;
+}
+
 __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
                                                  __tgt_device_image *image) {
   // This function loads the device image onto gpu[device_id] and does other
@@ -2328,11 +2330,4 @@ int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {
   }
   return OFFLOAD_SUCCESS;
 }
-
-namespace core {
-hsa_status_t allow_access_to_all_gpu_agents(void *ptr) {
-  return hsa_amd_agents_allow_access(DeviceInfo.HSAAgents.size(),
-                                     &DeviceInfo.HSAAgents[0], NULL, ptr);
-}
-
-} // namespace core
+} // extern "C"


        


More information about the Openmp-commits mailing list