[Openmp-commits] [openmp] 91dfb32 - [openmp][amdgpu][nfc] Mark all external functions extern C to get type checking
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Fri Dec 17 10:46:54 PST 2021
Author: Jon Chesterfield
Date: 2021-12-17T18:46:43Z
New Revision: 91dfb32f2fa56e6e71428f381529f1f9b7fdbbc2
URL: https://github.com/llvm/llvm-project/commit/91dfb32f2fa56e6e71428f381529f1f9b7fdbbc2
DIFF: https://github.com/llvm/llvm-project/commit/91dfb32f2fa56e6e71428f381529f1f9b7fdbbc2.diff
LOG: [openmp][amdgpu][nfc] Mark all external functions extern C to get type checking
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index a3d52706f13b2..41f5b1a570b5e 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1323,29 +1323,7 @@ uint32_t elf_e_flags(__tgt_device_image *image) {
DP("ELF Flags: 0x%x\n", Flags);
return Flags;
}
-} // namespace
-int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
- return elf_machine_id_is_amdgcn(image);
-}
-
-int __tgt_rtl_number_of_devices() {
- // If the construction failed, no methods are safe to call
- if (DeviceInfo.ConstructionSucceeded) {
- return DeviceInfo.NumberOfDevices;
- } else {
- DP("AMDGPU plugin construction failed. Zero devices available\n");
- return 0;
- }
-}
-
-int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
- DP("Init requires flags to %ld\n", RequiresFlags);
- DeviceInfo.RequiresFlags = RequiresFlags;
- return RequiresFlags;
-}
-
-namespace {
template <typename T> bool enforce_upper_bound(T *value, T upper) {
bool changed = *value > upper;
if (changed) {
@@ -1353,164 +1331,7 @@ template <typename T> bool enforce_upper_bound(T *value, T upper) {
}
return changed;
}
-} // namespace
-
-int32_t __tgt_rtl_init_device(int device_id) {
- hsa_status_t err;
-
- // this is per device id init
- DP("Initialize the device id: %d\n", device_id);
-
- hsa_agent_t agent = DeviceInfo.HSAAgents[device_id];
-
- // Get number of Compute Unit
- uint32_t compute_units = 0;
- err = hsa_agent_get_info(
- agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
- &compute_units);
- if (err != HSA_STATUS_SUCCESS) {
- DeviceInfo.ComputeUnits[device_id] = 1;
- DP("Error getting compute units : settiing to 1\n");
- } else {
- DeviceInfo.ComputeUnits[device_id] = compute_units;
- DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
- }
-
- char GetInfoName[64]; // 64 max size returned by get info
- err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
- (void *)GetInfoName);
- if (err)
- DeviceInfo.GPUName[device_id] = "--unknown gpu--";
- else {
- DeviceInfo.GPUName[device_id] = GetInfoName;
- }
-
- if (print_kernel_trace & STARTUP_DETAILS)
- DP("Device#%-2d CU's: %2d %s\n", device_id,
- DeviceInfo.ComputeUnits[device_id],
- DeviceInfo.GPUName[device_id].c_str());
-
- // Query attributes to determine number of threads/block and blocks/grid.
- uint16_t workgroup_max_dim[3];
- err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
- &workgroup_max_dim);
- if (err != HSA_STATUS_SUCCESS) {
- DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::DefaultNumTeams;
- DP("Error getting grid dims: num groups : %d\n",
- RTLDeviceInfoTy::DefaultNumTeams);
- } else if (workgroup_max_dim[0] <= RTLDeviceInfoTy::HardTeamLimit) {
- DeviceInfo.GroupsPerDevice[device_id] = workgroup_max_dim[0];
- DP("Using %d ROCm blocks per grid\n",
- DeviceInfo.GroupsPerDevice[device_id]);
- } else {
- DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::HardTeamLimit;
- DP("Max ROCm blocks per grid %d exceeds the hard team limit %d, capping "
- "at the hard limit\n",
- workgroup_max_dim[0], RTLDeviceInfoTy::HardTeamLimit);
- }
- // Get thread limit
- hsa_dim3_t grid_max_dim;
- err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim);
- if (err == HSA_STATUS_SUCCESS) {
- DeviceInfo.ThreadsPerGroup[device_id] =
- reinterpret_cast<uint32_t *>(&grid_max_dim)[0] /
- DeviceInfo.GroupsPerDevice[device_id];
-
- if (DeviceInfo.ThreadsPerGroup[device_id] == 0) {
- DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size;
- DP("Default thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size);
- } else if (enforce_upper_bound(&DeviceInfo.ThreadsPerGroup[device_id],
- RTLDeviceInfoTy::Max_WG_Size)) {
- DP("Capped thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size);
- } else {
- DP("Using ROCm Queried thread limit: %d\n",
- DeviceInfo.ThreadsPerGroup[device_id]);
- }
- } else {
- DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size;
- DP("Error getting max block dimension, use default:%d \n",
- RTLDeviceInfoTy::Max_WG_Size);
- }
-
- // Get wavefront size
- uint32_t wavefront_size = 0;
- err =
- hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size);
- if (err == HSA_STATUS_SUCCESS) {
- DP("Queried wavefront size: %d\n", wavefront_size);
- DeviceInfo.WarpSize[device_id] = wavefront_size;
- } else {
- // TODO: Burn the wavefront size into the code object
- DP("Warning: Unknown wavefront size, assuming 64\n");
- DeviceInfo.WarpSize[device_id] = 64;
- }
-
- // Adjust teams to the env variables
-
- if (DeviceInfo.Env.TeamLimit > 0 &&
- (enforce_upper_bound(&DeviceInfo.GroupsPerDevice[device_id],
- DeviceInfo.Env.TeamLimit))) {
- DP("Capping max groups per device to OMP_TEAM_LIMIT=%d\n",
- DeviceInfo.Env.TeamLimit);
- }
-
- // Set default number of teams
- if (DeviceInfo.Env.NumTeams > 0) {
- DeviceInfo.NumTeams[device_id] = DeviceInfo.Env.NumTeams;
- DP("Default number of teams set according to environment %d\n",
- DeviceInfo.Env.NumTeams);
- } else {
- char *TeamsPerCUEnvStr = getenv("OMP_TARGET_TEAMS_PER_PROC");
- int TeamsPerCU = DefaultTeamsPerCU;
- if (TeamsPerCUEnvStr) {
- TeamsPerCU = std::stoi(TeamsPerCUEnvStr);
- }
-
- DeviceInfo.NumTeams[device_id] =
- TeamsPerCU * DeviceInfo.ComputeUnits[device_id];
- DP("Default number of teams = %d * number of compute units %d\n",
- TeamsPerCU, DeviceInfo.ComputeUnits[device_id]);
- }
-
- if (enforce_upper_bound(&DeviceInfo.NumTeams[device_id],
- DeviceInfo.GroupsPerDevice[device_id])) {
- DP("Default number of teams exceeds device limit, capping at %d\n",
- DeviceInfo.GroupsPerDevice[device_id]);
- }
-
- // Adjust threads to the env variables
- if (DeviceInfo.Env.TeamThreadLimit > 0 &&
- (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
- DeviceInfo.Env.TeamThreadLimit))) {
- DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n",
- DeviceInfo.Env.TeamThreadLimit);
- }
-
- // Set default number of threads
- DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size;
- DP("Default number of threads set according to library's default %d\n",
- RTLDeviceInfoTy::Default_WG_Size);
- if (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
- DeviceInfo.ThreadsPerGroup[device_id])) {
- DP("Default number of threads exceeds device limit, capping at %d\n",
- DeviceInfo.ThreadsPerGroup[device_id]);
- }
-
- DP("Device %d: default limit for groupsPerDevice %d & threadsPerGroup %d\n",
- device_id, DeviceInfo.GroupsPerDevice[device_id],
- DeviceInfo.ThreadsPerGroup[device_id]);
-
- DP("Device %d: wavefront size %d, total threads %d x %d = %d\n", device_id,
- DeviceInfo.WarpSize[device_id], DeviceInfo.ThreadsPerGroup[device_id],
- DeviceInfo.GroupsPerDevice[device_id],
- DeviceInfo.GroupsPerDevice[device_id] *
- DeviceInfo.ThreadsPerGroup[device_id]);
-
- return OFFLOAD_SUCCESS;
-}
-
-namespace {
Elf64_Shdr *find_only_SHT_HASH(Elf *elf) {
size_t N;
int rc = elf_getshdrnum(elf, &N);
@@ -1660,9 +1481,8 @@ hsa_status_t module_register_from_memory_to_place(
DeviceInfo.HSAAgents[DeviceId], L, static_cast<void *>(&cb),
HSAExecutables);
}
-} // namespace
-static uint64_t get_device_State_bytes(char *ImageStart, size_t img_size) {
+uint64_t get_device_State_bytes(char *ImageStart, size_t img_size) {
uint64_t device_State_bytes = 0;
{
// If this is the deviceRTL, get the state variable size
@@ -1683,20 +1503,6 @@ static uint64_t get_device_State_bytes(char *ImageStart, size_t img_size) {
return device_State_bytes;
}
-static __tgt_target_table *
-__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image);
-
-static __tgt_target_table *
-__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image);
-
-__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
- __tgt_device_image *image) {
- DeviceInfo.load_run_lock.lock();
- __tgt_target_table *res = __tgt_rtl_load_binary_locked(device_id, image);
- DeviceInfo.load_run_lock.unlock();
- return res;
-}
-
struct device_environment {
// initialise an DeviceEnvironmentTy in the deviceRTL
// patches around
diff erences in the deviceRTL between trunk, aomp,
@@ -1793,7 +1599,7 @@ struct device_environment {
}
};
-static hsa_status_t impl_calloc(void **ret_ptr, size_t size, int DeviceId) {
+hsa_status_t impl_calloc(void **ret_ptr, size_t size, int DeviceId) {
uint64_t rounded = 4 * ((size + 3) / 4);
void *ptr;
hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(DeviceId);
@@ -1813,12 +1619,208 @@ static hsa_status_t impl_calloc(void **ret_ptr, size_t size, int DeviceId) {
return HSA_STATUS_SUCCESS;
}
-static bool image_contains_symbol(void *data, size_t size, const char *sym) {
+bool image_contains_symbol(void *data, size_t size, const char *sym) {
symbol_info si;
int rc = get_symbol_info_without_loading((char *)data, size, sym, &si);
return (rc == 0) && (si.addr != nullptr);
}
+} // namespace
+
+namespace core {
+hsa_status_t allow_access_to_all_gpu_agents(void *ptr) {
+ return hsa_amd_agents_allow_access(DeviceInfo.HSAAgents.size(),
+ &DeviceInfo.HSAAgents[0], NULL, ptr);
+}
+} // namespace core
+
+extern "C" {
+int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
+ return elf_machine_id_is_amdgcn(image);
+}
+
+int __tgt_rtl_number_of_devices() {
+ // If the construction failed, no methods are safe to call
+ if (DeviceInfo.ConstructionSucceeded) {
+ return DeviceInfo.NumberOfDevices;
+ } else {
+ DP("AMDGPU plugin construction failed. Zero devices available\n");
+ return 0;
+ }
+}
+
+int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
+ DP("Init requires flags to %ld\n", RequiresFlags);
+ DeviceInfo.RequiresFlags = RequiresFlags;
+ return RequiresFlags;
+}
+
+int32_t __tgt_rtl_init_device(int device_id) {
+ hsa_status_t err;
+
+ // this is per device id init
+ DP("Initialize the device id: %d\n", device_id);
+
+ hsa_agent_t agent = DeviceInfo.HSAAgents[device_id];
+
+ // Get number of Compute Unit
+ uint32_t compute_units = 0;
+ err = hsa_agent_get_info(
+ agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
+ &compute_units);
+ if (err != HSA_STATUS_SUCCESS) {
+ DeviceInfo.ComputeUnits[device_id] = 1;
+ DP("Error getting compute units : settiing to 1\n");
+ } else {
+ DeviceInfo.ComputeUnits[device_id] = compute_units;
+ DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
+ }
+
+ char GetInfoName[64]; // 64 max size returned by get info
+ err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
+ (void *)GetInfoName);
+ if (err)
+ DeviceInfo.GPUName[device_id] = "--unknown gpu--";
+ else {
+ DeviceInfo.GPUName[device_id] = GetInfoName;
+ }
+
+ if (print_kernel_trace & STARTUP_DETAILS)
+ DP("Device#%-2d CU's: %2d %s\n", device_id,
+ DeviceInfo.ComputeUnits[device_id],
+ DeviceInfo.GPUName[device_id].c_str());
+
+ // Query attributes to determine number of threads/block and blocks/grid.
+ uint16_t workgroup_max_dim[3];
+ err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
+ &workgroup_max_dim);
+ if (err != HSA_STATUS_SUCCESS) {
+ DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::DefaultNumTeams;
+ DP("Error getting grid dims: num groups : %d\n",
+ RTLDeviceInfoTy::DefaultNumTeams);
+ } else if (workgroup_max_dim[0] <= RTLDeviceInfoTy::HardTeamLimit) {
+ DeviceInfo.GroupsPerDevice[device_id] = workgroup_max_dim[0];
+ DP("Using %d ROCm blocks per grid\n",
+ DeviceInfo.GroupsPerDevice[device_id]);
+ } else {
+ DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::HardTeamLimit;
+ DP("Max ROCm blocks per grid %d exceeds the hard team limit %d, capping "
+ "at the hard limit\n",
+ workgroup_max_dim[0], RTLDeviceInfoTy::HardTeamLimit);
+ }
+
+ // Get thread limit
+ hsa_dim3_t grid_max_dim;
+ err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim);
+ if (err == HSA_STATUS_SUCCESS) {
+ DeviceInfo.ThreadsPerGroup[device_id] =
+ reinterpret_cast<uint32_t *>(&grid_max_dim)[0] /
+ DeviceInfo.GroupsPerDevice[device_id];
+
+ if (DeviceInfo.ThreadsPerGroup[device_id] == 0) {
+ DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size;
+ DP("Default thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size);
+ } else if (enforce_upper_bound(&DeviceInfo.ThreadsPerGroup[device_id],
+ RTLDeviceInfoTy::Max_WG_Size)) {
+ DP("Capped thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size);
+ } else {
+ DP("Using ROCm Queried thread limit: %d\n",
+ DeviceInfo.ThreadsPerGroup[device_id]);
+ }
+ } else {
+ DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size;
+ DP("Error getting max block dimension, use default:%d \n",
+ RTLDeviceInfoTy::Max_WG_Size);
+ }
+
+ // Get wavefront size
+ uint32_t wavefront_size = 0;
+ err =
+ hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size);
+ if (err == HSA_STATUS_SUCCESS) {
+ DP("Queried wavefront size: %d\n", wavefront_size);
+ DeviceInfo.WarpSize[device_id] = wavefront_size;
+ } else {
+ // TODO: Burn the wavefront size into the code object
+ DP("Warning: Unknown wavefront size, assuming 64\n");
+ DeviceInfo.WarpSize[device_id] = 64;
+ }
+
+ // Adjust teams to the env variables
+
+ if (DeviceInfo.Env.TeamLimit > 0 &&
+ (enforce_upper_bound(&DeviceInfo.GroupsPerDevice[device_id],
+ DeviceInfo.Env.TeamLimit))) {
+ DP("Capping max groups per device to OMP_TEAM_LIMIT=%d\n",
+ DeviceInfo.Env.TeamLimit);
+ }
+
+ // Set default number of teams
+ if (DeviceInfo.Env.NumTeams > 0) {
+ DeviceInfo.NumTeams[device_id] = DeviceInfo.Env.NumTeams;
+ DP("Default number of teams set according to environment %d\n",
+ DeviceInfo.Env.NumTeams);
+ } else {
+ char *TeamsPerCUEnvStr = getenv("OMP_TARGET_TEAMS_PER_PROC");
+ int TeamsPerCU = DefaultTeamsPerCU;
+ if (TeamsPerCUEnvStr) {
+ TeamsPerCU = std::stoi(TeamsPerCUEnvStr);
+ }
+
+ DeviceInfo.NumTeams[device_id] =
+ TeamsPerCU * DeviceInfo.ComputeUnits[device_id];
+ DP("Default number of teams = %d * number of compute units %d\n",
+ TeamsPerCU, DeviceInfo.ComputeUnits[device_id]);
+ }
+
+ if (enforce_upper_bound(&DeviceInfo.NumTeams[device_id],
+ DeviceInfo.GroupsPerDevice[device_id])) {
+ DP("Default number of teams exceeds device limit, capping at %d\n",
+ DeviceInfo.GroupsPerDevice[device_id]);
+ }
+
+ // Adjust threads to the env variables
+ if (DeviceInfo.Env.TeamThreadLimit > 0 &&
+ (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
+ DeviceInfo.Env.TeamThreadLimit))) {
+ DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n",
+ DeviceInfo.Env.TeamThreadLimit);
+ }
+
+ // Set default number of threads
+ DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size;
+ DP("Default number of threads set according to library's default %d\n",
+ RTLDeviceInfoTy::Default_WG_Size);
+ if (enforce_upper_bound(&DeviceInfo.NumThreads[device_id],
+ DeviceInfo.ThreadsPerGroup[device_id])) {
+ DP("Default number of threads exceeds device limit, capping at %d\n",
+ DeviceInfo.ThreadsPerGroup[device_id]);
+ }
+
+ DP("Device %d: default limit for groupsPerDevice %d & threadsPerGroup %d\n",
+ device_id, DeviceInfo.GroupsPerDevice[device_id],
+ DeviceInfo.ThreadsPerGroup[device_id]);
+
+ DP("Device %d: wavefront size %d, total threads %d x %d = %d\n", device_id,
+ DeviceInfo.WarpSize[device_id], DeviceInfo.ThreadsPerGroup[device_id],
+ DeviceInfo.GroupsPerDevice[device_id],
+ DeviceInfo.GroupsPerDevice[device_id] *
+ DeviceInfo.ThreadsPerGroup[device_id]);
+
+ return OFFLOAD_SUCCESS;
+}
+
+static __tgt_target_table *
+__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image);
+
+__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
+ __tgt_device_image *image) {
+ DeviceInfo.load_run_lock.lock();
+ __tgt_target_table *res = __tgt_rtl_load_binary_locked(device_id, image);
+ DeviceInfo.load_run_lock.unlock();
+ return res;
+}
+
__tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
__tgt_device_image *image) {
// This function loads the device image onto gpu[device_id] and does other
@@ -2328,11 +2330,4 @@ int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {
}
return OFFLOAD_SUCCESS;
}
-
-namespace core {
-hsa_status_t allow_access_to_all_gpu_agents(void *ptr) {
- return hsa_amd_agents_allow_access(DeviceInfo.HSAAgents.size(),
- &DeviceInfo.HSAAgents[0], NULL, ptr);
-}
-
-} // namespace core
+} // extern "C"
More information about the Openmp-commits
mailing list