[llvm-branch-commits] [openmp] cab9f69 - [libomptarget][amdgpu] Improve diagnostics on arch mismatch
Jon Chesterfield via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 9 11:01:20 PST 2020
Author: Jon Chesterfield
Date: 2020-12-09T18:55:53Z
New Revision: cab9f6923522475e0d2137c66622c3fa70b01d3b
URL: https://github.com/llvm/llvm-project/commit/cab9f6923522475e0d2137c66622c3fa70b01d3b
DIFF: https://github.com/llvm/llvm-project/commit/cab9f6923522475e0d2137c66622c3fa70b01d3b.diff
LOG: [libomptarget][amdgpu] Improve diagnostics on arch mismatch
Added:
openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
Modified:
openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
index 0c50ffdf2fa6..38f0afabf3ad 100644
--- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
@@ -57,6 +57,7 @@ add_library(omptarget.rtl.amdgpu SHARED
impl/atmi.cpp
impl/atmi_interop_hsa.cpp
impl/data.cpp
+ impl/get_elf_mach_gfx_name.cpp
impl/machine.cpp
impl/system.cpp
impl/utils.cpp
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
new file mode 100644
index 000000000000..45af34684117
--- /dev/null
+++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
@@ -0,0 +1,53 @@
+#include "get_elf_mach_gfx_name.h"
+
+// This header conflicts with the system elf.h (macros vs enums of the same
+// identifier) and contains more up to date values for the enum checked here.
+// rtl.cpp uses the system elf.h.
+#include "llvm/BinaryFormat/ELF.h"
+
+const char *get_elf_mach_gfx_name(uint32_t EFlags) {
+ using namespace llvm::ELF;
+ uint32_t Gfx = (EFlags & EF_AMDGPU_MACH);
+ switch (Gfx) {
+ case EF_AMDGPU_MACH_AMDGCN_GFX801:
+ return "gfx801";
+ case EF_AMDGPU_MACH_AMDGCN_GFX802:
+ return "gfx802";
+ case EF_AMDGPU_MACH_AMDGCN_GFX803:
+ return "gfx803";
+ case EF_AMDGPU_MACH_AMDGCN_GFX805:
+ return "gfx805";
+ case EF_AMDGPU_MACH_AMDGCN_GFX810:
+ return "gfx810";
+ case EF_AMDGPU_MACH_AMDGCN_GFX900:
+ return "gfx900";
+ case EF_AMDGPU_MACH_AMDGCN_GFX902:
+ return "gfx902";
+ case EF_AMDGPU_MACH_AMDGCN_GFX904:
+ return "gfx904";
+ case EF_AMDGPU_MACH_AMDGCN_GFX906:
+ return "gfx906";
+ case EF_AMDGPU_MACH_AMDGCN_GFX908:
+ return "gfx908";
+ case EF_AMDGPU_MACH_AMDGCN_GFX909:
+ return "gfx909";
+ case EF_AMDGPU_MACH_AMDGCN_GFX90C:
+ return "gfx90c";
+ case EF_AMDGPU_MACH_AMDGCN_GFX1010:
+ return "gfx1010";
+ case EF_AMDGPU_MACH_AMDGCN_GFX1011:
+ return "gfx1011";
+ case EF_AMDGPU_MACH_AMDGCN_GFX1012:
+ return "gfx1012";
+ case EF_AMDGPU_MACH_AMDGCN_GFX1030:
+ return "gfx1030";
+ case EF_AMDGPU_MACH_AMDGCN_GFX1031:
+ return "gfx1031";
+ case EF_AMDGPU_MACH_AMDGCN_GFX1032:
+ return "gfx1032";
+ case EF_AMDGPU_MACH_AMDGCN_GFX1033:
+ return "gfx1033";
+ default:
+ return "--unknown gfx";
+ }
+}
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
new file mode 100644
index 000000000000..b1be90dc29d5
--- /dev/null
+++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
@@ -0,0 +1,8 @@
+#ifndef GET_ELF_MACH_GFX_NAME_H_INCLUDED
+#define GET_ELF_MACH_GFX_NAME_H_INCLUDED
+
+#include <stdint.h>
+
+const char *get_elf_mach_gfx_name(uint32_t EFlags);
+
+#endif
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 252abca08944..60040d1c0da4 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -36,6 +36,7 @@
#include "internal.h"
#include "Debug.h"
+#include "get_elf_mach_gfx_name.h"
#include "omptargetplugin.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
@@ -92,14 +93,6 @@ uint32_t TgtStackItemSize = 0;
#include "../../common/elf_common.c"
-static bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
- const uint16_t amdgcnMachineID = 224;
- int32_t r = elf_check_machine(image, amdgcnMachineID);
- if (!r) {
- DP("Supported machine ID not found\n");
- }
- return r;
-}
/// Keep entries table per device
struct FuncOrGblEntryTy {
@@ -319,6 +312,7 @@ class RTLDeviceInfoTy {
std::vector<int> GroupsPerDevice;
std::vector<int> ThreadsPerGroup;
std::vector<int> WarpSize;
+ std::vector<std::string> GPUName;
// OpenMP properties
std::vector<int> NumTeams;
@@ -472,6 +466,7 @@ class RTLDeviceInfoTy {
FuncGblEntries.resize(NumberOfDevices);
ThreadsPerGroup.resize(NumberOfDevices);
ComputeUnits.resize(NumberOfDevices);
+ GPUName.resize(NumberOfDevices);
GroupsPerDevice.resize(NumberOfDevices);
WarpSize.resize(NumberOfDevices);
NumTeams.resize(NumberOfDevices);
@@ -642,6 +637,40 @@ void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
assert(async_info_ptr->Queue);
async_info_ptr->Queue = 0;
}
+
+bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
+ const uint16_t amdgcnMachineID = EM_AMDGPU;
+ int32_t r = elf_check_machine(image, amdgcnMachineID);
+ if (!r) {
+ DP("Supported machine ID not found\n");
+ }
+ return r;
+}
+
+uint32_t elf_e_flags(__tgt_device_image *image) {
+ char *img_begin = (char *)image->ImageStart;
+ size_t img_size = (char *)image->ImageEnd - img_begin;
+
+ Elf *e = elf_memory(img_begin, img_size);
+ if (!e) {
+ DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
+ return 0;
+ }
+
+ Elf64_Ehdr *eh64 = elf64_getehdr(e);
+
+ if (!eh64) {
+ DP("Unable to get machine ID from ELF file!\n");
+ elf_end(e);
+ return 0;
+ }
+
+ uint32_t Flags = eh64->e_flags;
+
+ elf_end(e);
+ DP("ELF Flags: 0x%x\n", Flags);
+ return Flags;
+}
} // namespace
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
@@ -676,9 +705,20 @@ int32_t __tgt_rtl_init_device(int device_id) {
DeviceInfo.ComputeUnits[device_id] = compute_units;
DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
}
+
+ char GetInfoName[64]; // 64 max size returned by get info
+ err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
+ (void *) GetInfoName);
+ if (err)
+ DeviceInfo.GPUName[device_id] = "--unknown gpu--";
+ else {
+ DeviceInfo.GPUName[device_id] = GetInfoName;
+ }
+
if (print_kernel_trace == 4)
- fprintf(stderr, "Device#%-2d CU's: %2d\n", device_id,
- DeviceInfo.ComputeUnits[device_id]);
+ fprintf(stderr, "Device#%-2d CU's: %2d %s\n", device_id,
+ DeviceInfo.ComputeUnits[device_id],
+ DeviceInfo.GPUName[device_id].c_str());
// Query attributes to determine number of threads/block and blocks/grid.
uint16_t workgroup_max_dim[3];
@@ -1038,22 +1078,18 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
return ATMI_STATUS_SUCCESS;
};
- atmi_status_t err;
{
- err = module_register_from_memory_to_place(
+ atmi_status_t err = module_register_from_memory_to_place(
(void *)image->ImageStart, img_size, get_gpu_place(device_id),
on_deserialized_data);
check("Module registering", err);
if (err != ATMI_STATUS_SUCCESS) {
- char GPUName[64] = "--unknown gpu--";
- hsa_agent_t agent = DeviceInfo.HSAAgents[device_id];
- (void)hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
- (void *)GPUName);
fprintf(stderr,
- "Possible gpu arch mismatch: %s, please check"
- " compiler: -march=<gpu> flag\n",
- GPUName);
+ "Possible gpu arch mismatch: device:%s, image:%s please check"
+ " compiler flag: -march=<gpu>\n",
+ DeviceInfo.GPUName[device_id].c_str(),
+ get_elf_mach_gfx_name(elf_e_flags(image)));
return NULL;
}
}
@@ -1149,8 +1185,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
void *varptr;
uint32_t varsize;
- err = atmi_interop_hsa_get_symbol_info(get_gpu_mem_place(device_id),
- e->name, &varptr, &varsize);
+ atmi_status_t err = atmi_interop_hsa_get_symbol_info(
+ get_gpu_mem_place(device_id), e->name, &varptr, &varsize);
if (err != ATMI_STATUS_SUCCESS) {
DP("Loading global '%s' (Failed)\n", e->name);
@@ -1192,7 +1228,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
atmi_mem_place_t place = get_gpu_mem_place(device_id);
uint32_t kernarg_segment_size;
- err = atmi_interop_hsa_get_kernel_info(
+ atmi_status_t err = atmi_interop_hsa_get_kernel_info(
place, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
&kernarg_segment_size);
More information about the llvm-branch-commits
mailing list