[Openmp-commits] [openmp] d2f85d0 - [OpenMP][Libomptarget] Adding `print_device_info` to RTL and `omptarget`
Shilei Tian via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jul 27 18:48:02 PDT 2021
Author: Jose M Monsalve Diaz
Date: 2021-07-27T21:47:57-04:00
New Revision: d2f85d0910ce94cf55793d9cda09beaaa03e881e
URL: https://github.com/llvm/llvm-project/commit/d2f85d0910ce94cf55793d9cda09beaaa03e881e
DIFF: https://github.com/llvm/llvm-project/commit/d2f85d0910ce94cf55793d9cda09beaaa03e881e.diff
LOG: [OpenMP][Libomptarget] Adding `print_device_info` to RTL and `omptarget`
This patch introduces a function in the device's plugin to print the
device information. This patch relates to another patch that introduces
a CLI tool to obtain the device information from the omplibrary directly.
It is inspired by PGI's pgaccelinfo.
The modifications are as follows:
1. Introduce the optional `void __tgt_rtl_print_device_info(RTLdevID)` function into the RTL.
2. Introduce the `bool __tgt_print_device_info(devID)` function into `omptarget` interface. Returns false if the RTL is not implemented
3. Added `bool printDeviceInfo(RTLDevID)` to the `DeviceTy`
4. Implement the `__tgt_rtl_print_device_info` for CUDA. Added additional CUDA Runtime calls.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D106751
Added:
Modified:
openmp/libomptarget/include/omptarget.h
openmp/libomptarget/include/omptargetplugin.h
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
openmp/libomptarget/plugins/cuda/src/rtl.cpp
openmp/libomptarget/plugins/exports
openmp/libomptarget/src/device.cpp
openmp/libomptarget/src/device.h
openmp/libomptarget/src/exports
openmp/libomptarget/src/interface.cpp
openmp/libomptarget/src/rtl.cpp
openmp/libomptarget/src/rtl.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 39c9f9e8031ab..60e49868a2742 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -333,6 +333,7 @@ void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
void __tgt_set_info_flag(uint32_t);
+int __tgt_print_device_info(int64_t device_id);
#ifdef __cplusplus
}
#endif
diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h
index dbd38caf7aaa0..b7b3eb806981c 100644
--- a/openmp/libomptarget/include/omptargetplugin.h
+++ b/openmp/libomptarget/include/omptargetplugin.h
@@ -142,6 +142,9 @@ int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
// Set plugin's internal information flag externally.
void __tgt_rtl_set_info_flag(uint32_t);
+// Print the device information
+void __tgt_rtl_print_device_info(int32_t ID);
+
#ifdef __cplusplus
}
#endif
diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
index 235efd2728ded..fb776f7ae5586 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
@@ -28,6 +28,11 @@ DLWRAP(cuDeviceGetAttribute, 3);
DLWRAP(cuDeviceGetCount, 1);
DLWRAP(cuFuncGetAttribute, 3);
+// Device info
+DLWRAP(cuDeviceGetName, 3);
+DLWRAP(cuDeviceTotalMem, 2);
+DLWRAP(cuDriverGetVersion, 1);
+
DLWRAP(cuGetErrorString, 2);
DLWRAP(cuLaunchKernel, 11);
diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
index 17aa2a12ef6c3..0814db7e9d267 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
@@ -72,6 +72,11 @@ CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
CUresult cuDeviceGetCount(int *);
CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
+// Device info
+CUresult cuDeviceGetName(char *, int, CUdevice *);
+CUresult cuDeviceTotalMem(size_t *, CUdevice *);
+CUresult cuDriverGetVersion(int *);
+
CUresult cuGetErrorString(CUresult, const char **);
CUresult cuInit(unsigned);
CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 18779d6833c19..678d8b447cacd 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -61,6 +61,8 @@
} while (false)
#endif // OMPTARGET_DEBUG
+#define BOOL2TEXT(b) ((b) ? "Yes" : "No")
+
#include "elf_common.h"
/// Keep entries table per device.
@@ -1157,6 +1159,178 @@ class DeviceRTLTy {
}
return (Err == CUDA_SUCCESS) ? OFFLOAD_SUCCESS : OFFLOAD_FAIL;
}
+
+ void printDeviceInfo(int32_t device_id) {
+ char TmpChar[1000];
+ std::string TmpStr;
+ size_t TmpSt;
+ int TmpInt, TmpInt2, TmpInt3;
+
+ CUdevice Device;
+ checkResult(cuDeviceGet(&Device, device_id),
+ "Error returned from cuCtxGetDevice\n");
+
+ cuDriverGetVersion(&TmpInt);
+ printf(" CUDA Driver Version: \t\t%d \n", TmpInt);
+ printf(" CUDA Device Number: \t\t%d \n", device_id);
+ checkResult(cuDeviceGetName(TmpChar, 1000, Device),
+ "Error returned from cuDeviceGetName\n");
+ printf(" Device Name: \t\t\t%s \n", TmpChar);
+ checkResult(cuDeviceTotalMem(&TmpSt, Device),
+ "Error returned from cuDeviceTotalMem\n");
+ printf(" Global Memory Size: \t\t%zu bytes \n", TmpSt);
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Number of Multiprocessors: \t\t%d \n", TmpInt);
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Concurrent Copy and Execution: \t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Total Constant Memory: \t\t%d bytes\n", TmpInt);
+ checkResult(
+ cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Max Shared Memory per Block: \t%d bytes \n", TmpInt);
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Registers per Block: \t\t%d \n", TmpInt);
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_WARP_SIZE, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Warp Size: \t\t\t\t%d Threads \n", TmpInt);
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Maximum Threads per Block: \t\t%d \n", TmpInt);
+ checkResult(cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
+ Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ checkResult(cuDeviceGetAttribute(&TmpInt2,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ checkResult(cuDeviceGetAttribute(&TmpInt3,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Maximum Block Dimensions: \t\t%d, %d, %d \n", TmpInt, TmpInt2,
+ TmpInt3);
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ checkResult(cuDeviceGetAttribute(&TmpInt2, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
+ Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ checkResult(cuDeviceGetAttribute(&TmpInt3, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
+ Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Maximum Grid Dimensions: \t\t%d x %d x %d \n", TmpInt, TmpInt2,
+ TmpInt3);
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_PITCH, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Maximum Memory Pitch: \t\t%d bytes \n", TmpInt);
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Texture Alignment: \t\t\t%d bytes \n", TmpInt);
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Clock Rate: \t\t\t%d kHz\n", TmpInt);
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Execution Timeout: \t\t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_INTEGRATED, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Integrated Device: \t\t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Can Map Host Memory: \t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ if (TmpInt == CU_COMPUTEMODE_DEFAULT)
+ TmpStr = "DEFAULT";
+ else if (TmpInt == CU_COMPUTEMODE_PROHIBITED)
+ TmpStr = "PROHIBITED";
+ else if (TmpInt == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
+ TmpStr = "EXCLUSIVE PROCESS";
+ else
+ TmpStr = "unknown";
+ printf(" Compute Mode: \t\t\t%s \n", TmpStr.c_str());
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Concurrent Kernels: \t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" ECC Enabled: \t\t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Memory Clock Rate: \t\t\t%d kHz\n", TmpInt);
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Memory Bus Width: \t\t\t%d bits\n", TmpInt);
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" L2 Cache Size: \t\t\t%d bytes \n", TmpInt);
+ checkResult(
+ cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Max Threads Per SMP: \t\t%d \n", TmpInt);
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Async Engines: \t\t\t%s (%d) \n", BOOL2TEXT(TmpInt), TmpInt);
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Unified Addressing: \t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(
+ cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Managed Memory: \t\t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(
+ cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Concurrent Managed Memory: \t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(
+ cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Preemption Supported: \t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(cuDeviceGetAttribute(
+ &TmpInt, CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Cooperative Launch: \t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD,
+ Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Multi-Device Boars: \t\t%s \n", BOOL2TEXT(TmpInt));
+ checkResult(cuDeviceGetAttribute(&TmpInt,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ checkResult(cuDeviceGetAttribute(&TmpInt2,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ Device),
+ "Error returned from cuDeviceGetAttribute\n");
+ printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
+ }
};
DeviceRTLTy DeviceRTL;
@@ -1357,6 +1531,11 @@ void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
InfoLevel.store(NewInfoLevel);
}
+void __tgt_rtl_print_device_info(int32_t device_id) {
+ assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
+ DeviceRTL.printDeviceInfo(device_id);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports
index 6500f0688f328..61cc6746defdf 100644
--- a/openmp/libomptarget/plugins/exports
+++ b/openmp/libomptarget/plugins/exports
@@ -23,6 +23,7 @@ VERS1.0 {
__tgt_rtl_unregister_lib;
__tgt_rtl_supports_empty_images;
__tgt_rtl_set_info_flag;
+ __tgt_rtl_print_device_info;
local:
*;
};
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 380ee22a9fef2..f660d2321dfb6 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -511,6 +511,14 @@ int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr,
TgtOffsets, TgtVarsSize, AsyncInfo);
}
+// Run region on device
+bool DeviceTy::printDeviceInfo(int32_t RTLDevId) {
+ if (!RTL->print_device_info)
+ return false;
+ RTL->print_device_info(RTLDevId);
+ return true;
+}
+
// Run team region on device.
int32_t DeviceTy::runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
ptr
diff _t *TgtOffsets, int32_t TgtVarsSize,
diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index b8f5ab2c7b8b4..e93f7b8d2c49b 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -275,6 +275,10 @@ struct DeviceTy {
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
int32_t synchronize(AsyncInfoTy &AsyncInfo);
+ /// Calls the corresponding print in the \p RTLDEVID
+ /// device RTL to obtain the information of the specific device.
+ bool printDeviceInfo(int32_t RTLDevID);
+
private:
// Call to RTL
void init(); // To be called only via DeviceTy::initOnce()
diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index 16639ab0966d2..c401c4afd3ccb 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -40,6 +40,7 @@ VERS1.0 {
llvm_omp_target_alloc_shared;
llvm_omp_target_alloc_device;
__tgt_set_info_flag;
+ __tgt_print_device_info;
local:
*;
};
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 853be82c893e6..79ba91df5b823 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -466,3 +466,8 @@ EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
R.set_info_flag(NewInfoLevel);
}
}
+
+EXTERN int __tgt_print_device_info(int64_t device_id) {
+ return PM->Devices[device_id].printDeviceInfo(
+ PM->Devices[device_id].RTLDeviceID);
+}
diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 9721504048bd9..ec86182ab5cf8 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -177,6 +177,8 @@ void RTLsTy::LoadRTLs() {
dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images");
*((void **)&R.set_info_flag) =
dlsym(dynlib_handle, "__tgt_rtl_set_info_flag");
+ *((void **)&R.print_device_info) =
+ dlsym(dynlib_handle, "__tgt_rtl_print_device_info");
}
DP("RTLs loaded!\n");
diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h
index 35313dff3c3ff..ed87b6c36e0cf 100644
--- a/openmp/libomptarget/src/rtl.h
+++ b/openmp/libomptarget/src/rtl.h
@@ -55,6 +55,7 @@ struct RTLInfoTy {
typedef int64_t(synchronize_ty)(int32_t, __tgt_async_info *);
typedef int32_t (*register_lib_ty)(__tgt_bin_desc *);
typedef int32_t(supports_empty_images_ty)();
+ typedef void(print_device_info_ty)(int32_t);
typedef void(set_info_flag_ty)(uint32_t);
int32_t Idx = -1; // RTL index, index is the number of devices
@@ -93,6 +94,7 @@ struct RTLInfoTy {
register_lib_ty unregister_lib = nullptr;
supports_empty_images_ty *supports_empty_images = nullptr;
set_info_flag_ty *set_info_flag = nullptr;
+ print_device_info_ty *print_device_info = nullptr;
// Are there images associated with this RTL.
bool isUsed = false;
More information about the Openmp-commits
mailing list