[Openmp-commits] [openmp] d2f85d0 - [OpenMP][Libomptarget] Adding `print_device_info` to RTL and `omptarget`

Shilei Tian via Openmp-commits openmp-commits at lists.llvm.org
Tue Jul 27 18:48:02 PDT 2021


Author: Jose M Monsalve Diaz
Date: 2021-07-27T21:47:57-04:00
New Revision: d2f85d0910ce94cf55793d9cda09beaaa03e881e

URL: https://github.com/llvm/llvm-project/commit/d2f85d0910ce94cf55793d9cda09beaaa03e881e
DIFF: https://github.com/llvm/llvm-project/commit/d2f85d0910ce94cf55793d9cda09beaaa03e881e.diff

LOG: [OpenMP][Libomptarget] Adding `print_device_info` to RTL and `omptarget`

This patch introduces a function in the device's plugin to print the
device information. This patch relates to another patch that introduces
a CLI tool to obtain the device information from the omplibrary directly.
It is inspired by PGI's pgaccelinfo.

The modifications are as follows:
1. Introduce the optional `void __tgt_rtl_print_device_info(RTLdevID)` function into the RTL.
2. Introduce the `bool __tgt_print_device_info(devID)` function into `omptarget` interface. Returns false if the RTL is not implemented
3. Added `bool printDeviceInfo(RTLDevID)` to the `DeviceTy`
4. Implement the `__tgt_rtl_print_device_info` for CUDA. Added additional CUDA Runtime calls.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D106751

Added: 
    

Modified: 
    openmp/libomptarget/include/omptarget.h
    openmp/libomptarget/include/omptargetplugin.h
    openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
    openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
    openmp/libomptarget/plugins/cuda/src/rtl.cpp
    openmp/libomptarget/plugins/exports
    openmp/libomptarget/src/device.cpp
    openmp/libomptarget/src/device.h
    openmp/libomptarget/src/exports
    openmp/libomptarget/src/interface.cpp
    openmp/libomptarget/src/rtl.cpp
    openmp/libomptarget/src/rtl.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 39c9f9e8031ab..60e49868a2742 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -333,6 +333,7 @@ void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
 
 void __tgt_set_info_flag(uint32_t);
 
+int __tgt_print_device_info(int64_t device_id);
 #ifdef __cplusplus
 }
 #endif

diff  --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h
index dbd38caf7aaa0..b7b3eb806981c 100644
--- a/openmp/libomptarget/include/omptargetplugin.h
+++ b/openmp/libomptarget/include/omptargetplugin.h
@@ -142,6 +142,9 @@ int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
 // Set plugin's internal information flag externally.
 void __tgt_rtl_set_info_flag(uint32_t);
 
+// Print the device information
+void __tgt_rtl_print_device_info(int32_t ID);
+
 #ifdef __cplusplus
 }
 #endif

diff  --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
index 235efd2728ded..fb776f7ae5586 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
@@ -28,6 +28,11 @@ DLWRAP(cuDeviceGetAttribute, 3);
 DLWRAP(cuDeviceGetCount, 1);
 DLWRAP(cuFuncGetAttribute, 3);
 
+// Device info
+DLWRAP(cuDeviceGetName, 3);
+DLWRAP(cuDeviceTotalMem, 2);
+DLWRAP(cuDriverGetVersion, 1);
+
 DLWRAP(cuGetErrorString, 2);
 DLWRAP(cuLaunchKernel, 11);
 

diff  --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
index 17aa2a12ef6c3..0814db7e9d267 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
@@ -72,6 +72,11 @@ CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
 CUresult cuDeviceGetCount(int *);
 CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
 
+// Device info
+CUresult cuDeviceGetName(char *, int, CUdevice *);
+CUresult cuDeviceTotalMem(size_t *, CUdevice *);
+CUresult cuDriverGetVersion(int *);
+
 CUresult cuGetErrorString(CUresult, const char **);
 CUresult cuInit(unsigned);
 CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,

diff  --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 18779d6833c19..678d8b447cacd 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -61,6 +61,8 @@
   } while (false)
 #endif // OMPTARGET_DEBUG
 
+#define BOOL2TEXT(b) ((b) ? "Yes" : "No")
+
 #include "elf_common.h"
 
 /// Keep entries table per device.
@@ -1157,6 +1159,178 @@ class DeviceRTLTy {
     }
     return (Err == CUDA_SUCCESS) ? OFFLOAD_SUCCESS : OFFLOAD_FAIL;
   }
+
+  void printDeviceInfo(int32_t device_id) {
+    char TmpChar[1000];
+    std::string TmpStr;
+    size_t TmpSt;
+    int TmpInt, TmpInt2, TmpInt3;
+
+    CUdevice Device;
+    checkResult(cuDeviceGet(&Device, device_id),
+                "Error returned from cuCtxGetDevice\n");
+
+    cuDriverGetVersion(&TmpInt);
+    printf("    CUDA Driver Version: \t\t%d \n", TmpInt);
+    printf("    CUDA Device Number: \t\t%d \n", device_id);
+    checkResult(cuDeviceGetName(TmpChar, 1000, Device),
+                "Error returned from cuDeviceGetName\n");
+    printf("    Device Name: \t\t\t%s \n", TmpChar);
+    checkResult(cuDeviceTotalMem(&TmpSt, Device),
+                "Error returned from cuDeviceTotalMem\n");
+    printf("    Global Memory Size: \t\t%zu bytes \n", TmpSt);
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Number of Multiprocessors: \t\t%d \n", TmpInt);
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Concurrent Copy and Execution: \t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Total Constant Memory: \t\t%d bytes\n", TmpInt);
+    checkResult(
+        cuDeviceGetAttribute(
+            &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Max Shared Memory per Block: \t%d bytes \n", TmpInt);
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Registers per Block: \t\t%d \n", TmpInt);
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_WARP_SIZE, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Warp Size: \t\t\t\t%d Threads \n", TmpInt);
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Maximum Threads per Block: \t\t%d \n", TmpInt);
+    checkResult(cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
+                                    Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    checkResult(cuDeviceGetAttribute(&TmpInt2,
+                                    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    checkResult(cuDeviceGetAttribute(&TmpInt3,
+                                    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Maximum Block Dimensions: \t\t%d, %d, %d \n", TmpInt, TmpInt2,
+          TmpInt3);
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    checkResult(cuDeviceGetAttribute(&TmpInt2, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
+                                    Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    checkResult(cuDeviceGetAttribute(&TmpInt3, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
+                                    Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Maximum Grid Dimensions: \t\t%d x %d x %d \n", TmpInt, TmpInt2,
+          TmpInt3);
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_PITCH, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Maximum Memory Pitch: \t\t%d bytes \n", TmpInt);
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Texture Alignment: \t\t\t%d bytes \n", TmpInt);
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Clock Rate: \t\t\t%d kHz\n", TmpInt);
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Execution Timeout: \t\t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_INTEGRATED, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Integrated Device: \t\t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Can Map Host Memory: \t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    if (TmpInt == CU_COMPUTEMODE_DEFAULT)
+      TmpStr = "DEFAULT";
+    else if (TmpInt == CU_COMPUTEMODE_PROHIBITED)
+      TmpStr = "PROHIBITED";
+    else if (TmpInt == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
+      TmpStr = "EXCLUSIVE PROCESS";
+    else
+      TmpStr = "unknown";
+    printf("    Compute Mode: \t\t\t%s \n", TmpStr.c_str());
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Concurrent Kernels: \t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    ECC Enabled: \t\t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Memory Clock Rate: \t\t\t%d kHz\n", TmpInt);
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Memory Bus Width: \t\t\t%d bits\n", TmpInt);
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    L2 Cache Size: \t\t\t%d bytes \n", TmpInt);
+    checkResult(
+        cuDeviceGetAttribute(
+            &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Max Threads Per SMP: \t\t%d \n", TmpInt);
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Async Engines: \t\t\t%s (%d) \n", BOOL2TEXT(TmpInt), TmpInt);
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Unified Addressing: \t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(
+        cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Managed Memory: \t\t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(
+        cuDeviceGetAttribute(
+            &TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Concurrent Managed Memory: \t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(
+        cuDeviceGetAttribute(
+            &TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, Device),
+        "Error returned from cuDeviceGetAttribute\n");
+    printf("    Preemption Supported: \t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(cuDeviceGetAttribute(
+                    &TmpInt, CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Cooperative Launch: \t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD,
+                                    Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Multi-Device Boars: \t\t%s \n", BOOL2TEXT(TmpInt));
+    checkResult(cuDeviceGetAttribute(&TmpInt,
+                                    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+                                    Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    checkResult(cuDeviceGetAttribute(&TmpInt2,
+                                    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+                                    Device),
+                "Error returned from cuDeviceGetAttribute\n");
+    printf("    Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
+  }
 };
 
 DeviceRTLTy DeviceRTL;
@@ -1357,6 +1531,11 @@ void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
   InfoLevel.store(NewInfoLevel);
 }
 
+void __tgt_rtl_print_device_info(int32_t device_id) {
+  assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
+  DeviceRTL.printDeviceInfo(device_id);
+}
+
 #ifdef __cplusplus
 }
 #endif

diff  --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports
index 6500f0688f328..61cc6746defdf 100644
--- a/openmp/libomptarget/plugins/exports
+++ b/openmp/libomptarget/plugins/exports
@@ -23,6 +23,7 @@ VERS1.0 {
     __tgt_rtl_unregister_lib;
     __tgt_rtl_supports_empty_images;
     __tgt_rtl_set_info_flag;
+    __tgt_rtl_print_device_info;
   local:
     *;
 };

diff  --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 380ee22a9fef2..f660d2321dfb6 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -511,6 +511,14 @@ int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr,
                                  TgtOffsets, TgtVarsSize, AsyncInfo);
 }
 
+// Run region on device
+bool DeviceTy::printDeviceInfo(int32_t RTLDevId) {
+  if (!RTL->print_device_info)
+    return false;
+  RTL->print_device_info(RTLDevId);
+  return true;
+}
+
 // Run team region on device.
 int32_t DeviceTy::runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
                                 ptr
diff _t *TgtOffsets, int32_t TgtVarsSize,

diff  --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index b8f5ab2c7b8b4..e93f7b8d2c49b 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -275,6 +275,10 @@ struct DeviceTy {
   /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
   int32_t synchronize(AsyncInfoTy &AsyncInfo);
 
+  /// Calls the corresponding print in the \p RTLDEVID 
+  /// device RTL to obtain the information of the specific device.
+  bool printDeviceInfo(int32_t RTLDevID);
+
 private:
   // Call to RTL
   void init(); // To be called only via DeviceTy::initOnce()

diff  --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index 16639ab0966d2..c401c4afd3ccb 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -40,6 +40,7 @@ VERS1.0 {
     llvm_omp_target_alloc_shared;
     llvm_omp_target_alloc_device;
     __tgt_set_info_flag;
+    __tgt_print_device_info;
   local:
     *;
 };

diff  --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 853be82c893e6..79ba91df5b823 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -466,3 +466,8 @@ EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
       R.set_info_flag(NewInfoLevel);
   }
 }
+
+EXTERN int __tgt_print_device_info(int64_t device_id) {
+  return PM->Devices[device_id].printDeviceInfo(
+      PM->Devices[device_id].RTLDeviceID);
+}

diff  --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 9721504048bd9..ec86182ab5cf8 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -177,6 +177,8 @@ void RTLsTy::LoadRTLs() {
         dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images");
     *((void **)&R.set_info_flag) =
         dlsym(dynlib_handle, "__tgt_rtl_set_info_flag");
+    *((void **)&R.print_device_info) =
+        dlsym(dynlib_handle, "__tgt_rtl_print_device_info");
   }
 
   DP("RTLs loaded!\n");

diff  --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h
index 35313dff3c3ff..ed87b6c36e0cf 100644
--- a/openmp/libomptarget/src/rtl.h
+++ b/openmp/libomptarget/src/rtl.h
@@ -55,6 +55,7 @@ struct RTLInfoTy {
   typedef int64_t(synchronize_ty)(int32_t, __tgt_async_info *);
   typedef int32_t (*register_lib_ty)(__tgt_bin_desc *);
   typedef int32_t(supports_empty_images_ty)();
+  typedef void(print_device_info_ty)(int32_t);
   typedef void(set_info_flag_ty)(uint32_t);
 
   int32_t Idx = -1;             // RTL index, index is the number of devices
@@ -93,6 +94,7 @@ struct RTLInfoTy {
   register_lib_ty unregister_lib = nullptr;
   supports_empty_images_ty *supports_empty_images = nullptr;
   set_info_flag_ty *set_info_flag = nullptr;
+  print_device_info_ty *print_device_info = nullptr;
 
   // Are there images associated with this RTL.
   bool isUsed = false;


        


More information about the Openmp-commits mailing list