[Openmp-commits] [openmp] 313c523 - [OpenMP][Tool] Introducing the `llvm-omp-device-info` tool

Shilei Tian via Openmp-commits openmp-commits at lists.llvm.org
Tue Jul 27 19:38:44 PDT 2021


Author: Jose M Monsalve Diaz
Date: 2021-07-27T22:38:35-04:00
New Revision: 313c5239959b8f9e5cc182b982c914978f437ae1

URL: https://github.com/llvm/llvm-project/commit/313c5239959b8f9e5cc182b982c914978f437ae1
DIFF: https://github.com/llvm/llvm-project/commit/313c5239959b8f9e5cc182b982c914978f437ae1.diff

LOG: [OpenMP][Tool] Introducing the `llvm-omp-device-info` tool

This patch introduces the `llvm-omp-device-info` tool, which uses the
omptarget library and interface to query the device info from all the
available devices as seen by OpenMP. This is inspired by PGI's `pgaccelinfo`

Since omptarget usually requires a description structure with executable
kernels, I split the initialization of the RTLs and Devices to be able to
initialize all possible devices and query each of them.

This revision relies on the patch that introduces the print device info.

A limitation is that the order in which the devices are initialized, and the
corresponding device ID is not necesarily the one seen by OpenMP.

The changes are as follows:
1. Separate the RTL initialization that was performed in `RegisterLib` to its own `initRTLonce` function
2. Create an `initAllRTLs` method that initializes all available RTLs at runtime
3. Created the `llvm-deviceinfo.cpp` tool that uses `omptarget` to query each device and prints its information.

Example Output:
```
Device (0):
    print_device_info not implemented

Device (1):
    print_device_info not implemented

Device (2):
    print_device_info not implemented

Device (3):
    print_device_info not implemented

Device (4):
    CUDA Driver Version:                11000
    CUDA Device Number:                 0
    Device Name:                        Quadro P1000
    Global Memory Size:                 4236312576 bytes
    Number of Multiprocessors:          5
    Concurrent Copy and Execution:      Yes
    Total Constant Memory:              65536 bytes
    Max Shared Memory per Block:        49152 bytes
    Registers per Block:                65536
    Warp Size:                          32 Threads
    Maximum Threads per Block:          1024
    Maximum Block Dimensions:           1024, 1024, 64
    Maximum Grid Dimensions:            2147483647 x 65535 x 65535
    Maximum Memory Pitch:               2147483647 bytes
    Texture Alignment:                  512 bytes
    Clock Rate:                         1480500 kHz
    Execution Timeout:                  Yes
    Integrated Device:                  No
    Can Map Host Memory:                Yes
    Compute Mode:                       DEFAULT
    Concurrent Kernels:                 Yes
    ECC Enabled:                        No
    Memory Clock Rate:                  2505000 kHz
    Memory Bus Width:                   128 bits
    L2 Cache Size:                      1048576 bytes
    Max Threads Per SMP:                2048
    Async Engines:                      Yes (2)
    Unified Addressing:                 Yes
    Managed Memory:                     Yes
    Concurrent Managed Memory:          Yes
    Preemption Supported:               Yes
    Cooperative Launch:                 Yes
    Multi-Device Boars:                 No
    Compute Capabilities:               61
```

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D106752

Added: 
    openmp/libomptarget/tools/CMakeLists.txt
    openmp/libomptarget/tools/deviceinfo/CMakeLists.txt
    openmp/libomptarget/tools/deviceinfo/llvm-omp-device-info.cpp

Modified: 
    openmp/libomptarget/CMakeLists.txt
    openmp/libomptarget/include/omptarget.h
    openmp/libomptarget/plugins/cuda/src/rtl.cpp
    openmp/libomptarget/src/exports
    openmp/libomptarget/src/interface.cpp
    openmp/libomptarget/src/rtl.cpp
    openmp/libomptarget/src/rtl.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt
index 3f709781893e4..7fcc6cb23c562 100644
--- a/openmp/libomptarget/CMakeLists.txt
+++ b/openmp/libomptarget/CMakeLists.txt
@@ -79,6 +79,7 @@ set(LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER "${LIBOMP_LIBRARY_DIR}" CACHE STRING
 add_subdirectory(plugins)
 add_subdirectory(deviceRTLs)
 add_subdirectory(DeviceRTL)
+add_subdirectory(tools)
 
 # Add tests.
 add_subdirectory(test)

diff  --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 60e49868a2742..dfddb59fa91d9 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -210,6 +210,9 @@ void __tgt_register_requires(int64_t flags);
 /// adds a target shared library to the target execution image
 void __tgt_register_lib(__tgt_bin_desc *desc);
 
+/// Initialize all RTLs at once
+void __tgt_init_all_rtls();
+
 /// removes a target shared library from the target execution image
 void __tgt_unregister_lib(__tgt_bin_desc *desc);
 

diff  --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 678d8b447cacd..fa1d29bf44c18 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -16,9 +16,11 @@
 #include <list>
 #include <memory>
 #include <mutex>
+#include <string.h>
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include <string.h>
 
 #include "Debug.h"
 #include "omptargetplugin.h"

diff  --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index c401c4afd3ccb..4d6c1f7f0ea4e 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -3,6 +3,7 @@ VERS1.0 {
     __tgt_register_requires;
     __tgt_register_lib;
     __tgt_unregister_lib;
+    __tgt_init_all_rtls;
     __tgt_target_data_begin;
     __tgt_target_data_end;
     __tgt_target_data_update;

diff  --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 79ba91df5b823..793bfe0f92913 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -43,6 +43,10 @@ EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
   PM->RTLs.RegisterLib(desc);
 }
 
+////////////////////////////////////////////////////////////////////////////////
+/// Initialize all available devices without registering any image
+EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); }
+
 ////////////////////////////////////////////////////////////////////////////////
 /// unloads a target shared library
 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {

diff  --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index ec86182ab5cf8..437da9d4bb1fd 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -290,6 +290,38 @@ void RTLsTy::RegisterRequires(int64_t flags) {
      flags, RequiresFlags);
 }
 
+void RTLsTy::initRTLonce(RTLInfoTy &R) {
+  // If this RTL is not already in use, initialize it.
+  if (!R.isUsed && R.NumberOfDevices != 0) {
+    // Initialize the device information for the RTL we are about to use.
+    DeviceTy device(&R);
+    size_t Start = PM->Devices.size();
+    PM->Devices.resize(Start + R.NumberOfDevices, device);
+    for (int32_t device_id = 0; device_id < R.NumberOfDevices; device_id++) {
+      // global device ID
+      PM->Devices[Start + device_id].DeviceID = Start + device_id;
+      // RTL local device ID
+      PM->Devices[Start + device_id].RTLDeviceID = device_id;
+    }
+
+    // Initialize the index of this RTL and save it in the used RTLs.
+    R.Idx = (UsedRTLs.empty())
+                ? 0
+                : UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices;
+    assert((size_t)R.Idx == Start &&
+           "RTL index should equal the number of devices used so far.");
+    R.isUsed = true;
+    UsedRTLs.push_back(&R);
+
+    DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx);
+  }
+}
+
+void RTLsTy::initAllRTLs() {
+  for (auto &R : AllRTLs)
+    initRTLonce(R);
+}
+
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
   PM->RTLsMtx.lock();
   // Register the images with the RTLs that understand them, if any.
@@ -297,7 +329,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
     // Obtain the image.
     __tgt_device_image *img = &desc->DeviceImages[i];
 
-    RTLInfoTy *FoundRTL = NULL;
+    RTLInfoTy *FoundRTL = nullptr;
 
     // Scan the RTLs that have associated images until we find one that supports
     // the current image.
@@ -311,31 +343,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
       DP("Image " DPxMOD " is compatible with RTL %s!\n",
          DPxPTR(img->ImageStart), R.RTLName.c_str());
 
-      // If this RTL is not already in use, initialize it.
-      if (!R.isUsed) {
-        // Initialize the device information for the RTL we are about to use.
-        DeviceTy device(&R);
-        size_t Start = PM->Devices.size();
-        PM->Devices.resize(Start + R.NumberOfDevices, device);
-        for (int32_t device_id = 0; device_id < R.NumberOfDevices;
-             device_id++) {
-          // global device ID
-          PM->Devices[Start + device_id].DeviceID = Start + device_id;
-          // RTL local device ID
-          PM->Devices[Start + device_id].RTLDeviceID = device_id;
-        }
-
-        // Initialize the index of this RTL and save it in the used RTLs.
-        R.Idx = (UsedRTLs.empty())
-                    ? 0
-                    : UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices;
-        assert((size_t)R.Idx == Start &&
-               "RTL index should equal the number of devices used so far.");
-        R.isUsed = true;
-        UsedRTLs.push_back(&R);
-
-        DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx);
-      }
+      initRTLonce(R);
 
       // Initialize (if necessary) translation table for this library.
       PM->TrlTblMtx.lock();

diff  --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h
index ed87b6c36e0cf..db13927cff53f 100644
--- a/openmp/libomptarget/src/rtl.h
+++ b/openmp/libomptarget/src/rtl.h
@@ -121,6 +121,12 @@ struct RTLsTy {
   // Register the clauses of the requires directive.
   void RegisterRequires(int64_t flags);
 
+  // Initialize RTL if it has not been initialized
+  void initRTLonce(RTLInfoTy &RTL);
+
+  // Initialize all RTLs
+  void initAllRTLs();
+
   // Register a shared library with all (compatible) RTLs.
   void RegisterLib(__tgt_bin_desc *desc);
 

diff  --git a/openmp/libomptarget/tools/CMakeLists.txt b/openmp/libomptarget/tools/CMakeLists.txt
new file mode 100644
index 0000000000000..ca5e785ab4cc9
--- /dev/null
+++ b/openmp/libomptarget/tools/CMakeLists.txt
@@ -0,0 +1,13 @@
+##===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+##===----------------------------------------------------------------------===##
+#
+# Adding omptarget tools
+#
+##===----------------------------------------------------------------------===##
+
+add_subdirectory(deviceinfo)

diff  --git a/openmp/libomptarget/tools/deviceinfo/CMakeLists.txt b/openmp/libomptarget/tools/deviceinfo/CMakeLists.txt
new file mode 100644
index 0000000000000..dab61cf6b4acd
--- /dev/null
+++ b/openmp/libomptarget/tools/deviceinfo/CMakeLists.txt
@@ -0,0 +1,24 @@
+##===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+##===----------------------------------------------------------------------===##
+#
+# Build llvm-omp-device-info tool
+#
+##===----------------------------------------------------------------------===##
+
+libomptarget_say("Building the llvm-omp-device-info tool")
+libomptarget_say("llvm-omp-device-info using plugins ${LIBOMPTARGET_TESTED_PLUGINS}")
+
+add_llvm_tool(llvm-omp-device-info llvm-omp-device-info.cpp)
+
+llvm_update_compile_flags(llvm-omp-device-info)
+
+target_link_libraries(llvm-omp-device-info PRIVATE
+  omp
+  omptarget
+  ${LIBOMPTARGET_TESTED_PLUGINS}
+)

diff  --git a/openmp/libomptarget/tools/deviceinfo/llvm-omp-device-info.cpp b/openmp/libomptarget/tools/deviceinfo/llvm-omp-device-info.cpp
new file mode 100644
index 0000000000000..7dd22aec9828f
--- /dev/null
+++ b/openmp/libomptarget/tools/deviceinfo/llvm-omp-device-info.cpp
@@ -0,0 +1,31 @@
+//===- llvm-omp-device-info.cpp - Obtain device info as seen from OpenMP --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a command line utility that, by using Libomptarget, and the device
+// plugins, list devices information as seen from the OpenMP Runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "omptarget.h"
+#include <cstdio>
+
+int main(int argc, char **argv) {
+  __tgt_bin_desc EmptyDesc = {0, nullptr, nullptr, nullptr};
+  __tgt_register_lib(&EmptyDesc);
+  __tgt_init_all_rtls();
+
+  for (int Dev = 0; Dev < omp_get_num_devices(); Dev++) {
+    printf("Device (%d):\n", Dev);
+    if (!__tgt_print_device_info(Dev))
+      printf("    print_device_info not implemented\n");
+    printf("\n");
+  }
+
+  __tgt_unregister_lib(&EmptyDesc);
+  return 0;
+}


        


More information about the Openmp-commits mailing list