[Openmp-commits] [openmp] e01ce4e - [Libomptarget] Add checks for CUDA subarchitecture using new info

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Thu Jul 21 10:20:19 PDT 2022


Author: Joseph Huber
Date: 2022-07-21T13:20:06-04:00
New Revision: e01ce4e88a8443993afb19f64b2811d50dca65aa

URL: https://github.com/llvm/llvm-project/commit/e01ce4e88a8443993afb19f64b2811d50dca65aa
DIFF: https://github.com/llvm/llvm-project/commit/e01ce4e88a8443993afb19f64b2811d50dca65aa.diff

LOG: [Libomptarget] Add checks for CUDA subarchitecture using new info

This patch extends the `is_valid_binary` routine to also check if the
binary's architecture string matches the one parsed from the runtime.
This should allow us to only use the binary whose compute capability
matches, allowing us to support basic multi-architecture binaries for
CUDA.

Depends on D127432

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D127505

Added: 
    

Modified: 
    openmp/libomptarget/plugins/cuda/src/rtl.cpp
    openmp/libomptarget/plugins/exports

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 2f96869ea3866..2ab4d6017b5ed 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -1519,6 +1519,43 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
   return elf_check_machine(Image, /* EM_CUDA */ 190);
 }
 
+int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
+                                       __tgt_image_info *info) {
+  if (!__tgt_rtl_is_valid_binary(image))
+    return false;
+
+  // A subarchitecture was not specified. Assume it is compatible.
+  if (!info->Arch)
+    return true;
+
+  int32_t NumberOfDevices = 0;
+  if (cuDeviceGetCount(&NumberOfDevices) != CUDA_SUCCESS)
+    return false;
+
+  for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) {
+    CUdevice Device;
+    if (cuDeviceGet(&Device, DeviceId) != CUDA_SUCCESS)
+      return false;
+
+    int32_t Major, Minor;
+    if (cuDeviceGetAttribute(&Major,
+                             CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+                             Device) != CUDA_SUCCESS)
+      return false;
+    if (cuDeviceGetAttribute(&Minor,
+                             CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+                             Device) != CUDA_SUCCESS)
+      return false;
+
+    std::string ArchStr = "sm_" + std::to_string(Major) + std::to_string(Minor);
+    if (ArchStr != info->Arch)
+      return false;
+  }
+
+  DP("Image has compatible compute capability: %s\n", info->Arch);
+  return true;
+}
+
 int32_t __tgt_rtl_number_of_devices() { return DeviceRTL.getNumOfDevices(); }
 
 int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {

diff  --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports
index b4582f1f25c03..fa6a7d97760e5 100644
--- a/openmp/libomptarget/plugins/exports
+++ b/openmp/libomptarget/plugins/exports
@@ -1,6 +1,7 @@
 VERS1.0 {
   global:
     __tgt_rtl_is_valid_binary;
+    __tgt_rtl_is_valid_binary_info;
     __tgt_rtl_is_data_exchangable;
     __tgt_rtl_number_of_devices;
     __tgt_rtl_init_requires;


        


More information about the Openmp-commits mailing list