[Openmp-commits] [openmp] [OpenMP][libomptarget] Enable lazy device initialization (PR #76832)

Gheorghe-Teodor Bercea via Openmp-commits openmp-commits at lists.llvm.org
Thu Jan 4 07:39:27 PST 2024


https://github.com/doru1004 updated https://github.com/llvm/llvm-project/pull/76832

>From d20628213fca3b297fa22b109a9956df22a67b1f Mon Sep 17 00:00:00 2001
From: Doru Bercea <doru.bercea at amd.com>
Date: Tue, 19 Dec 2023 11:44:43 -0500
Subject: [PATCH] Enable lazy device initialization.

---
 openmp/libomptarget/include/PluginManager.h   | 12 ++--
 openmp/libomptarget/include/device.h          |  2 +
 openmp/libomptarget/src/PluginManager.cpp     | 62 ++++++++++++++-----
 openmp/libomptarget/src/device.cpp            |  7 ++-
 .../test/offloading/lazy_device_init.cpp      | 34 ++++++++++
 5 files changed, 96 insertions(+), 21 deletions(-)
 create mode 100644 openmp/libomptarget/test/offloading/lazy_device_init.cpp

diff --git a/openmp/libomptarget/include/PluginManager.h b/openmp/libomptarget/include/PluginManager.h
index a0499c37504c0d..a27be6adb14049 100644
--- a/openmp/libomptarget/include/PluginManager.h
+++ b/openmp/libomptarget/include/PluginManager.h
@@ -54,10 +54,6 @@ struct PluginAdaptorTy {
   /// Return the number of devices visible to the underlying plugin.
   int32_t getNumberOfPluginDevices() const { return NumberOfPluginDevices; }
 
-  /// Return the number of devices successfully initialized and visible to the
-  /// user.
-  int32_t getNumberOfUserDevices() const { return NumberOfUserDevices; }
-
   /// Add all offload entries described by \p DI to the devices managed by this
   /// plugin.
   void addOffloadEntries(DeviceImageTy &DI);
@@ -82,6 +78,8 @@ struct PluginAdaptorTy {
 
   llvm::DenseSet<const __tgt_device_image *> UsedImages;
 
+  bool LazyDeviceInitialization;
+
 private:
   /// Number of devices the underling plugins sees.
   int32_t NumberOfPluginDevices = -1;
@@ -108,6 +106,9 @@ struct PluginManager {
   /// Exclusive accessor type for the device container.
   using ExclusiveDevicesAccessorTy = Accessor<DeviceContainerTy>;
 
+  /// Keep track of the number of initialized devices:
+  int32_t NumberOfInitializedDevices = 0;
+
   PluginManager() {}
 
   void init();
@@ -124,7 +125,8 @@ struct PluginManager {
 
   /// Return the device presented to the user as device \p DeviceNo if it is
   /// initialized and ready. Otherwise return an error explaining the problem.
-  llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo);
+  llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo,
+                                       bool WithoutInit = false);
 
   /// Iterate over all initialized and ready devices registered with this
   /// plugin.
diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index d28d3c508faf56..134603b4e1d19e 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -51,6 +51,8 @@ struct DeviceTy {
   PluginAdaptorTy *RTL;
   int32_t RTLDeviceID;
 
+  bool IsInit;
+
   bool HasMappedGlobalData = false;
 
   PendingCtorsDtorsPerLibrary PendingCtorsDtors;
diff --git a/openmp/libomptarget/src/PluginManager.cpp b/openmp/libomptarget/src/PluginManager.cpp
index da2e08180eead8..c7ce0755539da9 100644
--- a/openmp/libomptarget/src/PluginManager.cpp
+++ b/openmp/libomptarget/src/PluginManager.cpp
@@ -52,7 +52,8 @@ PluginAdaptorTy::create(const std::string &Name) {
 
 PluginAdaptorTy::PluginAdaptorTy(const std::string &Name,
                                  std::unique_ptr<llvm::sys::DynamicLibrary> DL)
-    : Name(Name), LibraryHandler(std::move(DL)) {}
+    : Name(Name), LibraryHandler(std::move(DL)),
+      LazyDeviceInitialization(false) {}
 
 Error PluginAdaptorTy::init() {
 
@@ -84,14 +85,19 @@ Error PluginAdaptorTy::init() {
                              "No devices supported in this RTL\n");
   }
 
+  if (char *EnvStr = getenv("LIBOMPTARGET_LAZY_DEVICE_INIT")) {
+    LazyDeviceInitialization = std::stoi(EnvStr);
+    DP("Using lazy device initialization!\n");
+  }
+
   DP("Registered '%s' with %d plugin visible devices!\n", Name.c_str(),
      NumberOfPluginDevices);
   return Error::success();
 }
 
 void PluginAdaptorTy::addOffloadEntries(DeviceImageTy &DI) {
-  for (int32_t I = 0, E = getNumberOfUserDevices(); I < E; ++I) {
-    auto DeviceOrErr = PM->getDevice(DeviceOffset + I);
+  for (int32_t I = 0, E = getNumberOfPluginDevices(); I < E; ++I) {
+    auto DeviceOrErr = PM->getDevice(DeviceOffset + I, /*WithoutInit*/ true);
     if (!DeviceOrErr)
       FATAL_MESSAGE(DeviceOffset + I, "%s",
                     toString(DeviceOrErr.takeError()).c_str());
@@ -142,21 +148,28 @@ void PluginAdaptorTy::initDevices(PluginManager &PM) {
   if (set_device_offset)
     set_device_offset(DeviceOffset);
 
+  // Perform all the actions we normally perform even under lazy initialization.
+  // The only exception is that we postpone initializing the device itself until
+  // it is being used.
   int32_t NumPD = getNumberOfPluginDevices();
   ExclusiveDevicesAccessor->reserve(DeviceOffset + NumPD);
   for (int32_t PDevI = 0, UserDevId = DeviceOffset; PDevI < NumPD; PDevI++) {
     auto Device = std::make_unique<DeviceTy>(this, UserDevId, PDevI);
-    if (auto Err = Device->init()) {
-      DP("Skip plugin known device %d: %s\n", PDevI,
-         toString(std::move(Err)).c_str());
-      continue;
+    if (!LazyDeviceInitialization) {
+      if (auto Err = Device->init()) {
+        DP("Skip plugin known device %d: %s\n", PDevI,
+           toString(std::move(Err)).c_str());
+        continue;
+      }
+      ++NumberOfUserDevices;
     }
 
     ExclusiveDevicesAccessor->push_back(std::move(Device));
-    ++NumberOfUserDevices;
     ++UserDevId;
   }
 
+  // Note: when the devices are lazily initialized, the number of exposed
+  // devices below is zero.
   DP("Plugin adaptor " DPxMOD " has index %d, exposes %d out of %d devices!\n",
      DPxPTR(LibraryHandler.get()), DeviceOffset, NumberOfUserDevices,
      NumberOfPluginDevices);
@@ -178,15 +191,15 @@ static void registerImageIntoTranslationTable(TranslationTable &TT,
   // Resize the Targets Table and Images to accommodate the new targets if
   // required
   unsigned TargetsTableMinimumSize =
-      RTL.DeviceOffset + RTL.getNumberOfUserDevices();
+      RTL.DeviceOffset + RTL.getNumberOfPluginDevices();
 
   if (TT.TargetsTable.size() < TargetsTableMinimumSize) {
     TT.TargetsImages.resize(TargetsTableMinimumSize, 0);
     TT.TargetsTable.resize(TargetsTableMinimumSize, 0);
   }
 
-  // Register the image in all devices for this target type.
-  for (int32_t I = 0; I < RTL.getNumberOfUserDevices(); ++I) {
+  // Register the image in all possible devices for this target type.
+  for (int32_t I = 0; I < RTL.getNumberOfPluginDevices(); ++I) {
     // If we are changing the image we are also invalidating the target table.
     if (TT.TargetsImages[RTL.DeviceOffset + I] != Image) {
       TT.TargetsImages[RTL.DeviceOffset + I] = Image;
@@ -291,8 +304,9 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
 
       // Execute dtors for static objects if the device has been used, i.e.
       // if its PendingCtors list has been emptied.
-      for (int32_t I = 0; I < FoundRTL->getNumberOfUserDevices(); ++I) {
-        auto DeviceOrErr = PM->getDevice(FoundRTL->DeviceOffset + I);
+      for (int32_t I = 0; I < FoundRTL->getNumberOfPluginDevices(); ++I) {
+        auto DeviceOrErr =
+            PM->getDevice(FoundRTL->DeviceOffset + I, /*WithoutInit*/ true);
         if (!DeviceOrErr)
           FATAL_MESSAGE(FoundRTL->DeviceOffset + I, "%s",
                         toString(DeviceOrErr.takeError()).c_str());
@@ -357,7 +371,8 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
   DP("Done unregistering library!\n");
 }
 
-Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo) {
+Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo,
+                                              bool WithoutInit) {
   auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
   if (DeviceNo >= ExclusiveDevicesAccessor->size())
     return createStringError(
@@ -365,5 +380,22 @@ Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo) {
         "Device number '%i' out of range, only %i devices available", DeviceNo,
         ExclusiveDevicesAccessor->size());
 
-  return *(*ExclusiveDevicesAccessor)[DeviceNo];
+  DeviceTy &Device = *(*ExclusiveDevicesAccessor)[DeviceNo];
+
+  // If the device is initialized eagerly then IsInit will be true already and
+  // the whole initialization of the device will be skipped. In some cases, such
+  // as when we register the offload entries, we also want to make sure that the
+  // device is fetched without the initialization being even considered. For
+  // this we set WithoutInit to true.
+  if (!WithoutInit && !Device.IsInit) {
+    if (auto Err = Device.init()) {
+      DP("Failed to init device %d: %s\n", DeviceNo,
+         toString(std::move(Err)).c_str());
+      return createStringError(inconvertibleErrorCode(),
+                               "Failed to init device %d\n", DeviceNo);
+    }
+    DP("Device %d (local ID %d) has been lazily initialized! (IsInit = %d)\n",
+       DeviceNo, Device.RTLDeviceID, Device.IsInit);
+  }
+  return Device;
 }
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index dbad13b92bcc14..480146039708fb 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -65,7 +65,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
 }
 
 DeviceTy::DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID)
-    : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID),
+    : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID), IsInit(false),
       PendingCtorsDtors(), PendingGlobalsMtx(), MappingInfo(*this) {}
 
 DeviceTy::~DeviceTy() {
@@ -77,6 +77,10 @@ DeviceTy::~DeviceTy() {
 }
 
 llvm::Error DeviceTy::init() {
+  // If device is already initialized then return success:
+  if (IsInit)
+    return llvm::Error::success();
+
   // Make call to init_requires if it exists for this plugin.
   int32_t Ret = 0;
   if (RTL->init_requires)
@@ -103,6 +107,7 @@ llvm::Error DeviceTy::init() {
                                   OMPX_ReplaySaveOutput, ReqPtrArgOffset);
   }
 
+  IsInit = true;
   return llvm::Error::success();
 }
 
diff --git a/openmp/libomptarget/test/offloading/lazy_device_init.cpp b/openmp/libomptarget/test/offloading/lazy_device_init.cpp
new file mode 100644
index 00000000000000..558c36354fec74
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/lazy_device_init.cpp
@@ -0,0 +1,34 @@
+// clang-format off
+// RUN: %libomptarget-compilexx-generic && env LIBOMPTARGET_LAZY_DEVICE_INIT=1 LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
+// clang-format on
+
+// REQUIRES: libomptarget-debug
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  int *a = (int *)malloc(sizeof(int) * 10);
+
+  // clang-format off
+// CHECK: omptarget --> Using lazy device initialization!
+// CHECK: omptarget --> Plugin adaptor {{.*}} has index 0, exposes 0 out of {{.*}} devices!
+// CHECK: omptarget --> Done registering entries!
+// CHECK: omptarget --> Use default device id [[DEVICE_ID:.*]]
+// CHECK: omptarget --> Device [[DEVICE_ID]] (local ID 0) has been lazily initialized! (IsInit = 1)
+  // clang-format on
+
+#pragma omp target map(from : a[ : 10])
+  { a[5] = 4; }
+
+  // CHECK: a[5] = 4
+
+  printf("a[5] = %d\n", a[5]);
+
+  free(a);
+
+  return 0;
+}



More information about the Openmp-commits mailing list