[Openmp-commits] [openmp] [OpenMP][libomptarget] Enable lazy device initialization (PR #76832)
Gheorghe-Teodor Bercea via Openmp-commits
openmp-commits at lists.llvm.org
Wed Jan 3 08:33:42 PST 2024
https://github.com/doru1004 created https://github.com/llvm/llvm-project/pull/76832
Enable lazy device initialization. This addresses this issue: https://github.com/llvm/llvm-project/issues/75677
>From 9db4a91886089610f7ffc12fb32f655af68c97ec Mon Sep 17 00:00:00 2001
From: Doru Bercea <doru.bercea at amd.com>
Date: Tue, 19 Dec 2023 11:44:43 -0500
Subject: [PATCH] Enable lazy device initialization.
---
openmp/libomptarget/include/PluginManager.h | 12 ++--
openmp/libomptarget/include/device.h | 2 +
openmp/libomptarget/src/PluginManager.cpp | 62 ++++++++++++++-----
openmp/libomptarget/src/device.cpp | 7 ++-
.../test/offloading/lazy_device_init.cpp | 33 ++++++++++
5 files changed, 95 insertions(+), 21 deletions(-)
create mode 100644 openmp/libomptarget/test/offloading/lazy_device_init.cpp
diff --git a/openmp/libomptarget/include/PluginManager.h b/openmp/libomptarget/include/PluginManager.h
index a0499c37504c0d..a27be6adb14049 100644
--- a/openmp/libomptarget/include/PluginManager.h
+++ b/openmp/libomptarget/include/PluginManager.h
@@ -54,10 +54,6 @@ struct PluginAdaptorTy {
/// Return the number of devices visible to the underlying plugin.
int32_t getNumberOfPluginDevices() const { return NumberOfPluginDevices; }
- /// Return the number of devices successfully initialized and visible to the
- /// user.
- int32_t getNumberOfUserDevices() const { return NumberOfUserDevices; }
-
/// Add all offload entries described by \p DI to the devices managed by this
/// plugin.
void addOffloadEntries(DeviceImageTy &DI);
@@ -82,6 +78,8 @@ struct PluginAdaptorTy {
llvm::DenseSet<const __tgt_device_image *> UsedImages;
+ bool LazyDeviceInitialization;
+
private:
/// Number of devices the underling plugins sees.
int32_t NumberOfPluginDevices = -1;
@@ -108,6 +106,9 @@ struct PluginManager {
/// Exclusive accessor type for the device container.
using ExclusiveDevicesAccessorTy = Accessor<DeviceContainerTy>;
+ /// Keep track of the number of initialized devices:
+ int32_t NumberOfInitializedDevices = 0;
+
PluginManager() {}
void init();
@@ -124,7 +125,8 @@ struct PluginManager {
/// Return the device presented to the user as device \p DeviceNo if it is
/// initialized and ready. Otherwise return an error explaining the problem.
- llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo);
+ llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo,
+ bool WithoutInit = false);
/// Iterate over all initialized and ready devices registered with this
/// plugin.
diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index d28d3c508faf56..134603b4e1d19e 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -51,6 +51,8 @@ struct DeviceTy {
PluginAdaptorTy *RTL;
int32_t RTLDeviceID;
+ bool IsInit;
+
bool HasMappedGlobalData = false;
PendingCtorsDtorsPerLibrary PendingCtorsDtors;
diff --git a/openmp/libomptarget/src/PluginManager.cpp b/openmp/libomptarget/src/PluginManager.cpp
index da2e08180eead8..c7ce0755539da9 100644
--- a/openmp/libomptarget/src/PluginManager.cpp
+++ b/openmp/libomptarget/src/PluginManager.cpp
@@ -52,7 +52,8 @@ PluginAdaptorTy::create(const std::string &Name) {
PluginAdaptorTy::PluginAdaptorTy(const std::string &Name,
std::unique_ptr<llvm::sys::DynamicLibrary> DL)
- : Name(Name), LibraryHandler(std::move(DL)) {}
+ : Name(Name), LibraryHandler(std::move(DL)),
+ LazyDeviceInitialization(false) {}
Error PluginAdaptorTy::init() {
@@ -84,14 +85,19 @@ Error PluginAdaptorTy::init() {
"No devices supported in this RTL\n");
}
+ if (char *EnvStr = getenv("LIBOMPTARGET_LAZY_DEVICE_INIT")) {
+ LazyDeviceInitialization = std::stoi(EnvStr);
+ DP("Using lazy device initialization!\n");
+ }
+
DP("Registered '%s' with %d plugin visible devices!\n", Name.c_str(),
NumberOfPluginDevices);
return Error::success();
}
void PluginAdaptorTy::addOffloadEntries(DeviceImageTy &DI) {
- for (int32_t I = 0, E = getNumberOfUserDevices(); I < E; ++I) {
- auto DeviceOrErr = PM->getDevice(DeviceOffset + I);
+ for (int32_t I = 0, E = getNumberOfPluginDevices(); I < E; ++I) {
+ auto DeviceOrErr = PM->getDevice(DeviceOffset + I, /*WithoutInit*/ true);
if (!DeviceOrErr)
FATAL_MESSAGE(DeviceOffset + I, "%s",
toString(DeviceOrErr.takeError()).c_str());
@@ -142,21 +148,28 @@ void PluginAdaptorTy::initDevices(PluginManager &PM) {
if (set_device_offset)
set_device_offset(DeviceOffset);
+ // Perform all the actions we normally perform even under lazy initialization.
+ // The only exception is that we postpone initializing the device itself until
+ // it is being used.
int32_t NumPD = getNumberOfPluginDevices();
ExclusiveDevicesAccessor->reserve(DeviceOffset + NumPD);
for (int32_t PDevI = 0, UserDevId = DeviceOffset; PDevI < NumPD; PDevI++) {
auto Device = std::make_unique<DeviceTy>(this, UserDevId, PDevI);
- if (auto Err = Device->init()) {
- DP("Skip plugin known device %d: %s\n", PDevI,
- toString(std::move(Err)).c_str());
- continue;
+ if (!LazyDeviceInitialization) {
+ if (auto Err = Device->init()) {
+ DP("Skip plugin known device %d: %s\n", PDevI,
+ toString(std::move(Err)).c_str());
+ continue;
+ }
+ ++NumberOfUserDevices;
}
ExclusiveDevicesAccessor->push_back(std::move(Device));
- ++NumberOfUserDevices;
++UserDevId;
}
+ // Note: when the devices are lazily initialized, the number of exposed
+ // devices below is zero.
DP("Plugin adaptor " DPxMOD " has index %d, exposes %d out of %d devices!\n",
DPxPTR(LibraryHandler.get()), DeviceOffset, NumberOfUserDevices,
NumberOfPluginDevices);
@@ -178,15 +191,15 @@ static void registerImageIntoTranslationTable(TranslationTable &TT,
// Resize the Targets Table and Images to accommodate the new targets if
// required
unsigned TargetsTableMinimumSize =
- RTL.DeviceOffset + RTL.getNumberOfUserDevices();
+ RTL.DeviceOffset + RTL.getNumberOfPluginDevices();
if (TT.TargetsTable.size() < TargetsTableMinimumSize) {
TT.TargetsImages.resize(TargetsTableMinimumSize, 0);
TT.TargetsTable.resize(TargetsTableMinimumSize, 0);
}
- // Register the image in all devices for this target type.
- for (int32_t I = 0; I < RTL.getNumberOfUserDevices(); ++I) {
+ // Register the image in all possible devices for this target type.
+ for (int32_t I = 0; I < RTL.getNumberOfPluginDevices(); ++I) {
// If we are changing the image we are also invalidating the target table.
if (TT.TargetsImages[RTL.DeviceOffset + I] != Image) {
TT.TargetsImages[RTL.DeviceOffset + I] = Image;
@@ -291,8 +304,9 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
// Execute dtors for static objects if the device has been used, i.e.
// if its PendingCtors list has been emptied.
- for (int32_t I = 0; I < FoundRTL->getNumberOfUserDevices(); ++I) {
- auto DeviceOrErr = PM->getDevice(FoundRTL->DeviceOffset + I);
+ for (int32_t I = 0; I < FoundRTL->getNumberOfPluginDevices(); ++I) {
+ auto DeviceOrErr =
+ PM->getDevice(FoundRTL->DeviceOffset + I, /*WithoutInit*/ true);
if (!DeviceOrErr)
FATAL_MESSAGE(FoundRTL->DeviceOffset + I, "%s",
toString(DeviceOrErr.takeError()).c_str());
@@ -357,7 +371,8 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
DP("Done unregistering library!\n");
}
-Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo) {
+Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo,
+ bool WithoutInit) {
auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
if (DeviceNo >= ExclusiveDevicesAccessor->size())
return createStringError(
@@ -365,5 +380,22 @@ Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo) {
"Device number '%i' out of range, only %i devices available", DeviceNo,
ExclusiveDevicesAccessor->size());
- return *(*ExclusiveDevicesAccessor)[DeviceNo];
+ DeviceTy &Device = *(*ExclusiveDevicesAccessor)[DeviceNo];
+
+ // If the device is initialized eagerly then IsInit will be true already and
+ // the whole initialization of the device will be skipped. In some cases, such
+ // as when we register the offload entries, we also want to make sure that the
+ // device is fetched without the initialization being even considered. For
+ // this we set WithoutInit to true.
+ if (!WithoutInit && !Device.IsInit) {
+ if (auto Err = Device.init()) {
+ DP("Failed to init device %d: %s\n", DeviceNo,
+ toString(std::move(Err)).c_str());
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to init device %d\n", DeviceNo);
+ }
+ DP("Device %d (local ID %d) has been lazily initialized! (IsInit = %d)\n",
+ DeviceNo, Device.RTLDeviceID, Device.IsInit);
+ }
+ return Device;
}
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index dbad13b92bcc14..480146039708fb 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -65,7 +65,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
}
DeviceTy::DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID)
- : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID),
+ : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID), IsInit(false),
PendingCtorsDtors(), PendingGlobalsMtx(), MappingInfo(*this) {}
DeviceTy::~DeviceTy() {
@@ -77,6 +77,10 @@ DeviceTy::~DeviceTy() {
}
llvm::Error DeviceTy::init() {
+ // If device is already initialized then return success:
+ if (IsInit)
+ return llvm::Error::success();
+
// Make call to init_requires if it exists for this plugin.
int32_t Ret = 0;
if (RTL->init_requires)
@@ -103,6 +107,7 @@ llvm::Error DeviceTy::init() {
OMPX_ReplaySaveOutput, ReqPtrArgOffset);
}
+ IsInit = true;
return llvm::Error::success();
}
diff --git a/openmp/libomptarget/test/offloading/lazy_device_init.cpp b/openmp/libomptarget/test/offloading/lazy_device_init.cpp
new file mode 100644
index 00000000000000..474bf6ad4261b9
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/lazy_device_init.cpp
@@ -0,0 +1,33 @@
+// clang-format off
+// RUN: %libomptarget-compilexx-generic && env LIBOMPTARGET_LAZY_DEVICE_INIT=1 LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
+// clang-format on
+
+// REQUIRES: libomptarget-debug
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+ int *a = (int *)malloc(sizeof(int) * 10);
+
+ // clang-format off
+// CHECK: omptarget --> Plugin adaptor {{.*}} has index 0, exposes 0 out of {{.*}} devices!
+// CHECK: omptarget --> Done registering entries!
+// CHECK: omptarget --> Use default device id [[DEVICE_ID:.*]]
+// CHECK: omptarget --> Device [[DEVICE_ID]] (local ID 0) has been lazily initialized! (IsInit = 1)
+ // clang-format on
+
+#pragma omp target map(from : a[ : 10])
+ { a[5] = 4; }
+
+ // CHECK: a[5] = 4
+
+ printf("a[5] = %d\n", a[5]);
+
+ free(a);
+
+ return 0;
+}
More information about the Openmp-commits
mailing list