[Openmp-commits] [openmp] [Libomptarget] Replace global `PluginTy::get` interface with references (PR #86595)
via Openmp-commits
openmp-commits at lists.llvm.org
Mon Mar 25 15:40:08 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Summary:
We have a plugin singleton that implements the Plugin interface. This
then spawns separate device and kernels. Previously when these needed to
reach into the global singleton they would use the `PluginTy::get`
routine to get access to it. In the future we will move away from this
as the lifetime of the plugin will be handled by `libomptarget`
directly. This patch removes uses of this inside of the plugin
implementaion themselves by simply keeping a reference to the plugin
inside of the device.
The external `__tgt_rtl` functions still use the global method, but will
be removed later.
---
Full diff: https://github.com/llvm/llvm-project/pull/86595.diff
5 Files Affected:
- (modified) openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp (+26-17)
- (modified) openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h (+6-2)
- (modified) openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp (+5-4)
- (modified) openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp (+7-7)
- (modified) openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp (+9-9)
``````````diff
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 14461f14dd6706..2dd08dd5d0b4ea 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -371,7 +371,8 @@ struct AMDGPUMemoryPoolTy {
struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
/// Create an empty memory manager.
- AMDGPUMemoryManagerTy() : MemoryPool(nullptr), MemoryManager(nullptr) {}
+ AMDGPUMemoryManagerTy(AMDGPUPluginTy &Plugin)
+ : Plugin(Plugin), MemoryPool(nullptr), MemoryManager(nullptr) {}
/// Initialize the memory manager from a memory pool.
Error init(AMDGPUMemoryPoolTy &MemoryPool) {
@@ -429,6 +430,9 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
return OFFLOAD_SUCCESS;
}
+ /// The underlying plugin that owns this memory manager.
+ AMDGPUPluginTy &Plugin;
+
/// The memory pool used to allocate memory.
AMDGPUMemoryPoolTy *MemoryPool;
@@ -1744,9 +1748,10 @@ struct AMDGenericDeviceTy {
/// HSA host agent. We aggregate all its resources into the same instance.
struct AMDHostDeviceTy : public AMDGenericDeviceTy {
/// Create a host device from an array of host agents.
- AMDHostDeviceTy(const llvm::SmallVector<hsa_agent_t> &HostAgents)
- : AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(),
- PinnedMemoryManager() {
+ AMDHostDeviceTy(AMDGPUPluginTy &Plugin,
+ const llvm::SmallVector<hsa_agent_t> &HostAgents)
+ : AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(Plugin),
+ PinnedMemoryManager(Plugin) {
assert(HostAgents.size() && "No host agent found");
}
@@ -1840,9 +1845,10 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
/// generic device class.
struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// Create an AMDGPU device with a device id and default AMDGPU grid values.
- AMDGPUDeviceTy(int32_t DeviceId, int32_t NumDevices,
+ AMDGPUDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
AMDHostDeviceTy &HostDevice, hsa_agent_t Agent)
- : GenericDeviceTy(DeviceId, NumDevices, {0}), AMDGenericDeviceTy(),
+ : GenericDeviceTy(Plugin, DeviceId, NumDevices, {0}),
+ AMDGenericDeviceTy(),
OMPX_NumQueues("LIBOMPTARGET_AMDGPU_NUM_HSA_QUEUES", 4),
OMPX_QueueSize("LIBOMPTARGET_AMDGPU_HSA_QUEUE_SIZE", 512),
OMPX_DefaultTeamsPerCU("LIBOMPTARGET_AMDGPU_TEAMS_PER_CU", 4),
@@ -2088,7 +2094,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Allocate and construct an AMDGPU kernel.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the AMDGPU kernel.
- AMDGPUKernelTy *AMDGPUKernel = PluginTy::get().allocate<AMDGPUKernelTy>();
+ AMDGPUKernelTy *AMDGPUKernel = Plugin.allocate<AMDGPUKernelTy>();
if (!AMDGPUKernel)
return Plugin::error("Failed to allocate memory for AMDGPU kernel");
@@ -2138,8 +2144,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
int32_t ImageId) override {
// Allocate and initialize the image object.
- AMDGPUDeviceImageTy *AMDImage =
- PluginTy::get().allocate<AMDGPUDeviceImageTy>();
+ AMDGPUDeviceImageTy *AMDImage = Plugin.allocate<AMDGPUDeviceImageTy>();
new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage);
// Load the HSA executable.
@@ -2697,7 +2702,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
Error setDeviceHeapSize(uint64_t Value) override {
for (DeviceImageTy *Image : LoadedImages)
- if (auto Err = setupDeviceMemoryPool(PluginTy::get(), *Image, Value))
+ if (auto Err = setupDeviceMemoryPool(Plugin, *Image, Value))
return Err;
DeviceMemoryPoolSize = Value;
return Plugin::success();
@@ -2737,7 +2742,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return utils::iterateAgentMemoryPools(
Agent, [&](hsa_amd_memory_pool_t HSAMemoryPool) {
AMDGPUMemoryPoolTy *MemoryPool =
- PluginTy::get().allocate<AMDGPUMemoryPoolTy>();
+ Plugin.allocate<AMDGPUMemoryPoolTy>();
new (MemoryPool) AMDGPUMemoryPoolTy(HSAMemoryPool);
AllMemoryPools.push_back(MemoryPool);
return HSA_STATUS_SUCCESS;
@@ -3090,7 +3095,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
// Initialize the host device using host agents.
HostDevice = allocate<AMDHostDeviceTy>();
- new (HostDevice) AMDHostDeviceTy(HostAgents);
+ new (HostDevice) AMDHostDeviceTy(*this, HostAgents);
// Setup the memory pools of available for the host.
if (auto Err = HostDevice->init())
@@ -3116,8 +3121,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
}
/// Creates an AMDGPU device.
- GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
- return new AMDGPUDeviceTy(DeviceId, NumDevices, getHostDevice(),
+ GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices) override {
+ return new AMDGPUDeviceTy(Plugin, DeviceId, NumDevices, getHostDevice(),
getKernelAgent(DeviceId));
}
@@ -3248,7 +3254,9 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
// 56 bytes per allocation.
uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;
- AMDHostDeviceTy &HostDevice = PluginTy::get<AMDGPUPluginTy>().getHostDevice();
+ AMDGPUPluginTy &AMDGPUPlugin =
+ static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
+ AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
void *AllArgs = nullptr;
@@ -3385,7 +3393,7 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
}
assert(Ptr && "Invalid pointer");
- auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents();
+ auto &KernelAgents = Plugin.getKernelAgents();
// Allow all kernel agents to access the allocation.
if (auto Err = MemoryPool->enableAccess(Ptr, Size, KernelAgents)) {
@@ -3428,7 +3436,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
}
if (Alloc) {
- auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents();
+ auto &KernelAgents =
+ static_cast<AMDGPUPluginTy &>(Plugin).getKernelAgents();
// Inherently necessary for host or shared allocations
// Also enabled for device memory to allow device to device memcpy
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 1440f8f678d820..b16b081e5effd1 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -610,7 +610,7 @@ class PinnedAllocationMapTy {
struct GenericDeviceTy : public DeviceAllocatorTy {
/// Construct a device with its device id within the plugin, the number of
/// devices in the plugin and the grid values for that kind of device.
- GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
+ GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
const llvm::omp::GV &GridValues);
/// Get the device identifier within the corresponding plugin. Notice that
@@ -860,6 +860,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Allocate and construct a kernel object.
virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
+ /// Reference to the underlying plugin that created this device.
+ GenericPluginTy &Plugin;
+
private:
/// Get and set the stack size and heap size for the device. If not used, the
/// plugin can implement the setters as no-op and setting the output
@@ -977,7 +980,8 @@ struct GenericPluginTy {
virtual Error deinitImpl() = 0;
/// Create a new device for the underlying plugin.
- virtual GenericDeviceTy *createDevice(int32_t DeviceID,
+ virtual GenericDeviceTy *createDevice(GenericPluginTy &Plugin,
+ int32_t DeviceID,
int32_t NumDevices) = 0;
/// Create a new global handler for the underlying plugin.
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 4db787a2ea9185..c32bd089d8a995 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -438,7 +438,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
// Retrieve kernel environment object for the kernel.
GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
sizeof(KernelEnvironment), &KernelEnvironment);
- GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
+ GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler();
if (auto Err =
GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
@@ -710,9 +710,10 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
}
-GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
+GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices,
const llvm::omp::GV &OMPGridValues)
- : MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
+ : Plugin(Plugin), MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
OMP_NumTeams("OMP_NUM_TEAMS"),
OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
@@ -1522,7 +1523,7 @@ Error GenericPluginTy::initDevice(int32_t DeviceId) {
assert(!Devices[DeviceId] && "Device already initialized");
// Create the device and save the reference.
- GenericDeviceTy *Device = createDevice(DeviceId, NumDevices);
+ GenericDeviceTy *Device = createDevice(*this, DeviceId, NumDevices);
assert(Device && "Invalid device");
// Save the device reference into the list.
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index d43c723c350287..fc74c6aa23fddd 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -255,8 +255,8 @@ struct CUDAEventRef final : public GenericDeviceResourceRef {
/// generic device class.
struct CUDADeviceTy : public GenericDeviceTy {
// Create a CUDA device with a device id and the default CUDA grid values.
- CUDADeviceTy(int32_t DeviceId, int32_t NumDevices)
- : GenericDeviceTy(DeviceId, NumDevices, NVPTXGridValues),
+ CUDADeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices)
+ : GenericDeviceTy(Plugin, DeviceId, NumDevices, NVPTXGridValues),
CUDAStreamManager(*this), CUDAEventManager(*this) {}
~CUDADeviceTy() {}
@@ -471,7 +471,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
/// Allocate and construct a CUDA kernel.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the CUDA kernel.
- CUDAKernelTy *CUDAKernel = PluginTy::get().allocate<CUDAKernelTy>();
+ CUDAKernelTy *CUDAKernel = Plugin.allocate<CUDAKernelTy>();
if (!CUDAKernel)
return Plugin::error("Failed to allocate memory for CUDA kernel");
@@ -529,8 +529,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
return std::move(Err);
// Allocate and initialize the image object.
- CUDADeviceImageTy *CUDAImage =
- PluginTy::get().allocate<CUDADeviceImageTy>();
+ CUDADeviceImageTy *CUDAImage = Plugin.allocate<CUDADeviceImageTy>();
new (CUDAImage) CUDADeviceImageTy(ImageId, *this, TgtImage);
// Load the CUDA module.
@@ -1373,8 +1372,9 @@ struct CUDAPluginTy final : public GenericPluginTy {
Error deinitImpl() override { return Plugin::success(); }
/// Creates a CUDA device to use for offloading.
- GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
- return new CUDADeviceTy(DeviceId, NumDevices);
+ GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices) override {
+ return new CUDADeviceTy(Plugin, DeviceId, NumDevices);
}
/// Creates a CUDA global handler.
diff --git a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
index ea8ed8d6a8569e..f0ce24249301af 100644
--- a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
@@ -66,7 +66,7 @@ struct GenELF64KernelTy : public GenericKernelTy {
GlobalTy Global(getName(), 0);
// Get the metadata (address) of the kernel function.
- GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
+ GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler();
if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global))
return Err;
@@ -132,8 +132,9 @@ struct GenELF64DeviceImageTy : public DeviceImageTy {
/// Class implementing the device functionalities for GenELF64.
struct GenELF64DeviceTy : public GenericDeviceTy {
/// Create the device with a specific id.
- GenELF64DeviceTy(int32_t DeviceId, int32_t NumDevices)
- : GenericDeviceTy(DeviceId, NumDevices, GenELF64GridValues) {}
+ GenELF64DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices)
+ : GenericDeviceTy(Plugin, DeviceId, NumDevices, GenELF64GridValues) {}
~GenELF64DeviceTy() {}
@@ -149,8 +150,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
/// Construct the kernel for a specific image on the device.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the kernel.
- GenELF64KernelTy *GenELF64Kernel =
- PluginTy::get().allocate<GenELF64KernelTy>();
+ GenELF64KernelTy *GenELF64Kernel = Plugin.allocate<GenELF64KernelTy>();
if (!GenELF64Kernel)
return Plugin::error("Failed to allocate memory for GenELF64 kernel");
@@ -166,8 +166,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
int32_t ImageId) override {
// Allocate and initialize the image object.
- GenELF64DeviceImageTy *Image =
- PluginTy::get().allocate<GenELF64DeviceImageTy>();
+ GenELF64DeviceImageTy *Image = Plugin.allocate<GenELF64DeviceImageTy>();
new (Image) GenELF64DeviceImageTy(ImageId, *this, TgtImage);
// Create a temporary file.
@@ -400,8 +399,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
Error deinitImpl() override { return Plugin::success(); }
/// Creates a generic ELF device.
- GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
- return new GenELF64DeviceTy(DeviceId, NumDevices);
+ GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices) override {
+ return new GenELF64DeviceTy(Plugin, DeviceId, NumDevices);
}
/// Creates a generic global handler.
``````````
</details>
https://github.com/llvm/llvm-project/pull/86595
More information about the Openmp-commits
mailing list