[Openmp-commits] [openmp] [libomptarget][nextgen-plugin] Always use a signal to trigger completion of H2D data transfers (PR #83475)
Gheorghe-Teodor Bercea via Openmp-commits
openmp-commits at lists.llvm.org
Thu Feb 29 12:21:19 PST 2024
https://github.com/doru1004 created https://github.com/llvm/llvm-project/pull/83475
Always use a signal to trigger completion of H2D data transfers.
>From 548cab7375be3391c77dee23b3dbcc3ccf797f9a Mon Sep 17 00:00:00 2001
From: Doru Bercea <doru.bercea at amd.com>
Date: Thu, 29 Feb 2024 15:18:48 -0500
Subject: [PATCH] Always use a signal to trigger completion of H2D data
transfers.
---
.../plugins-nextgen/amdgpu/src/rtl.cpp | 23 +++++++------------
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 81634ae1edc490..55794d8bbf2264 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1354,12 +1354,10 @@ struct AMDGPUStreamTy {
Signal->increaseUseCount();
}
- AMDGPUSignalTy *OutputSignal = OutputSignals[0];
-
std::lock_guard<std::mutex> Lock(Mutex);
// Consume stream slot and compute dependencies.
- auto [Curr, InputSignal] = consume(OutputSignal);
+ auto [Curr, InputSignal] = consume(OutputSignals[0]);
// Avoid defining the input dependency if already satisfied.
if (InputSignal && !InputSignal->load())
@@ -1383,22 +1381,17 @@ struct AMDGPUStreamTy {
if (auto Err = Plugin::check(Status,
"Error in hsa_amd_signal_async_handler: %s"))
return Err;
-
- // Let's use now the second output signal.
- OutputSignal = OutputSignals[1];
-
- // Consume another stream slot and compute dependencies.
- std::tie(Curr, InputSignal) = consume(OutputSignal);
} else {
// All preceding operations completed, copy the memory synchronously.
std::memcpy(Inter, Src, CopySize);
- // Return the second signal because it will not be used.
- OutputSignals[1]->decreaseUseCount();
- if (auto Err = SignalManager.returnResource(OutputSignals[1]))
- return Err;
+ // Signal the end of the operation.
+ Slots[Curr].Signal->signal();
}
+ // Consume another stream slot and compute dependencies.
+ std::tie(Curr, InputSignal) = consume(OutputSignals[1]);
+
// Setup the post action to release the intermediate pinned buffer.
if (auto Err = Slots[Curr].schedReleaseBuffer(Inter, MemoryManager))
return Err;
@@ -1409,10 +1402,10 @@ struct AMDGPUStreamTy {
hsa_signal_t InputSignalRaw = InputSignal->get();
return utils::asyncMemCopy(UseMultipleSdmaEngines, Dst, Agent, Inter,
Agent, CopySize, 1, &InputSignalRaw,
- OutputSignal->get());
+ OutputSignals[1]->get());
}
return utils::asyncMemCopy(UseMultipleSdmaEngines, Dst, Agent, Inter, Agent,
- CopySize, 0, nullptr, OutputSignal->get());
+ CopySize, 0, nullptr, OutputSignals[1]->get());
}
// AMDGPUDeviceTy is incomplete here, passing the underlying agent instead
More information about the Openmp-commits
mailing list