[Openmp-commits] [openmp] [libomptarget][nextgen-plugin][NFC] Clean-up InputSignal checks (PR #83458)

Gheorghe-Teodor Bercea via Openmp-commits openmp-commits at lists.llvm.org
Thu Feb 29 10:22:10 PST 2024


https://github.com/doru1004 created https://github.com/llvm/llvm-project/pull/83458

Clean-up InputSignal checks.

>From 6d4fd2770d804eb0d22e01af1fcb1a7079fbcf38 Mon Sep 17 00:00:00 2001
From: Doru Bercea <doru.bercea at amd.com>
Date: Thu, 29 Feb 2024 13:21:12 -0500
Subject: [PATCH] Clean-up InputSignal checks

---
 .../plugins-nextgen/amdgpu/src/rtl.cpp        | 28 +++----------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 81634ae1edc490..fce7454bf2800d 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -715,16 +715,12 @@ struct AMDGPUQueueTy {
     std::lock_guard<std::mutex> Lock(Mutex);
     assert(Queue && "Interacted with a non-initialized queue!");
 
-    // Avoid defining the input dependency if already satisfied.
-    if (InputSignal && !InputSignal->load())
-      InputSignal = nullptr;
-
     // Add a barrier packet before the kernel packet in case there is a pending
     // preceding operation. The barrier packet will delay the processing of
     // subsequent queue's packets until the barrier input signal are satisfied.
     // No need output signal needed because the dependency is already guaranteed
     // by the queue barrier itself.
-    if (InputSignal)
+    if (InputSignal && InputSignal->load())
       if (auto Err = pushBarrierImpl(nullptr, InputSignal))
         return Err;
 
@@ -1254,12 +1250,8 @@ struct AMDGPUStreamTy {
     // Consume stream slot and compute dependencies.
     auto [Curr, InputSignal] = consume(OutputSignal);
 
-    // Avoid defining the input dependency if already satisfied.
-    if (InputSignal && !InputSignal->load())
-      InputSignal = nullptr;
-
     // Issue the async memory copy.
-    if (InputSignal) {
+    if (InputSignal && InputSignal->load()) {
       hsa_signal_t InputSignalRaw = InputSignal->get();
       return utils::asyncMemCopy(UseMultipleSdmaEngines, Dst, Agent, Src, Agent,
                                  CopySize, 1, &InputSignalRaw,
@@ -1293,17 +1285,13 @@ struct AMDGPUStreamTy {
     // Consume stream slot and compute dependencies.
     auto [Curr, InputSignal] = consume(OutputSignals[0]);
 
-    // Avoid defining the input dependency if already satisfied.
-    if (InputSignal && !InputSignal->load())
-      InputSignal = nullptr;
-
     // Setup the post action for releasing the intermediate buffer.
     if (auto Err = Slots[Curr].schedReleaseBuffer(Inter, MemoryManager))
       return Err;
 
     // Issue the first step: device to host transfer. Avoid defining the input
     // dependency if already satisfied.
-    if (InputSignal) {
+    if (InputSignal && InputSignal->load()) {
       hsa_signal_t InputSignalRaw = InputSignal->get();
       if (auto Err = utils::asyncMemCopy(
               UseMultipleSdmaEngines, Inter, Agent, Src, Agent, CopySize, 1,
@@ -1361,12 +1349,8 @@ struct AMDGPUStreamTy {
     // Consume stream slot and compute dependencies.
     auto [Curr, InputSignal] = consume(OutputSignal);
 
-    // Avoid defining the input dependency if already satisfied.
-    if (InputSignal && !InputSignal->load())
-      InputSignal = nullptr;
-
     // Issue the first step: host to host transfer.
-    if (InputSignal) {
+    if (InputSignal && InputSignal->load()) {
       // The std::memcpy is done asynchronously using an async handler. We store
       // the function's information in the action but it is not actually a
       // post action.
@@ -1429,10 +1413,6 @@ struct AMDGPUStreamTy {
     // Consume stream slot and compute dependencies.
     auto [Curr, InputSignal] = consume(OutputSignal);
 
-    // Avoid defining the input dependency if already satisfied.
-    if (InputSignal && !InputSignal->load())
-      InputSignal = nullptr;
-
     // The agents need to have access to the corresponding memory
     // This is presently only true if the pointers were originally
     // allocated by this runtime or the caller made the appropriate



More information about the Openmp-commits mailing list