[Openmp-commits] [openmp] [libomptarget][nextgen-plugin] Always use a signal to trigger completion of H2D data transfers (PR #83475)

Thu Feb 29 12:22:01 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Gheorghe-Teodor Bercea (doru1004)

<details>
<summary>Changes</summary>

Always use a signal to trigger completion of H2D data transfers.

---
Full diff: https://github.com/llvm/llvm-project/pull/83475.diff


1 Files Affected:

- (modified) openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp (+8-15) 


``````````diff

diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 81634ae1edc490..55794d8bbf2264 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1354,12 +1354,10 @@ struct AMDGPUStreamTy {
       Signal->increaseUseCount();
     }
 
-    AMDGPUSignalTy *OutputSignal = OutputSignals[0];
-
     std::lock_guard<std::mutex> Lock(Mutex);
 
     // Consume stream slot and compute dependencies.
-    auto [Curr, InputSignal] = consume(OutputSignal);
+    auto [Curr, InputSignal] = consume(OutputSignals[0]);
 
     // Avoid defining the input dependency if already satisfied.
     if (InputSignal && !InputSignal->load())
@@ -1383,22 +1381,17 @@ struct AMDGPUStreamTy {
       if (auto Err = Plugin::check(Status,
                                    "Error in hsa_amd_signal_async_handler: %s"))
         return Err;
-
-      // Let's use now the second output signal.
-      OutputSignal = OutputSignals[1];
-
-      // Consume another stream slot and compute dependencies.
-      std::tie(Curr, InputSignal) = consume(OutputSignal);
     } else {
       // All preceding operations completed, copy the memory synchronously.
       std::memcpy(Inter, Src, CopySize);
 
-      // Return the second signal because it will not be used.
-      OutputSignals[1]->decreaseUseCount();
-      if (auto Err = SignalManager.returnResource(OutputSignals[1]))
-        return Err;
+      // Signal the end of the operation.
+      Slots[Curr].Signal->signal();
     }
 
+    // Consume another stream slot and compute dependencies.
+    std::tie(Curr, InputSignal) = consume(OutputSignals[1]);
+
     // Setup the post action to release the intermediate pinned buffer.
     if (auto Err = Slots[Curr].schedReleaseBuffer(Inter, MemoryManager))
       return Err;
@@ -1409,10 +1402,10 @@ struct AMDGPUStreamTy {
       hsa_signal_t InputSignalRaw = InputSignal->get();
       return utils::asyncMemCopy(UseMultipleSdmaEngines, Dst, Agent, Inter,
                                  Agent, CopySize, 1, &InputSignalRaw,
-                                 OutputSignal->get());
+                                 OutputSignals[1]->get());
     }
     return utils::asyncMemCopy(UseMultipleSdmaEngines, Dst, Agent, Inter, Agent,
-                               CopySize, 0, nullptr, OutputSignal->get());
+                               CopySize, 0, nullptr, OutputSignals[1]->get());
   }
 
   // AMDGPUDeviceTy is incomplete here, passing the underlying agent instead

``````````

</details>


https://github.com/llvm/llvm-project/pull/83475