[Openmp-commits] [PATCH] D153837: Synchronize after each GPU action in the nextgen plugin

Eric Wright via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Tue Jun 27 00:12:23 PDT 2023


efwright created this revision.
efwright added a reviewer: jdoerfert.
Herald added a project: All.
efwright requested review of this revision.
Herald added a project: OpenMP.
Herald added a subscriber: openmp-commits.

Creating a debug option to synchronize GPU kernel launches and data transfers immediately. Done through an environment variable. Currently done in the common plugin interface, so hopefully it would be applicable to all architectures. We instead could do it inside the individual architecture implementations instead, such as having a "cudaStreamSynchronize" call immediately after the "cudaLaunchKernel", for example. Though I think the way I have it now is basically equivalent and doesn't need to be done for each architecture.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153837

Files:
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp


Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -463,6 +463,7 @@
 
   /// Synchronize current thread with the pending operations on the async info.
   Error synchronizeImpl(__tgt_async_info &AsyncInfo) override {
+
     CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo.Queue);
     CUresult Res = cuStreamSynchronize(Stream);
 
@@ -847,6 +848,10 @@
                      /* gridDimZ */ 1, NumThreads,
                      /* blockDimY */ 1, /* blockDimZ */ 1, MaxDynCGroupMem,
                      Stream, (void **)Args, nullptr);
+
+  //Plugin::check(cuStreamSynchronize(Stream), "Error in stream synchronize for '%s': %s", getName());
+  //cuStreamSynchronize(Stream);
+
   return Plugin::check(Res, "Error in cuLaunchKernel for '%s': %s", getName());
 }
 
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -529,6 +529,11 @@
   /// Register the offload entries for a specific image on the device.
   Error registerOffloadEntries(DeviceImageTy &Image);
 
+  /// Force a synchronization if the 'LIBOMPTARGET_FORCE_SYNCHRONIZE"
+  /// environment variable is set.
+  // Error forceSynchronize(__tgt_async_info *AsyncInfo);
+  void checkForForceSynchronize(__tgt_async_info *AsyncInfo);
+
   /// Synchronize the current thread with the pending operations on the
   /// __tgt_async_info structure.
   Error synchronize(__tgt_async_info *AsyncInfo);
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -247,6 +247,18 @@
   return Plugin::success();
 }
 
+void GenericDeviceTy::checkForForceSynchronize(__tgt_async_info *AsyncInfo) {
+  if (std::getenv("LIBOMPTARGET_FORCE_SYNCHRONIZE")) {
+    if (AsyncInfo) {
+      auto SyncErr = synchronize(AsyncInfo);
+      if (SyncErr) {
+        REPORT("Failure to synchronize stream %p: %s\n", AsyncInfo->Queue,
+               toString(std::move(SyncErr)).data());
+      }
+    }
+  }
+}
+
 Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
                               ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs,
                               AsyncInfoWrapperTy &AsyncInfoWrapper) const {
@@ -926,6 +938,8 @@
   AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
 
   auto Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper);
+  checkForForceSynchronize(AsyncInfo);
+
   AsyncInfoWrapper.finalize(Err);
   return Err;
 }
@@ -935,6 +949,8 @@
   AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
 
   auto Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper);
+  checkForForceSynchronize(AsyncInfo);
+
   AsyncInfoWrapper.finalize(Err);
   return Err;
 }
@@ -945,6 +961,8 @@
   AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
 
   auto Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper);
+  checkForForceSynchronize(AsyncInfo);
+
   AsyncInfoWrapper.finalize(Err);
   return Err;
 }
@@ -966,6 +984,7 @@
 
   auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs,
                                   AsyncInfoWrapper);
+  checkForForceSynchronize(AsyncInfo);
 
   if (RecordReplay.isRecordingOrReplaying() &&
       RecordReplay.isSaveOutputEnabled())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D153837.534851.patch
Type: text/x-patch
Size: 3841 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20230627/59a8c635/attachment.bin>


More information about the Openmp-commits mailing list