[Openmp-commits] [openmp] 41566fb - [OpenMP][FIX] Ensure recording works properly w/ late allocations

Johannes Doerfert via Openmp-commits openmp-commits at lists.llvm.org
Mon Nov 20 17:24:35 PST 2023


Author: Johannes Doerfert
Date: 2023-11-20T17:15:33-08:00
New Revision: 41566fb8520a1284678d5a344632930b0d3d1fbf

URL: https://github.com/llvm/llvm-project/commit/41566fb8520a1284678d5a344632930b0d3d1fbf
DIFF: https://github.com/llvm/llvm-project/commit/41566fb8520a1284678d5a344632930b0d3d1fbf.diff

LOG: [OpenMP][FIX] Ensure recording works properly w/ late allocations

Added: 
    

Modified: 
    openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 663e4bfa3536b5a..08946e21035014e 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -231,9 +231,9 @@ struct RecordReplayTy {
     OS.close();
   }
 
-  void saveKernelDescr(const char *Name, void **ArgPtrs, ptr
diff _t *ArgOffsets,
-                       int32_t NumArgs, uint64_t NumTeamsClause,
-                       uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
+  void saveKernelDescr(const char *Name, void **ArgPtrs, int32_t NumArgs,
+                       uint64_t NumTeamsClause, uint32_t ThreadLimitClause,
+                       uint64_t LoopTripCount) {
     json::Object JsonKernelInfo;
     JsonKernelInfo["Name"] = Name;
     JsonKernelInfo["NumArgs"] = NumArgs;
@@ -251,7 +251,7 @@ struct RecordReplayTy {
 
     json::Array JsonArgOffsets;
     for (int I = 0; I < NumArgs; ++I)
-      JsonArgOffsets.push_back(ArgOffsets[I]);
+      JsonArgOffsets.push_back(0);
     JsonKernelInfo["ArgOffsets"] = json::Value(std::move(JsonArgOffsets));
 
     SmallString<128> JsonFilename = {Name, ".json"};
@@ -427,6 +427,11 @@ Expected<KernelLaunchEnvironmentTy *>
 GenericKernelTy::getKernelLaunchEnvironment(
     GenericDeviceTy &GenericDevice,
     AsyncInfoWrapperTy &AsyncInfoWrapper) const {
+  // Ctor/Dtor have no arguments, replaying uses the original kernel launch
+  // environment.
+  if (isCtorOrDtor() || RecordReplay.isReplaying())
+    return nullptr;
+
   // TODO: Check if the kernel needs a launch environment.
   auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy),
                                             /*HostPtr=*/nullptr,
@@ -501,6 +506,15 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
       getNumBlocks(GenericDevice, KernelArgs.NumTeams, KernelArgs.Tripcount,
                    NumThreads, KernelArgs.ThreadLimit[0] > 0);
 
+  // Record the kernel description after we modified the argument count and num
+  // blocks/threads.
+  if (RecordReplay.isRecording()) {
+    RecordReplay.saveImage(getName(), getImage());
+    RecordReplay.saveKernelInput(getName(), getImage());
+    RecordReplay.saveKernelDescr(getName(), Ptrs.data(), KernelArgs.NumArgs,
+                                 NumBlocks, NumThreads, KernelArgs.Tripcount);
+  }
+
   if (auto Err =
           printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
     return Err;
@@ -517,16 +531,20 @@ void *GenericKernelTy::prepareArgs(
   if (isCtorOrDtor())
     return nullptr;
 
-  NumArgs += 1;
+  uint32_t KLEOffset = !!KernelLaunchEnvironment;
+  NumArgs += KLEOffset;
 
   Args.resize(NumArgs);
   Ptrs.resize(NumArgs);
 
-  Ptrs[0] = KernelLaunchEnvironment;
-  Args[0] = &Ptrs[0];
+  if (KernelLaunchEnvironment) {
+    Ptrs[0] = KernelLaunchEnvironment;
+    Args[0] = &Ptrs[0];
+  }
 
-  for (int I = 1; I < NumArgs; ++I) {
-    Ptrs[I] = (void *)((intptr_t)ArgPtrs[I - 1] + ArgOffsets[I - 1]);
+  for (int I = KLEOffset; I < NumArgs; ++I) {
+    Ptrs[I] =
+        (void *)((intptr_t)ArgPtrs[I - KLEOffset] + ArgOffsets[I - KLEOffset]);
     Args[I] = &Ptrs[I];
   }
   return &Args[0];
@@ -808,7 +826,7 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
     return std::move(Err);
 
   // Setup the global device memory pool if needed.
-  if (shouldSetupDeviceMemoryPool()) {
+  if (!RecordReplay.isReplaying() && shouldSetupDeviceMemoryPool()) {
     uint64_t HeapSize;
     auto SizeOrErr = getDeviceHeapSize(HeapSize);
     if (SizeOrErr) {
@@ -1413,21 +1431,9 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
   GenericKernelTy &GenericKernel =
       *reinterpret_cast<GenericKernelTy *>(EntryPtr);
 
-  if (RecordReplay.isRecording()) {
-    RecordReplay.saveImage(GenericKernel.getName(), GenericKernel.getImage());
-    RecordReplay.saveKernelInput(GenericKernel.getName(),
-                                 GenericKernel.getImage());
-  }
-
   auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs,
                                   AsyncInfoWrapper);
 
-  if (RecordReplay.isRecording())
-    RecordReplay.saveKernelDescr(GenericKernel.getName(), ArgPtrs, ArgOffsets,
-                                 KernelArgs.NumArgs, KernelArgs.NumTeams[0],
-                                 KernelArgs.ThreadLimit[0],
-                                 KernelArgs.Tripcount);
-
   // 'finalize' here to guarantee next record-replay actions are in-sync
   AsyncInfoWrapper.finalize(Err);
 


        


More information about the Openmp-commits mailing list