[Openmp-commits] [openmp] [OpenMP] Fix record-replay allocation order for kernel environment (PR #71863)

Konstantinos Parasyris via Openmp-commits openmp-commits at lists.llvm.org
Thu Nov 9 12:39:01 PST 2023


https://github.com/koparasy created https://github.com/llvm/llvm-project/pull/71863

None

>From 94bd3497cf70bafa75b474a408140cca0850d5c9 Mon Sep 17 00:00:00 2001
From: koparasy <parasyris1 at llnl.gov>
Date: Thu, 9 Nov 2023 11:59:40 -0800
Subject: [PATCH 1/2] Revert "[OpenMP] Move the recording code to account for
 KernelLaunchEnvironment"

This reverts commit 726ee40f524918f9a6a6bba5a73e4d88c02a2cc3.
---
 .../common/PluginInterface/PluginInterface.cpp       | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 117ed94a1da6ffa..a47481104430f36 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -504,12 +504,6 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
           printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
     return Err;
 
-  if (RecordReplay.isRecording())
-    RecordReplay.saveKernelInputInfo(
-        getName(), getImage(), ArgPtrs, ArgOffsets,
-        KernelArgs.NumArgs - /* KernelLaunchEnvironment */ 1, NumBlocks,
-        NumThreads, KernelArgs.Tripcount);
-
   return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs,
                     KernelArgsPtr, AsyncInfoWrapper);
 }
@@ -1411,6 +1405,12 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
   GenericKernelTy &GenericKernel =
       *reinterpret_cast<GenericKernelTy *>(EntryPtr);
 
+  if (RecordReplay.isRecording())
+    RecordReplay.saveKernelInputInfo(
+        GenericKernel.getName(), GenericKernel.getImage(), ArgPtrs, ArgOffsets,
+        KernelArgs.NumArgs, KernelArgs.NumTeams[0], KernelArgs.ThreadLimit[0],
+        KernelArgs.Tripcount);
+
   if (RecordReplay.isRecording())
     RecordReplay.saveImage(GenericKernel.getName(), GenericKernel.getImage());
 

>From 4e494f4fd848fbe0647172a0b300a7028f281497 Mon Sep 17 00:00:00 2001
From: koparasy <parasyris1 at llnl.gov>
Date: Thu, 9 Nov 2023 12:37:50 -0800
Subject: [PATCH 2/2] [OpenMP] Fix record-replay allocation order for kernel
 environment

---
 .../PluginInterface/PluginInterface.cpp       | 41 +++++++++++--------
 .../kernelreplay/llvm-omp-kernel-replay.cpp   |  2 +-
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index a47481104430f36..b55509c2f28fffc 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -231,10 +231,9 @@ struct RecordReplayTy {
     OS.close();
   }
 
-  void saveKernelInputInfo(const char *Name, DeviceImageTy &Image,
-                           void **ArgPtrs, ptrdiff_t *ArgOffsets,
-                           int32_t NumArgs, uint64_t NumTeamsClause,
-                           uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
+  void saveKernelDescr(const char *Name, void **ArgPtrs, ptrdiff_t *ArgOffsets,
+                       int32_t NumArgs, uint64_t NumTeamsClause,
+                       uint32_t ThreadLimitClause, uint64_t LoopTripCount) {
     json::Object JsonKernelInfo;
     JsonKernelInfo["Name"] = Name;
     JsonKernelInfo["NumArgs"] = NumArgs;
@@ -255,12 +254,6 @@ struct RecordReplayTy {
       JsonArgOffsets.push_back(ArgOffsets[I]);
     JsonKernelInfo["ArgOffsets"] = json::Value(std::move(JsonArgOffsets));
 
-    SmallString<128> MemoryFilename = {Name, ".memory"};
-    dumpDeviceMemory(MemoryFilename);
-
-    SmallString<128> GlobalsFilename = {Name, ".globals"};
-    dumpGlobals(GlobalsFilename, Image);
-
     SmallString<128> JsonFilename = {Name, ".json"};
     std::error_code EC;
     raw_fd_ostream JsonOS(JsonFilename.str(), EC);
@@ -271,6 +264,14 @@ struct RecordReplayTy {
     JsonOS.close();
   }
 
+  void saveKernelInput(const char *Name, DeviceImageTy &Image) {
+    SmallString<128> GlobalsFilename = {Name, ".globals"};
+    dumpGlobals(GlobalsFilename, Image);
+
+    SmallString<128> MemoryFilename = {Name, ".memory"};
+    dumpDeviceMemory(MemoryFilename);
+  }
+
   void saveKernelOutputInfo(const char *Name) {
     SmallString<128> OutputFilename = {
         Name, (isRecording() ? ".original.output" : ".replay.output")};
@@ -1405,18 +1406,21 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
   GenericKernelTy &GenericKernel =
       *reinterpret_cast<GenericKernelTy *>(EntryPtr);
 
-  if (RecordReplay.isRecording())
-    RecordReplay.saveKernelInputInfo(
-        GenericKernel.getName(), GenericKernel.getImage(), ArgPtrs, ArgOffsets,
-        KernelArgs.NumArgs, KernelArgs.NumTeams[0], KernelArgs.ThreadLimit[0],
-        KernelArgs.Tripcount);
-
-  if (RecordReplay.isRecording())
+  if (RecordReplay.isRecording()) {
     RecordReplay.saveImage(GenericKernel.getName(), GenericKernel.getImage());
+    RecordReplay.saveKernelInput(GenericKernel.getName(),
+                                 GenericKernel.getImage());
+  }
 
   auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs,
                                   AsyncInfoWrapper);
 
+  if (RecordReplay.isRecording())
+    RecordReplay.saveKernelDescr(GenericKernel.getName(), ArgPtrs, ArgOffsets,
+                                 KernelArgs.NumArgs, KernelArgs.NumTeams[0],
+                                 KernelArgs.ThreadLimit[0],
+                                 KernelArgs.Tripcount);
+
   // 'finalize' here to guarantee next record-replay actions are in-sync
   AsyncInfoWrapper.finalize(Err);
 
@@ -1845,7 +1849,8 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr,
                                 int32_t DstDeviceId, void *DstPtr,
                                 int64_t Size) {
   return __tgt_rtl_data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr,
-                                       Size, /* AsyncInfoPtr */ nullptr);
+                                       Size,
+                                       /* AsyncInfoPtr */ nullptr);
 }
 
 int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
diff --git a/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
index 93fc3e7853f8e9c..254be7db6e01a41 100644
--- a/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
+++ b/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -147,7 +147,7 @@ int main(int argc, char **argv) {
   uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()];
   std::memcpy(recored_data,
               const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()),
-              DeviceMemorySizeJson.value() * sizeof(uint8_t));
+              DeviceMemoryMB.get()->getBufferSize());
 
   __tgt_target_kernel_replay(
       /* Loc */ nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,



More information about the Openmp-commits mailing list