[llvm-branch-commits] [llvm] [offload][OpenMP] Fix record replay when no memory is used (PR #201771)

Kevin Sala Penades via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jun 18 00:13:09 PDT 2026


https://github.com/kevinsala updated https://github.com/llvm/llvm-project/pull/201771

>From f8a0c401f0abdf7a4f9f05934e13b6e490bef62d Mon Sep 17 00:00:00 2001
From: Kevin Sala <salapenades1 at llnl.gov>
Date: Fri, 5 Jun 2026 00:27:07 -0700
Subject: [PATCH] [offload][OpenMP] Fix record replay when no memory is used

Progams that do not use any memory (e.g., no mappings) were failing because
we were trying to execute zero size transfers.
---
 offload/libomptarget/omptarget.cpp            | 30 +++++++++------
 .../common/src/RecordReplay.cpp               | 37 +++++++++----------
 .../record-replay-empty-memory.cpp            | 26 +++++++++++++
 .../kernelreplay/llvm-omp-kernel-replay.cpp   |  3 +-
 4 files changed, 64 insertions(+), 32 deletions(-)
 create mode 100644 offload/test/tools/omp-kernel-replay/record-replay-empty-memory.cpp

diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 4d7a01853c2e1..facc8a0a7b030 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -2440,6 +2440,7 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
   // Initialize the device memory of each global.
   for (int32_t I = 0; I < NumGlobals; ++I) {
     assert(Globals[I].AuxAddr && "Global has no AuxAddr.");
+    assert(Globals[I].Size && "Global has Size zero.");
 
     // Initialize the value of the global in the device.
     int Ret = Device.submitData(Symbols[I + 1].DevPtr, Globals[I].AuxAddr,
@@ -2450,25 +2451,30 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
     }
   }
 
-  // Reuse a previous device allocation or allocate a new device buffer.
+  // Reuse a previous device allocation or allocate a new device buffer. Do not
+  // allocate anything if the size is zero.
   void *&TgtPtr = ReuseDeviceAlloc;
-  if (!TgtPtr)
+  if (!TgtPtr && DeviceMemorySize) {
     TgtPtr = Device.allocData(DeviceMemorySize, /*HstPtr=*/nullptr,
                               TARGET_ALLOC_DEFAULT);
-  if (!TgtPtr) {
-    REPORT() << "Failed to allocate device memory.";
-    return OFFLOAD_FAIL;
+    if (!TgtPtr) {
+      REPORT() << "Failed to allocate device memory.";
+      return OFFLOAD_FAIL;
+    }
   }
 
   // Save the device allocation for future replays of the same kernel.
   if (ReplayOutcome)
     ReplayOutcome->ReplayDeviceAlloc = TgtPtr;
 
-  int Ret =
-      Device.submitData(TgtPtr, DeviceMemory, DeviceMemorySize, AsyncInfo);
-  if (Ret != OFFLOAD_SUCCESS) {
-    REPORT() << "Failed to submit data to a global.";
-    return OFFLOAD_FAIL;
+  // Initialize the device memory.
+  if (DeviceMemorySize) {
+    int Ret =
+        Device.submitData(TgtPtr, DeviceMemory, DeviceMemorySize, AsyncInfo);
+    if (Ret != OFFLOAD_SUCCESS) {
+      REPORT() << "Failed to submit data to the device memory.";
+      return OFFLOAD_FAIL;
+    }
   }
 
   KernelArgsTy KernelArgs{};
@@ -2487,8 +2493,8 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
   KernelExtraArgsTy KernelExtraArgs{};
   KernelExtraArgs.ReplayOutcome = ReplayOutcome;
 
-  Ret = Device.launchKernel(Symbols[0].DevPtr, TgtArgs, TgtOffsets, KernelArgs,
-                            &KernelExtraArgs, AsyncInfo);
+  int Ret = Device.launchKernel(Symbols[0].DevPtr, TgtArgs, TgtOffsets,
+                                KernelArgs, &KernelExtraArgs, AsyncInfo);
   if (Ret != OFFLOAD_SUCCESS) {
     REPORT() << "Failed to launch kernel replay.";
     return OFFLOAD_FAIL;
diff --git a/offload/plugins-nextgen/common/src/RecordReplay.cpp b/offload/plugins-nextgen/common/src/RecordReplay.cpp
index a09a127c43ca4..30539454120b8 100644
--- a/offload/plugins-nextgen/common/src/RecordReplay.cpp
+++ b/offload/plugins-nextgen/common/src/RecordReplay.cpp
@@ -338,23 +338,24 @@ Error NativeRecordReplayTy::recordSnapshot(StringRef Filename) {
   uint64_t RecordSize = CurrentSize;
   AllocationLock.unlock();
 
-  ErrorOr<std::unique_ptr<WritableMemoryBuffer>> DeviceMemoryMB =
-      WritableMemoryBuffer::getNewUninitMemBuffer(RecordSize);
-  if (!DeviceMemoryMB)
-    return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
-                         "creating MemoryBuffer for device memory");
-
-  if (auto Err = Device.dataRetrieve(DeviceMemoryMB.get()->getBufferStart(),
-                                     StartAddr, RecordSize, nullptr))
-    return Err;
-
-  StringRef DeviceMemory(DeviceMemoryMB.get()->getBufferStart(), RecordSize);
+  std::unique_ptr<WritableMemoryBuffer> DeviceMB;
+  if (RecordSize) {
+    DeviceMB = WritableMemoryBuffer::getNewUninitMemBuffer(RecordSize);
+    if (!DeviceMB)
+      return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
+                           "creating MemoryBuffer for device memory");
+
+    if (auto Err = Device.dataRetrieve(DeviceMB->getBufferStart(), StartAddr,
+                                       RecordSize, nullptr))
+      return Err;
+  }
 
   std::error_code EC;
   raw_fd_ostream OS(Filename, EC);
   if (EC)
     return Plugin::error(ErrorCode::HOST_IO, "saving memory snapshot file");
-  OS << DeviceMemory;
+  if (DeviceMB)
+    OS.write(DeviceMB->getBufferStart(), RecordSize);
   OS.close();
   return Plugin::success();
 }
@@ -389,13 +390,12 @@ Error NativeRecordReplayTy::recordGlobals(StringRef Filename) {
     NumGlobals++;
   }
 
-  ErrorOr<std::unique_ptr<WritableMemoryBuffer>> GlobalsMB =
-      WritableMemoryBuffer::getNewUninitMemBuffer(TotalSize);
+  auto GlobalsMB = WritableMemoryBuffer::getNewUninitMemBuffer(TotalSize);
   if (!GlobalsMB)
     return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
                          "creating MemoryBuffer for globals memory");
 
-  void *BufferPtr = GlobalsMB.get()->getBufferStart();
+  void *BufferPtr = GlobalsMB->getBufferStart();
   *((uint32_t *)(BufferPtr)) = NumGlobals;
   BufferPtr = utils::advancePtr(BufferPtr, sizeof(uint32_t));
 
@@ -418,16 +418,15 @@ Error NativeRecordReplayTy::recordGlobals(StringRef Filename) {
       return Err;
     BufferPtr = utils::advancePtr(BufferPtr, Global.Size);
   }
-  assert(BufferPtr == GlobalsMB->get()->getBufferEnd() &&
+  assert(BufferPtr == GlobalsMB->getBufferEnd() &&
          "Buffer over or under-filled.");
   assert(TotalSize == (uint64_t)utils::getPtrDiff(
-                          BufferPtr, GlobalsMB->get()->getBufferStart()) &&
+                          BufferPtr, GlobalsMB->getBufferStart()) &&
          "Buffer size mismatch.");
 
-  StringRef GlobalsMemory(GlobalsMB.get()->getBufferStart(), TotalSize);
   std::error_code EC;
   raw_fd_ostream OS(Filename, EC);
-  OS << GlobalsMemory;
+  OS.write(GlobalsMB->getBufferStart(), TotalSize);
   OS.close();
   return Plugin::success();
 }
diff --git a/offload/test/tools/omp-kernel-replay/record-replay-empty-memory.cpp b/offload/test/tools/omp-kernel-replay/record-replay-empty-memory.cpp
new file mode 100644
index 0000000000000..0705c6d66ac8e
--- /dev/null
+++ b/offload/test/tools/omp-kernel-replay/record-replay-empty-memory.cpp
@@ -0,0 +1,26 @@
+// clang-format off
+// RUN: %libomptarget-compilexx-generic
+// RUN: rm -rf %t.testdir
+// RUN: mkdir -p %t.testdir
+// RUN: env LIBOMPTARGET_RECORD=1 LIBOMPTARGET_RECORD_MEMSIZE=536870912 LIBOMPTARGET_RECORD_DIR=%t.testdir %libomptarget-run-generic 2>&1 | %fcheck-generic
+// RUN: ls -t %t.testdir/*.json | sed -n '1p' | grep . | xargs -I {} %omp-kernel-replay --verify {}
+// clang-format on
+
+// REQUIRES: gpu
+
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: x86_64-unknown-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: intelgpu
+
+#include <cstdint>
+#include <cstdio>
+
+int main() {
+#pragma omp target teams num_teams(256)
+  {
+  }
+
+  // CHECK: PASS
+  printf("PASS\n");
+}
diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
index a5bda7a0f0444..4335002fd8c77 100644
--- a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
+++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -130,7 +130,8 @@ Error verifyReplayOutput(StringRef RecordOutputFilename,
   if (!ReplayOutputBufferOrErr)
     return createErr("failed to read the kernel replay output file");
 
-  // Compare record and replay outputs to verify they match.
+  // Compare record and replay outputs to verify they match. If they are both
+  // empty, the verification is successful.
   StringRef RecordOutput = RecordOutputBufferOrErr.get()->getBuffer();
   StringRef ReplayOutput = ReplayOutputBufferOrErr.get()->getBuffer();
   if (RecordOutput != ReplayOutput)



More information about the llvm-branch-commits mailing list