[llvm-branch-commits] [llvm] [offload][OpenMP] Fix record replay when no memory is used (PR #201771)
Kevin Sala Penades via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jun 18 00:13:09 PDT 2026
https://github.com/kevinsala updated https://github.com/llvm/llvm-project/pull/201771
>From f8a0c401f0abdf7a4f9f05934e13b6e490bef62d Mon Sep 17 00:00:00 2001
From: Kevin Sala <salapenades1 at llnl.gov>
Date: Fri, 5 Jun 2026 00:27:07 -0700
Subject: [PATCH] [offload][OpenMP] Fix record replay when no memory is used
Progams that do not use any memory (e.g., no mappings) were failing because
we were trying to execute zero size transfers.
---
offload/libomptarget/omptarget.cpp | 30 +++++++++------
.../common/src/RecordReplay.cpp | 37 +++++++++----------
.../record-replay-empty-memory.cpp | 26 +++++++++++++
.../kernelreplay/llvm-omp-kernel-replay.cpp | 3 +-
4 files changed, 64 insertions(+), 32 deletions(-)
create mode 100644 offload/test/tools/omp-kernel-replay/record-replay-empty-memory.cpp
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 4d7a01853c2e1..facc8a0a7b030 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -2440,6 +2440,7 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
// Initialize the device memory of each global.
for (int32_t I = 0; I < NumGlobals; ++I) {
assert(Globals[I].AuxAddr && "Global has no AuxAddr.");
+ assert(Globals[I].Size && "Global has Size zero.");
// Initialize the value of the global in the device.
int Ret = Device.submitData(Symbols[I + 1].DevPtr, Globals[I].AuxAddr,
@@ -2450,25 +2451,30 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
}
}
- // Reuse a previous device allocation or allocate a new device buffer.
+ // Reuse a previous device allocation or allocate a new device buffer. Do not
+ // allocate anything if the size is zero.
void *&TgtPtr = ReuseDeviceAlloc;
- if (!TgtPtr)
+ if (!TgtPtr && DeviceMemorySize) {
TgtPtr = Device.allocData(DeviceMemorySize, /*HstPtr=*/nullptr,
TARGET_ALLOC_DEFAULT);
- if (!TgtPtr) {
- REPORT() << "Failed to allocate device memory.";
- return OFFLOAD_FAIL;
+ if (!TgtPtr) {
+ REPORT() << "Failed to allocate device memory.";
+ return OFFLOAD_FAIL;
+ }
}
// Save the device allocation for future replays of the same kernel.
if (ReplayOutcome)
ReplayOutcome->ReplayDeviceAlloc = TgtPtr;
- int Ret =
- Device.submitData(TgtPtr, DeviceMemory, DeviceMemorySize, AsyncInfo);
- if (Ret != OFFLOAD_SUCCESS) {
- REPORT() << "Failed to submit data to a global.";
- return OFFLOAD_FAIL;
+ // Initialize the device memory.
+ if (DeviceMemorySize) {
+ int Ret =
+ Device.submitData(TgtPtr, DeviceMemory, DeviceMemorySize, AsyncInfo);
+ if (Ret != OFFLOAD_SUCCESS) {
+ REPORT() << "Failed to submit data to the device memory.";
+ return OFFLOAD_FAIL;
+ }
}
KernelArgsTy KernelArgs{};
@@ -2487,8 +2493,8 @@ int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
KernelExtraArgsTy KernelExtraArgs{};
KernelExtraArgs.ReplayOutcome = ReplayOutcome;
- Ret = Device.launchKernel(Symbols[0].DevPtr, TgtArgs, TgtOffsets, KernelArgs,
- &KernelExtraArgs, AsyncInfo);
+ int Ret = Device.launchKernel(Symbols[0].DevPtr, TgtArgs, TgtOffsets,
+ KernelArgs, &KernelExtraArgs, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT() << "Failed to launch kernel replay.";
return OFFLOAD_FAIL;
diff --git a/offload/plugins-nextgen/common/src/RecordReplay.cpp b/offload/plugins-nextgen/common/src/RecordReplay.cpp
index a09a127c43ca4..30539454120b8 100644
--- a/offload/plugins-nextgen/common/src/RecordReplay.cpp
+++ b/offload/plugins-nextgen/common/src/RecordReplay.cpp
@@ -338,23 +338,24 @@ Error NativeRecordReplayTy::recordSnapshot(StringRef Filename) {
uint64_t RecordSize = CurrentSize;
AllocationLock.unlock();
- ErrorOr<std::unique_ptr<WritableMemoryBuffer>> DeviceMemoryMB =
- WritableMemoryBuffer::getNewUninitMemBuffer(RecordSize);
- if (!DeviceMemoryMB)
- return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
- "creating MemoryBuffer for device memory");
-
- if (auto Err = Device.dataRetrieve(DeviceMemoryMB.get()->getBufferStart(),
- StartAddr, RecordSize, nullptr))
- return Err;
-
- StringRef DeviceMemory(DeviceMemoryMB.get()->getBufferStart(), RecordSize);
+ std::unique_ptr<WritableMemoryBuffer> DeviceMB;
+ if (RecordSize) {
+ DeviceMB = WritableMemoryBuffer::getNewUninitMemBuffer(RecordSize);
+ if (!DeviceMB)
+ return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
+ "creating MemoryBuffer for device memory");
+
+ if (auto Err = Device.dataRetrieve(DeviceMB->getBufferStart(), StartAddr,
+ RecordSize, nullptr))
+ return Err;
+ }
std::error_code EC;
raw_fd_ostream OS(Filename, EC);
if (EC)
return Plugin::error(ErrorCode::HOST_IO, "saving memory snapshot file");
- OS << DeviceMemory;
+ if (DeviceMB)
+ OS.write(DeviceMB->getBufferStart(), RecordSize);
OS.close();
return Plugin::success();
}
@@ -389,13 +390,12 @@ Error NativeRecordReplayTy::recordGlobals(StringRef Filename) {
NumGlobals++;
}
- ErrorOr<std::unique_ptr<WritableMemoryBuffer>> GlobalsMB =
- WritableMemoryBuffer::getNewUninitMemBuffer(TotalSize);
+ auto GlobalsMB = WritableMemoryBuffer::getNewUninitMemBuffer(TotalSize);
if (!GlobalsMB)
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
"creating MemoryBuffer for globals memory");
- void *BufferPtr = GlobalsMB.get()->getBufferStart();
+ void *BufferPtr = GlobalsMB->getBufferStart();
*((uint32_t *)(BufferPtr)) = NumGlobals;
BufferPtr = utils::advancePtr(BufferPtr, sizeof(uint32_t));
@@ -418,16 +418,15 @@ Error NativeRecordReplayTy::recordGlobals(StringRef Filename) {
return Err;
BufferPtr = utils::advancePtr(BufferPtr, Global.Size);
}
- assert(BufferPtr == GlobalsMB->get()->getBufferEnd() &&
+ assert(BufferPtr == GlobalsMB->getBufferEnd() &&
"Buffer over or under-filled.");
assert(TotalSize == (uint64_t)utils::getPtrDiff(
- BufferPtr, GlobalsMB->get()->getBufferStart()) &&
+ BufferPtr, GlobalsMB->getBufferStart()) &&
"Buffer size mismatch.");
- StringRef GlobalsMemory(GlobalsMB.get()->getBufferStart(), TotalSize);
std::error_code EC;
raw_fd_ostream OS(Filename, EC);
- OS << GlobalsMemory;
+ OS.write(GlobalsMB->getBufferStart(), TotalSize);
OS.close();
return Plugin::success();
}
diff --git a/offload/test/tools/omp-kernel-replay/record-replay-empty-memory.cpp b/offload/test/tools/omp-kernel-replay/record-replay-empty-memory.cpp
new file mode 100644
index 0000000000000..0705c6d66ac8e
--- /dev/null
+++ b/offload/test/tools/omp-kernel-replay/record-replay-empty-memory.cpp
@@ -0,0 +1,26 @@
+// clang-format off
+// RUN: %libomptarget-compilexx-generic
+// RUN: rm -rf %t.testdir
+// RUN: mkdir -p %t.testdir
+// RUN: env LIBOMPTARGET_RECORD=1 LIBOMPTARGET_RECORD_MEMSIZE=536870912 LIBOMPTARGET_RECORD_DIR=%t.testdir %libomptarget-run-generic 2>&1 | %fcheck-generic
+// RUN: ls -t %t.testdir/*.json | sed -n '1p' | grep . | xargs -I {} %omp-kernel-replay --verify {}
+// clang-format on
+
+// REQUIRES: gpu
+
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: x86_64-unknown-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: intelgpu
+
+#include <cstdint>
+#include <cstdio>
+
+int main() {
+#pragma omp target teams num_teams(256)
+ {
+ }
+
+ // CHECK: PASS
+ printf("PASS\n");
+}
diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
index a5bda7a0f0444..4335002fd8c77 100644
--- a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
+++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -130,7 +130,8 @@ Error verifyReplayOutput(StringRef RecordOutputFilename,
if (!ReplayOutputBufferOrErr)
return createErr("failed to read the kernel replay output file");
- // Compare record and replay outputs to verify they match.
+ // Compare record and replay outputs to verify they match. If they are both
+ // empty, the verification is successful.
StringRef RecordOutput = RecordOutputBufferOrErr.get()->getBuffer();
StringRef ReplayOutput = ReplayOutputBufferOrErr.get()->getBuffer();
if (RecordOutput != ReplayOutput)
More information about the llvm-branch-commits
mailing list