[Openmp-commits] [llvm] [openmp] [offload] Improve report printing for kernel recording (PR #204505)
Kevin Sala Penades via Openmp-commits
openmp-commits at lists.llvm.org
Thu Jun 18 12:49:37 PDT 2026
https://github.com/kevinsala updated https://github.com/llvm/llvm-project/pull/204505
>From c02f7b1d05dc137dbafa8c601bd14b8936e658a4 Mon Sep 17 00:00:00 2001
From: Kevin Sala <salapenades1 at llnl.gov>
Date: Wed, 17 Jun 2026 18:10:56 -0700
Subject: [PATCH 1/2] [offload] Improve report printing for kernel recording
---
offload/libomptarget/device.cpp | 10 +++-
offload/libomptarget/omptarget.cpp | 3 +-
.../common/include/PluginInterface.h | 5 +-
.../common/include/RecordReplay.h | 14 ++++--
.../common/src/PluginInterface.cpp | 8 +--
.../common/src/RecordReplay.cpp | 50 +++++++++++++------
openmp/docs/design/Runtimes.rst | 17 ++++++-
7 files changed, 76 insertions(+), 31 deletions(-)
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index 546f679353544..c977c369aef43 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -95,13 +95,19 @@ llvm::Error DeviceTy::init() {
Int32Envar OMPX_RecordDevice("LIBOMPTARGET_RECORD_DEVICE", 0);
StringEnvar OMPX_RecordOutputDir("LIBOMPTARGET_RECORD_DIR", "");
BoolEnvar OMPX_EmitRecordReport("LIBOMPTARGET_RECORD_REPORT", false);
+ StringEnvar OMPX_RecordReportFile("LIBOMPTARGET_RECORD_REPORT_FILE", "");
if (OMPX_RecordDevice != RTLDeviceID)
return llvm::Error::success();
+ // Print report if it was enabled explicitly or a report file was indicated.
+ bool EmitReport =
+ OMPX_EmitRecordReport || !OMPX_RecordReportFile.get().empty();
+
Ret = RTL->initialize_record_replay(
RTLDeviceID, OMPX_RecordMemSize, nullptr,
- /*IsRecord=*/true, /*IsNative=*/true, OMPX_RecordOutput,
- OMPX_EmitRecordReport, OMPX_RecordOutputDir.get().c_str());
+ /*IsRecord=*/true, /*IsNative=*/true, OMPX_RecordOutput, EmitReport,
+ OMPX_RecordReportFile.get().c_str(),
+ OMPX_RecordOutputDir.get().c_str());
if (Ret != OFFLOAD_SUCCESS)
return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE,
"failed to initialize RR in device %d\n",
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 17b215732d51b..4d7a01853c2e1 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -2383,7 +2383,8 @@ int target_activate_rr(DeviceTy &Device, uint64_t MemorySize, void *VAddr,
const char *OutputDirPath) {
return Device.RTL->initialize_record_replay(
Device.DeviceID, MemorySize, VAddr, IsRecord,
- /*IsNative=*/true, SaveOutput, EmitReport, OutputDirPath);
+ /*IsNative=*/true, SaveOutput, EmitReport, /*ReportFile=*/"",
+ OutputDirPath);
}
/// Executes a kernel using pre-recorded information for loading to
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index f50b07aad0209..6e208bbc7e056 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -1252,7 +1252,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error initRecordReplay(int64_t Size, void *VAddr, bool IsRecord,
bool IsNative, bool SaveOutput, bool EmitReport,
- const char *OutputDirPath) {
+ const char *ReportFile, const char *OutputDirPath) {
if (RecordReplay)
return Plugin::error(error::ErrorCode::INVALID_ARGUMENT,
"RR already initialized");
@@ -1267,7 +1267,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
RecordReplay =
new NativeRecordReplayTy(Status, OutputDirPath ? OutputDirPath : "",
- SaveOutput, EmitReport, *this);
+ SaveOutput, EmitReport, ReportFile, *this);
return RecordReplay->init(Size, VAddr);
}
@@ -1587,6 +1587,7 @@ struct GenericPluginTy {
int32_t initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
void *VAddr, bool IsRecord, bool IsNative,
bool SaveOutput, bool EmitReport,
+ const char *ReportFile,
const char *OutputDirPath);
/// Loads the associated binary into the plugin and returns a handle to it.
diff --git a/offload/plugins-nextgen/common/include/RecordReplay.h b/offload/plugins-nextgen/common/include/RecordReplay.h
index 65a861cc8a0cc..6f083943131d2 100644
--- a/offload/plugins-nextgen/common/include/RecordReplay.h
+++ b/offload/plugins-nextgen/common/include/RecordReplay.h
@@ -80,9 +80,12 @@ struct RecordReplayTy {
/// Whether a memory snapshot should be recorded a kernel execution.
bool SaveOutput;
- /// Whether a report should be emitted afther the recording.
+ /// Whether a report should be emitted after the recording.
bool EmitReport;
+ /// The name of the file where to emit the record report.
+ std::string ReportFile;
+
/// Reference to the corresponding device.
GenericDeviceTy &Device;
@@ -157,13 +160,14 @@ struct RecordReplayTy {
/// Tracker of record replay instances.
std::unordered_set<InstanceTy, InstanceHasher> Instances;
+ SmallVector<const InstanceTy *> OrderedInstances;
std::mutex InstancesLock;
public:
RecordReplayTy(StatusTy Status, StringRef OutputDirectoryStr, bool SaveOutput,
- bool EmitReport, GenericDeviceTy &Device)
+ bool EmitReport, StringRef ReportFile, GenericDeviceTy &Device)
: Status(Status), SaveOutput(SaveOutput), EmitReport(EmitReport),
- Device(Device) {
+ ReportFile(ReportFile.str()), Device(Device) {
if (OutputDirectoryStr == "")
OutputDirectory = std::filesystem::current_path();
else
@@ -260,10 +264,10 @@ struct RecordReplayTy {
/// The native kernel record replay support.
struct NativeRecordReplayTy : public RecordReplayTy {
NativeRecordReplayTy(StatusTy Status, StringRef OutputDirectoryStr,
- bool SaveOutput, bool EmitReport,
+ bool SaveOutput, bool EmitReport, StringRef ReportFile,
GenericDeviceTy &Device)
: RecordReplayTy(Status, OutputDirectoryStr, SaveOutput, EmitReport,
- Device) {}
+ ReportFile, Device) {}
private:
Error recordPrologueImpl(const GenericKernelTy &Kernel,
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index f8f362cf2b4ed..a17a7a9d3e7e3 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1498,13 +1498,13 @@ int32_t GenericPluginTy::is_data_exchangable(int32_t SrcDeviceId,
int32_t GenericPluginTy::initialize_record_replay(
int32_t DeviceId, int64_t MemorySize, void *VAddr, bool IsRecord,
- bool IsNative, bool SaveOutput, bool EmitReport,
+ bool IsNative, bool SaveOutput, bool EmitReport, const char *ReportFile,
const char *OutputDirPath) {
GenericDeviceTy &Device = getDevice(DeviceId);
- if (auto Err =
- Device.initRecordReplay(MemorySize, VAddr, IsRecord, IsNative,
- SaveOutput, EmitReport, OutputDirPath)) {
+ if (auto Err = Device.initRecordReplay(MemorySize, VAddr, IsRecord, IsNative,
+ SaveOutput, EmitReport, ReportFile,
+ OutputDirPath)) {
REPORT() << "Failure to initialize RR with " << MemorySize
<< " bytes on device " << DeviceId << ": "
<< toString(std::move(Err));
diff --git a/offload/plugins-nextgen/common/src/RecordReplay.cpp b/offload/plugins-nextgen/common/src/RecordReplay.cpp
index e89b516983e41..a09a127c43ca4 100644
--- a/offload/plugins-nextgen/common/src/RecordReplay.cpp
+++ b/offload/plugins-nextgen/common/src/RecordReplay.cpp
@@ -88,20 +88,34 @@ Error RecordReplayTy::deinit() {
}
Error RecordReplayTy::emitInstanceReport() {
+ llvm::raw_ostream *OutStream = &llvm::outs();
+ std::unique_ptr<llvm::raw_fd_ostream> FileOut;
+
+ if (!ReportFile.empty()) {
+ // The report file is emitted in the output directory.
+ std::string ReportFilePath =
+ (std::filesystem::absolute(OutputDirectory) / ReportFile).string();
+ std::error_code EC;
+ FileOut = std::make_unique<llvm::raw_fd_ostream>(ReportFilePath, EC);
+ if (EC)
+ return Plugin::error(ErrorCode::HOST_IO, "saving report file");
+ OutStream = FileOut.get();
+ }
+
std::lock_guard<std::mutex> LG(InstancesLock);
- llvm::outs() << "=== Kernel Record Report ===\n";
- llvm::outs() << "Directory: "
- << std::filesystem::absolute(OutputDirectory).string() << "\n";
- llvm::outs() << "Total Instances: " << Instances.size() << "\n";
- llvm::outs() << "JSON Filename, Kernel Name, Time (ns), Occurrences:\n";
+ *OutStream << "=== Kernel Record Report ===\n";
+ *OutStream << "Directory: "
+ << std::filesystem::absolute(OutputDirectory).string() << "\n";
+ *OutStream << "Total Instances: " << OrderedInstances.size() << "\n";
+ *OutStream << "JSON Filename, Kernel Name, Time (ns), Occurrences:\n";
SmallString<128> Filename;
- for (const auto &Inst : Instances)
- llvm::outs()
- << getFilename(Inst, FileTy::Descriptor, /*IncludeDir=*/false).c_str()
- << ", " << Inst.Kernel.getName() << ", " << Inst.getRecordedTimeNs()
- << ", " << Inst.Occurrences << "\n";
- llvm::outs() << "=== End Kernel Record Report ===\n";
+ for (const auto *Inst : OrderedInstances)
+ *OutStream
+ << getFilename(*Inst, FileTy::Descriptor, /*IncludeDir=*/false).c_str()
+ << ", " << Inst->Kernel.getName() << ", " << Inst->getRecordedTimeNs()
+ << ", " << Inst->Occurrences << "\n";
+ *OutStream << "=== End Kernel Record Report ===\n";
return Plugin::success();
}
@@ -114,18 +128,24 @@ RecordReplayTy::registerInstance(const GenericKernelTy &Kernel,
std::lock_guard<std::mutex> LG(InstancesLock);
auto [It, Inserted] = Instances.emplace(Kernel, NumTeams, NumThreads,
SharedMemorySize, ReplayOutcome);
+ // Keep insertion order.
+ if (Inserted)
+ OrderedInstances.push_back(&(*It));
+
// Increase the number of occurrences.
It->Occurrences += 1;
- return {*It, Inserted};
+
+ // Return reference and whether it was registered for the first time. Notice
+ // that registering an unregistered instance counts as a new registration.
+ return {*It, (It->Occurrences == 1)};
}
Error RecordReplayTy::unregisterInstance(const InstanceTy &Instance) {
assert(isReplaying() && "Cannot unregister instance when recording.");
+ // Do not remove it, it may be reused in the future.
std::lock_guard<std::mutex> LG(InstancesLock);
- size_t Erased = Instances.erase(Instance);
- if (Erased != 1)
- return Plugin::error(ErrorCode::INVALID_ARGUMENT, "invalid instance");
+ Instance.Occurrences = 0;
return Plugin::success();
}
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 8b3b7e9bed0c6..098946b2743d6 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -1279,6 +1279,7 @@ is provided below.
* ``LIBOMPTARGET_RECORD=[TRUE/FALSE] (default FALSE)``
* ``LIBOMPTARGET_RECORD_DIR=<Filepath>``
* ``LIBOMPTARGET_RECORD_REPORT=[TRUE/FALSE] (default FALSE)``
+* ``LIBOMPTARGET_RECORD_REPORT_FILE=<Filename>``
* ``LIBOMPTARGET_RECORD_MEMSIZE=<Num> (default 8*1024*1024*1024)``
* ``LIBOMPTARGET_RECORD_DEVICE=<Num> (default 0)``
* ``LIBOMPTARGET_RECORD_OUTPUT=[TRUE/FALSE] (default TRUE)``
@@ -1309,8 +1310,20 @@ LIBOMPTARGET_RECORD_REPORT
""""""""""""""""""""""""""
This environment variable is used to instruct the runtime to emit a summary of
-the recorded kernel instances and their associated JSON files. By default, no
-report is emitted.
+the recorded kernel instances and their associated JSON files. When enabled, the
+report is emitted in the standard output. The environment variable
+:ref:`LIBOMPTARGET_RECORD_REPORT_FILE` can be used to indicate a file where to
+emit the report. By default, no report is emitted.
+
+.. _libomptarget_record_report_file:
+
+LIBOMPTARGET_RECORD_REPORT_FILE
+"""""""""""""""""""""""""""""""
+
+This environment variable is used to instruct the runtime to emit the recording
+report to a specific output file. The file will be created within the recording
+directory. Note that it is not needed to use :ref:`LIBOMPTARGET_RECORD_REPORT`
+when setting this enviornment variable.
LIBOMPTARGET_RECORD_MEMSIZE
"""""""""""""""""""""""""""
>From ca9675a9409060a54774b93ca25f0bbcf773735c Mon Sep 17 00:00:00 2001
From: Kevin Sala <salapenades1 at llnl.gov>
Date: Thu, 18 Jun 2026 12:48:08 -0700
Subject: [PATCH 2/2] Fix documentation
---
openmp/docs/design/Runtimes.rst | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 098946b2743d6..29636c6ef8c99 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -1312,8 +1312,8 @@ LIBOMPTARGET_RECORD_REPORT
This environment variable is used to instruct the runtime to emit a summary of
the recorded kernel instances and their associated JSON files. When enabled, the
report is emitted in the standard output. The environment variable
-:ref:`LIBOMPTARGET_RECORD_REPORT_FILE` can be used to indicate a file where to
-emit the report. By default, no report is emitted.
+:ref:`LIBOMPTARGET_RECORD_REPORT_FILE` can be used to indicate the file name of
+the file where to emit the report. By default, no report is emitted.
.. _libomptarget_record_report_file:
@@ -1321,9 +1321,10 @@ LIBOMPTARGET_RECORD_REPORT_FILE
"""""""""""""""""""""""""""""""
This environment variable is used to instruct the runtime to emit the recording
-report to a specific output file. The file will be created within the recording
-directory. Note that it is not needed to use :ref:`LIBOMPTARGET_RECORD_REPORT`
-when setting this enviornment variable.
+report to an output file with a specific file name. The file will be created
+within the recording directory (see :ref:`LIBOMPTARGET_RECORD_DIR`). Note that
+it is not needed to use :ref:`LIBOMPTARGET_RECORD_REPORT` when setting this
+environment variable.
LIBOMPTARGET_RECORD_MEMSIZE
"""""""""""""""""""""""""""
More information about the Openmp-commits
mailing list