[Openmp-commits] [openmp] 79349e5 - [offload] Improve report printing for kernel recording (#204505)
via Openmp-commits
openmp-commits at lists.llvm.org
Thu Jun 18 21:57:27 PDT 2026
Author: Kevin Sala Penades
Date: 2026-06-18T21:57:23-07:00
New Revision: 79349e509ab569636647829945e20a6f244d6c36
URL: https://github.com/llvm/llvm-project/commit/79349e509ab569636647829945e20a6f244d6c36
DIFF: https://github.com/llvm/llvm-project/commit/79349e509ab569636647829945e20a6f244d6c36.diff
LOG: [offload] Improve report printing for kernel recording (#204505)
This commit extends the record reporting mechanism:
- `LIBOMPTARGET_RECORD_REPORT_FILENAME` enables the reporting mechanism
and allows specifying the name of the output file.
- The report of recorded kernels are ordered in recording order. This is
really useful for tests that need to record and replay more than one
kernel.
Added:
Modified:
offload/libomptarget/device.cpp
offload/libomptarget/omptarget.cpp
offload/plugins-nextgen/common/include/PluginInterface.h
offload/plugins-nextgen/common/include/RecordReplay.h
offload/plugins-nextgen/common/src/PluginInterface.cpp
offload/plugins-nextgen/common/src/RecordReplay.cpp
openmp/docs/design/Runtimes.rst
Removed:
################################################################################
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index 546f679353544..18339e9afe975 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -95,13 +95,20 @@ llvm::Error DeviceTy::init() {
Int32Envar OMPX_RecordDevice("LIBOMPTARGET_RECORD_DEVICE", 0);
StringEnvar OMPX_RecordOutputDir("LIBOMPTARGET_RECORD_DIR", "");
BoolEnvar OMPX_EmitRecordReport("LIBOMPTARGET_RECORD_REPORT", false);
+ StringEnvar OMPX_RecordReportFilename("LIBOMPTARGET_RECORD_REPORT_FILENAME",
+ "");
if (OMPX_RecordDevice != RTLDeviceID)
return llvm::Error::success();
+ // Print report if it was enabled explicitly or a report file was indicated.
+ bool EmitReport =
+ OMPX_EmitRecordReport || !OMPX_RecordReportFilename.get().empty();
+
Ret = RTL->initialize_record_replay(
RTLDeviceID, OMPX_RecordMemSize, nullptr,
- /*IsRecord=*/true, /*IsNative=*/true, OMPX_RecordOutput,
- OMPX_EmitRecordReport, OMPX_RecordOutputDir.get().c_str());
+ /*IsRecord=*/true, /*IsNative=*/true, OMPX_RecordOutput, EmitReport,
+ OMPX_RecordReportFilename.get().c_str(),
+ OMPX_RecordOutputDir.get().c_str());
if (Ret != OFFLOAD_SUCCESS)
return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE,
"failed to initialize RR in device %d\n",
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 17b215732d51b..d18b8e38b7808 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -2383,7 +2383,8 @@ int target_activate_rr(DeviceTy &Device, uint64_t MemorySize, void *VAddr,
const char *OutputDirPath) {
return Device.RTL->initialize_record_replay(
Device.DeviceID, MemorySize, VAddr, IsRecord,
- /*IsNative=*/true, SaveOutput, EmitReport, OutputDirPath);
+ /*IsNative=*/true, SaveOutput, EmitReport, /*ReportFilename=*/"",
+ OutputDirPath);
}
/// Executes a kernel using pre-recorded information for loading to
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index f50b07aad0209..cd7e1981435ea 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -1252,6 +1252,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error initRecordReplay(int64_t Size, void *VAddr, bool IsRecord,
bool IsNative, bool SaveOutput, bool EmitReport,
+ const char *ReportFilename,
const char *OutputDirPath) {
if (RecordReplay)
return Plugin::error(error::ErrorCode::INVALID_ARGUMENT,
@@ -1267,7 +1268,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
RecordReplay =
new NativeRecordReplayTy(Status, OutputDirPath ? OutputDirPath : "",
- SaveOutput, EmitReport, *this);
+ SaveOutput, EmitReport, ReportFilename, *this);
return RecordReplay->init(Size, VAddr);
}
@@ -1587,6 +1588,7 @@ struct GenericPluginTy {
int32_t initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
void *VAddr, bool IsRecord, bool IsNative,
bool SaveOutput, bool EmitReport,
+ const char *ReportFilename,
const char *OutputDirPath);
/// Loads the associated binary into the plugin and returns a handle to it.
diff --git a/offload/plugins-nextgen/common/include/RecordReplay.h b/offload/plugins-nextgen/common/include/RecordReplay.h
index 65a861cc8a0cc..ef48b55ff998d 100644
--- a/offload/plugins-nextgen/common/include/RecordReplay.h
+++ b/offload/plugins-nextgen/common/include/RecordReplay.h
@@ -80,9 +80,12 @@ struct RecordReplayTy {
/// Whether a memory snapshot should be recorded a kernel execution.
bool SaveOutput;
- /// Whether a report should be emitted afther the recording.
+ /// Whether a report should be emitted after the recording.
bool EmitReport;
+ /// The name of the file where to emit the record report.
+ std::string ReportFilename;
+
/// Reference to the corresponding device.
GenericDeviceTy &Device;
@@ -157,13 +160,15 @@ struct RecordReplayTy {
/// Tracker of record replay instances.
std::unordered_set<InstanceTy, InstanceHasher> Instances;
+ SmallVector<const InstanceTy *> OrderedInstances;
std::mutex InstancesLock;
public:
RecordReplayTy(StatusTy Status, StringRef OutputDirectoryStr, bool SaveOutput,
- bool EmitReport, GenericDeviceTy &Device)
+ bool EmitReport, StringRef ReportFilename,
+ GenericDeviceTy &Device)
: Status(Status), SaveOutput(SaveOutput), EmitReport(EmitReport),
- Device(Device) {
+ ReportFilename(ReportFilename.str()), Device(Device) {
if (OutputDirectoryStr == "")
OutputDirectory = std::filesystem::current_path();
else
@@ -261,9 +266,9 @@ struct RecordReplayTy {
struct NativeRecordReplayTy : public RecordReplayTy {
NativeRecordReplayTy(StatusTy Status, StringRef OutputDirectoryStr,
bool SaveOutput, bool EmitReport,
- GenericDeviceTy &Device)
+ StringRef ReportFilename, GenericDeviceTy &Device)
: RecordReplayTy(Status, OutputDirectoryStr, SaveOutput, EmitReport,
- Device) {}
+ ReportFilename, Device) {}
private:
Error recordPrologueImpl(const GenericKernelTy &Kernel,
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index f8f362cf2b4ed..0c345d84fa907 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1498,13 +1498,13 @@ int32_t GenericPluginTy::is_data_exchangable(int32_t SrcDeviceId,
int32_t GenericPluginTy::initialize_record_replay(
int32_t DeviceId, int64_t MemorySize, void *VAddr, bool IsRecord,
- bool IsNative, bool SaveOutput, bool EmitReport,
+ bool IsNative, bool SaveOutput, bool EmitReport, const char *ReportFilename,
const char *OutputDirPath) {
GenericDeviceTy &Device = getDevice(DeviceId);
- if (auto Err =
- Device.initRecordReplay(MemorySize, VAddr, IsRecord, IsNative,
- SaveOutput, EmitReport, OutputDirPath)) {
+ if (auto Err = Device.initRecordReplay(MemorySize, VAddr, IsRecord, IsNative,
+ SaveOutput, EmitReport, ReportFilename,
+ OutputDirPath)) {
REPORT() << "Failure to initialize RR with " << MemorySize
<< " bytes on device " << DeviceId << ": "
<< toString(std::move(Err));
diff --git a/offload/plugins-nextgen/common/src/RecordReplay.cpp b/offload/plugins-nextgen/common/src/RecordReplay.cpp
index e89b516983e41..7cfd39288307b 100644
--- a/offload/plugins-nextgen/common/src/RecordReplay.cpp
+++ b/offload/plugins-nextgen/common/src/RecordReplay.cpp
@@ -88,20 +88,34 @@ Error RecordReplayTy::deinit() {
}
Error RecordReplayTy::emitInstanceReport() {
+ llvm::raw_ostream *OutStream = &llvm::outs();
+ std::unique_ptr<llvm::raw_fd_ostream> FileOut;
+
+ if (!ReportFilename.empty()) {
+ // The report file is emitted in the output directory.
+ std::string ReportFilepath =
+ (std::filesystem::absolute(OutputDirectory) / ReportFilename).string();
+ std::error_code EC;
+ FileOut = std::make_unique<llvm::raw_fd_ostream>(ReportFilepath, EC);
+ if (EC)
+ return Plugin::error(ErrorCode::HOST_IO, "saving report file");
+ OutStream = FileOut.get();
+ }
+
std::lock_guard<std::mutex> LG(InstancesLock);
- llvm::outs() << "=== Kernel Record Report ===\n";
- llvm::outs() << "Directory: "
- << std::filesystem::absolute(OutputDirectory).string() << "\n";
- llvm::outs() << "Total Instances: " << Instances.size() << "\n";
- llvm::outs() << "JSON Filename, Kernel Name, Time (ns), Occurrences:\n";
+ *OutStream << "=== Kernel Record Report ===\n";
+ *OutStream << "Directory: "
+ << std::filesystem::absolute(OutputDirectory).string() << "\n";
+ *OutStream << "Total Instances: " << OrderedInstances.size() << "\n";
+ *OutStream << "JSON Filename, Kernel Name, Time (ns), Occurrences:\n";
SmallString<128> Filename;
- for (const auto &Inst : Instances)
- llvm::outs()
- << getFilename(Inst, FileTy::Descriptor, /*IncludeDir=*/false).c_str()
- << ", " << Inst.Kernel.getName() << ", " << Inst.getRecordedTimeNs()
- << ", " << Inst.Occurrences << "\n";
- llvm::outs() << "=== End Kernel Record Report ===\n";
+ for (const auto *Inst : OrderedInstances)
+ *OutStream
+ << getFilename(*Inst, FileTy::Descriptor, /*IncludeDir=*/false).c_str()
+ << ", " << Inst->Kernel.getName() << ", " << Inst->getRecordedTimeNs()
+ << ", " << Inst->Occurrences << "\n";
+ *OutStream << "=== End Kernel Record Report ===\n";
return Plugin::success();
}
@@ -114,18 +128,24 @@ RecordReplayTy::registerInstance(const GenericKernelTy &Kernel,
std::lock_guard<std::mutex> LG(InstancesLock);
auto [It, Inserted] = Instances.emplace(Kernel, NumTeams, NumThreads,
SharedMemorySize, ReplayOutcome);
+ // Keep insertion order.
+ if (Inserted)
+ OrderedInstances.push_back(&(*It));
+
// Increase the number of occurrences.
It->Occurrences += 1;
- return {*It, Inserted};
+
+ // Return reference and whether it was registered for the first time. Notice
+ // that registering an unregistered instance counts as a new registration.
+ return {*It, (It->Occurrences == 1)};
}
Error RecordReplayTy::unregisterInstance(const InstanceTy &Instance) {
assert(isReplaying() && "Cannot unregister instance when recording.");
+ // Do not remove it, it may be reused in the future.
std::lock_guard<std::mutex> LG(InstancesLock);
- size_t Erased = Instances.erase(Instance);
- if (Erased != 1)
- return Plugin::error(ErrorCode::INVALID_ARGUMENT, "invalid instance");
+ Instance.Occurrences = 0;
return Plugin::success();
}
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 8b3b7e9bed0c6..4e3137abd6fb7 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -1279,6 +1279,7 @@ is provided below.
* ``LIBOMPTARGET_RECORD=[TRUE/FALSE] (default FALSE)``
* ``LIBOMPTARGET_RECORD_DIR=<Filepath>``
* ``LIBOMPTARGET_RECORD_REPORT=[TRUE/FALSE] (default FALSE)``
+* ``LIBOMPTARGET_RECORD_REPORT_FILENAME=<Filename>``
* ``LIBOMPTARGET_RECORD_MEMSIZE=<Num> (default 8*1024*1024*1024)``
* ``LIBOMPTARGET_RECORD_DEVICE=<Num> (default 0)``
* ``LIBOMPTARGET_RECORD_OUTPUT=[TRUE/FALSE] (default TRUE)``
@@ -1309,8 +1310,20 @@ LIBOMPTARGET_RECORD_REPORT
""""""""""""""""""""""""""
This environment variable is used to instruct the runtime to emit a summary of
-the recorded kernel instances and their associated JSON files. By default, no
-report is emitted.
+the recorded kernel instances and their associated JSON files. When enabled, the
+report is emitted in the standard output. See
+:ref:`LIBOMPTARGET_RECORD_REPORT_FILENAME` to emit the report to a file. By
+default, no report is emitted.
+
+.. _libomptarget_record_report_filename:
+
+LIBOMPTARGET_RECORD_REPORT_FILENAME
+"""""""""""""""""""""""""""""""""""
+
+This environment variable is used to instruct the runtime to emit the recording
+report to a file with a specific file. The file is written in the recording
+directory (see :ref:`LIBOMPTARGET_RECORD_DIR`). Note that it is not needed to
+use :ref:`LIBOMPTARGET_RECORD_REPORT` when setting this environment variable.
LIBOMPTARGET_RECORD_MEMSIZE
"""""""""""""""""""""""""""
More information about the Openmp-commits
mailing list