[Openmp-commits] [openmp] 79349e5 - [offload] Improve report printing for kernel recording (#204505)

via Openmp-commits openmp-commits at lists.llvm.org
Thu Jun 18 21:57:27 PDT 2026


Author: Kevin Sala Penades
Date: 2026-06-18T21:57:23-07:00
New Revision: 79349e509ab569636647829945e20a6f244d6c36

URL: https://github.com/llvm/llvm-project/commit/79349e509ab569636647829945e20a6f244d6c36
DIFF: https://github.com/llvm/llvm-project/commit/79349e509ab569636647829945e20a6f244d6c36.diff

LOG: [offload] Improve report printing for kernel recording (#204505)

This commit extends the record reporting mechanism:
- `LIBOMPTARGET_RECORD_REPORT_FILENAME` enables the reporting mechanism
and allows specifying the name of the output file.
- The report of recorded kernels are ordered in recording order. This is
really useful for tests that need to record and replay more than one
kernel.

Added: 
    

Modified: 
    offload/libomptarget/device.cpp
    offload/libomptarget/omptarget.cpp
    offload/plugins-nextgen/common/include/PluginInterface.h
    offload/plugins-nextgen/common/include/RecordReplay.h
    offload/plugins-nextgen/common/src/PluginInterface.cpp
    offload/plugins-nextgen/common/src/RecordReplay.cpp
    openmp/docs/design/Runtimes.rst

Removed: 
    


################################################################################
diff  --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index 546f679353544..18339e9afe975 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -95,13 +95,20 @@ llvm::Error DeviceTy::init() {
     Int32Envar OMPX_RecordDevice("LIBOMPTARGET_RECORD_DEVICE", 0);
     StringEnvar OMPX_RecordOutputDir("LIBOMPTARGET_RECORD_DIR", "");
     BoolEnvar OMPX_EmitRecordReport("LIBOMPTARGET_RECORD_REPORT", false);
+    StringEnvar OMPX_RecordReportFilename("LIBOMPTARGET_RECORD_REPORT_FILENAME",
+                                          "");
     if (OMPX_RecordDevice != RTLDeviceID)
       return llvm::Error::success();
 
+    // Print report if it was enabled explicitly or a report file was indicated.
+    bool EmitReport =
+        OMPX_EmitRecordReport || !OMPX_RecordReportFilename.get().empty();
+
     Ret = RTL->initialize_record_replay(
         RTLDeviceID, OMPX_RecordMemSize, nullptr,
-        /*IsRecord=*/true, /*IsNative=*/true, OMPX_RecordOutput,
-        OMPX_EmitRecordReport, OMPX_RecordOutputDir.get().c_str());
+        /*IsRecord=*/true, /*IsNative=*/true, OMPX_RecordOutput, EmitReport,
+        OMPX_RecordReportFilename.get().c_str(),
+        OMPX_RecordOutputDir.get().c_str());
     if (Ret != OFFLOAD_SUCCESS)
       return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE,
                                        "failed to initialize RR in device %d\n",

diff  --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 17b215732d51b..d18b8e38b7808 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -2383,7 +2383,8 @@ int target_activate_rr(DeviceTy &Device, uint64_t MemorySize, void *VAddr,
                        const char *OutputDirPath) {
   return Device.RTL->initialize_record_replay(
       Device.DeviceID, MemorySize, VAddr, IsRecord,
-      /*IsNative=*/true, SaveOutput, EmitReport, OutputDirPath);
+      /*IsNative=*/true, SaveOutput, EmitReport, /*ReportFilename=*/"",
+      OutputDirPath);
 }
 
 /// Executes a kernel using pre-recorded information for loading to

diff  --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index f50b07aad0209..cd7e1981435ea 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -1252,6 +1252,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
 
   Error initRecordReplay(int64_t Size, void *VAddr, bool IsRecord,
                          bool IsNative, bool SaveOutput, bool EmitReport,
+                         const char *ReportFilename,
                          const char *OutputDirPath) {
     if (RecordReplay)
       return Plugin::error(error::ErrorCode::INVALID_ARGUMENT,
@@ -1267,7 +1268,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
 
     RecordReplay =
         new NativeRecordReplayTy(Status, OutputDirPath ? OutputDirPath : "",
-                                 SaveOutput, EmitReport, *this);
+                                 SaveOutput, EmitReport, ReportFilename, *this);
     return RecordReplay->init(Size, VAddr);
   }
 
@@ -1587,6 +1588,7 @@ struct GenericPluginTy {
   int32_t initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
                                    void *VAddr, bool IsRecord, bool IsNative,
                                    bool SaveOutput, bool EmitReport,
+                                   const char *ReportFilename,
                                    const char *OutputDirPath);
 
   /// Loads the associated binary into the plugin and returns a handle to it.

diff  --git a/offload/plugins-nextgen/common/include/RecordReplay.h b/offload/plugins-nextgen/common/include/RecordReplay.h
index 65a861cc8a0cc..ef48b55ff998d 100644
--- a/offload/plugins-nextgen/common/include/RecordReplay.h
+++ b/offload/plugins-nextgen/common/include/RecordReplay.h
@@ -80,9 +80,12 @@ struct RecordReplayTy {
   /// Whether a memory snapshot should be recorded a kernel execution.
   bool SaveOutput;
 
-  /// Whether a report should be emitted afther the recording.
+  /// Whether a report should be emitted after the recording.
   bool EmitReport;
 
+  /// The name of the file where to emit the record report.
+  std::string ReportFilename;
+
   /// Reference to the corresponding device.
   GenericDeviceTy &Device;
 
@@ -157,13 +160,15 @@ struct RecordReplayTy {
 
   /// Tracker of record replay instances.
   std::unordered_set<InstanceTy, InstanceHasher> Instances;
+  SmallVector<const InstanceTy *> OrderedInstances;
   std::mutex InstancesLock;
 
 public:
   RecordReplayTy(StatusTy Status, StringRef OutputDirectoryStr, bool SaveOutput,
-                 bool EmitReport, GenericDeviceTy &Device)
+                 bool EmitReport, StringRef ReportFilename,
+                 GenericDeviceTy &Device)
       : Status(Status), SaveOutput(SaveOutput), EmitReport(EmitReport),
-        Device(Device) {
+        ReportFilename(ReportFilename.str()), Device(Device) {
     if (OutputDirectoryStr == "")
       OutputDirectory = std::filesystem::current_path();
     else
@@ -261,9 +266,9 @@ struct RecordReplayTy {
 struct NativeRecordReplayTy : public RecordReplayTy {
   NativeRecordReplayTy(StatusTy Status, StringRef OutputDirectoryStr,
                        bool SaveOutput, bool EmitReport,
-                       GenericDeviceTy &Device)
+                       StringRef ReportFilename, GenericDeviceTy &Device)
       : RecordReplayTy(Status, OutputDirectoryStr, SaveOutput, EmitReport,
-                       Device) {}
+                       ReportFilename, Device) {}
 
 private:
   Error recordPrologueImpl(const GenericKernelTy &Kernel,

diff  --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index f8f362cf2b4ed..0c345d84fa907 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1498,13 +1498,13 @@ int32_t GenericPluginTy::is_data_exchangable(int32_t SrcDeviceId,
 
 int32_t GenericPluginTy::initialize_record_replay(
     int32_t DeviceId, int64_t MemorySize, void *VAddr, bool IsRecord,
-    bool IsNative, bool SaveOutput, bool EmitReport,
+    bool IsNative, bool SaveOutput, bool EmitReport, const char *ReportFilename,
     const char *OutputDirPath) {
   GenericDeviceTy &Device = getDevice(DeviceId);
 
-  if (auto Err =
-          Device.initRecordReplay(MemorySize, VAddr, IsRecord, IsNative,
-                                  SaveOutput, EmitReport, OutputDirPath)) {
+  if (auto Err = Device.initRecordReplay(MemorySize, VAddr, IsRecord, IsNative,
+                                         SaveOutput, EmitReport, ReportFilename,
+                                         OutputDirPath)) {
     REPORT() << "Failure to initialize RR with " << MemorySize
              << " bytes on device " << DeviceId << ": "
              << toString(std::move(Err));

diff  --git a/offload/plugins-nextgen/common/src/RecordReplay.cpp b/offload/plugins-nextgen/common/src/RecordReplay.cpp
index e89b516983e41..7cfd39288307b 100644
--- a/offload/plugins-nextgen/common/src/RecordReplay.cpp
+++ b/offload/plugins-nextgen/common/src/RecordReplay.cpp
@@ -88,20 +88,34 @@ Error RecordReplayTy::deinit() {
 }
 
 Error RecordReplayTy::emitInstanceReport() {
+  llvm::raw_ostream *OutStream = &llvm::outs();
+  std::unique_ptr<llvm::raw_fd_ostream> FileOut;
+
+  if (!ReportFilename.empty()) {
+    // The report file is emitted in the output directory.
+    std::string ReportFilepath =
+        (std::filesystem::absolute(OutputDirectory) / ReportFilename).string();
+    std::error_code EC;
+    FileOut = std::make_unique<llvm::raw_fd_ostream>(ReportFilepath, EC);
+    if (EC)
+      return Plugin::error(ErrorCode::HOST_IO, "saving report file");
+    OutStream = FileOut.get();
+  }
+
   std::lock_guard<std::mutex> LG(InstancesLock);
-  llvm::outs() << "=== Kernel Record Report ===\n";
-  llvm::outs() << "Directory: "
-               << std::filesystem::absolute(OutputDirectory).string() << "\n";
-  llvm::outs() << "Total Instances: " << Instances.size() << "\n";
-  llvm::outs() << "JSON Filename, Kernel Name, Time (ns), Occurrences:\n";
+  *OutStream << "=== Kernel Record Report ===\n";
+  *OutStream << "Directory: "
+             << std::filesystem::absolute(OutputDirectory).string() << "\n";
+  *OutStream << "Total Instances: " << OrderedInstances.size() << "\n";
+  *OutStream << "JSON Filename, Kernel Name, Time (ns), Occurrences:\n";
 
   SmallString<128> Filename;
-  for (const auto &Inst : Instances)
-    llvm::outs()
-        << getFilename(Inst, FileTy::Descriptor, /*IncludeDir=*/false).c_str()
-        << ", " << Inst.Kernel.getName() << ", " << Inst.getRecordedTimeNs()
-        << ", " << Inst.Occurrences << "\n";
-  llvm::outs() << "=== End Kernel Record Report ===\n";
+  for (const auto *Inst : OrderedInstances)
+    *OutStream
+        << getFilename(*Inst, FileTy::Descriptor, /*IncludeDir=*/false).c_str()
+        << ", " << Inst->Kernel.getName() << ", " << Inst->getRecordedTimeNs()
+        << ", " << Inst->Occurrences << "\n";
+  *OutStream << "=== End Kernel Record Report ===\n";
 
   return Plugin::success();
 }
@@ -114,18 +128,24 @@ RecordReplayTy::registerInstance(const GenericKernelTy &Kernel,
   std::lock_guard<std::mutex> LG(InstancesLock);
   auto [It, Inserted] = Instances.emplace(Kernel, NumTeams, NumThreads,
                                           SharedMemorySize, ReplayOutcome);
+  // Keep insertion order.
+  if (Inserted)
+    OrderedInstances.push_back(&(*It));
+
   // Increase the number of occurrences.
   It->Occurrences += 1;
-  return {*It, Inserted};
+
+  // Return reference and whether it was registered for the first time. Notice
+  // that registering an unregistered instance counts as a new registration.
+  return {*It, (It->Occurrences == 1)};
 }
 
 Error RecordReplayTy::unregisterInstance(const InstanceTy &Instance) {
   assert(isReplaying() && "Cannot unregister instance when recording.");
 
+  // Do not remove it, it may be reused in the future.
   std::lock_guard<std::mutex> LG(InstancesLock);
-  size_t Erased = Instances.erase(Instance);
-  if (Erased != 1)
-    return Plugin::error(ErrorCode::INVALID_ARGUMENT, "invalid instance");
+  Instance.Occurrences = 0;
   return Plugin::success();
 }
 

diff  --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 8b3b7e9bed0c6..4e3137abd6fb7 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -1279,6 +1279,7 @@ is provided below.
 * ``LIBOMPTARGET_RECORD=[TRUE/FALSE] (default FALSE)``
 * ``LIBOMPTARGET_RECORD_DIR=<Filepath>``
 * ``LIBOMPTARGET_RECORD_REPORT=[TRUE/FALSE] (default FALSE)``
+* ``LIBOMPTARGET_RECORD_REPORT_FILENAME=<Filename>``
 * ``LIBOMPTARGET_RECORD_MEMSIZE=<Num> (default 8*1024*1024*1024)``
 * ``LIBOMPTARGET_RECORD_DEVICE=<Num> (default 0)``
 * ``LIBOMPTARGET_RECORD_OUTPUT=[TRUE/FALSE] (default TRUE)``
@@ -1309,8 +1310,20 @@ LIBOMPTARGET_RECORD_REPORT
 """"""""""""""""""""""""""
 
 This environment variable is used to instruct the runtime to emit a summary of
-the recorded kernel instances and their associated JSON files. By default, no
-report is emitted.
+the recorded kernel instances and their associated JSON files. When enabled, the
+report is emitted in the standard output. See
+:ref:`LIBOMPTARGET_RECORD_REPORT_FILENAME` to emit the report to a file. By
+default, no report is emitted.
+
+.. _libomptarget_record_report_filename:
+
+LIBOMPTARGET_RECORD_REPORT_FILENAME
+"""""""""""""""""""""""""""""""""""
+
+This environment variable is used to instruct the runtime to emit the recording
+report to a file with a specific file. The file is written in the recording
+directory (see :ref:`LIBOMPTARGET_RECORD_DIR`). Note that it is not needed to
+use :ref:`LIBOMPTARGET_RECORD_REPORT` when setting this environment variable.
 
 LIBOMPTARGET_RECORD_MEMSIZE
 """""""""""""""""""""""""""


        


More information about the Openmp-commits mailing list