[llvm] Add option to dump IR to files intstead of stderr (PR #66412)

Nuri Amari via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 11:39:18 PDT 2023


https://github.com/NuriAmari created https://github.com/llvm/llvm-project/pull/66412:

This patch adds a flag to LLVM such that the output generated by the `-print-(before|after|all)`
family of flags is written to files in a directory rather than to stderr.

This new flag is `-ir-dump-directory` and is used to specify where to write the files. No other flags are added, it just modifies the behavior of the print flags.

This is a second simplified version of the changes proposed in https://github.com/llvm/llvm-project/pull/65179.

This patch only adds support for the new pass manager. If this patch is accepted, similar support can be added to the legacy pass manager.

>From da5da863d20cd8bef88066bba931c068042833cf Mon Sep 17 00:00:00 2001
From: Nuri Amari <nuriamari at fb.com>
Date: Thu, 7 Sep 2023 12:34:15 -0700
Subject: [PATCH] Add option to dump IR to files intstead of stderr

This patch adds a flag to LLVM such that the output
generated by the `-print-(before|after|all)`
family of flags is written to files in a directory rather
than to stderr.

This new flag is `-ir-dump-directory` and is used to
specify where to write the files. No other flags are
added, it just modifies the behavior of the print flags.
---
 llvm/include/llvm/IR/PrintPasses.h            |   4 +
 .../llvm/Passes/StandardInstrumentations.h    |  20 ++-
 llvm/lib/IR/PrintPasses.cpp                   |   9 +
 llvm/lib/Passes/StandardInstrumentations.cpp  | 167 +++++++++++++++---
 llvm/test/Other/dump-before-after.ll          |  57 ++++++
 5 files changed, 231 insertions(+), 26 deletions(-)
 create mode 100644 llvm/test/Other/dump-before-after.ll

diff --git a/llvm/include/llvm/IR/PrintPasses.h b/llvm/include/llvm/IR/PrintPasses.h
index 95b97e76c867cb2..c4baadfa3975531 100644
--- a/llvm/include/llvm/IR/PrintPasses.h
+++ b/llvm/include/llvm/IR/PrintPasses.h
@@ -55,6 +55,10 @@ bool forcePrintModuleIR();
 bool isPassInPrintList(StringRef PassName);
 bool isFilterPassesEmpty();
 
+// Returns a non-empty string if printing before/after passes is to be
+// dumped into files in the returned directory instead of written to stderr.
+std::string irDumpDirectory();
+
 // Returns true if we should print the function.
 bool isFunctionInPrintList(StringRef FunctionName);
 
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 331130c6b22d990..fd512635356e5dd 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -46,6 +46,16 @@ class PrintIRInstrumentation {
   void registerCallbacks(PassInstrumentationCallbacks &PIC);
 
 private:
+  enum SuffixType {
+    before,
+    after,
+    invalidated,
+  };
+
+  using PrintModuleDesc = std::tuple<const Module *, std::string /* IRName */,
+                                     StringRef /* StoredPassID */,
+                                     SmallString<128> /* DumpFilename */>;
+
   void printBeforePass(StringRef PassID, Any IR);
   void printAfterPass(StringRef PassID, Any IR);
   void printAfterPassInvalidated(StringRef PassID);
@@ -55,11 +65,15 @@ class PrintIRInstrumentation {
   bool shouldPrintPassNumbers();
   bool shouldPrintAtPassNumber();
 
-  using PrintModuleDesc = std::tuple<const Module *, std::string, StringRef>;
-
-  void pushModuleDesc(StringRef PassID, Any IR);
+  void pushModuleDesc(StringRef PassID, Any IR,
+                      SmallString<128> DumpIRFilename);
   PrintModuleDesc popModuleDesc(StringRef PassID);
 
+  SmallString<128> fetchDumpFilename(StringRef PassId, Any IR);
+  StringRef getFileSuffix(SuffixType);
+
+  static constexpr std::array FileSuffixes = {"-before.ll", "-after.ll",
+                                              "-invalidated.ll"};
   PassInstrumentationCallbacks *PIC;
   /// Stack of Module description, enough to print the module after a given
   /// pass.
diff --git a/llvm/lib/IR/PrintPasses.cpp b/llvm/lib/IR/PrintPasses.cpp
index e2ef20bb81ba7d7..406af4a0a5e004e 100644
--- a/llvm/lib/IR/PrintPasses.cpp
+++ b/llvm/lib/IR/PrintPasses.cpp
@@ -103,6 +103,13 @@ static cl::list<std::string>
                             "options"),
                    cl::CommaSeparated, cl::Hidden);
 
+static cl::opt<std::string> IRDumpDirectory(
+    "ir-dump-directory",
+    llvm::cl::desc("If specified, IR printed using the "
+                   "-print-[before|after]{-all} options will be dumped into "
+                   "files in this directory rather than written to stderr"),
+    cl::init(""), cl::Hidden, cl::value_desc("filename"));
+
 /// This is a helper to determine whether to print IR before or
 /// after a pass.
 
@@ -139,6 +146,8 @@ std::vector<std::string> llvm::printAfterPasses() {
   return std::vector<std::string>(PrintAfter);
 }
 
+std::string llvm::irDumpDirectory() { return IRDumpDirectory; }
+
 bool llvm::forcePrintModuleIR() { return PrintModuleScope; }
 
 bool llvm::isPassInPrintList(StringRef PassName) {
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 6244c0a5a949ba1..64bf306438d99d2 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Passes/StandardInstrumentations.h"
 #include "llvm/ADT/Any.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/LazyCallGraph.h"
@@ -33,6 +34,7 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/Signals.h"
@@ -684,9 +686,64 @@ PrintIRInstrumentation::~PrintIRInstrumentation() {
   assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit");
 }
 
-void PrintIRInstrumentation::pushModuleDesc(StringRef PassID, Any IR) {
+static SmallString<32> getIRDisplayName(Any IR) {
+
+  auto hashName = [](StringRef name) {
+    const size_t hashValue = hash_value(name);
+    return std::to_string(hashValue);
+  };
+
+  SmallString<32> Result;
+  const Module *M = unwrapModule(IR);
+  std::string ModuleName = hashName(M->getName());
+  SmallString<32> IRName;
+  if (any_cast<const Module *>(&IR)) {
+    IRName += "-module";
+  } else if (const Function **F = any_cast<const Function *>(&IR)) {
+    IRName += "-function-";
+    IRName += hashName((*F)->getName());
+  } else if (const LazyCallGraph::SCC **C =
+                 any_cast<const LazyCallGraph::SCC *>(&IR)) {
+    IRName += "-scc-";
+    IRName += hashName((*C)->getName());
+  } else if (const Loop **L = any_cast<const Loop *>(&IR)) {
+    IRName += "-loop-";
+    IRName += hashName((*L)->getName());
+  } else {
+    llvm_unreachable("Unknown wrapped IR type");
+  }
+
+  Result += ModuleName;
+  Result += IRName;
+  return Result;
+}
+
+SmallString<128> PrintIRInstrumentation::fetchDumpFilename(StringRef PassID,
+                                                           Any IR) {
+  const std::string &RootDirectory = irDumpDirectory();
+  assert(!RootDirectory.empty() &&
+         "The flag -ir-dump-directory must be passed to dump IR to files");
+  SmallString<128> ResultPath;
+  ResultPath += RootDirectory;
+  SmallString<16> Filename;
+  Filename += std::to_string(CurrentPassNumber);
+  Filename += "-";
+  Filename += getIRDisplayName(IR);
+  Filename += "-";
+  Filename += PassID;
+  sys::path::append(ResultPath, Filename);
+  return ResultPath;
+}
+
+StringRef
+PrintIRInstrumentation::getFileSuffix(PrintIRInstrumentation::SuffixType Type) {
+  return FileSuffixes[Type];
+}
+
+void PrintIRInstrumentation::pushModuleDesc(StringRef PassID, Any IR,
+                                            SmallString<128> DumpIRFilename) {
   const Module *M = unwrapModule(IR);
-  ModuleDescStack.emplace_back(M, getIRName(IR), PassID);
+  ModuleDescStack.emplace_back(M, getIRName(IR), PassID, DumpIRFilename);
 }
 
 PrintIRInstrumentation::PrintModuleDesc
@@ -697,17 +754,42 @@ PrintIRInstrumentation::popModuleDesc(StringRef PassID) {
   return ModuleDesc;
 }
 
+// Callers are responsible for closing the returned file descriptor
+static int prepareDumpIRFileDescriptor(StringRef DumpIRFilename) {
+  std::error_code EC;
+  auto ParentPath = llvm::sys::path::parent_path(DumpIRFilename);
+  if (!ParentPath.empty()) {
+    std::error_code EC = llvm::sys::fs::create_directories(ParentPath);
+    if (EC)
+      report_fatal_error(Twine("Failed to create directory ") + ParentPath +
+                         " to support -ir-dump-directory: " + EC.message());
+  }
+  int Result = 0;
+  EC =
+      sys::fs::openFile(DumpIRFilename, Result, sys::fs::CD_OpenAlways,
+                        sys::fs::FA_Write | sys::fs::FA_Read, sys::fs::OF_None);
+  if (EC)
+    report_fatal_error(Twine("Failed to open ") + DumpIRFilename +
+                       " to support -ir-dump-directory: " + EC.message());
+  return Result;
+}
+
 void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
   if (isIgnored(PassID))
     return;
 
+  SmallString<128> DumpIRFilename;
+  if (!irDumpDirectory().empty() &&
+      (shouldPrintBeforePass(PassID) || shouldPrintAfterPass(PassID)))
+    DumpIRFilename = fetchDumpFilename(PassID, IR);
+
   // Saving Module for AfterPassInvalidated operations.
   // Note: here we rely on a fact that we do not change modules while
   // traversing the pipeline, so the latest captured module is good
   // for all print operations that has not happen yet.
   if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() ||
       shouldPrintAfterPass(PassID))
-    pushModuleDesc(PassID, IR);
+    pushModuleDesc(PassID, IR, DumpIRFilename);
 
   if (!shouldPrintIR(IR))
     return;
@@ -720,9 +802,20 @@ void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
   if (!shouldPrintBeforePass(PassID))
     return;
 
-  dbgs() << "*** IR Dump Before " << PassID << " on " << getIRName(IR)
-         << " ***\n";
-  unwrapAndPrint(dbgs(), IR);
+  auto WriteIRToStream = [&](raw_ostream &Stream) {
+    Stream << "*** IR Dump Before " << PassID << " on " << getIRName(IR)
+           << " ***\n";
+    unwrapAndPrint(Stream, IR);
+  };
+
+  if (!DumpIRFilename.empty()) {
+    DumpIRFilename += getFileSuffix(SuffixType::before);
+    llvm::raw_fd_ostream DumpIRFileStream{
+        prepareDumpIRFileDescriptor(DumpIRFilename), /* shouldClose */ true};
+    WriteIRToStream(DumpIRFileStream);
+  } else {
+    WriteIRToStream(dbgs());
+  }
 }
 
 void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
@@ -736,18 +829,32 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
   const Module *M;
   std::string IRName;
   StringRef StoredPassID;
-  std::tie(M, IRName, StoredPassID) = popModuleDesc(PassID);
+  SmallString<128> DumpIRFilename;
+  std::tie(M, IRName, StoredPassID, DumpIRFilename) = popModuleDesc(PassID);
   assert(StoredPassID == PassID && "mismatched PassID");
 
   if (!shouldPrintIR(IR) || !shouldPrintAfterPass(PassID))
     return;
 
-  dbgs() << "*** IR Dump "
-         << (shouldPrintAtPassNumber()
-                 ? StringRef(formatv("At {0}-{1}", CurrentPassNumber, PassID))
-                 : StringRef(formatv("After {0}", PassID)))
-         << " on " << IRName << " ***\n";
-  unwrapAndPrint(dbgs(), IR);
+  auto WriteIRToStream = [&](raw_ostream &Stream) {
+    Stream << "*** IR Dump "
+           << (shouldPrintAtPassNumber()
+                   ? StringRef(formatv("At {0}-{1}", CurrentPassNumber, PassID))
+                   : StringRef(formatv("After {0}", PassID)))
+           << " on " << IRName << " ***\n";
+    unwrapAndPrint(Stream, IR);
+  };
+
+  if (!irDumpDirectory().empty()) {
+    assert(!DumpIRFilename.empty() && "DumpIRFilename must not be empty and "
+                                      "should be set in printBeforePass");
+    DumpIRFilename += getFileSuffix(SuffixType::after);
+    llvm::raw_fd_ostream DumpIRFileStream{
+        prepareDumpIRFileDescriptor(DumpIRFilename), /* shouldClose */ true};
+    WriteIRToStream(DumpIRFileStream);
+  } else {
+    WriteIRToStream(dbgs());
+  }
 }
 
 void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
@@ -761,22 +868,36 @@ void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
   const Module *M;
   std::string IRName;
   StringRef StoredPassID;
-  std::tie(M, IRName, StoredPassID) = popModuleDesc(PassID);
+  SmallString<128> DumpIRFilename;
+  std::tie(M, IRName, StoredPassID, DumpIRFilename) = popModuleDesc(PassID);
   assert(StoredPassID == PassID && "mismatched PassID");
   // Additional filtering (e.g. -filter-print-func) can lead to module
   // printing being skipped.
   if (!M || !shouldPrintAfterPass(PassID))
     return;
 
-  SmallString<20> Banner;
-  if (shouldPrintAtPassNumber())
-    Banner = formatv("*** IR Dump At {0}-{1} on {2} (invalidated) ***",
-                     CurrentPassNumber, PassID, IRName);
-  else 
-    Banner = formatv("*** IR Dump After {0} on {1} (invalidated) ***", 
-                     PassID, IRName);
-  dbgs() << Banner << "\n";
-  printIR(dbgs(), M);
+  auto WriteIRToStream = [&](raw_ostream &Stream) {
+    SmallString<20> Banner;
+    if (shouldPrintAtPassNumber())
+      Banner = formatv("*** IR Dump At {0}-{1} on {2} (invalidated) ***",
+                       CurrentPassNumber, PassID, IRName);
+    else
+      Banner = formatv("*** IR Dump After {0} on {1} (invalidated) ***", PassID,
+                       IRName);
+    Stream << Banner << "\n";
+    printIR(Stream, M);
+  };
+
+  if (!irDumpDirectory().empty()) {
+    assert(!DumpIRFilename.empty() && "DumpIRFilename must not be empty and "
+                                      "should be set in printBeforePass");
+    DumpIRFilename += getFileSuffix(SuffixType::invalidated);
+    llvm::raw_fd_ostream DumpIRFileStream{
+        prepareDumpIRFileDescriptor(DumpIRFilename), /* shouldClose */ true};
+    WriteIRToStream(DumpIRFileStream);
+  } else {
+    WriteIRToStream(dbgs());
+  }
 }
 
 bool PrintIRInstrumentation::shouldPrintBeforePass(StringRef PassID) {
diff --git a/llvm/test/Other/dump-before-after.ll b/llvm/test/Other/dump-before-after.ll
new file mode 100644
index 000000000000000..1f0d01022551123
--- /dev/null
+++ b/llvm/test/Other/dump-before-after.ll
@@ -0,0 +1,57 @@
+; RUN: mkdir -p %t/logs
+; RUN: rm -rf %t/logs
+
+; Basic dump before and after a single module pass
+
+; RUN: opt %s -disable-output -passes='no-op-module' -ir-dump-directory %t/logs -print-after=no-op-module -print-before=no-op-module
+; RUN: find %t/logs -type f -print | sort | FileCheck %s --check-prefix=SINGLE-PASS
+; SINGLE-PASS: {{[0-9]+}}-[[MODULE_NAME_HASH:[0-9]+]]-module-NoOpModulePass-after.ll
+; SINGLE-PASS: {{[0-9]+}}-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; RUN: rm -rf %t/logs
+
+
+; Dump before and after multiple runs of the same module pass
+; The integers preceeding log files represent relative pass execution order,
+; but they are not necessarily continuous. That is passes which are run
+; but not printed, still increment the count -- leading to gaps in the printed
+; integers.
+
+; RUN: opt %s -disable-output -passes='no-op-module,no-op-module,no-op-module' -ir-dump-directory %t/logs -print-after=no-op-module -print-before=no-op-module
+; RUN: find %t/logs -type f -print | sort | FileCheck %s --check-prefix=MULTIPLE-PASSES
+; MULTIPLE-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH:[0-9]+]]-module-NoOpModulePass-after.ll
+; MULTIPLE-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; MULTIPLE-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-module-NoOpModulePass-after.ll
+; MULTIPLE-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; MULTIPLE-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-module-NoOpModulePass-after.ll
+; MULTIPLE-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; RUN: rm -rf %t/logs
+
+; Dump before and after multiple passes, of various levels of granularity
+
+; RUN: opt %s -disable-output -passes='no-op-module,cgscc(no-op-cgscc),function(no-op-function),function(loop(no-op-loop))' -ir-dump-directory %t/logs -print-after=no-op-module,no-op-cgscc,no-op-function,no-op-loop -print-before=no-op-module,no-op-cgscc,no-op-function,no-op-loop
+; RUN: find %t/logs -type f -print | sort | FileCheck %s --check-prefix=MULTIPLE-GRANULAR-PASSES
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH:[0-9]+]]-module-NoOpModulePass-after.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-module-NoOpModulePass-before.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-scc-[[SCC_FOO_HASH:[0-9]+]]-NoOpCGSCCPass-after.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-scc-[[SCC_FOO_HASH]]-NoOpCGSCCPass-before.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-scc-[[SCC_BAR_HASH:[0-9]+]]-NoOpCGSCCPass-after.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-scc-[[SCC_BAR_HASH]]-NoOpCGSCCPass-before.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-function-[[FUNCTION_FOO_HASH:[0-9]+]]-NoOpFunctionPass-after.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-function-[[FUNCTION_FOO_HASH]]-NoOpFunctionPass-before.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-function-[[FUNCTION_BAR_HASH:[0-9]+]]-NoOpFunctionPass-after.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-function-[[FUNCTION_BAR_HASH]]-NoOpFunctionPass-before.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-loop-[[LOOP_NAME_HASH:[0-9]+]]-NoOpLoopPass-after.ll
+; MULTIPLE-GRANULAR-PASSES: {{[0-9]+}}-[[MODULE_NAME_HASH]]-loop-[[LOOP_NAME_HASH]]-NoOpLoopPass-before.ll
+
+; RUN: rm -rf %t/logs
+
+define void @foo() {
+    ret void
+}
+
+define void @bar() {
+entry:
+    br label %my-loop
+my-loop:
+    br label %my-loop
+}



More information about the llvm-commits mailing list