[llvm] [SandboxIR] Add debug checker to compare IR before/after a revert (PR #115968)

Jorge Gorbe Moya via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 15 17:01:50 PST 2024


https://github.com/slackito updated https://github.com/llvm/llvm-project/pull/115968

>From f1916b4c2c0c856790e6490b26c28f45af3d5e18 Mon Sep 17 00:00:00 2001
From: Jorge Gorbe Moya <jgorbe at google.com>
Date: Fri, 8 Nov 2024 13:42:38 -0800
Subject: [PATCH 1/3] [SandboxIR] Add debug checker to compare IR before/after
 a revert

This will help us catch mistakes in change tracking. It's only enabled
when EXPENSIVE_CHECKS are enabled.
---
 llvm/include/llvm/SandboxIR/Context.h    | 11 ++--
 llvm/include/llvm/SandboxIR/Tracker.h    | 70 ++++++++++++++++++++-
 llvm/lib/SandboxIR/Tracker.cpp           | 79 +++++++++++++++++++++++-
 llvm/unittests/SandboxIR/TrackerTest.cpp | 38 ++++++++++++
 4 files changed, 190 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/SandboxIR/Context.h b/llvm/include/llvm/SandboxIR/Context.h
index f2056de87cb946..b0d6f8335d9e0e 100644
--- a/llvm/include/llvm/SandboxIR/Context.h
+++ b/llvm/include/llvm/SandboxIR/Context.h
@@ -44,11 +44,12 @@ class Context {
 
 protected:
   LLVMContext &LLVMCtx;
-  friend class Type;        // For LLVMCtx.
-  friend class PointerType; // For LLVMCtx.
-  friend class IntegerType; // For LLVMCtx.
-  friend class StructType;  // For LLVMCtx.
-  friend class Region;      // For LLVMCtx.
+  friend class Type;              // For LLVMCtx.
+  friend class PointerType;       // For LLVMCtx.
+  friend class IntegerType;       // For LLVMCtx.
+  friend class StructType;        // For LLVMCtx.
+  friend class Region;            // For LLVMCtx.
+  friend class IRSnapshotChecker; // To snapshot LLVMModuleToModuleMap.
 
   Tracker IRTracker;
 
diff --git a/llvm/include/llvm/SandboxIR/Tracker.h b/llvm/include/llvm/SandboxIR/Tracker.h
index dab20eb809ba08..3600b3956e22cc 100644
--- a/llvm/include/llvm/SandboxIR/Tracker.h
+++ b/llvm/include/llvm/SandboxIR/Tracker.h
@@ -42,13 +42,13 @@
 
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StableHashing.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Module.h"
 #include "llvm/SandboxIR/Use.h"
 #include "llvm/Support/Debug.h"
 #include <memory>
-#include <regex>
 
 namespace llvm::sandboxir {
 
@@ -67,6 +67,60 @@ class CmpInst;
 class Module;
 class GlobalVariable;
 
+#ifndef NDEBUG
+
+/// A class that saves hashes and textual IR snapshots of modules in a
+/// SandboxIR Context, and does hash comparison when `expectNoDiff` is called.
+/// If hashes differ, it prints textual IR for both old and new versions to
+/// aid debugging.
+///
+/// This is used as an additional debug check when reverting changes to
+/// SandboxIR, to verify the reverted state matches the initial state.
+class IRSnapshotChecker {
+  Context &Ctx;
+
+  // A snapshot of textual IR for a module, with a hash for quick comparison.
+  struct ModuleSnapshot {
+    llvm::stable_hash Hash;
+    std::string TextualIR;
+  };
+
+  // A snapshot for each llvm::Module found in the SandboxIR Context. In
+  // practice there will always be one module, but sandbox IR save/restore ops
+  // work at the Context level, so we must take the full state into account.
+  using ContextSnapshot = DenseMap<llvm::Module *, ModuleSnapshot>;
+
+  ContextSnapshot OrigContextSnapshot;
+
+  // True if save() was previously called. This helps us distinguish between
+  // "expectNoDiff was called without calling save" and "save was called but
+  // the saved snapshot is empty".
+  bool HasSavedState = false;
+
+  // Dumps to a string the textual IR for a single Module.
+  std::string dumpIR(llvm::Module *F) const;
+
+  // Returns a snapshot of all the modules in the sandbox IR context.
+  ContextSnapshot takeSnapshot() const;
+
+  // Compares two snapshots and returns true if they differ.
+  bool diff(const ContextSnapshot &Orig, const ContextSnapshot &Curr) const;
+
+public:
+  IRSnapshotChecker(Context &Ctx) : Ctx(Ctx) {}
+
+  /// Saves the current state.
+  void save();
+
+  /// Checks current state against saved state, crashes if different.
+  void expectNoDiff();
+
+  /// Empties saved state.
+  void reset();
+};
+
+#endif // NDEBUG
+
 /// The base class for IR Change classes.
 class IRChangeBase {
 protected:
@@ -405,6 +459,10 @@ class Tracker {
   TrackerState State = TrackerState::Disabled;
   Context &Ctx;
 
+#ifndef NDEBUG
+  IRSnapshotChecker SnapshotChecker;
+#endif
+
 public:
 #ifndef NDEBUG
   /// Helps catch bugs where we are creating new change objects while in the
@@ -412,7 +470,15 @@ class Tracker {
   bool InMiddleOfCreatingChange = false;
 #endif // NDEBUG
 
-  explicit Tracker(Context &Ctx) : Ctx(Ctx) {}
+  explicit Tracker(Context &Ctx)
+      : Ctx(Ctx)
+#ifndef NDEBUG
+        ,
+        SnapshotChecker(Ctx)
+#endif
+  {
+  }
+
   ~Tracker();
   Context &getContext() const { return Ctx; }
   /// Record \p Change and take ownership. This is the main function used to
diff --git a/llvm/lib/SandboxIR/Tracker.cpp b/llvm/lib/SandboxIR/Tracker.cpp
index d35e3ba84990f3..b35c01d6c92ecd 100644
--- a/llvm/lib/SandboxIR/Tracker.cpp
+++ b/llvm/lib/SandboxIR/Tracker.cpp
@@ -10,12 +10,81 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/StructuralHash.h"
 #include "llvm/SandboxIR/Instruction.h"
 #include <sstream>
 
 using namespace llvm::sandboxir;
 
 #ifndef NDEBUG
+
+std::string IRSnapshotChecker::dumpIR(llvm::Module *M) const {
+  std::string Result;
+  raw_string_ostream SS(Result);
+  M->print(SS, /*AssemblyAnnotationWriter=*/nullptr);
+  return Result;
+}
+
+IRSnapshotChecker::ContextSnapshot IRSnapshotChecker::takeSnapshot() const {
+  ContextSnapshot Result;
+  for (const auto &Entry : Ctx.LLVMModuleToModuleMap) {
+    llvm::Module *M = Entry.first;
+    ModuleSnapshot MS;
+    MS.Hash = StructuralHash(*M, /*DetailedHash=*/true);
+    MS.TextualIR = dumpIR(M);
+    Result[M] = MS;
+  }
+  return Result;
+}
+
+bool IRSnapshotChecker::diff(const ContextSnapshot &Orig,
+                             const ContextSnapshot &Curr) const {
+  bool DifferenceFound = false;
+  for (const auto &[M, OrigMS] : Orig) {
+    auto CurrMSIt = Curr.find(M);
+    if (CurrMSIt == Curr.end()) {
+      DifferenceFound = true;
+      dbgs() << "Module " << M->getName() << " not found in current IR.\n";
+      continue;
+    }
+    const ModuleSnapshot &CurrMS = CurrMSIt->second;
+    if (OrigMS.Hash != CurrMS.Hash) {
+      DifferenceFound = true;
+      dbgs() << "Found IR difference in module " << M->getName() << ".\n";
+      dbgs() << "Original:\n" << OrigMS.TextualIR << "\n";
+      dbgs() << "Current:\n" << CurrMS.TextualIR << "\n";
+    }
+  }
+  // Check that Curr doesn't somehow contain any new modules.
+  for (const auto &[M, CurrMS] : Curr) {
+    if (!Orig.contains(M)) {
+      DifferenceFound = true;
+      dbgs() << "Module " << M->getName()
+             << " found in current IR but not in original snapshot.\n";
+    }
+  }
+  return DifferenceFound;
+}
+
+void IRSnapshotChecker::save() {
+  HasSavedState = true;
+  OrigContextSnapshot = takeSnapshot();
+}
+
+void IRSnapshotChecker::expectNoDiff() {
+  assert(HasSavedState && "expectNoDiff called with no saved state");
+  ContextSnapshot CurrContextSnapshot = takeSnapshot();
+  if (diff(OrigContextSnapshot, CurrContextSnapshot)) {
+    llvm_unreachable(
+        "Original and current IR differ! Probably a checkpointing bug.");
+  }
+}
+
+void IRSnapshotChecker::reset() {
+  OrigContextSnapshot.clear();
+  HasSavedState = false;
+}
+
 void UseSet::dump() const {
   dump(dbgs());
   dbgs() << "\n";
@@ -275,7 +344,12 @@ void CmpSwapOperands::dump() const {
 }
 #endif
 
-void Tracker::save() { State = TrackerState::Record; }
+void Tracker::save() {
+  State = TrackerState::Record;
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+  SnapshotChecker.save();
+#endif
+}
 
 void Tracker::revert() {
   assert(State == TrackerState::Record && "Forgot to save()!");
@@ -283,6 +357,9 @@ void Tracker::revert() {
   for (auto &Change : reverse(Changes))
     Change->revert(*this);
   Changes.clear();
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+  SnapshotChecker.expectNoDiff();
+#endif
 }
 
 void Tracker::accept() {
diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp
index 4f2cfa6b06ecd2..7c0d4573801184 100644
--- a/llvm/unittests/SandboxIR/TrackerTest.cpp
+++ b/llvm/unittests/SandboxIR/TrackerTest.cpp
@@ -1844,3 +1844,41 @@ define void @foo(i32 %arg, float %farg) {
   Ctx.revert();
   EXPECT_FALSE(FAdd->getFastMathFlags() != OrigFMF);
 }
+
+TEST_F(TrackerTest, IRSnapshotCheckerNoChanges) {
+  parseIR(C, R"IR(
+define i32 @foo(i32 %arg) {
+  %add0 = add i32 %arg, %arg
+  ret i32 %add0
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+
+  [[maybe_unused]] auto *F = Ctx.createFunction(&LLVMF);
+  sandboxir::IRSnapshotChecker Checker(Ctx);
+  Checker.save();
+  Checker.expectNoDiff();
+}
+
+TEST_F(TrackerTest, IRSnapshotCheckerDiesWithUnexpectedChanges) {
+  parseIR(C, R"IR(
+define i32 @foo(i32 %arg) {
+  %add0 = add i32 %arg, %arg
+  %add1 = add i32 %add0, %arg
+  ret i32 %add1
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+
+  auto *F = Ctx.createFunction(&LLVMF);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  sandboxir::Instruction *Add0 = &*It++;
+  sandboxir::Instruction *Add1 = &*It++;
+  sandboxir::IRSnapshotChecker Checker(Ctx);
+  Checker.save();
+  Add1->setOperand(1, Add0);
+  EXPECT_DEATH(Checker.expectNoDiff(), "Found IR difference");
+}

>From 0a4840249388dbdd612046239126f2be17b0b234 Mon Sep 17 00:00:00 2001
From: Jorge Gorbe Moya <jgorbe at google.com>
Date: Fri, 15 Nov 2024 16:57:27 -0800
Subject: [PATCH 2/3] address review feedback

---
 llvm/include/llvm/SandboxIR/Instruction.h |  1 +
 llvm/include/llvm/SandboxIR/Tracker.h     | 32 ++++++--------
 llvm/lib/SandboxIR/Tracker.cpp            | 52 +++++++++++------------
 llvm/unittests/SandboxIR/TrackerTest.cpp  | 25 +++++++++++
 4 files changed, 62 insertions(+), 48 deletions(-)

diff --git a/llvm/include/llvm/SandboxIR/Instruction.h b/llvm/include/llvm/SandboxIR/Instruction.h
index d9642365908d28..2a59d72e285527 100644
--- a/llvm/include/llvm/SandboxIR/Instruction.h
+++ b/llvm/include/llvm/SandboxIR/Instruction.h
@@ -11,6 +11,7 @@
 
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/SandboxIR/BasicBlock.h"
 #include "llvm/SandboxIR/Constant.h"
diff --git a/llvm/include/llvm/SandboxIR/Tracker.h b/llvm/include/llvm/SandboxIR/Tracker.h
index 3600b3956e22cc..9a031f32708374 100644
--- a/llvm/include/llvm/SandboxIR/Tracker.h
+++ b/llvm/include/llvm/SandboxIR/Tracker.h
@@ -45,7 +45,6 @@
 #include "llvm/ADT/StableHashing.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
-#include "llvm/IR/Module.h"
 #include "llvm/SandboxIR/Use.h"
 #include "llvm/Support/Debug.h"
 #include <memory>
@@ -64,12 +63,11 @@ class SwitchInst;
 class ConstantInt;
 class ShuffleVectorInst;
 class CmpInst;
-class Module;
 class GlobalVariable;
 
 #ifndef NDEBUG
 
-/// A class that saves hashes and textual IR snapshots of modules in a
+/// A class that saves hashes and textual IR snapshots of functions in a
 /// SandboxIR Context, and does hash comparison when `expectNoDiff` is called.
 /// If hashes differ, it prints textual IR for both old and new versions to
 /// aid debugging.
@@ -79,26 +77,22 @@ class GlobalVariable;
 class IRSnapshotChecker {
   Context &Ctx;
 
-  // A snapshot of textual IR for a module, with a hash for quick comparison.
-  struct ModuleSnapshot {
+  // A snapshot of textual IR for a function, with a hash for quick comparison.
+  struct FunctionSnapshot {
     llvm::stable_hash Hash;
     std::string TextualIR;
   };
 
-  // A snapshot for each llvm::Module found in the SandboxIR Context. In
-  // practice there will always be one module, but sandbox IR save/restore ops
-  // work at the Context level, so we must take the full state into account.
-  using ContextSnapshot = DenseMap<llvm::Module *, ModuleSnapshot>;
+  // A snapshot for each llvm::Function found in every module in the SandboxIR
+  // Context. In practice there will always be one module, but sandbox IR
+  // save/restore ops work at the Context level, so we must take the full state
+  // into account.
+  using ContextSnapshot = DenseMap<const llvm::Function *, FunctionSnapshot>;
 
   ContextSnapshot OrigContextSnapshot;
 
-  // True if save() was previously called. This helps us distinguish between
-  // "expectNoDiff was called without calling save" and "save was called but
-  // the saved snapshot is empty".
-  bool HasSavedState = false;
-
-  // Dumps to a string the textual IR for a single Module.
-  std::string dumpIR(llvm::Module *F) const;
+  // Dumps to a string the textual IR for a single Function.
+  std::string dumpIR(const llvm::Function &F) const;
 
   // Returns a snapshot of all the modules in the sandbox IR context.
   ContextSnapshot takeSnapshot() const;
@@ -109,14 +103,12 @@ class IRSnapshotChecker {
 public:
   IRSnapshotChecker(Context &Ctx) : Ctx(Ctx) {}
 
-  /// Saves the current state.
+  /// Saves a snapshot of the current state. If there was any previous snapshot,
+  /// it will be replaced with the new one.
   void save();
 
   /// Checks current state against saved state, crashes if different.
   void expectNoDiff();
-
-  /// Empties saved state.
-  void reset();
 };
 
 #endif // NDEBUG
diff --git a/llvm/lib/SandboxIR/Tracker.cpp b/llvm/lib/SandboxIR/Tracker.cpp
index b35c01d6c92ecd..76f8b3e1b896c7 100644
--- a/llvm/lib/SandboxIR/Tracker.cpp
+++ b/llvm/lib/SandboxIR/Tracker.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/StructuralHash.h"
 #include "llvm/SandboxIR/Instruction.h"
 #include <sstream>
@@ -18,61 +19,61 @@ using namespace llvm::sandboxir;
 
 #ifndef NDEBUG
 
-std::string IRSnapshotChecker::dumpIR(llvm::Module *M) const {
+std::string IRSnapshotChecker::dumpIR(const llvm::Function &F) const {
   std::string Result;
   raw_string_ostream SS(Result);
-  M->print(SS, /*AssemblyAnnotationWriter=*/nullptr);
+  F.print(SS, /*AssemblyAnnotationWriter=*/nullptr);
   return Result;
 }
 
 IRSnapshotChecker::ContextSnapshot IRSnapshotChecker::takeSnapshot() const {
   ContextSnapshot Result;
-  for (const auto &Entry : Ctx.LLVMModuleToModuleMap) {
-    llvm::Module *M = Entry.first;
-    ModuleSnapshot MS;
-    MS.Hash = StructuralHash(*M, /*DetailedHash=*/true);
-    MS.TextualIR = dumpIR(M);
-    Result[M] = MS;
-  }
+  for (const auto &Entry : Ctx.LLVMModuleToModuleMap)
+    for (const auto &F : *Entry.first) {
+      FunctionSnapshot Snapshot;
+      Snapshot.Hash = StructuralHash(F, /*DetailedHash=*/true);
+      Snapshot.TextualIR = dumpIR(F);
+      Result[&F] = Snapshot;
+    }
   return Result;
 }
 
 bool IRSnapshotChecker::diff(const ContextSnapshot &Orig,
                              const ContextSnapshot &Curr) const {
   bool DifferenceFound = false;
-  for (const auto &[M, OrigMS] : Orig) {
-    auto CurrMSIt = Curr.find(M);
-    if (CurrMSIt == Curr.end()) {
+  for (const auto &[F, OrigFS] : Orig) {
+    auto CurrFSIt = Curr.find(F);
+    if (CurrFSIt == Curr.end()) {
       DifferenceFound = true;
-      dbgs() << "Module " << M->getName() << " not found in current IR.\n";
+      dbgs() << "Function " << F->getName() << " not found in current IR.\n";
+      dbgs() << OrigFS.TextualIR << "\n";
       continue;
     }
-    const ModuleSnapshot &CurrMS = CurrMSIt->second;
-    if (OrigMS.Hash != CurrMS.Hash) {
+    const FunctionSnapshot &CurrFS = CurrFSIt->second;
+    if (OrigFS.Hash != CurrFS.Hash) {
       DifferenceFound = true;
-      dbgs() << "Found IR difference in module " << M->getName() << ".\n";
-      dbgs() << "Original:\n" << OrigMS.TextualIR << "\n";
-      dbgs() << "Current:\n" << CurrMS.TextualIR << "\n";
+      dbgs() << "Found IR difference in Function " << F->getName() << "\n";
+      dbgs() << "Original:\n" << OrigFS.TextualIR << "\n";
+      dbgs() << "Current:\n" << CurrFS.TextualIR << "\n";
     }
   }
   // Check that Curr doesn't somehow contain any new modules.
-  for (const auto &[M, CurrMS] : Curr) {
-    if (!Orig.contains(M)) {
+  for (const auto &[F, CurrFS] : Curr) {
+    if (!Orig.contains(F)) {
       DifferenceFound = true;
-      dbgs() << "Module " << M->getName()
+      dbgs() << "Function " << F->getName()
              << " found in current IR but not in original snapshot.\n";
+      dbgs() << CurrFS.TextualIR << "\n";
     }
   }
   return DifferenceFound;
 }
 
 void IRSnapshotChecker::save() {
-  HasSavedState = true;
   OrigContextSnapshot = takeSnapshot();
 }
 
 void IRSnapshotChecker::expectNoDiff() {
-  assert(HasSavedState && "expectNoDiff called with no saved state");
   ContextSnapshot CurrContextSnapshot = takeSnapshot();
   if (diff(OrigContextSnapshot, CurrContextSnapshot)) {
     llvm_unreachable(
@@ -80,11 +81,6 @@ void IRSnapshotChecker::expectNoDiff() {
   }
 }
 
-void IRSnapshotChecker::reset() {
-  OrigContextSnapshot.clear();
-  HasSavedState = false;
-}
-
 void UseSet::dump() const {
   dump(dbgs());
   dbgs() << "\n";
diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp
index 7c0d4573801184..cee13222179dce 100644
--- a/llvm/unittests/SandboxIR/TrackerTest.cpp
+++ b/llvm/unittests/SandboxIR/TrackerTest.cpp
@@ -1882,3 +1882,28 @@ define i32 @foo(i32 %arg) {
   Add1->setOperand(1, Add0);
   EXPECT_DEATH(Checker.expectNoDiff(), "Found IR difference");
 }
+
+TEST_F(TrackerTest, IRSnapshotCheckerSaveMultipleTimes) {
+  parseIR(C, R"IR(
+define i32 @foo(i32 %arg) {
+  %add0 = add i32 %arg, %arg
+  %add1 = add i32 %add0, %arg
+  ret i32 %add1
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+
+  auto *F = Ctx.createFunction(&LLVMF);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  sandboxir::Instruction *Add0 = &*It++;
+  sandboxir::Instruction *Add1 = &*It++;
+  sandboxir::IRSnapshotChecker Checker(Ctx);
+  Checker.save();
+  Add1->setOperand(1, Add0);
+  // Now IR differs from the last snapshot. Let's take a new snapshot.
+  Checker.save();
+  // The new snapshot should have replaced the old one, so this should succeed.
+  Checker.expectNoDiff();
+}

>From 908a13db88a4652f341ae60e710d5a61480d8289 Mon Sep 17 00:00:00 2001
From: Jorge Gorbe Moya <jgorbe at google.com>
Date: Fri, 15 Nov 2024 17:01:36 -0800
Subject: [PATCH 3/3] clang-format

---
 llvm/lib/SandboxIR/Tracker.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/SandboxIR/Tracker.cpp b/llvm/lib/SandboxIR/Tracker.cpp
index 76f8b3e1b896c7..686ef01409edf3 100644
--- a/llvm/lib/SandboxIR/Tracker.cpp
+++ b/llvm/lib/SandboxIR/Tracker.cpp
@@ -69,9 +69,7 @@ bool IRSnapshotChecker::diff(const ContextSnapshot &Orig,
   return DifferenceFound;
 }
 
-void IRSnapshotChecker::save() {
-  OrigContextSnapshot = takeSnapshot();
-}
+void IRSnapshotChecker::save() { OrigContextSnapshot = takeSnapshot(); }
 
 void IRSnapshotChecker::expectNoDiff() {
   ContextSnapshot CurrContextSnapshot = takeSnapshot();



More information about the llvm-commits mailing list