[llvm] [RemoveDIs][DebugInfo] Add DPValue checks to the verifier, prepare DPValue for parsing support (PR #79810)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 29 03:26:12 PST 2024


https://github.com/SLTozer created https://github.com/llvm/llvm-project/pull/79810

As part of the RemoveDIs project, this patch adds support for checking DPValues in the verifier. Although this is not strictly parsing-related, and we currently automatically convert back to the old debug info format immediately after parsing, we are approaching the point where the we can operate end-to-end in the new debug info format, at which point it is appropriate that we can actually validate modules in the new format.
This patch also contains some changes that aren't strictly parsing-related, but are necessary class refactors for parsing support, and are used in the verifier checks (i.e. changing the DILocalVariable field to be a tracking MD reference, and adding a Verifier check to confirm that it is a DILocalVariable).
This patch is intended to follow the printing patch (https://github.com/llvm/llvm-project/pull/79281), but is presented here as a standalone patch for ease-of-review.

>From 42d810a85d43bd7c9c555d016cacd582fb06785e Mon Sep 17 00:00:00 2001
From: Stephen Tozer <Stephen.Tozer at Sony.com>
Date: Mon, 29 Jan 2024 09:48:55 +0000
Subject: [PATCH] Add pre-parse changes (verifier, DPValue class interface)

---
 .../include/llvm/IR/DebugProgramInstruction.h |  7 +-
 llvm/lib/IR/AsmWriter.cpp                     |  6 +-
 llvm/lib/IR/DebugProgramInstruction.cpp       |  6 ++
 llvm/lib/IR/Verifier.cpp                      | 97 +++++++++++++++++++
 4 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h
index 737417fb9b9a542..74b42d1e81f938a 100644
--- a/llvm/include/llvm/IR/DebugProgramInstruction.h
+++ b/llvm/include/llvm/IR/DebugProgramInstruction.h
@@ -81,7 +81,7 @@ class DPValue : public ilist_node<DPValue>, private DebugValueUser {
   // DebugValueUser superclass instead. The referred to Value can either be a
   // ValueAsMetadata or a DIArgList.
 
-  DILocalVariable *Variable;
+  TrackingMDNodeRef Variable;
   DIExpression *Expression;
   DebugLoc DbgLoc;
   DIExpression *AddressExpression;
@@ -219,7 +219,7 @@ class DPValue : public ilist_node<DPValue>, private DebugValueUser {
   void addVariableLocationOps(ArrayRef<Value *> NewValues,
                               DIExpression *NewExpr);
 
-  void setVariable(DILocalVariable *NewVar) { Variable = NewVar; }
+  void setVariable(DILocalVariable *NewVar);
 
   void setExpression(DIExpression *NewExpr) { Expression = NewExpr; }
 
@@ -240,7 +240,8 @@ class DPValue : public ilist_node<DPValue>, private DebugValueUser {
   void setKillLocation();
   bool isKillLocation() const;
 
-  DILocalVariable *getVariable() const { return Variable; }
+  DILocalVariable *getVariable() const;
+  MDNode *getRawVariable() const { return Variable; }
 
   DIExpression *getExpression() const { return Expression; }
 
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 3c15784a0ed5eba..80e089c8caa13d2 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1138,10 +1138,10 @@ void SlotTracker::processFunctionMetadata(const Function &F) {
 }
 
 void SlotTracker::processDPValueMetadata(const DPValue &DPV) {
-  CreateMetadataSlot(DPV.getVariable());
-  CreateMetadataSlot(DPV.getDebugLoc());
+  CreateMetadataSlot(DPV.getRawVariable());
+  CreateMetadataSlot(DPV.getDebugLoc().getAsMDNode());
   if (DPV.isDbgAssign()) {
-    CreateMetadataSlot(DPV.getAssignID());
+    CreateMetadataSlot(cast<MDNode>(DPV.getRawAssignID()));
   }
 }
 
diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp
index fd234685d5fd4bd..38faf90064e3cb3 100644
--- a/llvm/lib/IR/DebugProgramInstruction.cpp
+++ b/llvm/lib/IR/DebugProgramInstruction.cpp
@@ -111,6 +111,8 @@ DPValue *DPValue::createLinkedDPVAssign(Instruction *LinkedInstr, Value *Val,
   return NewDPVAssign;
 }
 
+void DPValue::setVariable(DILocalVariable *NewVar) { Variable.reset(NewVar); }
+
 iterator_range<DPValue::location_op_iterator> DPValue::location_ops() const {
   auto *MD = getRawLocation();
   // If a Value has been deleted, the "location" for this DPValue will be
@@ -249,6 +251,10 @@ bool DPValue::isKillLocation() const {
          any_of(location_ops(), [](Value *V) { return isa<UndefValue>(V); });
 }
 
+DILocalVariable *DPValue::getVariable() const {
+  return cast<DILocalVariable>(Variable.get());
+}
+
 std::optional<uint64_t> DPValue::getFragmentSizeInBits() const {
   if (auto Fragment = getExpression()->getFragmentInfo())
     return Fragment->SizeInBits;
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 91cf91fbc788bd9..5f0b04d425898d1 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -178,6 +178,26 @@ struct VerifierSupport {
       V->print(*OS, MST, false);
   }
 
+  void Write(DPValue::LocationType Type) {
+    switch (Type) {
+    case DPValue::LocationType::Value:
+      *OS << "value";
+      break;
+    case DPValue::LocationType::Declare:
+      *OS << "declare";
+      break;
+    case DPValue::LocationType::Assign:
+      *OS << "assign";
+      break;
+    case DPValue::LocationType::End:
+      *OS << "end";
+      break;
+    case DPValue::LocationType::Any:
+      *OS << "any";
+      break;
+    };
+  }
+
   void Write(const Metadata *MD) {
     if (!MD)
       return;
@@ -522,6 +542,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
 
   void visitTemplateParams(const MDNode &N, const Metadata &RawParams);
 
+  void visit(DPValue &DPV);
   // InstVisitor overrides...
   using InstVisitor<Verifier>::visit;
   void visit(Instruction &I);
@@ -650,6 +671,8 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   } while (false)
 
 void Verifier::visit(Instruction &I) {
+  for (auto &DPV : I.getDbgValueRange())
+    visit(DPV);
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
     Check(I.getOperand(i) != nullptr, "Operand is null", &I);
   InstVisitor<Verifier>::visit(I);
@@ -6152,6 +6175,80 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
   return nullptr;
 }
 
+void Verifier::visit(DPValue &DPV) {
+  CheckDI(DPV.getType() == DPValue::LocationType::Value ||
+              DPV.getType() == DPValue::LocationType::Declare ||
+              DPV.getType() == DPValue::LocationType::Assign,
+          "invalid #dbg record type", &DPV, DPV.getType());
+  StringRef Kind;
+  switch (DPV.getType()) {
+  case DPValue::LocationType::Value:
+    Kind = "value";
+    break;
+  case DPValue::LocationType::Declare:
+    Kind = "declare";
+    break;
+  case DPValue::LocationType::Assign:
+    Kind = "assign";
+    break;
+  default:
+    llvm_unreachable("Tried to print a DPValue with an invalid LocationType!");
+  };
+  auto *MD = DPV.getRawLocation();
+  CheckDI(isa<ValueAsMetadata>(MD) || isa<DIArgList>(MD) ||
+              (isa<MDNode>(MD) && !cast<MDNode>(MD)->getNumOperands()),
+          "invalid #dbg_" + Kind + " address/value", &DPV, MD);
+  CheckDI(isa<DILocalVariable>(DPV.getRawVariable()),
+          "invalid #dbg_" + Kind + " variable", &DPV, DPV.getRawVariable());
+  CheckDI(DPV.getExpression(), "missing #dbg_" + Kind + " expression", &DPV,
+          DPV.getExpression());
+
+  if (DPV.isDbgAssign()) {
+    CheckDI(isa<DIAssignID>(DPV.getRawAssignID()),
+            "invalid #dbg_assign DIAssignID", &DPV, DPV.getRawAssignID());
+    const auto *RawAddr = DPV.getRawAddress();
+    CheckDI(
+        isa<ValueAsMetadata>(RawAddr) ||
+            (isa<MDNode>(RawAddr) && !cast<MDNode>(RawAddr)->getNumOperands()),
+        "invalid #dbg_assign address", &DPV, DPV.getRawAddress());
+    CheckDI(DPV.getAddressExpression(),
+            "missing #dbg_assign address expression", &DPV,
+            DPV.getAddressExpression());
+    // All of the linked instructions should be in the same function as DPV.
+    for (Instruction *I : at::getAssignmentInsts(&DPV))
+      CheckDI(DPV.getFunction() == I->getFunction(),
+              "inst not in same function as #dbg_assign", I, &DPV);
+  }
+
+  if (MDNode *N = DPV.getDebugLoc().getAsMDNode()) {
+    CheckDI(isa<DILocation>(N), "invalid #dbg_" + Kind + " location", &DPV, N);
+    visitDILocation(*cast<DILocation>(N));
+  }
+
+  BasicBlock *BB = DPV.getParent();
+  Function *F = BB ? BB->getParent() : nullptr;
+
+  // The scopes for variables and !dbg attachments must agree.
+  DILocalVariable *Var = DPV.getVariable();
+  DILocation *Loc = DPV.getDebugLoc();
+  CheckDI(Loc, "missing #dbg_" + Kind + " DILocation", &DPV, BB, F);
+
+  DISubprogram *VarSP = getSubprogram(Var->getRawScope());
+  DISubprogram *LocSP = getSubprogram(Loc->getRawScope());
+  if (!VarSP || !LocSP)
+    return; // Broken scope chains are checked elsewhere.
+
+  CheckDI(VarSP == LocSP,
+          "mismatched subprogram between #dbg_" + Kind +
+              " variable and DILocation",
+          &DPV, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
+          Loc->getScope()->getSubprogram());
+
+  // This check is redundant with one in visitLocalVariable().
+  CheckDI(isType(Var->getRawType()), "invalid type ref", Var,
+          Var->getRawType());
+}
+
 void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) {
   if (auto *VPCast = dyn_cast<VPCastIntrinsic>(&VPI)) {
     auto *RetTy = cast<VectorType>(VPCast->getType());



More information about the llvm-commits mailing list