[llvm] [DebugInfo] Don't apply is_stmt on MBB branches that preserve lines (PR #108251)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 12 08:17:51 PST 2024


https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/108251

>From a708db9f14506e33c4358b6e7435e1561f1df3f4 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 11 Sep 2024 17:52:08 +0100
Subject: [PATCH 1/6] [DebugInfo] Don't apply is_stmt on MBB branches that
 preserve lines

This patch follows on from the changes made in 5fef40c, by adding an
additional heuristic that prevents us from applying the start-of-MBB
is_stmt flag when we can see that, for all direct branches to the MBB,
the last line stepped on before the branch is the same as the first line
of the MBB. This is mainly to prevent certain pathological cases, such as
macros that expand to multiple basic blocks that all have the same source
location, from giving us repeated steps on the same line. This approach
is not comprehensive, since it relies on analyzeBranch to read edges.
---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp    | 123 +++++++++++++++++-
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h      |   2 +
 .../DebugInfo/X86/is_stmt-at-block-start.ll   |  62 +++++++++
 3 files changed, 182 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/DebugInfo/X86/is_stmt-at-block-start.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7ccd0bc9828bf1..b04566fa32c257 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2072,10 +2072,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
     return;
   }
 
-  bool PrevInstInSameSection =
-      (!PrevInstBB ||
-       PrevInstBB->getSectionID() == MI->getParent()->getSectionID());
-  if (DL == PrevInstLoc && PrevInstInSameSection) {
+  bool PrevInstInDiffBB = PrevInstBB && PrevInstBB != MI->getParent();
+  bool ForceIsStmt =
+      PrevInstInDiffBB && MBBsStartingWithIsStmt.contains(MI->getParent());
+  if (DL == PrevInstLoc && !ForceIsStmt) {
     // If we have an ongoing unspecified location, nothing to do here.
     if (!DL)
       return;
@@ -2132,7 +2132,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   // If the line changed, we call that a new statement; unless we went to
   // line 0 and came back, in which case it is not a new statement.
   unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
-  if (DL.getLine() && DL.getLine() != OldLine)
+  if (DL.getLine() && (DL.getLine() != OldLine || ForceIsStmt))
     Flags |= DWARF2_FLAG_IS_STMT;
 
   const MDNode *Scope = DL.getScope();
@@ -2326,6 +2326,119 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   // Record beginning of function.
   PrologEndLoc = emitInitialLocDirective(
       *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+
+  // Try to determine what line we would be stepped on before entering each MBB.
+  // This happens in the following ways:
+  // - If this block has a single predecessor, we determine the last line in
+  //   the pred block and we have stepped from that.
+  // - If this block has multiple predecessors, we determine the last line in
+  //   each of them; if they all agree then we have stepped from that line,
+  //   otherwise we do not know what line we have stepped from.
+  // The last line in each MBB is determined as follows:
+  // - If the block contains at least one DebugLoc, we have stepped from the
+  //   last one.
+  // - Otherwise, the last line is line 0.
+  // There is one extra rule however: if a predecessor branches to the current
+  // basic block, we only count DebugLocs on or before that branch, so if we're
+  // looking at the MBB %bb.0, which ends with:
+  //   JCC_1 %bb.1, 0, !debug-location !1
+  //   JMP_1 %bb.2, !debug-location !2
+  // We would consider !1 to be the last loc before %bb.1, and !2 before %bb.2.
+  // We also don't need to make this calculation for any block that doesn't have
+  // a non-0 line number on its first instruction, as we will always emit line 0
+  // without is_stmt for that block regardless.
+  MBBsStartingWithIsStmt.clear();
+
+  // First, collect the last stepped line for each MBB.
+  SmallDenseMap<std::pair<const MachineBasicBlock *, const MachineBasicBlock *>,
+                unsigned>
+      MBBExitLines;
+  const auto *TII = MF->getSubtarget().getInstrInfo();
+
+  // We only need to examine MBBs that could have is_stmt set by this logic.
+  // We use const_cast even though we won't actually modify MF, because some
+  // methods we need take a non-const MBB.
+  SetVector<MachineBasicBlock *> PredMBBsToExamine;
+  SmallVector<MachineBasicBlock *> PotentialIsStmtMBBs;
+  for (auto &MBB : *const_cast<MachineFunction *>(MF)) {
+    if (MBB.pred_empty() || !MBB.begin()->getDebugLoc())
+      continue;
+    unsigned StartLine = MBB.begin()->getDebugLoc()->getLine();
+    if (StartLine == 0)
+      continue;
+    PotentialIsStmtMBBs.push_back(&MBB);
+    for (auto Pred : MBB.predecessors())
+      PredMBBsToExamine.insert(&*Pred);
+  }
+
+  // For each predecessor MBB, we examine the last DebugLoc seen before each
+  // branch or logical fallthrough. We're generous with applying is_stmt; if
+  // there's an edge that TargetInstrInfo::analyzeBranch can't understand, we
+  // simply assume that is_stmt ought to be applied to the successors, since
+  // this rule is mainly intended to avoid excessive stepping on lines that
+  // expand to many lines of code.
+  for (auto *MBB : PredMBBsToExamine) {
+    assert(!MBB->empty() && "Shouldn't be processing empty blocks here.");
+    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+    SmallVector<MachineOperand, 4> Cond;
+    if (TII->analyzeBranch(*MBB, TBB, FBB, Cond)) {
+      // We can't determine what DLs this branch's successors use, so skip it.
+      continue;
+    }
+    assert(TBB && "Bad result from analyzeBranch?");
+    auto MI = MBB->rbegin();
+    // For a conditional branch followed by unconditional branch where the
+    // unconditional branch has a DebugLoc, that loc is the outgoing loc to the
+    // false destination only; otherwise, both destinations share an outgoing
+    // loc.
+    if (!Cond.empty() && FBB != nullptr && MBB->back().getDebugLoc()) {
+      assert(MI->isBranch() && "Bad result from analyzeBranch?");
+      MBBExitLines.insert({{MBB, FBB}, MBB->back().getDebugLoc()->getLine()});
+      FBB = nullptr;
+      ++MI;
+    } else if (!Cond.empty() && !FBB) {
+      // For a conditional branch that falls through to the next block, the
+      // fallthrough block is the false branch.
+      FBB = MBB->getLogicalFallThrough();
+      assert(FBB &&
+             "Block ending with just a conditional branch should fallthrough.");
+    }
+
+    // If we don't find an outgoing loc, this block will start with a line 0.
+    unsigned LastLine = 0;
+    while (MI != MBB->rend()) {
+      if (auto DL = MI->getDebugLoc()) {
+        LastLine = DL->getLine();
+        break;
+      }
+      ++MI;
+    }
+    MBBExitLines.insert({{MBB, TBB}, LastLine});
+    if (FBB)
+      MBBExitLines.insert({{MBB, FBB}, LastLine});
+  }
+
+  // Now use the outgoing values to determine the incoming values for each
+  // block.
+  MBBsStartingWithIsStmt.insert(&*MF->begin());
+  for (auto *MBB : PotentialIsStmtMBBs) {
+    assert(!MBB->empty() && "Shouldn't be processing empty blocks here.");
+    if (!MBB->begin()->getDebugLoc())
+      continue;
+    unsigned StartLine = MBB->begin()->getDebugLoc()->getLine();
+    if (StartLine == 0)
+      continue;
+    for (auto Pred : MBB->predecessors()) {
+      auto LineIt = MBBExitLines.find({&*Pred, MBB});
+      // If there is no entry, it means there's a branch here that we couldn't
+      // track, so we can't be sure about what line we're arriving from;
+      // therefore assume that we should use is_stmt.
+      if (LineIt == MBBExitLines.end() || LineIt->second != StartLine) {
+        MBBsStartingWithIsStmt.insert(MBB);
+        break;
+      }
+    }
+  }
 }
 
 unsigned
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 6ed03124a2626f..25c3587fd89a43 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -382,6 +382,8 @@ class DwarfDebug : public DebugHandlerBase {
                               SmallPtrSet<const MDNode *, 2>>;
   DenseMap<const DILocalScope *, MDNodeSet> LocalDeclsPerLS;
 
+  SmallDenseSet<const MachineBasicBlock *> MBBsStartingWithIsStmt;
+
   /// If nonnull, stores the current machine function we're processing.
   const MachineFunction *CurFn = nullptr;
 
diff --git a/llvm/test/DebugInfo/X86/is_stmt-at-block-start.ll b/llvm/test/DebugInfo/X86/is_stmt-at-block-start.ll
new file mode 100644
index 00000000000000..ca9305d476d01f
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/is_stmt-at-block-start.ll
@@ -0,0 +1,62 @@
+;; Checks that when an instruction at the start of a BasicBlock has the same
+;; DebugLoc as the instruction at the end of the previous BasicBlock, we add
+;; is_stmt to the new line, to ensure that we still step on it if we arrive from
+;; a BasicBlock other than the immediately preceding one, unless all known
+;; predecessor BasicBlocks end with the same line.
+
+; RUN: %llc_dwarf -mtriple=x86_64-unknown-linux -O0 -filetype=obj < %s | llvm-dwarfdump --debug-line - | FileCheck %s
+
+; CHECK:      {{0x[0-9a-f]+}}     13      5 {{.+}} is_stmt
+; CHECK-NEXT: {{0x[0-9a-f]+}}     13      5 {{.+}} is_stmt
+
+define void @_Z1fi(i1 %cond) !dbg !21 {
+entry:
+  br i1 %cond, label %if.then2, label %if.else4
+
+if.then2:                                         ; preds = %entry
+  br label %if.end8, !dbg !24
+
+if.else4:                                         ; preds = %entry
+  %0 = load i32, ptr null, align 4, !dbg !24
+  %call5 = call i1 null(i32 %0)
+  ret void
+
+if.end8:                                          ; preds = %if.then2
+  ret void
+}
+
+; CHECK:      {{0x[0-9a-f]+}}    113      5 {{.+}} is_stmt
+; CHECK-NOT:  {{0x[0-9a-f]+}}    113      5
+
+define void @_Z1gi(i1 %cond) !dbg !31 {
+entry:
+  br i1 %cond, label %if.then2, label %if.else4, !dbg !34
+
+if.then2:                                         ; preds = %entry
+  br label %if.end8, !dbg !34
+
+if.else4:                                         ; preds = %entry
+  %0 = load i32, ptr null, align 4, !dbg !34
+  %call5 = call i1 null(i32 %0)
+  ret void
+
+if.end8:                                          ; preds = %if.then2
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.cpp", directory: "/home/gbtozers/dev/upstream-llvm")
+!20 = !{i32 2, !"Debug Info Version", i32 3}
+!21 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", scope: !1, file: !1, line: 7, type: !22, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!22 = distinct !DISubroutineType(types: !23)
+!23 = !{null}
+!24 = !DILocation(line: 13, column: 5, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !21, file: !1, line: 11, column: 27)
+!31 = distinct !DISubprogram(name: "g", linkageName: "_Z1gi", scope: !1, file: !1, line: 107, type: !32, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!32 = distinct !DISubroutineType(types: !33)
+!33 = !{null}
+!34 = !DILocation(line: 113, column: 5, scope: !35)
+!35 = distinct !DILexicalBlock(scope: !31, file: !1, line: 111, column: 27)

>From 37323cfe25436a96a73da359f02d38f586f8be2e Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Mon, 16 Sep 2024 13:05:50 +0100
Subject: [PATCH 2/6] Fixes and simplification

---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 135 +++++++++++----------
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h   |   2 +-
 2 files changed, 71 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index b04566fa32c257..ee8fb47f6f9af0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2072,9 +2072,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
     return;
   }
 
-  bool PrevInstInDiffBB = PrevInstBB && PrevInstBB != MI->getParent();
-  bool ForceIsStmt =
-      PrevInstInDiffBB && MBBsStartingWithIsStmt.contains(MI->getParent());
+  bool ForceIsStmt = ForceIsStmtInstrs.contains(MI);
   if (DL == PrevInstLoc && !ForceIsStmt) {
     // If we have an ongoing unspecified location, nothing to do here.
     if (!DL)
@@ -2104,8 +2102,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
     //   possibly debug information; we want it to have a source location.
     // - Instruction is at the top of a block; we don't want to inherit the
     //   location from the physically previous (maybe unrelated) block.
-    if (UnknownLocations == Enable || PrevLabel ||
-        (PrevInstBB && PrevInstBB != MI->getParent())) {
+    bool PrevInstInDiffBB = PrevInstBB && PrevInstBB != MI->getParent();
+    if (UnknownLocations == Enable || PrevLabel || PrevInstInDiffBB) {
       // Preserve the file and column numbers, if we can, to save space in
       // the encoded line table.
       // Do not update PrevInstLoc, it remembers the last non-0 line.
@@ -2347,7 +2345,7 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   // We also don't need to make this calculation for any block that doesn't have
   // a non-0 line number on its first instruction, as we will always emit line 0
   // without is_stmt for that block regardless.
-  MBBsStartingWithIsStmt.clear();
+  ForceIsStmtInstrs.clear();
 
   // First, collect the last stepped line for each MBB.
   SmallDenseMap<std::pair<const MachineBasicBlock *, const MachineBasicBlock *>,
@@ -2359,16 +2357,18 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   // We use const_cast even though we won't actually modify MF, because some
   // methods we need take a non-const MBB.
   SetVector<MachineBasicBlock *> PredMBBsToExamine;
-  SmallVector<MachineBasicBlock *> PotentialIsStmtMBBs;
+  SmallDenseMap<MachineBasicBlock *, MachineInstr *> PotentialIsStmtMBBInstrs;
   for (auto &MBB : *const_cast<MachineFunction *>(MF)) {
-    if (MBB.pred_empty() || !MBB.begin()->getDebugLoc())
+    if (MBB.empty() || MBB.pred_empty())
       continue;
-    unsigned StartLine = MBB.begin()->getDebugLoc()->getLine();
-    if (StartLine == 0)
-      continue;
-    PotentialIsStmtMBBs.push_back(&MBB);
-    for (auto Pred : MBB.predecessors())
-      PredMBBsToExamine.insert(&*Pred);
+    for (auto &MI : MBB) {
+      if (MI.getDebugLoc() && MI.getDebugLoc()->getLine()) {
+        for (auto Pred : MBB.predecessors())
+          PredMBBsToExamine.insert(&*Pred);
+        PotentialIsStmtMBBInstrs.insert({&MBB, &MI});
+        break;
+      }
+    }
   }
 
   // For each predecessor MBB, we examine the last DebugLoc seen before each
@@ -2378,66 +2378,71 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   // this rule is mainly intended to avoid excessive stepping on lines that
   // expand to many lines of code.
   for (auto *MBB : PredMBBsToExamine) {
-    assert(!MBB->empty() && "Shouldn't be processing empty blocks here.");
-    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
-    SmallVector<MachineOperand, 4> Cond;
-    if (TII->analyzeBranch(*MBB, TBB, FBB, Cond)) {
-      // We can't determine what DLs this branch's successors use, so skip it.
+    auto CheckMBBEdge = [&](MachineBasicBlock *Succ, unsigned OutgoingLine) {
+      auto MBBInstrIt = PotentialIsStmtMBBInstrs.find(Succ);
+      if (MBBInstrIt == PotentialIsStmtMBBInstrs.end())
+        return;
+      MachineInstr *MI = MBBInstrIt->second;
+      if (MI->getDebugLoc()->getLine() == OutgoingLine)
+        return;
+      PotentialIsStmtMBBInstrs.erase(MBBInstrIt);
+      ForceIsStmtInstrs.insert(MI);
+    };
+    // If this block is empty, it technically has a fall through but we should
+    // assume it is irrelevant for the purposes of calculating is_stmt.
+    if (MBB->empty())
       continue;
-    }
-    assert(TBB && "Bad result from analyzeBranch?");
-    auto MI = MBB->rbegin();
-    // For a conditional branch followed by unconditional branch where the
-    // unconditional branch has a DebugLoc, that loc is the outgoing loc to the
-    // false destination only; otherwise, both destinations share an outgoing
-    // loc.
-    if (!Cond.empty() && FBB != nullptr && MBB->back().getDebugLoc()) {
-      assert(MI->isBranch() && "Bad result from analyzeBranch?");
-      MBBExitLines.insert({{MBB, FBB}, MBB->back().getDebugLoc()->getLine()});
-      FBB = nullptr;
-      ++MI;
-    } else if (!Cond.empty() && !FBB) {
-      // For a conditional branch that falls through to the next block, the
-      // fallthrough block is the false branch.
-      FBB = MBB->getLogicalFallThrough();
-      assert(FBB &&
-             "Block ending with just a conditional branch should fallthrough.");
+    // If we can't determine what DLs this branch's successors use, just treat
+    // all the successors as coming from the last DebugLoc.
+    SmallVector<MachineBasicBlock *, 2> SuccessorBBs;
+    auto MIIt = MBB->rbegin();
+    {
+      MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+      SmallVector<MachineOperand, 4> Cond;
+      bool AnalyzeFailed = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
+      // For a conditional branch followed by unconditional branch where the
+      // unconditional branch has a DebugLoc, that loc is the outgoing loc to
+      // the the false destination only; otherwise, both destinations share an
+      // outgoing loc.
+      if (!AnalyzeFailed && !Cond.empty() && FBB != nullptr &&
+          MBB->back().getDebugLoc() && MBB->back().getDebugLoc()->getLine()) {
+        unsigned FBBLine = MBB->back().getDebugLoc()->getLine();
+        assert(MIIt->isBranch() && "Bad result from analyzeBranch?");
+        CheckMBBEdge(FBB, FBBLine);
+        FBB = nullptr;
+        ++MIIt;
+        SuccessorBBs.push_back(TBB);
+      } else {
+        // For all other cases, all successors share the last outgoing DebugLoc.
+        SuccessorBBs.assign(MBB->succ_begin(), MBB->succ_end());
+      }
     }
 
     // If we don't find an outgoing loc, this block will start with a line 0.
+    // It is possible that we have a block that has no DebugLoc, but acts as a
+    // simple passthrough between two blocks that end and start with the same
+    // line, e.g.:
+    //   bb.1:
+    //     JMP %bb.2, debug-location !10
+    //   bb.2:
+    //     JMP %bb.3
+    //   bb.3:
+    //     $r1 = ADD $r2, $r3, debug-location !10
+    // If these blocks were merged into a single block, we would not attach
+    // is_stmt to the ADD, but with this logic that only checks the immediate
+    // predecessor, we will; we make this tradeoff because doing a full dataflow
+    // analysis would be expensive, and these situations are probably not common
+    // enough for this to be worthwhile.
     unsigned LastLine = 0;
-    while (MI != MBB->rend()) {
-      if (auto DL = MI->getDebugLoc()) {
+    while (MIIt != MBB->rend()) {
+      if (auto DL = MIIt->getDebugLoc(); DL && DL->getLine()) {
         LastLine = DL->getLine();
         break;
       }
-      ++MI;
-    }
-    MBBExitLines.insert({{MBB, TBB}, LastLine});
-    if (FBB)
-      MBBExitLines.insert({{MBB, FBB}, LastLine});
-  }
-
-  // Now use the outgoing values to determine the incoming values for each
-  // block.
-  MBBsStartingWithIsStmt.insert(&*MF->begin());
-  for (auto *MBB : PotentialIsStmtMBBs) {
-    assert(!MBB->empty() && "Shouldn't be processing empty blocks here.");
-    if (!MBB->begin()->getDebugLoc())
-      continue;
-    unsigned StartLine = MBB->begin()->getDebugLoc()->getLine();
-    if (StartLine == 0)
-      continue;
-    for (auto Pred : MBB->predecessors()) {
-      auto LineIt = MBBExitLines.find({&*Pred, MBB});
-      // If there is no entry, it means there's a branch here that we couldn't
-      // track, so we can't be sure about what line we're arriving from;
-      // therefore assume that we should use is_stmt.
-      if (LineIt == MBBExitLines.end() || LineIt->second != StartLine) {
-        MBBsStartingWithIsStmt.insert(MBB);
-        break;
-      }
+      ++MIIt;
     }
+    for (auto Succ : SuccessorBBs)
+      CheckMBBEdge(Succ, LastLine);
   }
 }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 25c3587fd89a43..aad76ab616254d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -382,7 +382,7 @@ class DwarfDebug : public DebugHandlerBase {
                               SmallPtrSet<const MDNode *, 2>>;
   DenseMap<const DILocalScope *, MDNodeSet> LocalDeclsPerLS;
 
-  SmallDenseSet<const MachineBasicBlock *> MBBsStartingWithIsStmt;
+  SmallDenseSet<const MachineInstr *> ForceIsStmtInstrs;
 
   /// If nonnull, stores the current machine function we're processing.
   const MachineFunction *CurFn = nullptr;

>From eacfb86ee7d78411741f86a5f4a51d81bfefc740 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Thu, 3 Oct 2024 15:24:25 +0100
Subject: [PATCH 3/6] Minor fixups, comment adjustments

---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 42 ++++++++--------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index ee8fb47f6f9af0..d2928bc0838ab8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2325,26 +2325,15 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   PrologEndLoc = emitInitialLocDirective(
       *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
 
-  // Try to determine what line we would be stepped on before entering each MBB.
-  // This happens in the following ways:
-  // - If this block has a single predecessor, we determine the last line in
-  //   the pred block and we have stepped from that.
-  // - If this block has multiple predecessors, we determine the last line in
-  //   each of them; if they all agree then we have stepped from that line,
-  //   otherwise we do not know what line we have stepped from.
-  // The last line in each MBB is determined as follows:
-  // - If the block contains at least one DebugLoc, we have stepped from the
-  //   last one.
-  // - Otherwise, the last line is line 0.
-  // There is one extra rule however: if a predecessor branches to the current
-  // basic block, we only count DebugLocs on or before that branch, so if we're
-  // looking at the MBB %bb.0, which ends with:
-  //   JCC_1 %bb.1, 0, !debug-location !1
-  //   JMP_1 %bb.2, !debug-location !2
-  // We would consider !1 to be the last loc before %bb.1, and !2 before %bb.2.
-  // We also don't need to make this calculation for any block that doesn't have
-  // a non-0 line number on its first instruction, as we will always emit line 0
-  // without is_stmt for that block regardless.
+  // For each MBB we try to determine whether and what source line is *always*
+  // the last stepped-on line before entering MBB. Such a line exists if every
+  // predecessor has an instruction with source line N, which is the last valid
+  // source line to be seen before entering MBB, and if N is the same for all
+  // predecessors. If this is true, and the first instruction with a valid
+  // source line in MBB also has source line N, then that instruction should
+  // *not* use is_stmt. Otherwise, the first instruction with a valid source
+  // line in MBB should always use is_stmt, since we may step to it from a
+  // different line.
   ForceIsStmtInstrs.clear();
 
   // First, collect the last stepped line for each MBB.
@@ -2363,8 +2352,8 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
       continue;
     for (auto &MI : MBB) {
       if (MI.getDebugLoc() && MI.getDebugLoc()->getLine()) {
-        for (auto Pred : MBB.predecessors())
-          PredMBBsToExamine.insert(&*Pred);
+        for (auto *Pred : MBB.predecessors())
+          PredMBBsToExamine.insert(Pred);
         PotentialIsStmtMBBInstrs.insert({&MBB, &MI});
         break;
       }
@@ -2372,11 +2361,9 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   }
 
   // For each predecessor MBB, we examine the last DebugLoc seen before each
-  // branch or logical fallthrough. We're generous with applying is_stmt; if
+  // branch or logical fallthrough. We're generous with applying is_stmt: if
   // there's an edge that TargetInstrInfo::analyzeBranch can't understand, we
-  // simply assume that is_stmt ought to be applied to the successors, since
-  // this rule is mainly intended to avoid excessive stepping on lines that
-  // expand to many lines of code.
+  // assume that all successor MBBs use the last DebugLoc seen.
   for (auto *MBB : PredMBBsToExamine) {
     auto CheckMBBEdge = [&](MachineBasicBlock *Succ, unsigned OutgoingLine) {
       auto MBBInstrIt = PotentialIsStmtMBBInstrs.find(Succ);
@@ -2409,7 +2396,6 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
         unsigned FBBLine = MBB->back().getDebugLoc()->getLine();
         assert(MIIt->isBranch() && "Bad result from analyzeBranch?");
         CheckMBBEdge(FBB, FBBLine);
-        FBB = nullptr;
         ++MIIt;
         SuccessorBBs.push_back(TBB);
       } else {
@@ -2441,7 +2427,7 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
       }
       ++MIIt;
     }
-    for (auto Succ : SuccessorBBs)
+    for (auto *Succ : SuccessorBBs)
       CheckMBBEdge(Succ, LastLine);
   }
 }

>From 05ec29145a666013dee0acf8b84b1f0f8e130c73 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Thu, 3 Oct 2024 16:49:11 +0100
Subject: [PATCH 4/6] Re-add unnecessarily removed code

---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index d2928bc0838ab8..abcc98dfad962f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2062,6 +2062,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   unsigned LastAsmLine =
       Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
 
+<<<<<<< HEAD
   if (!DL && MI == PrologEndLoc) {
     // In rare situations, we might want to place the end of the prologue
     // somewhere that doesn't have a source location already. It should be in
@@ -2072,8 +2073,13 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
     return;
   }
 
+=======
+  bool PrevInstInSameSection =
+      (!PrevInstBB ||
+       PrevInstBB->getSectionID() == MI->getParent()->getSectionID());
+>>>>>>> ac5931e8f979 (Re-add unnecessarily removed code)
   bool ForceIsStmt = ForceIsStmtInstrs.contains(MI);
-  if (DL == PrevInstLoc && !ForceIsStmt) {
+  if (DL == PrevInstLoc && PrevInstInSameSection && !ForceIsStmt) {
     // If we have an ongoing unspecified location, nothing to do here.
     if (!DL)
       return;
@@ -2102,8 +2108,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
     //   possibly debug information; we want it to have a source location.
     // - Instruction is at the top of a block; we don't want to inherit the
     //   location from the physically previous (maybe unrelated) block.
-    bool PrevInstInDiffBB = PrevInstBB && PrevInstBB != MI->getParent();
-    if (UnknownLocations == Enable || PrevLabel || PrevInstInDiffBB) {
+    if (UnknownLocations == Enable || PrevLabel ||
+        (PrevInstBB && PrevInstBB != MI->getParent())) {
       // Preserve the file and column numbers, if we can, to save space in
       // the encoded line table.
       // Do not update PrevInstLoc, it remembers the last non-0 line.

>From 7813158514e44bd6c14326c8fbb4f5e26d61dbb5 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 5 Nov 2024 18:17:32 +0000
Subject: [PATCH 5/6] Address review comments

---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 71 ++++++++++++++--------
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h   |  2 +
 2 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index abcc98dfad962f..5c8aa2971e6636 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2312,27 +2312,15 @@ DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, unsigned CUID) {
   return PrologEndLoc;
 }
 
-// Gather pre-function debug information.  Assumes being called immediately
-// after the function entry point has been emitted.
-void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
-  CurFn = MF;
-
-  auto *SP = MF->getFunction().getSubprogram();
-  assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode());
-  if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
-    return;
-
-  DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
-
-  Asm->OutStreamer->getContext().setDwarfCompileUnitID(
-      getDwarfCompileUnitIDForLineTable(CU));
-
-  // Record beginning of function.
-  PrologEndLoc = emitInitialLocDirective(
-      *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+/// For the function \p MF, finds the set of instructions which may represent a
+/// change in line number from one or more of the preceding MBBs. Stores the
+/// resulting set of instructions, which should have is_stmt set, in
+/// ForceIsStmtInstrs.
+void DwarfDebug::findForceIsStmtInstrs(const MachineFunction *MF) {
+  ForceIsStmtInstrs.clear();
 
-  // For each MBB we try to determine whether and what source line is *always*
-  // the last stepped-on line before entering MBB. Such a line exists if every
+  // Here we try to find MBBs where the last line stepped on before entering
+  // it is always the same, and which line that is. Such a line exists if every
   // predecessor has an instruction with source line N, which is the last valid
   // source line to be seen before entering MBB, and if N is the same for all
   // predecessors. If this is true, and the first instruction with a valid
@@ -2340,8 +2328,6 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   // *not* use is_stmt. Otherwise, the first instruction with a valid source
   // line in MBB should always use is_stmt, since we may step to it from a
   // different line.
-  ForceIsStmtInstrs.clear();
-
   // First, collect the last stepped line for each MBB.
   SmallDenseMap<std::pair<const MachineBasicBlock *, const MachineBasicBlock *>,
                 unsigned>
@@ -2351,7 +2337,7 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   // We only need to examine MBBs that could have is_stmt set by this logic.
   // We use const_cast even though we won't actually modify MF, because some
   // methods we need take a non-const MBB.
-  SetVector<MachineBasicBlock *> PredMBBsToExamine;
+  SmallSetVector<MachineBasicBlock *, 4> PredMBBsToExamine;
   SmallDenseMap<MachineBasicBlock *, MachineInstr *> PotentialIsStmtMBBInstrs;
   for (auto &MBB : *const_cast<MachineFunction *>(MF)) {
     if (MBB.empty() || MBB.pred_empty())
@@ -2381,9 +2367,19 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
       PotentialIsStmtMBBInstrs.erase(MBBInstrIt);
       ForceIsStmtInstrs.insert(MI);
     };
-    // If this block is empty, it technically has a fall through but we should
-    // assume it is irrelevant for the purposes of calculating is_stmt.
-    if (MBB->empty())
+    // If this block is empty, we conservatively assume that its fallthrough
+    // successor needs is_stmt; we could check MBB's predecessors to see if it
+    // has a consistent entry line, but this seems unlikely to be worthwhile.
+    if (MBB->empty()) {
+      for (auto *Succ : MBB->successors())
+        CheckMBBEdge(Succ, 0);
+      continue;
+    }
+    // If MBB has no successors that are in the "potential" set, due to one or
+    // more of them having confirmed is_stmt, we can skip this check early.
+    if (none_of(MBB->successors(), [&](auto *SuccMBB) {
+          return PotentialIsStmtMBBInstrs.contains(SuccMBB);
+        }))
       continue;
     // If we can't determine what DLs this branch's successors use, just treat
     // all the successors as coming from the last DebugLoc.
@@ -2438,6 +2434,29 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   }
 }
 
+// Gather pre-function debug information.  Assumes being called immediately
+// after the function entry point has been emitted.
+void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
+  CurFn = MF;
+
+  auto *SP = MF->getFunction().getSubprogram();
+  assert(LScopes.empty() ||
+         SP == LScopes.getCurrentFunctionScope()->getScopeNode());
+  if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
+    return;
+
+  DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+
+  Asm->OutStreamer->getContext().setDwarfCompileUnitID(
+      getDwarfCompileUnitIDForLineTable(CU));
+
+  // Record beginning of function.
+  PrologEndLoc = emitInitialLocDirective(
+      *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+
+  findForceIsStmtInstrs(MF);
+}
+
 unsigned
 DwarfDebug::getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU) {
   // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index aad76ab616254d..1ed20cb8ba57af 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -696,6 +696,8 @@ class DwarfDebug : public DebugHandlerBase {
   /// Emit the reference to the section.
   void emitSectionReference(const DwarfCompileUnit &CU);
 
+  void findForceIsStmtInstrs(const MachineFunction *MF);
+
 protected:
   /// Gather pre-function debug information.
   void beginFunctionImpl(const MachineFunction *MF) override;

>From 843dae8e4f71875effeed81a03b2cddb4d14711c Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 12 Nov 2024 15:37:45 +0000
Subject: [PATCH 6/6] Update comments, remove unnecessary formatted code

---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 61 +++++++++++++---------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5c8aa2971e6636..65be55b2840c32 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2062,7 +2062,6 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   unsigned LastAsmLine =
       Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
 
-<<<<<<< HEAD
   if (!DL && MI == PrologEndLoc) {
     // In rare situations, we might want to place the end of the prologue
     // somewhere that doesn't have a source location already. It should be in
@@ -2073,11 +2072,9 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
     return;
   }
 
-=======
   bool PrevInstInSameSection =
       (!PrevInstBB ||
        PrevInstBB->getSectionID() == MI->getParent()->getSectionID());
->>>>>>> ac5931e8f979 (Re-add unnecessarily removed code)
   bool ForceIsStmt = ForceIsStmtInstrs.contains(MI);
   if (DL == PrevInstLoc && PrevInstInSameSection && !ForceIsStmt) {
     // If we have an ongoing unspecified location, nothing to do here.
@@ -2319,26 +2316,40 @@ DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, unsigned CUID) {
 void DwarfDebug::findForceIsStmtInstrs(const MachineFunction *MF) {
   ForceIsStmtInstrs.clear();
 
-  // Here we try to find MBBs where the last line stepped on before entering
-  // it is always the same, and which line that is. Such a line exists if every
-  // predecessor has an instruction with source line N, which is the last valid
-  // source line to be seen before entering MBB, and if N is the same for all
-  // predecessors. If this is true, and the first instruction with a valid
-  // source line in MBB also has source line N, then that instruction should
-  // *not* use is_stmt. Otherwise, the first instruction with a valid source
-  // line in MBB should always use is_stmt, since we may step to it from a
-  // different line.
-  // First, collect the last stepped line for each MBB.
-  SmallDenseMap<std::pair<const MachineBasicBlock *, const MachineBasicBlock *>,
-                unsigned>
-      MBBExitLines;
+  // For this function, we try to find MBBs where the last source line in every
+  // block predecessor matches the first line seen in the block itself; for
+  // every such MBB, we set is_stmt=false on the first line in the block, and
+  // for every other block we set is_stmt=true on the first line.
+  // For example, if we have the block %bb.3, which has 2 predecesors %bb.1 and
+  // %bb.2:
+  //   bb.1:
+  //     $r3 = MOV64ri 12, debug-location !DILocation(line: 4)
+  //     JMP %bb.3, debug-location !DILocation(line: 5)
+  //   bb.2:
+  //     $r3 = MOV64ri 24, debug-location !DILocation(line: 5)
+  //     JMP %bb.3
+  //   bb.3:
+  //     $r2 = MOV64ri 1
+  //     $r1 = ADD $r2, $r3, debug-location !DILocation(line: 5)
+  // When we examine %bb.3, we first check to see if it contains any
+  // instructions with debug locations, and select the first such instruction;
+  // in this case, the ADD, with line=5. We then examine both of its
+  // predecessors to see what the last debug-location in them is. For each
+  // predecessor, if they do not contain any debug-locations, or if the last
+  // debug-location before jumping to %bb.3 does not have line=5, then the ADD
+  // in %bb.3 must use IsStmt. In this case, all predecessors have a
+  // debug-location with line=5 as the last debug-location before jumping to
+  // %bb.3, so we do not set is_stmt for the ADD instruction - we know that
+  // whichever MBB we have arrived from, the line has not changed.
+
   const auto *TII = MF->getSubtarget().getInstrInfo();
 
-  // We only need to examine MBBs that could have is_stmt set by this logic.
+  // We only need to the predecessors of MBBs that could have is_stmt set by
+  // this logic.
+  SmallDenseSet<MachineBasicBlock *, 4> PredMBBsToExamine;
+  SmallDenseMap<MachineBasicBlock *, MachineInstr *> PotentialIsStmtMBBInstrs;
   // We use const_cast even though we won't actually modify MF, because some
   // methods we need take a non-const MBB.
-  SmallSetVector<MachineBasicBlock *, 4> PredMBBsToExamine;
-  SmallDenseMap<MachineBasicBlock *, MachineInstr *> PotentialIsStmtMBBInstrs;
   for (auto &MBB : *const_cast<MachineFunction *>(MF)) {
     if (MBB.empty() || MBB.pred_empty())
       continue;
@@ -2352,10 +2363,11 @@ void DwarfDebug::findForceIsStmtInstrs(const MachineFunction *MF) {
     }
   }
 
-  // For each predecessor MBB, we examine the last DebugLoc seen before each
-  // branch or logical fallthrough. We're generous with applying is_stmt: if
-  // there's an edge that TargetInstrInfo::analyzeBranch can't understand, we
-  // assume that all successor MBBs use the last DebugLoc seen.
+  // For each predecessor MBB, we examine the last line seen before each branch
+  // or logical fallthrough. We use analyzeBranch to handle cases where
+  // different branches have different outgoing lines (i.e. if there are
+  // multiple branches that each have their own source location); otherwise we
+  // just use the last line in the block.
   for (auto *MBB : PredMBBsToExamine) {
     auto CheckMBBEdge = [&](MachineBasicBlock *Succ, unsigned OutgoingLine) {
       auto MBBInstrIt = PotentialIsStmtMBBInstrs.find(Succ);
@@ -2440,8 +2452,7 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   CurFn = MF;
 
   auto *SP = MF->getFunction().getSubprogram();
-  assert(LScopes.empty() ||
-         SP == LScopes.getCurrentFunctionScope()->getScopeNode());
+  assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode());
   if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
     return;
 



More information about the llvm-commits mailing list