[llvm] [DebugInfo] Don't apply is_stmt on MBB branches that preserve lines (PR #108251)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 11 10:06:32 PDT 2024


https://github.com/SLTozer created https://github.com/llvm/llvm-project/pull/108251

This patch follows on from the changes made in 5fef40c, by adding an additional heuristic that prevents us from applying the start-of-MBB is_stmt flag when we can see that, for all direct branches to the MBB, the last line stepped on before the branch is the same as the first line of the MBB. This is mainly to prevent certain pathological cases, such as macros that expand to multiple basic blocks that all have the same source location, from giving us repeated steps on the same line. This approach is not comprehensive, since it relies on analyzeBranch to read edges.

>From 9d19634206a6c9a958cdefb9492f20dd38700359 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 11 Sep 2024 17:52:08 +0100
Subject: [PATCH] [DebugInfo] Don't apply is_stmt on MBB branches that preserve
 lines

This patch follows on from the changes made in 5fef40c, by adding an
additional heuristic that prevents us from applying the start-of-MBB
is_stmt flag when we can see that, for all direct branches to the MBB,
the last line stepped on before the branch is the same as the first line
of the MBB. This is mainly to prevent certain pathological cases, such as
macros that expand to multiple basic blocks that all have the same source
location, from giving us repeated steps on the same line. This approach
is not comprehensive, since it relies on analyzeBranch to read edges.
---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp    | 119 +++++++++++++++++-
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h      |   2 +
 .../DebugInfo/X86/is_stmt-at-block-start.ll   |  27 +++-
 3 files changed, 145 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 148b620c2b62b7..b0139e19a8498c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2062,7 +2062,9 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
       Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
 
   bool PrevInstInDiffBB = PrevInstBB && PrevInstBB != MI->getParent();
-  if (DL == PrevInstLoc && !PrevInstInDiffBB) {
+  bool ForceIsStmt =
+      PrevInstInDiffBB && MBBsStartingWithIsStmt.contains(MI->getParent());
+  if (DL == PrevInstLoc && !ForceIsStmt) {
     // If we have an ongoing unspecified location, nothing to do here.
     if (!DL)
       return;
@@ -2120,7 +2122,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   // mark is_stmt for the first non-0 line in each BB, in case a predecessor BB
   // ends with a different line.
   unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
-  if (DL.getLine() && (DL.getLine() != OldLine || PrevInstInDiffBB))
+  if (DL.getLine() && (DL.getLine() != OldLine || ForceIsStmt))
     Flags |= DWARF2_FLAG_IS_STMT;
 
   const MDNode *Scope = DL.getScope();
@@ -2228,6 +2230,119 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   // Record beginning of function.
   PrologEndLoc = emitInitialLocDirective(
       *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+
+  // Try to determine what line we would be stepped on before entering each MBB.
+  // This happens in the following ways:
+  // - If this block has a single predecessor, we determine the last line in
+  //   the pred block and we have stepped from that.
+  // - If this block has multiple predecessors, we determine the last line in
+  //   each of them; if they all agree then we have stepped from that line,
+  //   otherwise we do not know what line we have stepped from.
+  // The last line in each MBB is determined as follows:
+  // - If the block contains at least one DebugLoc, we have stepped from the
+  //   last one.
+  // - Otherwise, the last line is line 0.
+  // There is one extra rule however: if a predecessor branches to the current
+  // basic block, we only count DebugLocs on or before that branch, so if we're
+  // looking at the MBB %bb.0, which ends with:
+  //   JCC_1 %bb.1, 0, !debug-location !1
+  //   JMP_1 %bb.2, !debug-location !2
+  // We would consider !1 to be the last loc before %bb.1, and !2 before %bb.2.
+  // We also don't need to make this calculation for any block that doesn't have
+  // a non-0 line number on its first instruction, as we will always emit line 0
+  // without is_stmt for that block regardless.
+  MBBsStartingWithIsStmt.clear();
+
+  // First, collect the last stepped line for each MBB.
+  SmallDenseMap<std::pair<const MachineBasicBlock *, const MachineBasicBlock *>,
+                unsigned>
+      MBBExitLines;
+  const auto *TII = MF->getSubtarget().getInstrInfo();
+
+  // We only need to examine MBBs that could have is_stmt set by this logic.
+  // We use const_cast even though we won't actually modify MF, because some
+  // methods we need take a non-const MBB.
+  SetVector<MachineBasicBlock *> PredMBBsToExamine;
+  SmallVector<MachineBasicBlock *> PotentialIsStmtMBBs;
+  for (auto &MBB : *const_cast<MachineFunction *>(MF)) {
+    if (MBB.pred_empty() || !MBB.begin()->getDebugLoc())
+      continue;
+    unsigned StartLine = MBB.begin()->getDebugLoc()->getLine();
+    if (StartLine == 0)
+      continue;
+    PotentialIsStmtMBBs.push_back(&MBB);
+    for (auto Pred : MBB.predecessors())
+      PredMBBsToExamine.insert(&*Pred);
+  }
+
+  // For each predecessor MBB, we examine the last DebugLoc seen before each
+  // branch or logical fallthrough. We're generous with applying is_stmt; if
+  // there's an edge that TargetInstrInfo::analyzeBranch can't understand, we
+  // simply assume that is_stmt ought to be applied to the successors, since
+  // this rule is mainly intended to avoid excessive stepping on lines that
+  // expand to many lines of code.
+  for (auto *MBB : PredMBBsToExamine) {
+    assert(!MBB->empty() && "Shouldn't be processing empty blocks here.");
+    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+    SmallVector<MachineOperand, 4> Cond;
+    if (TII->analyzeBranch(*MBB, TBB, FBB, Cond)) {
+      // We can't determine what DLs this branch's successors use, so skip it.
+      continue;
+    }
+    assert(TBB && "Bad result from analyzeBranch?");
+    auto MI = MBB->rbegin();
+    // For a conditional branch followed by unconditional branch where the
+    // unconditional branch has a DebugLoc, that loc is the outgoing loc to the
+    // false destination only; otherwise, both destinations share an outgoing
+    // loc.
+    if (!Cond.empty() && FBB != nullptr && MBB->back().getDebugLoc()) {
+      assert(MI->isBranch() && "Bad result from analyzeBranch?");
+      MBBExitLines.insert({{MBB, FBB}, MBB->back().getDebugLoc()->getLine()});
+      FBB = nullptr;
+      ++MI;
+    } else if (!Cond.empty() && !FBB) {
+      // For a conditional branch that falls through to the next block, the
+      // fallthrough block is the false branch.
+      FBB = MBB->getLogicalFallThrough();
+      assert(FBB &&
+             "Block ending with just a conditional branch should fallthrough.");
+    }
+
+    // If we don't find an outgoing loc, this block will start with a line 0.
+    unsigned LastLine = 0;
+    while (MI != MBB->rend()) {
+      if (auto DL = MI->getDebugLoc()) {
+        LastLine = DL->getLine();
+        break;
+      }
+      ++MI;
+    }
+    MBBExitLines.insert({{MBB, TBB}, LastLine});
+    if (FBB)
+      MBBExitLines.insert({{MBB, FBB}, LastLine});
+  }
+
+  // Now use the outgoing values to determine the incoming values for each
+  // block.
+  MBBsStartingWithIsStmt.insert(&*MF->begin());
+  for (auto *MBB : PotentialIsStmtMBBs) {
+    assert(!MBB->empty() && "Shouldn't be processing empty blocks here.");
+    if (!MBB->begin()->getDebugLoc())
+      continue;
+    unsigned StartLine = MBB->begin()->getDebugLoc()->getLine();
+    if (StartLine == 0)
+      continue;
+    for (auto Pred : MBB->predecessors()) {
+      auto LineIt = MBBExitLines.find({&*Pred, MBB});
+      // If there is no entry, it means there's a branch here that we couldn't
+      // track, so we can't be sure about what line we're arriving from;
+      // therefore assume that we should use is_stmt.
+      if (LineIt == MBBExitLines.end() || LineIt->second != StartLine) {
+        MBBsStartingWithIsStmt.insert(MBB);
+        break;
+      }
+    }
+  }
 }
 
 unsigned
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 19f5b677bb8d06..80a6c7bd34d914 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -382,6 +382,8 @@ class DwarfDebug : public DebugHandlerBase {
                               SmallPtrSet<const MDNode *, 2>>;
   DenseMap<const DILocalScope *, MDNodeSet> LocalDeclsPerLS;
 
+  SmallDenseSet<const MachineBasicBlock *> MBBsStartingWithIsStmt;
+
   /// If nonnull, stores the current machine function we're processing.
   const MachineFunction *CurFn = nullptr;
 
diff --git a/llvm/test/DebugInfo/X86/is_stmt-at-block-start.ll b/llvm/test/DebugInfo/X86/is_stmt-at-block-start.ll
index f4e98512675d9d..ca9305d476d01f 100644
--- a/llvm/test/DebugInfo/X86/is_stmt-at-block-start.ll
+++ b/llvm/test/DebugInfo/X86/is_stmt-at-block-start.ll
@@ -1,7 +1,8 @@
 ;; Checks that when an instruction at the start of a BasicBlock has the same
 ;; DebugLoc as the instruction at the end of the previous BasicBlock, we add
 ;; is_stmt to the new line, to ensure that we still step on it if we arrive from
-;; a BasicBlock other than the immediately preceding one.
+;; a BasicBlock other than the immediately preceding one, unless all known
+;; predecessor BasicBlocks end with the same line.
 
 ; RUN: %llc_dwarf -mtriple=x86_64-unknown-linux -O0 -filetype=obj < %s | llvm-dwarfdump --debug-line - | FileCheck %s
 
@@ -24,6 +25,25 @@ if.end8:                                          ; preds = %if.then2
   ret void
 }
 
+; CHECK:      {{0x[0-9a-f]+}}    113      5 {{.+}} is_stmt
+; CHECK-NOT:  {{0x[0-9a-f]+}}    113      5
+
+define void @_Z1gi(i1 %cond) !dbg !31 {
+entry:
+  br i1 %cond, label %if.then2, label %if.else4, !dbg !34
+
+if.then2:                                         ; preds = %entry
+  br label %if.end8, !dbg !34
+
+if.else4:                                         ; preds = %entry
+  %0 = load i32, ptr null, align 4, !dbg !34
+  %call5 = call i1 null(i32 %0)
+  ret void
+
+if.end8:                                          ; preds = %if.then2
+  ret void
+}
+
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
@@ -35,3 +55,8 @@ if.end8:                                          ; preds = %if.then2
 !23 = !{null}
 !24 = !DILocation(line: 13, column: 5, scope: !25)
 !25 = distinct !DILexicalBlock(scope: !21, file: !1, line: 11, column: 27)
+!31 = distinct !DISubprogram(name: "g", linkageName: "_Z1gi", scope: !1, file: !1, line: 107, type: !32, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!32 = distinct !DISubroutineType(types: !33)
+!33 = !{null}
+!34 = !DILocation(line: 113, column: 5, scope: !35)
+!35 = distinct !DILexicalBlock(scope: !31, file: !1, line: 111, column: 27)



More information about the llvm-commits mailing list