[llvm] 19b65a9 - [DebugInfo][RemoveDIs] Add a DPValue implementation for instcombine sinking (#77930)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 25 15:29:01 PST 2024


Author: Jeremy Morse
Date: 2024-01-25T23:28:56Z
New Revision: 19b65a9c0284c12556512e8136352a7e1eb8dfa3

URL: https://github.com/llvm/llvm-project/commit/19b65a9c0284c12556512e8136352a7e1eb8dfa3
DIFF: https://github.com/llvm/llvm-project/commit/19b65a9c0284c12556512e8136352a7e1eb8dfa3.diff

LOG: [DebugInfo][RemoveDIs] Add a DPValue implementation for instcombine sinking (#77930)

In instcombine, when we sink an instruction into a successor block, we try
to clone and salvage all the variable assignments that use that Value. This
is a behaviour that's (IMO) flawed, but there are important use cases where
we want to avoid regressions, thus we're implementing this for the
non-instruction debug-info representation.

This patch refactors the dbg.value sinking code into it's own function, and
installs a parallel implementation for DPValues, the non-instruction
debug-info container. This is mostly identical to the dbg.value
implementation, except that we don't have an easy-to-access ordering
between DPValues, and have to jump through extra hoops to establish one in
the (rare) cases where that ordering is required.

The test added represents a common use-case in LLVM where these behaviours
are important: a loop has been completely optimised away, leaving several
dbg.values in a row referring to an instruction that's going to sink. The
dbg.values should sink in both dbg.value and RemoveDIs mode, and
additionally only the last assignment should sink.

Added: 
    llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll

Modified: 
    llvm/include/llvm/IR/DebugProgramInstruction.h
    llvm/lib/IR/DebugProgramInstruction.cpp
    llvm/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h
index 0524a657bce4b60..a983280d2c4b9a2 100644
--- a/llvm/include/llvm/IR/DebugProgramInstruction.h
+++ b/llvm/include/llvm/IR/DebugProgramInstruction.h
@@ -89,6 +89,7 @@ class DPValue : public ilist_node<DPValue>, private DebugValueUser {
 public:
   void deleteInstr();
 
+  const Instruction *getInstruction() const;
   const BasicBlock *getParent() const;
   BasicBlock *getParent();
   void dump() const;

diff  --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp
index fd234685d5fd4bd..03085b3bfd1279c 100644
--- a/llvm/lib/IR/DebugProgramInstruction.cpp
+++ b/llvm/lib/IR/DebugProgramInstruction.cpp
@@ -337,6 +337,10 @@ bool DPValue::isKillAddress() const {
   return !Addr || isa<UndefValue>(Addr);
 }
 
+const Instruction *DPValue::getInstruction() const {
+  return Marker->MarkedInstr;
+}
+
 const BasicBlock *DPValue::getParent() const {
   return Marker->MarkedInstr->getParent();
 }

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index c24b6e3a5b33c0b..b75ab9ad30142d3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -736,6 +736,13 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
 
   bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock);
+  void tryToSinkInstructionDbgValues(
+      Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
+      BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers);
+  void tryToSinkInstructionDPValues(Instruction *I,
+                                    BasicBlock::iterator InsertPos,
+                                    BasicBlock *SrcBlock, BasicBlock *DestBlock,
+                                    SmallVectorImpl<DPValue *> &DPUsers);
 
   bool removeInstructionsBeforeUnreachable(Instruction &I);
   void addDeadEdge(BasicBlock *From, BasicBlock *To,

diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 738d35c5b52e384..dd168917f4dc9cb 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4313,12 +4313,31 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
   // mark the location undef: we know it was supposed to receive a new location
   // here, but that computation has been sunk.
   SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
-  findDbgUsers(DbgUsers, I);
+  SmallVector<DPValue *, 2> DPValues;
+  findDbgUsers(DbgUsers, I, &DPValues);
+  if (!DbgUsers.empty())
+    tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
+  if (!DPValues.empty())
+    tryToSinkInstructionDPValues(I, InsertPos, SrcBlock, DestBlock, DPValues);
+
+  // PS: there are numerous flaws with this behaviour, not least that right now
+  // assignments can be re-ordered past other assignments to the same variable
+  // if they use 
diff erent Values. Creating more undef assignements can never be
+  // undone. And salvaging all users outside of this block can un-necessarily
+  // alter the lifetime of the live-value that the variable refers to.
+  // Some of these things can be resolved by tolerating debug use-before-defs in
+  // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
+  // being used for more architectures.
 
+  return true;
+}
+
+void InstCombinerImpl::tryToSinkInstructionDbgValues(
+    Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
+    BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers) {
   // For all debug values in the destination block, the sunk instruction
   // will still be available, so they do not need to be dropped.
   SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage;
-  SmallVector<DPValue *, 2> DPValuesToSalvage;
   for (auto &DbgUser : DbgUsers)
     if (DbgUser->getParent() != DestBlock)
       DbgUsersToSalvage.push_back(DbgUser);
@@ -4362,10 +4381,7 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
 
   // Perform salvaging without the clones, then sink the clones.
   if (!DIIClones.empty()) {
-    // RemoveDIs: pass in empty vector of DPValues until we get to instrumenting
-    // this pass.
-    SmallVector<DPValue *, 1> DummyDPValues;
-    salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, DummyDPValues);
+    salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, {});
     // The clones are in reverse order of original appearance, reverse again to
     // maintain the original order.
     for (auto &DIIClone : llvm::reverse(DIIClones)) {
@@ -4373,8 +4389,132 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
       LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
     }
   }
+}
 
-  return true;
+void InstCombinerImpl::tryToSinkInstructionDPValues(
+    Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
+    BasicBlock *DestBlock, SmallVectorImpl<DPValue *> &DPValues) {
+  // Implementation of tryToSinkInstructionDbgValues, but for the DPValue of
+  // variable assignments rather than dbg.values.
+
+  // Fetch all DPValues not already in the destination.
+  SmallVector<DPValue *, 2> DPValuesToSalvage;
+  for (auto &DPV : DPValues)
+    if (DPV->getParent() != DestBlock)
+      DPValuesToSalvage.push_back(DPV);
+
+  // Fetch a second collection, of DPValues in the source block that we're going
+  // to sink.
+  SmallVector<DPValue *> DPValuesToSink;
+  for (DPValue *DPV : DPValuesToSalvage)
+    if (DPV->getParent() == SrcBlock)
+      DPValuesToSink.push_back(DPV);
+
+  // Sort DPValues according to their position in the block. This is a partial
+  // order: DPValues attached to 
diff erent instructions will be ordered by the
+  // instruction order, but DPValues attached to the same instruction won't
+  // have an order.
+  auto Order = [](DPValue *A, DPValue *B) -> bool {
+    return B->getInstruction()->comesBefore(A->getInstruction());
+  };
+  llvm::stable_sort(DPValuesToSink, Order);
+
+  // If there are two assignments to the same variable attached to the same
+  // instruction, the ordering between the two assignments is important. Scan
+  // for this (rare) case and establish which is the last assignment.
+  using InstVarPair = std::pair<const Instruction *, DebugVariable>;
+  SmallDenseMap<InstVarPair, DPValue *> FilterOutMap;
+  if (DPValuesToSink.size() > 1) {
+    SmallDenseMap<InstVarPair, unsigned> CountMap;
+    // Count how many assignments to each variable there is per instruction.
+    for (DPValue *DPV : DPValuesToSink) {
+      DebugVariable DbgUserVariable =
+          DebugVariable(DPV->getVariable(), DPV->getExpression(),
+                        DPV->getDebugLoc()->getInlinedAt());
+      CountMap[std::make_pair(DPV->getInstruction(), DbgUserVariable)] += 1;
+    }
+
+    // If there are any instructions with two assignments, add them to the
+    // FilterOutMap to record that they need extra filtering.
+    SmallPtrSet<const Instruction *, 4> DupSet;
+    for (auto It : CountMap) {
+      if (It.second > 1) {
+        FilterOutMap[It.first] = nullptr;
+        DupSet.insert(It.first.first);
+      }
+    }
+
+    // For all instruction/variable pairs needing extra filtering, find the
+    // latest assignment.
+    for (const Instruction *Inst : DupSet) {
+      for (DPValue &DPV : llvm::reverse(Inst->getDbgValueRange())) {
+        DebugVariable DbgUserVariable =
+            DebugVariable(DPV.getVariable(), DPV.getExpression(),
+                          DPV.getDebugLoc()->getInlinedAt());
+        auto FilterIt =
+            FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
+        if (FilterIt == FilterOutMap.end())
+          continue;
+        if (FilterIt->second != nullptr)
+          continue;
+        FilterIt->second = &DPV;
+      }
+    }
+  }
+
+  // Perform cloning of the DPValues that we plan on sinking, filter out any
+  // duplicate assignments identified above.
+  SmallVector<DPValue *, 2> DPVClones;
+  SmallSet<DebugVariable, 4> SunkVariables;
+  for (DPValue *DPV : DPValuesToSink) {
+    if (DPV->Type == DPValue::LocationType::Declare)
+      continue;
+
+    DebugVariable DbgUserVariable =
+        DebugVariable(DPV->getVariable(), DPV->getExpression(),
+                      DPV->getDebugLoc()->getInlinedAt());
+
+    // For any variable where there were multiple assignments in the same place,
+    // ignore all but the last assignment.
+    if (!FilterOutMap.empty()) {
+      InstVarPair IVP = std::make_pair(DPV->getInstruction(), DbgUserVariable);
+      auto It = FilterOutMap.find(IVP);
+
+      // Filter out.
+      if (It != FilterOutMap.end() && It->second != DPV)
+        continue;
+    }
+
+    if (!SunkVariables.insert(DbgUserVariable).second)
+      continue;
+
+    if (DPV->isDbgAssign())
+      continue;
+
+    DPVClones.emplace_back(DPV->clone());
+    LLVM_DEBUG(dbgs() << "CLONE: " << *DPVClones.back() << '\n');
+  }
+
+  // Perform salvaging without the clones, then sink the clones.
+  if (DPVClones.empty())
+    return;
+
+  salvageDebugInfoForDbgValues(*I, {}, DPValuesToSalvage);
+
+  // The clones are in reverse order of original appearance. Assert that the
+  // head bit is set on the iterator as we _should_ have received it via
+  // getFirstInsertionPt. Inserting like this will reverse the clone order as
+  // we'll repeatedly insert at the head, such as:
+  //   DPV-3 (third insertion goes here)
+  //   DPV-2 (second insertion goes here)
+  //   DPV-1 (first insertion goes here)
+  //   Any-Prior-DPVs
+  //   InsertPtInst
+  assert(InsertPos.getHeadBit());
+  for (DPValue *DPVClone : DPVClones) {
+    InsertPos->getParent()->insertDPValueBefore(DPVClone, InsertPos);
+    LLVM_DEBUG(dbgs() << "SINK: " << *DPVClone << '\n');
+  }
 }
 
 bool InstCombinerImpl::run() {

diff  --git a/llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll b/llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll
new file mode 100644
index 000000000000000..bcdcfef8a431219
--- /dev/null
+++ b/llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll
@@ -0,0 +1,74 @@
+; RUN: opt %s -o - -S --passes=instcombine | FileCheck %s
+; RUN: opt %s -o - -S --passes=instcombine --try-experimental-debuginfo-iterators | FileCheck %s
+;
+; CHECK-LABEL: for.body:
+; CHECK-NEXT:  %sub.ptr.rhs.cast.i.i = ptrtoint ptr %call2.i.i to i64,
+; CHECK-NEXT:  tail call void @llvm.dbg.value(metadata i64 %sub.ptr.rhs.cast.i.i, metadata !{{[0-9]*}}, metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value)
+;
+;; The code below is representative of a common situation: where we've had a
+;; loop be completely optimised out, leaving dbg.values representing the
+;; assignments for it, including a rotated assignment to the loop counter.
+;; Below, it's the two dbg.values in the entry block, assigning first the
+;; value of %conv.i, then the value of %conv.i minus one.
+;;
+;; When instcombine sinks %conv.i, it's critical that if it sinks a dbg.value
+;; with it, it sinks the most recent assignment. Otherwise it will re-order the
+;; assignments below, and a fall counter assignment will continue on from the
+;; end of the optimised-out loop.
+;;
+;; The check lines test that when the trunc sinks (along with the ptrtoint),
+;; we get the dbg.value with a DW_OP_minus in it.
+
+; ModuleID = 'tmp.ll'
+source_filename = "tmp.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+define void @_ZN4llvm16MCObjectStreamer15EmitInstructionERKNS_6MCInstE(i1 %tobool.not) local_unnamed_addr {
+entry:
+  %call2.i.i = load volatile ptr, ptr null, align 8, !dbg !4
+  %sub.ptr.rhs.cast.i.i = ptrtoint ptr %call2.i.i to i64, !dbg !4
+  %conv.i = trunc i64 %sub.ptr.rhs.cast.i.i to i32, !dbg !4
+  tail call void @llvm.dbg.value(metadata i32 %conv.i, metadata !16, metadata !DIExpression()), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 %conv.i, metadata !16, metadata !DIExpression(DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value)), !dbg !18
+  br i1 %tobool.not, label %common.ret, label %for.body
+
+common.ret:                                       ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry
+  %call2 = call ptr @_ZNK4llvm6MCInst10getOperandEj(i32 %conv.i)
+  br label %common.ret
+}
+
+declare i32 @_ZNK4llvm6MCInst14getNumOperandsEv()
+
+declare ptr @_ZNK4llvm6MCInst10getOperandEj(i32) local_unnamed_addr
+
+declare i64 @_ZNK4llvm25SmallVectorTemplateCommonINS_9MCOperandEvE4sizeEv()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "foo.cpp", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !DILocation(line: 197, column: 26, scope: !5)
+!5 = distinct !DILexicalBlock(scope: !7, file: !6, line: 197, column: 3)
+!6 = !DIFile(filename: "foo.cpp", directory: ".")
+!7 = distinct !DISubprogram(name: "EmitInstruction", linkageName: "_ZN4llvm16MCObjectStreamer15EmitInstructionERKNS_6MCInstE", scope: !8, file: !6, line: 195, type: !13, scopeLine: 195, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, declaration: !15, retainedNodes: !2)
+!8 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "MCObjectStreamer", scope: !10, file: !9, line: 33, size: 2432, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !2, vtableHolder: !11)
+!9 = !DIFile(filename: "bar.h", directory: ".")
+!10 = !DINamespace(name: "llvm", scope: null)
+!11 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "MCStreamer", scope: !10, file: !12, line: 108, size: 2240, flags: DIFlagFwdDecl | DIFlagNonTrivial, identifier: "_ZTSN4llvm10MCStreamerE")
+!12 = !DIFile(filename: "baz.h", directory: ".")
+!13 = distinct !DISubroutineType(types: !14)
+!14 = !{null}
+!15 = !DISubprogram(name: "EmitInstruction", linkageName: "_ZN4llvm16MCObjectStreamer15EmitInstructionERKNS_6MCInstE", scope: !8, file: !9, line: 88, type: !13, scopeLine: 88, containingType: !8, virtualIndex: 86, flags: DIFlagPublic | DIFlagPrototyped, spFlags: DISPFlagVirtual | DISPFlagOptimized)
+!16 = !DILocalVariable(name: "i", scope: !5, file: !6, line: 197, type: !17)
+!17 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!18 = !DILocation(line: 0, scope: !5)


        


More information about the llvm-commits mailing list