[llvm] [DebugInfo][RemoveDIs] Support maintaining DPValues in CodeGenPrepare (PR #73660)

Jeremy Morse via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 10:04:39 PST 2023


https://github.com/jmorse updated https://github.com/llvm/llvm-project/pull/73660

>From 9684e29ccb95527ad242f3f513766f7e85cef5b5 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Mon, 3 Jul 2023 18:08:48 +0100
Subject: [PATCH 1/6] [DebugInfo][RemoveDIs] Support maintaining DPValues in
 CodeGenPrepare

CodeGenPrepare needs to support the maintenence of DPValues, the
non-instruction replacement for dbg.value intrinsics. This means there are
a few functions we need to duplicate or replicate the functionality of:
 * fixupDbgValue for setting users of sunk addr GEPs,
 * The remains of placeDbgValues needs a DPValue implementation for sinking
 * Rollback of RAUWs needs to update DPValues
 * Rollback of instruction removal needs supporting (see github #73350)
 * A few places where we have to use iterators rather than instructions.

There are three places where we have to use the setHeadBit call on
iterators to indicate which portion of debug-info records we're about to
splice around. This is because CodeGenPrepare, unlike other optimisation
passes, is very much concerned with which block an operation occurs in and
where in the block instructions are because it's preparing things to be in
a format that's good for SelectionDAG.

There isn't a large amount of test coverage for debuginfo behaviours in
this pass, hence I've added some more.
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 171 +++++++++++++++---
 .../DebugInfo/X86/codegenprep-addrsink.ll     |   1 +
 .../DebugInfo/X86/codegenprepare-rollback.ll  |  32 ++++
 .../CodeGenPrepare/X86/catchpad-phi-cast.ll   |   1 +
 .../CodeGenPrepare/X86/cttz-ctlz.ll           |  29 +++
 .../Transforms/CodeGenPrepare/X86/select.ll   |   3 +-
 .../CodeGenPrepare/sink-shift-and-trunc.ll    |   1 +
 7 files changed, 210 insertions(+), 28 deletions(-)
 create mode 100644 llvm/test/DebugInfo/X86/codegenprepare-rollback.ll

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 07dc718ee3a38c3..0ea01087e46875f 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -459,7 +459,9 @@ class CodeGenPrepare : public FunctionPass {
   bool optimizeExtractElementInst(Instruction *Inst);
   bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
   bool fixupDbgValue(Instruction *I);
+  bool fixupDPValue(DPValue &I);
   bool placeDbgValues(Function &F);
+  bool placeDPValues(Instruction &I, DominatorTree &DT);
   bool placePseudoProbes(Function &F);
   bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
                     LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -1753,8 +1755,8 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
       assert(InsertPt != UserBB->end());
       InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
-                                    Cmp->getOperand(0), Cmp->getOperand(1), "",
-                                    &*InsertPt);
+                                    Cmp->getOperand(0), Cmp->getOperand(1), "");
+      InsertedCmp->insertBefore(*UserBB, InsertPt);
       // Propagate the debug info.
       InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
     }
@@ -2066,10 +2068,13 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
       // Sink the trunc
       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
       TruncInsertPt++;
+      // It will go ahead of any debug-info.
+      TruncInsertPt.setHeadBit(true); // not covered by any test at all.
       assert(TruncInsertPt != TruncUserBB->end());
 
       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
-                                       TruncI->getType(), "", &*TruncInsertPt);
+                                       TruncI->getType(), "");
+      InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
       InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
 
       MadeChange = true;
@@ -2234,7 +2239,9 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
   // Create another block after the count zero intrinsic. A PHI will be added
   // in this block to select the result of the intrinsic or the bit-width
   // constant if the input to the intrinsic is zero.
-  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
+  BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
+  // Any debug-info after CountZeros should not be included.
+  SplitPt.setHeadBit(true); // coveredy by Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
   BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
   if (IsHugeFunc)
     FreshBBs.insert(EndBlock);
@@ -2829,6 +2836,7 @@ class TypePromotionTransaction {
       Instruction *PrevInst;
       BasicBlock *BB;
     } Point;
+    std::optional<DPValue::self_iterator> beforeDPValue = std::nullopt;
 
     /// Remember whether or not the instruction had a previous instruction.
     bool HasPrevInstruction;
@@ -2836,12 +2844,21 @@ class TypePromotionTransaction {
   public:
     /// Record the position of \p Inst.
     InsertionHandler(Instruction *Inst) {
-      BasicBlock::iterator It = Inst->getIterator();
-      HasPrevInstruction = (It != (Inst->getParent()->begin()));
-      if (HasPrevInstruction)
-        Point.PrevInst = &*--It;
-      else
-        Point.BB = Inst->getParent();
+      HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
+      BasicBlock *BB = Inst->getParent();
+
+      // Record where we would have to re-insert the instruction in the sequence
+      // of DPValues, if we ended up reinserting.
+      if (BB->IsNewDbgInfoFormat) {
+        DPMarker *DPM = BB->createMarker(Inst);
+        beforeDPValue = DPM->getReinsertionPosition();
+      }
+
+      if (HasPrevInstruction) {
+        Point.PrevInst = &*std::prev(Inst->getIterator());
+      } else {
+        Point.BB = BB;
+      }
     }
 
     /// Insert \p Inst at the recorded position.
@@ -2849,14 +2866,16 @@ class TypePromotionTransaction {
       if (HasPrevInstruction) {
         if (Inst->getParent())
           Inst->removeFromParent();
-        Inst->insertAfter(Point.PrevInst);
+        Inst->insertAfter(&*Point.PrevInst);
       } else {
-        Instruction *Position = &*Point.BB->getFirstInsertionPt();
+        BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
         if (Inst->getParent())
-          Inst->moveBefore(Position);
+          Inst->moveBefore(*Point.BB, Position);
         else
-          Inst->insertBefore(Position);
+          Inst->insertBefore(*Point.BB, Position);
       }
+
+      Inst->getParent()->reinsertInstInDPValues(Inst, beforeDPValue);
     }
   };
 
@@ -2882,6 +2901,7 @@ class TypePromotionTransaction {
   };
 
   /// Set the operand of an instruction with a new value.
+  // XXX jmorse -- what about for dbg.values?
   class OperandSetter : public TypePromotionAction {
     /// Original operand of the instruction.
     Value *Origin;
@@ -3059,6 +3079,8 @@ class TypePromotionTransaction {
     SmallVector<InstructionAndIdx, 4> OriginalUses;
     /// Keep track of the debug users.
     SmallVector<DbgValueInst *, 1> DbgValues;
+    /// And non-instruction debug-users too.
+    SmallVector<DPValue *, 1> DPValues;
 
     /// Keep track of the new value so that we can undo it by replacing
     /// instances of the new value with the original value.
@@ -3079,7 +3101,7 @@ class TypePromotionTransaction {
       }
       // Record the debug uses separately. They are not in the instruction's
       // use list, but they are replaced by RAUW.
-      findDbgValues(DbgValues, Inst);
+      findDbgValues(DbgValues, Inst, &DPValues);
 
       // Now, we can replace the uses.
       Inst->replaceAllUsesWith(New);
@@ -3096,6 +3118,10 @@ class TypePromotionTransaction {
       // correctness and utility of debug value instructions.
       for (auto *DVI : DbgValues)
         DVI->replaceVariableLocationOp(New, Inst);
+      // Similar story with DPValues, the non-instruction representation of
+      // dbg.values.
+      for (DPValue *DPV : DPValues) // tested by transaction-test I'm adding
+        DPV->replaceVariableLocationOp(New, Inst);
     }
   };
 
@@ -6749,7 +6775,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
       !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
     return false;
 
-  IRBuilder<> Builder(Load->getNextNode());
+  IRBuilder<> Builder(Load->getNextNonDebugInstruction());
   auto *NewAnd = cast<Instruction>(
       Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
   // Mark this instruction as "inserted by CGP", so that other
@@ -7005,7 +7031,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   // Split the select block, according to how many (if any) values go on each
   // side.
   BasicBlock *StartBlock = SI->getParent();
-  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
+  BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
+  // We should split before any debug-info.
+  SplitPt.setHeadBit(true); // covered by X86/select.ll
 
   IRBuilder<> IB(SI);
   auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
@@ -7017,18 +7045,18 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   BranchInst *FalseBranch = nullptr;
   if (TrueInstrs.size() == 0) {
     FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
-        CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+        CondFr, SplitPt, false, nullptr, nullptr, LI));
     FalseBlock = FalseBranch->getParent();
     EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
   } else if (FalseInstrs.size() == 0) {
     TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
-        CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+        CondFr, SplitPt, false, nullptr, nullptr, LI));
     TrueBlock = TrueBranch->getParent();
     EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
   } else {
     Instruction *ThenTerm = nullptr;
     Instruction *ElseTerm = nullptr;
-    SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm,
+    SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
                                   nullptr, nullptr, LI);
     TrueBranch = cast<BranchInst>(ThenTerm);
     FalseBranch = cast<BranchInst>(ElseTerm);
@@ -8095,10 +8123,14 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
 }
 
 bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
+  bool AnyChange = false;
+  for (DPValue &DPV : I->getDbgValueRange())
+    AnyChange |= fixupDPValue(DPV);
+
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
   if (InsertedInsts.count(I))
-    return false;
+    return AnyChange;
 
   // TODO: Move into the switch on opcode below here.
   if (PHINode *P = dyn_cast<PHINode>(I)) {
@@ -8112,7 +8144,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
       ++NumPHIsElim;
       return true;
     }
-    return false;
+    return AnyChange;
   }
 
   if (CastInst *CI = dyn_cast<CastInst>(I)) {
@@ -8123,7 +8155,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
     // the address of globals out of a loop).  If this is the case, we don't
     // want to forward-subst the cast.
     if (isa<Constant>(CI->getOperand(0)))
-      return false;
+      return AnyChange;
 
     if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
       return true;
@@ -8149,7 +8181,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
         return MadeChange | optimizeExtUses(I);
       }
     }
-    return false;
+    return AnyChange;
   }
 
   if (auto *Cmp = dyn_cast<CmpInst>(I))
@@ -8244,7 +8276,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
         return true;
       }
     }
-    return false;
+    return AnyChange;
   }
 
   if (tryToSinkFreeOperands(I))
@@ -8269,7 +8301,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
     return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
   }
 
-  return false;
+  return AnyChange;
 }
 
 /// Given an OR instruction, check to see if this is a bitreverse
@@ -8359,6 +8391,30 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
   return AnyChange;
 }
 
+bool CodeGenPrepare::fixupDPValue(DPValue &DPV) {
+  if (DPV.Type != DPValue::LocationType::Value)
+    return false;
+
+  // Does this DPValue refer to a sunk address calculation?
+  bool AnyChange = false;
+  SmallDenseSet<Value *> LocationOps(DPV.location_ops().begin(),
+                                     DPV.location_ops().end());
+  for (Value *Location : LocationOps) {
+    WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
+    Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+    if (SunkAddr) {
+      // Point dbg.value at locally computed address, which should give the best
+      // opportunity to be accurately lowered. This update may change the type
+      // of pointer being referred to; however this makes no difference to
+      // debugging information, and we can't generate bitcasts that may affect
+      // codegen.
+      DPV.replaceVariableLocationOp(Location, SunkAddr);
+      AnyChange = true;
+    }
+  }
+  return AnyChange;
+}
+
 // A llvm.dbg.value may be using a value before its definition, due to
 // optimizations in this pass and others. Scan for such dbg.values, and rescue
 // them by moving the dbg.value to immediately after the value definition.
@@ -8371,8 +8427,10 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
   for (BasicBlock &BB : F) {
     for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
       DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
-      if (!DVI)
+      if (!DVI) {
+        MadeChange |= placeDPValues(Insn, DT);
         continue;
+      }
 
       SmallVector<Instruction *, 4> VIs;
       for (Value *V : DVI->getValues())
@@ -8421,6 +8479,65 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
       }
     }
   }
+
+  return MadeChange;
+}
+
+bool CodeGenPrepare::placeDPValues(Instruction &I, DominatorTree &DT) {
+  bool MadeChange = false;
+
+  for (DPValue &DPV : llvm::make_early_inc_range(I.getDbgValueRange())) {
+    if (DPV.Type != DPValue::LocationType::Value)
+      continue;
+
+    SmallVector<Instruction *, 4> VIs;
+    for (Value *V : DPV.location_ops())
+      if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
+        VIs.push_back(VI);
+
+    // This DPV may depend on multiple instructions, complicating any
+    // potential sink. This block takes the defensive approach, opting to
+    // "undef" the DVI if it has more than one instruction and any of them do
+    // not dominate DVI.
+    for (Instruction *VI : VIs) {
+      if (VI->isTerminator())
+        continue;
+
+      // If VI is a phi in a block with an EHPad terminator, we can't insert
+      // after it.
+      if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+        continue;
+
+      // If the defining instruction dominates the position, we do not need
+      // to move the dbg.value.
+      if (DT.dominates(VI, &I))
+        continue;
+
+      // If we depend on multiple instructions and any of them doesn't
+      // dominate this DPV, we probably can't salvage it: moving it to
+      // after any of the instructions could cause us to lose the others.
+      if (VIs.size() > 1) {
+        LLVM_DEBUG(
+            dbgs()
+            << "Unable to find valid location for Debug Value, undefing:\n"
+            << DPV);
+        DPV.setKillLocation();
+        break;
+      }
+
+      LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+                        << DPV << ' ' << *VI);
+      DPV.removeFromParent();
+      BasicBlock *VIBB = VI->getParent();
+      if (isa<PHINode>(VI)) {
+        VIBB->insertDPValueBefore(&DPV, VIBB->getFirstInsertionPt());
+      } else {
+        VIBB->insertDPValueAfter(&DPV, VI);
+      }
+      MadeChange = true;
+      ++NumDbgValueMoved;
+    }
+  }
   return MadeChange;
 }
 
diff --git a/llvm/test/DebugInfo/X86/codegenprep-addrsink.ll b/llvm/test/DebugInfo/X86/codegenprep-addrsink.ll
index 794efccca575bae..3fe4b085525afa0 100644
--- a/llvm/test/DebugInfo/X86/codegenprep-addrsink.ll
+++ b/llvm/test/DebugInfo/X86/codegenprep-addrsink.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -start-before=codegenprepare -stop-after=codegenprepare -mtriple=x86_64-unknown-unknown %s -o - | FileCheck %s
+; RUN: llc -start-before=codegenprepare -stop-after=codegenprepare -mtriple=x86_64-unknown-unknown %s -o - --try-experimental-debuginfo-iterators | FileCheck %s
 ;
 ; CGP duplicates address calculation into each basic block that contains loads
 ; or stores, so that they can be folded into instruction memory operands for
diff --git a/llvm/test/DebugInfo/X86/codegenprepare-rollback.ll b/llvm/test/DebugInfo/X86/codegenprepare-rollback.ll
new file mode 100644
index 000000000000000..e0cb22fbd57661f
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/codegenprepare-rollback.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -debugify -codegenprepare %s -o - | FileCheck %s --check-prefix=DEBUGIFY
+; RUN: opt -S -debugify -codegenprepare %s -o - --try-experimental-debuginfo-iterators | FileCheck %s --check-prefix=DEBUGIFY
+;
+; Copied from codegen-prepare-addrmode-sext.ll -- for the twoArgsNoPromotion
+; function, CGP attempts a type promotion transaction on the sext to replace
+; it with %add, but then rolls it back. This involves re-inserting the sext
+; instruction between two dbg.value intrinsics, and un-RAUWing the users of
+; the sext.
+; This test checks that this works correctly in both dbg.value mode, but also
+; RemoveDIs non-intrinsic debug-info mode.
+
+target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; DEBUGIFY-LABEL: @twoArgsNoPromotion
+; DEBUGIFY-NEXT: %add = add
+; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata i32 %add,
+; DEBUGIFY-NEXT: %sextadd = sext
+; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata i64 %sextadd,
+; DEBUGIFY-NEXT: %arrayidx = getelementptr
+; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata ptr %arrayidx,
+; DEBUGIFY-NEXT: %res = load i8,
+; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata i8 %res,
+; DEBUGIFY-NEXT: ret i8 %res,
+define i8 @twoArgsNoPromotion(i32 %arg1, i32 %arg2, ptr %base) {
+  %add = add nsw i32 %arg1, %arg2
+  %sextadd = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i8, ptr %base, i64 %sextadd
+  %res = load i8, ptr %arrayidx
+  ret i8 %res
+}
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
index d23dd8c792e0e64..614e80e3e89c22f 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -codegenprepare -S < %s | FileCheck %s
+; RUN: opt -codegenprepare -S < %s --try-experimental-debuginfo-iterators | FileCheck %s
 
 ; The following target lines are needed for the test to exercise what it should.
 ; Without these lines, CodeGenPrepare does not try to sink the bitcasts.
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
index 440afdeff10a3e8..5f368faf46ee31e 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
@@ -3,6 +3,9 @@
 ; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ
 ; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ
 
+; RUN: opt -S -debugify -codegenprepare < %s | FileCheck %s --check-prefix=DEBUGINFO
+; RUN: opt -S -debugify -codegenprepare --try-experimental-debuginfo-iterators < %s | FileCheck %s --check-prefix=DEBUGINFO
+
 target triple = "x86_64-unknown-unknown"
 target datalayout = "e-n32:64"
 
@@ -40,6 +43,19 @@ define i64 @cttz(i64 %A) {
 ; FAST_LZ-NEXT:    [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ]
 ; FAST_LZ-NEXT:    ret i64 [[CTZ]]
 ;
+; DEBUGINFO-LABEL: @cttz(
+; DEBUGINFO-NEXT:  entry:
+; DEBUGINFO-NEXT:    [[A_FR:%.*]] = freeze i64 [[A:%.*]], !dbg [[DBG11:![0-9]+]]
+; DEBUGINFO-NEXT:    [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0, !dbg [[DBG11]]
+; DEBUGINFO-NEXT:    br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG11]]
+; DEBUGINFO:       cond.false:
+; DEBUGINFO-NEXT:    [[Z:%.*]] = call i64 @llvm.cttz.i64(i64 [[A_FR]], i1 true), !dbg [[DBG11]]
+; DEBUGINFO-NEXT:    br label [[COND_END]], !dbg [[DBG12:![0-9]+]]
+; DEBUGINFO:       cond.end:
+; DEBUGINFO-NEXT:    [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ], !dbg [[DBG12]]
+; DEBUGINFO-NEXT:    tail call void @llvm.dbg.value(metadata i64 [[CTZ]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG11]]
+; DEBUGINFO-NEXT:    ret i64 [[CTZ]], !dbg [[DBG12]]
+;
 entry:
   %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
   ret i64 %z
@@ -75,6 +91,19 @@ define i64 @ctlz(i64 %A) {
 ; FAST_LZ-NEXT:    [[Z:%.*]] = call i64 @llvm.ctlz.i64(i64 [[A:%.*]], i1 false)
 ; FAST_LZ-NEXT:    ret i64 [[Z]]
 ;
+; DEBUGINFO-LABEL: @ctlz(
+; DEBUGINFO-NEXT:  entry:
+; DEBUGINFO-NEXT:    [[A_FR:%.*]] = freeze i64 [[A:%.*]], !dbg [[DBG16:![0-9]+]]
+; DEBUGINFO-NEXT:    [[CMPZ:%.*]] = icmp eq i64 [[A_FR]], 0, !dbg [[DBG16]]
+; DEBUGINFO-NEXT:    br i1 [[CMPZ]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG16]]
+; DEBUGINFO:       cond.false:
+; DEBUGINFO-NEXT:    [[Z:%.*]] = call i64 @llvm.ctlz.i64(i64 [[A_FR]], i1 true), !dbg [[DBG16]]
+; DEBUGINFO-NEXT:    br label [[COND_END]], !dbg [[DBG17:![0-9]+]]
+; DEBUGINFO:       cond.end:
+; DEBUGINFO-NEXT:    [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ], !dbg [[DBG17]]
+; DEBUGINFO-NEXT:    tail call void @llvm.dbg.value(metadata i64 [[CTZ]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16]]
+; DEBUGINFO-NEXT:    ret i64 [[CTZ]], !dbg [[DBG17]]
+;
 entry:
   %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
   ret i64 %z
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll
index d5eda2065144d19..a0f34a882d3063f 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -codegenprepare -S < %s | FileCheck %s
 ; RUN: opt -debugify -codegenprepare -S < %s | FileCheck %s -check-prefix=DEBUG
+; RUN: opt -debugify -codegenprepare -S < %s --try-experimental-debuginfo-iterators | FileCheck %s -check-prefix=DEBUG
 
 target triple = "x86_64-unknown-unknown"
 
@@ -40,7 +41,7 @@ define float @fdiv_true_sink(float %a, float %b) {
 ; DEBUG-LABEL: @fdiv_true_sink(
 ; DEBUG-NEXT:  entry:
 ; DEBUG-NEXT:    [[SEL_FR:%.*]] = freeze float [[A:%.*]]
-; DEBUG-NEXT:    [[CMP:%.*]] = fcmp ogt float [[SEL_FR]], 1.000000e+00, !dbg !24
+; DEBUG-NEXT:    [[CMP:%.*]] = fcmp ogt float [[SEL_FR]], 1.000000e+00, !dbg
 ; DEBUG-NEXT:    call void @llvm.dbg.value(metadata i1 [[CMP]]
 ; DEBUG-NEXT:    br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !dbg
 ; DEBUG:       select.true.sink:
diff --git a/llvm/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll b/llvm/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll
index 85512e64336db1a..e46f70c2c112225 100644
--- a/llvm/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll
@@ -1,5 +1,6 @@
 ; REQUIRES: aarch64-registered-target
 ; RUN: opt < %s -codegenprepare -mtriple=arm64-apple-ios -S | FileCheck %s
+; RUN: opt < %s -codegenprepare -mtriple=arm64-apple-ios -S --try-experimental-debuginfo-iterators | FileCheck %s
 
 @first_ones = external global [65536 x i8]
 

>From 85c857169330865672da18b84762886fb8d62602 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Tue, 28 Nov 2023 10:50:35 +0000
Subject: [PATCH 2/6] getstableloc needed in bb

---
 llvm/lib/IR/BasicBlock.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 7037f5f524ee069..79dffa6428181dc 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -617,7 +617,7 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName,
                                        this->getNextNode());
 
   // Save DebugLoc of split point before invalidating iterator.
-  DebugLoc Loc = I->getDebugLoc();
+  DebugLoc Loc = I->getStableDebugLoc();
   // Move all of the specified instructions from the original basic block into
   // the new basic block.
   New->splice(New->end(), this, I, end());

>From 111e753bda06b6391674f267ba0deacaca96c848 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Wed, 5 Jul 2023 15:06:25 +0100
Subject: [PATCH 3/6] [DebugInfo][RemoveDIs] Emulate inserting insts in
 dbg.value sequences

Here's a problem for the RemoveDIs project to make debug-info not be stored
in instructions -- in the following sequence:
    dbg.value(foo
    %bar = add i32 ...
    dbg.value(baz
It's possible for rare passes (only CodeGenPrepare) to remove the add
instruction, and then re-insert it back in the same place. When debug-info
is stored in instructions and there's a total order on "when" things happen
this is easy, but by moving that information out of the instruction stream
we start having to do manual maintenance.

This patch adds some utilities for re-inserting an instruction into a
sequence of DPValue objects. Someday we hope to design this away, but for
now it's necessary to support all the things you can do with dbg.values.
The two unit tests show how DPValues get shuffled around using the relevant
function calls. A follow-up patch adds instrumentation to CodeGenPrepare.
---
 llvm/include/llvm/IR/BasicBlock.h             |   8 +
 .../include/llvm/IR/DebugProgramInstruction.h |  10 ++
 llvm/lib/IR/BasicBlock.cpp                    |  49 ++++++
 llvm/lib/IR/DebugProgramInstruction.cpp       |  25 +++
 llvm/unittests/IR/BasicBlockDbgInfoTest.cpp   | 144 ++++++++++++++++++
 5 files changed, 236 insertions(+)

diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index ec916acc25151c8..45e4b577c8a992d 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -141,6 +141,14 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
   /// move such DPValues back to the right place (ahead of the terminator).
   void flushTerminatorDbgValues();
 
+  /// In rare circumstances instructions can be speculatively removed from
+  /// blocks, and then be re-inserted back into that position later. When this
+  /// happens in RemoveDIs debug-info mode, some special patching-up needs to
+  /// occur: inserting into the middle of a sequence of dbg.value intrinsics
+  /// does not have an equivalent with DPValues.
+  void reinsertInstInDPValues(Instruction *I,
+                              std::optional<DPValue::self_iterator> Pos);
+
 private:
   void setParent(Function *parent);
 
diff --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h
index cfee2a87b75c7b5..93adf866054de4f 100644
--- a/llvm/include/llvm/IR/DebugProgramInstruction.h
+++ b/llvm/include/llvm/IR/DebugProgramInstruction.h
@@ -308,6 +308,11 @@ class DPMarker {
   /// Transfer any DPValues from \p Src into this DPMarker. If \p InsertAtHead
   /// is true, place them before existing DPValues, otherwise afterwards.
   void absorbDebugValues(DPMarker &Src, bool InsertAtHead);
+  /// Transfer the DPValues in \p Range from \p Src into this DPMarker. If
+  /// \p InsertAtHead is true, place them before existing DPValues, otherwise
+  // afterwards.
+  void absorbDebugValues(iterator_range<DPValue::self_iterator> Range,
+                         DPMarker &Src, bool InsertAtHead);
   /// Insert a DPValue into this DPMarker, at the end of the list. If
   /// \p InsertAtHead is true, at the start.
   void insertDPValue(DPValue *New, bool InsertAtHead);
@@ -328,6 +333,11 @@ class DPMarker {
   /// erasing a dbg.value from a block.
   void dropOneDPValue(DPValue *DPV);
 
+  /// Return an iterator to the position of the "Next" DPValue after this
+  /// marker, or std::nullopt. This is the position to pass to
+  /// BasicBlock::reinsertInstInDPValues when re-inserting an instruction.
+  std::optional<DPValue::self_iterator> getReinsertionPosition();
+
   /// We generally act like all llvm Instructions have a range of DPValues
   /// attached to them, but in reality sometimes we don't allocate the DPMarker
   /// to save time and memory, but still have to return ranges of DPValues. When
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 79dffa6428181dc..4c47a93435c3395 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -1013,6 +1013,55 @@ DPMarker *BasicBlock::getMarker(InstListType::iterator It) {
   return It->DbgMarker;
 }
 
+void BasicBlock::reinsertInstInDPValues(
+    Instruction *I, std::optional<DPValue::self_iterator> Pos) {
+  // "I" was originally removed from a position where it was immediately in
+  // front of Pos. Any DPValues on that position then "fell down" onto Pos.
+  // "I" has been re-inserted after that wedge of DPValues, shuffle them around
+  // to represent the original positioning. To illustrate:
+  //
+  //   Instructions:  I1---I---I0
+  //       DPValues:    DDD DDD
+  //
+  // Instruction "I" removed,
+  //
+  //   Instructions:  I1------I0
+  //       DPValues:    DDDDDD
+  //                       ^Pos
+  //
+  // Instruction "I" re-inserted (now):
+  //
+  //   Instructions:  I1------I-I0
+  //       DPValues:    DDDDDD
+  //                       ^Pos
+  //
+  // After this method completes:
+  //
+  //   Instructions:  I1---I---I0
+  //       DPValues:    DDD DDD
+  //
+  // In a fantastic future we would a) ban passes from doing this at all, but
+  // in lieu of that b) we could have more fine grained control over how
+  // debug-info records coalesce together. This will probably happen if/when we
+  // address the matter of all debug-info records having to have a total order.
+
+  // If there were no DPValues on I0, Pos will be empty. We also don't need to
+  // do any further maintanence.
+  if (!Pos)
+    return;
+
+  // Construct the range of DPMarkers to move.
+  DPMarker *DPM = (*Pos)->getMarker();
+  auto Range = make_range(*Pos, DPM->StoredDPValues.end());
+  assert(Range.begin() != Range.end());
+
+  // These are DPValues that used to be attached to I0 but are now attached to I
+  // after the re-insertion. Move them back onto I0.
+  DPMarker *NextMarker = createMarker(std::next(I->getIterator()));
+  assert(NextMarker->StoredDPValues.empty());
+  NextMarker->absorbDebugValues(Range, *DPM, true);
+}
+
 #ifndef NDEBUG
 /// In asserts builds, this checks the numbering. In non-asserts builds, it
 /// is defined as a no-op inline function in BasicBlock.h.
diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp
index 581d77a26acb80a..f0e00b0d7a40a5f 100644
--- a/llvm/lib/IR/DebugProgramInstruction.cpp
+++ b/llvm/lib/IR/DebugProgramInstruction.cpp
@@ -267,6 +267,19 @@ void DPMarker::dropOneDPValue(DPValue *DPV) {
   DPV->deleteInstr();
 }
 
+std::optional<DPValue::self_iterator> DPMarker::getReinsertionPosition() {
+  // Is there a marker on the next instruction?
+  DPMarker *NextMarker = getParent()->getNextMarker(MarkedInstr);
+  if (!NextMarker)
+    return std::nullopt;
+
+  // Are there any DPValues in the next marker?
+  if (NextMarker->StoredDPValues.empty())
+    return std::nullopt;
+
+  return NextMarker->StoredDPValues.begin();
+}
+
 const BasicBlock *DPMarker::getParent() const {
   return MarkedInstr->getParent();
 }
@@ -334,6 +347,18 @@ void DPMarker::absorbDebugValues(DPMarker &Src, bool InsertAtHead) {
   StoredDPValues.splice(It, Src.StoredDPValues);
 }
 
+void DPMarker::absorbDebugValues(iterator_range<DPValue::self_iterator> Range,
+                                 DPMarker &Src, bool InsertAtHead) {
+  for (DPValue &DPV : Range)
+    DPV.setMarker(this);
+
+  auto InsertPos =
+      (InsertAtHead) ? StoredDPValues.begin() : StoredDPValues.end();
+
+  StoredDPValues.splice(InsertPos, Src.StoredDPValues, Range.begin(),
+                        Range.end());
+}
+
 iterator_range<simple_ilist<DPValue>::iterator> DPMarker::cloneDebugInfoFrom(
     DPMarker *From, std::optional<simple_ilist<DPValue>::iterator> from_here,
     bool InsertAtHead) {
diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
index 481cd181d3848e7..190a9d772c7889e 100644
--- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
+++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
@@ -1110,5 +1110,149 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) {
   UseNewDbgInfoFormat = false;
 }
 
+// When we remove instructions from the program, adjacent DPValues coalesce
+// together into one DPMarker. In "old" dbg.value mode you could re-insert
+// the removed instruction back into the middle of a sequence of dbg.values.
+// Test that this can be replicated correctly by DPValues
+TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) {
+  LLVMContext C;
+  UseNewDbgInfoFormat = true;
+
+  std::unique_ptr<Module> M = parseIR(C, R"(
+    define i16 @f(i16 %a) !dbg !6 {
+    entry:
+      call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11
+      %foo = add i16 %a, %a
+      call void @llvm.dbg.value(metadata i16 0, metadata !9, metadata !DIExpression()), !dbg !11
+      ret i16 1
+    }
+    declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+    !llvm.dbg.cu = !{!0}
+    !llvm.module.flags = !{!5}
+
+    !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+    !1 = !DIFile(filename: "t.ll", directory: "/")
+    !2 = !{}
+    !5 = !{i32 2, !"Debug Info Version", i32 3}
+    !6 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
+    !7 = !DISubroutineType(types: !2)
+    !8 = !{!9}
+    !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
+    !10 = !DIBasicType(name: "ty16", size: 16, encoding: DW_ATE_unsigned)
+    !11 = !DILocation(line: 1, column: 1, scope: !6)
+)");
+
+  BasicBlock &Entry = M->getFunction("f")->getEntryBlock();
+  M->convertToNewDbgValues();
+
+  // Fetch the relevant instructions from the converted function.
+  Instruction *AddInst = &*Entry.begin();
+  ASSERT_TRUE(isa<BinaryOperator>(AddInst));
+  Instruction *RetInst = AddInst->getNextNode();
+  ASSERT_TRUE(isa<ReturnInst>(RetInst));
+
+  // They should both have one DPValue on each.
+  EXPECT_TRUE(AddInst->hasDbgValues());
+  EXPECT_TRUE(RetInst->hasDbgValues());
+  auto R1 = AddInst->getDbgValueRange();
+  EXPECT_EQ(std::distance(R1.begin(), R1.end()), 1u);
+  auto R2 = RetInst->getDbgValueRange();
+  EXPECT_EQ(std::distance(R2.begin(), R2.end()), 1u);
+
+  // The Supported (TM) code sequence for removing then reinserting insts:
+  std::optional<DPValue::self_iterator> Pos =
+      AddInst->DbgMarker->getReinsertionPosition();
+  AddInst->removeFromParent();
+
+  // We should have a re-insertion position.
+  ASSERT_TRUE(Pos);
+  // Both DPValues should now be attached to the ret inst.
+  auto R3 = RetInst->getDbgValueRange();
+  EXPECT_EQ(std::distance(R3.begin(), R3.end()), 2u);
+
+  // Re-insert and re-insert.
+  AddInst->insertBefore(RetInst);
+  Entry.reinsertInstInDPValues(AddInst, Pos);
+  // We should be back into a position of having one DPValue on each inst.
+  EXPECT_TRUE(AddInst->hasDbgValues());
+  EXPECT_TRUE(RetInst->hasDbgValues());
+  auto R4 = AddInst->getDbgValueRange();
+  EXPECT_EQ(std::distance(R4.begin(), R4.end()), 1u);
+  auto R5 = RetInst->getDbgValueRange();
+  EXPECT_EQ(std::distance(R5.begin(), R5.end()), 1u);
+
+  UseNewDbgInfoFormat = false;
+}
+
+// Test instruction removal and re-insertion, this time with one DPValue that
+// should hop up one instruction.
+TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDPValue) {
+  LLVMContext C;
+  UseNewDbgInfoFormat = true;
+
+  std::unique_ptr<Module> M = parseIR(C, R"(
+    define i16 @f(i16 %a) !dbg !6 {
+    entry:
+      call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11
+      %foo = add i16 %a, %a
+      ret i16 1
+    }
+    declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+    !llvm.dbg.cu = !{!0}
+    !llvm.module.flags = !{!5}
+
+    !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+    !1 = !DIFile(filename: "t.ll", directory: "/")
+    !2 = !{}
+    !5 = !{i32 2, !"Debug Info Version", i32 3}
+    !6 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
+    !7 = !DISubroutineType(types: !2)
+    !8 = !{!9}
+    !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
+    !10 = !DIBasicType(name: "ty16", size: 16, encoding: DW_ATE_unsigned)
+    !11 = !DILocation(line: 1, column: 1, scope: !6)
+)");
+
+  BasicBlock &Entry = M->getFunction("f")->getEntryBlock();
+  M->convertToNewDbgValues();
+
+  // Fetch the relevant instructions from the converted function.
+  Instruction *AddInst = &*Entry.begin();
+  ASSERT_TRUE(isa<BinaryOperator>(AddInst));
+  Instruction *RetInst = AddInst->getNextNode();
+  ASSERT_TRUE(isa<ReturnInst>(RetInst));
+
+  // There should be one DPValue.
+  EXPECT_TRUE(AddInst->hasDbgValues());
+  EXPECT_FALSE(RetInst->hasDbgValues());
+  auto R1 = AddInst->getDbgValueRange();
+  EXPECT_EQ(std::distance(R1.begin(), R1.end()), 1u);
+
+  // The Supported (TM) code sequence for removing then reinserting insts:
+  std::optional<DPValue::self_iterator> Pos =
+      AddInst->DbgMarker->getReinsertionPosition();
+  AddInst->removeFromParent();
+
+  // No re-insertion position as there were no DPValues on the ret.
+  ASSERT_FALSE(Pos);
+  // The single DPValue should now be attached to the ret inst.
+  EXPECT_TRUE(RetInst->hasDbgValues());
+  auto R2 = RetInst->getDbgValueRange();
+  EXPECT_EQ(std::distance(R2.begin(), R2.end()), 1u);
+
+  // Re-insert and re-insert.
+  AddInst->insertBefore(RetInst);
+  Entry.reinsertInstInDPValues(AddInst, Pos);
+  // We should be back into a position of having one DPValue on the AddInst.
+  EXPECT_TRUE(AddInst->hasDbgValues());
+  EXPECT_FALSE(RetInst->hasDbgValues());
+  auto R3 = AddInst->getDbgValueRange();
+  EXPECT_EQ(std::distance(R3.begin(), R3.end()), 1u);
+
+  UseNewDbgInfoFormat = false;
+}
+
 } // End anonymous namespace.
 #endif // EXPERIMENTAL_DEBUGINFO_ITERATORS

>From 3f4625010e63b47562a4f7115b57925c16971097 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Tue, 28 Nov 2023 10:51:25 +0000
Subject: [PATCH 4/6] Fixup reinsert to be for the "after" case.

Fix tests for insertAfter faff
---
 llvm/lib/IR/BasicBlock.cpp                  | 53 +++++++++++----------
 llvm/unittests/IR/BasicBlockDbgInfoTest.cpp | 25 +++++++---
 2 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 4c47a93435c3395..fa56431e85a85a1 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -1015,10 +1015,10 @@ DPMarker *BasicBlock::getMarker(InstListType::iterator It) {
 
 void BasicBlock::reinsertInstInDPValues(
     Instruction *I, std::optional<DPValue::self_iterator> Pos) {
-  // "I" was originally removed from a position where it was immediately in
-  // front of Pos. Any DPValues on that position then "fell down" onto Pos.
-  // "I" has been re-inserted after that wedge of DPValues, shuffle them around
-  // to represent the original positioning. To illustrate:
+  // "I" was originally removed from a position where it was
+  // immediately in front of Pos. Any DPValues on that position then "fell down"
+  // onto Pos. "I" has been re-inserted at the front of that wedge of DPValues,
+  // shuffle them around to represent the original positioning. To illustrate:
   //
   //   Instructions:  I1---I---I0
   //       DPValues:    DDD DDD
@@ -1031,35 +1031,38 @@ void BasicBlock::reinsertInstInDPValues(
   //
   // Instruction "I" re-inserted (now):
   //
-  //   Instructions:  I1------I-I0
-  //       DPValues:    DDDDDD
-  //                       ^Pos
+  //   Instructions:  I1---I------I0
+  //       DPValues:        DDDDDD
+  //                           ^Pos
   //
   // After this method completes:
   //
   //   Instructions:  I1---I---I0
   //       DPValues:    DDD DDD
-  //
-  // In a fantastic future we would a) ban passes from doing this at all, but
-  // in lieu of that b) we could have more fine grained control over how
-  // debug-info records coalesce together. This will probably happen if/when we
-  // address the matter of all debug-info records having to have a total order.
-
-  // If there were no DPValues on I0, Pos will be empty. We also don't need to
-  // do any further maintanence.
-  if (!Pos)
+
+  // This happens if there were no DPValues on I0. Are there now DPValues there?
+  if (!Pos) {
+    DPMarker *NextMarker = getNextMarker(I);
+    if (!NextMarker)
+      return;
+    if (NextMarker->StoredDPValues.empty())
+      return;
+    // There are DPMarkers there now -- they fell down from "I".
+    DPMarker *ThisMarker = createMarker(I);
+    ThisMarker->absorbDebugValues(*NextMarker, false);
     return;
+  }
 
-  // Construct the range of DPMarkers to move.
+  // Is there even a range of DPValues to move?
   DPMarker *DPM = (*Pos)->getMarker();
-  auto Range = make_range(*Pos, DPM->StoredDPValues.end());
-  assert(Range.begin() != Range.end());
-
-  // These are DPValues that used to be attached to I0 but are now attached to I
-  // after the re-insertion. Move them back onto I0.
-  DPMarker *NextMarker = createMarker(std::next(I->getIterator()));
-  assert(NextMarker->StoredDPValues.empty());
-  NextMarker->absorbDebugValues(Range, *DPM, true);
+  auto Range = make_range(DPM->StoredDPValues.begin(), (*Pos));
+  if (Range.begin() == Range.end())
+    return;
+
+  // Otherwise: splice.
+  DPMarker *ThisMarker = createMarker(I);
+  assert(ThisMarker->StoredDPValues.empty());
+  ThisMarker->absorbDebugValues(Range, *DPM, true);
 }
 
 #ifndef NDEBUG
diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
index 190a9d772c7889e..66c793001715529 100644
--- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
+++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
@@ -1121,6 +1121,7 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) {
   std::unique_ptr<Module> M = parseIR(C, R"(
     define i16 @f(i16 %a) !dbg !6 {
     entry:
+      %qux = sub i16 %a, 0
       call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11
       %foo = add i16 %a, %a
       call void @llvm.dbg.value(metadata i16 0, metadata !9, metadata !DIExpression()), !dbg !11
@@ -1147,12 +1148,15 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) {
   M->convertToNewDbgValues();
 
   // Fetch the relevant instructions from the converted function.
-  Instruction *AddInst = &*Entry.begin();
+  Instruction *SubInst = &*Entry.begin();
+  ASSERT_TRUE(isa<BinaryOperator>(SubInst));
+  Instruction *AddInst = SubInst->getNextNode();
   ASSERT_TRUE(isa<BinaryOperator>(AddInst));
   Instruction *RetInst = AddInst->getNextNode();
   ASSERT_TRUE(isa<ReturnInst>(RetInst));
 
-  // They should both have one DPValue on each.
+  // add and sub should both have one DPValue on add and ret.
+  EXPECT_FALSE(SubInst->hasDbgValues());
   EXPECT_TRUE(AddInst->hasDbgValues());
   EXPECT_TRUE(RetInst->hasDbgValues());
   auto R1 = AddInst->getDbgValueRange();
@@ -1160,7 +1164,8 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) {
   auto R2 = RetInst->getDbgValueRange();
   EXPECT_EQ(std::distance(R2.begin(), R2.end()), 1u);
 
-  // The Supported (TM) code sequence for removing then reinserting insts:
+  // The Supported (TM) code sequence for removing then reinserting insts
+  // after another instruction:
   std::optional<DPValue::self_iterator> Pos =
       AddInst->DbgMarker->getReinsertionPosition();
   AddInst->removeFromParent();
@@ -1172,9 +1177,10 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) {
   EXPECT_EQ(std::distance(R3.begin(), R3.end()), 2u);
 
   // Re-insert and re-insert.
-  AddInst->insertBefore(RetInst);
+  AddInst->insertAfter(SubInst);
   Entry.reinsertInstInDPValues(AddInst, Pos);
-  // We should be back into a position of having one DPValue on each inst.
+  // We should be back into a position of having one DPValue on add and ret.
+  EXPECT_FALSE(SubInst->hasDbgValues());
   EXPECT_TRUE(AddInst->hasDbgValues());
   EXPECT_TRUE(RetInst->hasDbgValues());
   auto R4 = AddInst->getDbgValueRange();
@@ -1194,6 +1200,7 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDPValue) {
   std::unique_ptr<Module> M = parseIR(C, R"(
     define i16 @f(i16 %a) !dbg !6 {
     entry:
+      %qux = sub i16 %a, 0
       call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11
       %foo = add i16 %a, %a
       ret i16 1
@@ -1219,12 +1226,15 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDPValue) {
   M->convertToNewDbgValues();
 
   // Fetch the relevant instructions from the converted function.
-  Instruction *AddInst = &*Entry.begin();
+  Instruction *SubInst = &*Entry.begin();
+  ASSERT_TRUE(isa<BinaryOperator>(SubInst));
+  Instruction *AddInst = SubInst->getNextNode();
   ASSERT_TRUE(isa<BinaryOperator>(AddInst));
   Instruction *RetInst = AddInst->getNextNode();
   ASSERT_TRUE(isa<ReturnInst>(RetInst));
 
   // There should be one DPValue.
+  EXPECT_FALSE(SubInst->hasDbgValues());
   EXPECT_TRUE(AddInst->hasDbgValues());
   EXPECT_FALSE(RetInst->hasDbgValues());
   auto R1 = AddInst->getDbgValueRange();
@@ -1243,9 +1253,10 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDPValue) {
   EXPECT_EQ(std::distance(R2.begin(), R2.end()), 1u);
 
   // Re-insert and re-insert.
-  AddInst->insertBefore(RetInst);
+  AddInst->insertAfter(SubInst);
   Entry.reinsertInstInDPValues(AddInst, Pos);
   // We should be back into a position of having one DPValue on the AddInst.
+  EXPECT_FALSE(SubInst->hasDbgValues());
   EXPECT_TRUE(AddInst->hasDbgValues());
   EXPECT_FALSE(RetInst->hasDbgValues());
   auto R3 = AddInst->getDbgValueRange();

>From 05e250c490141c0953b77912bc98de03ef701721 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Wed, 29 Nov 2023 17:21:14 +0000
Subject: [PATCH 5/6] Review feedback

---
 llvm/lib/CodeGen/CodeGenPrepare.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 0ea01087e46875f..4df719f34c8b547 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2069,7 +2069,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
       TruncInsertPt++;
       // It will go ahead of any debug-info.
-      TruncInsertPt.setHeadBit(true); // not covered by any test at all.
+      TruncInsertPt.setHeadBit(true);
       assert(TruncInsertPt != TruncUserBB->end());
 
       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
@@ -2241,7 +2241,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
   // constant if the input to the intrinsic is zero.
   BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
   // Any debug-info after CountZeros should not be included.
-  SplitPt.setHeadBit(true); // coveredy by Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
+  SplitPt.setHeadBit(true);
   BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
   if (IsHugeFunc)
     FreshBBs.insert(EndBlock);
@@ -2836,7 +2836,7 @@ class TypePromotionTransaction {
       Instruction *PrevInst;
       BasicBlock *BB;
     } Point;
-    std::optional<DPValue::self_iterator> beforeDPValue = std::nullopt;
+    std::optional<DPValue::self_iterator> BeforeDPValue = std::nullopt;
 
     /// Remember whether or not the instruction had a previous instruction.
     bool HasPrevInstruction;
@@ -2851,7 +2851,7 @@ class TypePromotionTransaction {
       // of DPValues, if we ended up reinserting.
       if (BB->IsNewDbgInfoFormat) {
         DPMarker *DPM = BB->createMarker(Inst);
-        beforeDPValue = DPM->getReinsertionPosition();
+        BeforeDPValue = DPM->getReinsertionPosition();
       }
 
       if (HasPrevInstruction) {
@@ -2875,7 +2875,7 @@ class TypePromotionTransaction {
           Inst->insertBefore(*Point.BB, Position);
       }
 
-      Inst->getParent()->reinsertInstInDPValues(Inst, beforeDPValue);
+      Inst->getParent()->reinsertInstInDPValues(Inst, BeforeDPValue);
     }
   };
 
@@ -2901,7 +2901,6 @@ class TypePromotionTransaction {
   };
 
   /// Set the operand of an instruction with a new value.
-  // XXX jmorse -- what about for dbg.values?
   class OperandSetter : public TypePromotionAction {
     /// Original operand of the instruction.
     Value *Origin;
@@ -7033,7 +7032,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   BasicBlock *StartBlock = SI->getParent();
   BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
   // We should split before any debug-info.
-  SplitPt.setHeadBit(true); // covered by X86/select.ll
+  SplitPt.setHeadBit(true);
 
   IRBuilder<> IB(SI);
   auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
@@ -8391,6 +8390,8 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
   return AnyChange;
 }
 
+// FIXME: should updating debug-info really cause the "changed" flag to fire,
+// which can cause a function to be reprocessed?
 bool CodeGenPrepare::fixupDPValue(DPValue &DPV) {
   if (DPV.Type != DPValue::LocationType::Value)
     return false;

>From 6abf00a49fc18488ee978d85dd9d825e1dc8619a Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Wed, 29 Nov 2023 17:21:22 +0000
Subject: [PATCH 6/6] Replumb placeDbgValues to largely use a generic lambda

I can't believe that after deleting it 4 years ago I'm still working on
this function X_X
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp | 133 ++++++++++------------------
 1 file changed, 49 insertions(+), 84 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4df719f34c8b547..aebf235f128c669 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -461,7 +461,6 @@ class CodeGenPrepare : public FunctionPass {
   bool fixupDbgValue(Instruction *I);
   bool fixupDPValue(DPValue &I);
   bool placeDbgValues(Function &F);
-  bool placeDPValues(Instruction &I, DominatorTree &DT);
   bool placePseudoProbes(Function &F);
   bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
                     LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -8416,6 +8415,23 @@ bool CodeGenPrepare::fixupDPValue(DPValue &DPV) {
   return AnyChange;
 }
 
+static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI) {
+  DVI->removeFromParent();
+  if (isa<PHINode>(VI))
+    DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+  else
+    DVI->insertAfter(VI);
+}
+
+static void DbgInserterHelper(DPValue *DPV, Instruction *VI) {
+  DPV->removeFromParent();
+  BasicBlock *VIBB = VI->getParent();
+  if (isa<PHINode>(VI))
+    VIBB->insertDPValueBefore(DPV, VIBB->getFirstInsertionPt());
+  else
+    VIBB->insertDPValueAfter(DPV, VI);
+}
+
 // A llvm.dbg.value may be using a value before its definition, due to
 // optimizations in this pass and others. Scan for such dbg.values, and rescue
 // them by moving the dbg.value to immediately after the value definition.
@@ -8425,81 +8441,16 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
   bool MadeChange = false;
   DominatorTree DT(F);
 
-  for (BasicBlock &BB : F) {
-    for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
-      DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
-      if (!DVI) {
-        MadeChange |= placeDPValues(Insn, DT);
-        continue;
-      }
-
-      SmallVector<Instruction *, 4> VIs;
-      for (Value *V : DVI->getValues())
-        if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
-          VIs.push_back(VI);
-
-      // This DVI may depend on multiple instructions, complicating any
-      // potential sink. This block takes the defensive approach, opting to
-      // "undef" the DVI if it has more than one instruction and any of them do
-      // not dominate DVI.
-      for (Instruction *VI : VIs) {
-        if (VI->isTerminator())
-          continue;
-
-        // If VI is a phi in a block with an EHPad terminator, we can't insert
-        // after it.
-        if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
-          continue;
-
-        // If the defining instruction dominates the dbg.value, we do not need
-        // to move the dbg.value.
-        if (DT.dominates(VI, DVI))
-          continue;
-
-        // If we depend on multiple instructions and any of them doesn't
-        // dominate this DVI, we probably can't salvage it: moving it to
-        // after any of the instructions could cause us to lose the others.
-        if (VIs.size() > 1) {
-          LLVM_DEBUG(
-              dbgs()
-              << "Unable to find valid location for Debug Value, undefing:\n"
-              << *DVI);
-          DVI->setKillLocation();
-          break;
-        }
-
-        LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
-                          << *DVI << ' ' << *VI);
-        DVI->removeFromParent();
-        if (isa<PHINode>(VI))
-          DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
-        else
-          DVI->insertAfter(VI);
-        MadeChange = true;
-        ++NumDbgValueMoved;
-      }
-    }
-  }
-
-  return MadeChange;
-}
-
-bool CodeGenPrepare::placeDPValues(Instruction &I, DominatorTree &DT) {
-  bool MadeChange = false;
-
-  for (DPValue &DPV : llvm::make_early_inc_range(I.getDbgValueRange())) {
-    if (DPV.Type != DPValue::LocationType::Value)
-      continue;
-
+  auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
     SmallVector<Instruction *, 4> VIs;
-    for (Value *V : DPV.location_ops())
+    for (Value *V : DbgItem->location_ops())
       if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
         VIs.push_back(VI);
 
-    // This DPV may depend on multiple instructions, complicating any
+    // This item may depend on multiple instructions, complicating any
     // potential sink. This block takes the defensive approach, opting to
-    // "undef" the DVI if it has more than one instruction and any of them do
-    // not dominate DVI.
+    // "undef" the item if it has more than one instruction and any of them do
+    // not dominate iem.
     for (Instruction *VI : VIs) {
       if (VI->isTerminator())
         continue;
@@ -8509,36 +8460,50 @@ bool CodeGenPrepare::placeDPValues(Instruction &I, DominatorTree &DT) {
       if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
         continue;
 
-      // If the defining instruction dominates the position, we do not need
+      // If the defining instruction dominates the dbg.value, we do not need
       // to move the dbg.value.
-      if (DT.dominates(VI, &I))
+      if (DT.dominates(VI, Position))
         continue;
 
       // If we depend on multiple instructions and any of them doesn't
-      // dominate this DPV, we probably can't salvage it: moving it to
+      // dominate this DVI, we probably can't salvage it: moving it to
       // after any of the instructions could cause us to lose the others.
       if (VIs.size() > 1) {
         LLVM_DEBUG(
             dbgs()
             << "Unable to find valid location for Debug Value, undefing:\n"
-            << DPV);
-        DPV.setKillLocation();
+            << *DbgItem);
+        DbgItem->setKillLocation();
         break;
       }
 
       LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
-                        << DPV << ' ' << *VI);
-      DPV.removeFromParent();
-      BasicBlock *VIBB = VI->getParent();
-      if (isa<PHINode>(VI)) {
-        VIBB->insertDPValueBefore(&DPV, VIBB->getFirstInsertionPt());
-      } else {
-        VIBB->insertDPValueAfter(&DPV, VI);
-      }
+                        << *DbgItem << ' ' << *VI);
+      DbgInserterHelper(DbgItem, VI);
       MadeChange = true;
       ++NumDbgValueMoved;
     }
+  };
+
+  for (BasicBlock &BB : F) {
+    for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
+      // Process dbg.value intrinsics.
+      DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
+      if (DVI) {
+        DbgProcessor(DVI, DVI);
+        continue;
+      }
+
+      // If this isn't a dbg.value, process any attached DPValue records
+      // attached to this instruction.
+      for (DPValue &DPV : llvm::make_early_inc_range(Insn.getDbgValueRange())) {
+        if (DPV.Type != DPValue::LocationType::Value)
+          continue;
+        DbgProcessor(&DPV, &Insn);
+      }
+    }
   }
+
   return MadeChange;
 }
 



More information about the llvm-commits mailing list