[llvm] 0b1d1cd - [CodeGenPrepare][NFC] Update the dominator tree instead of rebuilding it

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 10:07:59 PDT 2023


Author: Momchil Velikov
Date: 2023-08-01T18:07:03+01:00
New Revision: 0b1d1cdb89322c277baf5221218a830195fef9d4

URL: https://github.com/llvm/llvm-project/commit/0b1d1cdb89322c277baf5221218a830195fef9d4
DIFF: https://github.com/llvm/llvm-project/commit/0b1d1cdb89322c277baf5221218a830195fef9d4.diff

LOG: [CodeGenPrepare][NFC] Update the dominator tree instead of rebuilding it

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D153638

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
    llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info-inseltpoison.ll
    llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h b/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
index bd98c902d1ab42..eceb131d8c60cc 100644
--- a/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -25,6 +25,8 @@
 namespace llvm {
 
 class BasicBlock;
+class DomTreeUpdater;
+class LoopInfo;
 class Value;
 
 struct DivRemMapKey {
@@ -66,8 +68,9 @@ template <> struct DenseMapInfo<DivRemMapKey> {
 ///
 /// This optimization may add basic blocks immediately after BB; for obvious
 /// reasons, you shouldn't pass those blocks to bypassSlowDivision.
-bool bypassSlowDivision(
-    BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidth);
+bool bypassSlowDivision(BasicBlock *BB,
+                        const DenseMap<unsigned int, unsigned int> &BypassWidth,
+                        DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr);
 
 } // end namespace llvm
 

diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b00df0b6c6cbdf..073e8e273a333c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -251,6 +252,15 @@ static cl::opt<bool> EnableICMP_EQToICMP_ST(
     "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
     cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
 
+#ifdef EXPENSIVE_CHECKS
+static bool VerifyDT = true;
+#else
+static bool VerifyDT = false;
+#endif
+static cl::opt<bool, true> VerifyDTUpdates(
+    "cgp-verify-dt-updates", cl::location(VerifyDT), cl::Hidden,
+    cl::desc("Verify dominator tree updates in CodeGenPrepare"));
+
 static cl::opt<bool>
     VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
                      cl::desc("Enable BFI update verification for "
@@ -304,6 +314,8 @@ class CodeGenPrepare : public FunctionPass {
   const TargetTransformInfo *TTI = nullptr;
   const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
   const TargetLibraryInfo *TLInfo = nullptr;
+  DominatorTree *DT = nullptr;
+  DomTreeUpdater *DTU = nullptr;
   LoopInfo *LI = nullptr;
   std::unique_ptr<BlockFrequencyInfo> BFI;
   std::unique_ptr<BranchProbabilityInfo> BPI;
@@ -355,10 +367,6 @@ class CodeGenPrepare : public FunctionPass {
   /// DataLayout for the Function being processed.
   const DataLayout *DL = nullptr;
 
-  /// Building the dominator tree can be expensive, so we only build it
-  /// lazily and update it when required.
-  std::unique_ptr<DominatorTree> DT;
-
 public:
   /// If encounter huge function, we need to limit the build time.
   bool IsHugeFunc = false;
@@ -394,6 +402,7 @@ class CodeGenPrepare : public FunctionPass {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetPassConfig>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>();
   }
@@ -417,20 +426,16 @@ class CodeGenPrepare : public FunctionPass {
     }
   }
 
-  // Get the DominatorTree, building if necessary.
-  DominatorTree &getDT(Function &F) {
-    if (!DT)
-      DT = std::make_unique<DominatorTree>(F);
-    return *DT;
-  }
+  // Get the DominatorTree, updating it if necessary.
+  DominatorTree &getDT() { return DTU->getDomTree(); }
 
   void removeAllAssertingVHReferences(Value *V);
   bool eliminateAssumptions(Function &F);
-  bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
-  bool eliminateMostlyEmptyBlocks(Function &F);
+  bool eliminateFallThrough(Function &F);
+  bool eliminateMostlyEmptyBlocks(Function &F, ModifyDT &ModifiedDT);
   BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
   bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
-  void eliminateMostlyEmptyBlock(BasicBlock *BB);
+  void eliminateMostlyEmptyBlock(BasicBlock *BB, ModifyDT &ModifiedDT);
   bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
                                      bool isPreheader);
   bool makeBitReverse(Instruction &I);
@@ -471,7 +476,7 @@ class CodeGenPrepare : public FunctionPass {
       Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
       bool HasPromoted, TypePromotionTransaction &TPT,
       SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
-  bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
+  bool splitBranchCondition(Function &F);
   bool simplifyOffsetableRelocate(GCStatepointInst &I);
 
   bool tryToSinkFreeOperands(Instruction *I);
@@ -490,6 +495,7 @@ char CodeGenPrepare::ID = 0;
 INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
                       "Optimize for code generation", false, false)
 INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
@@ -514,6 +520,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   TRI = SubtargetInfo->getRegisterInfo();
   TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   BPI.reset(new BranchProbabilityInfo(F, *LI));
   BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
@@ -544,6 +551,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       F.setSectionPrefix("unknown");
   }
 
+  DomTreeUpdater DTUpdater(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+  DTU = &DTUpdater;
+  auto resetDTAndLI = [&]() {
+    DTU->recalculate(F);
+    LI->releaseMemory();
+    LI->analyze(*DT);
+  };
+
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
   if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
@@ -556,7 +571,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       BasicBlock *Next = BB->getNextNode();
       // F.hasOptSize is already checked in the outer if statement.
       if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
-        EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
+        EverMadeChange |= bypassSlowDivision(BB, BypassWidths, DTU, LI);
       BB = Next;
     }
   }
@@ -568,26 +583,37 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   // Eliminate blocks that contain only PHI nodes and an
   // unconditional branch.
-  EverMadeChange |= eliminateMostlyEmptyBlocks(F);
-
   ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
+  EverMadeChange |= eliminateMostlyEmptyBlocks(F, ModifiedDT);
+  if (ModifiedDT != ModifyDT::NotModifyDT) {
+    // Rebuild the dom tree if the transformation above did change the CFG, but
+    // did not update the DT.
+    resetDTAndLI();
+  }
+
   if (!DisableBranchOpts)
-    EverMadeChange |= splitBranchCondition(F, ModifiedDT);
+    EverMadeChange |= splitBranchCondition(F);
 
   // Split some critical edges where one of the sources is an indirect branch,
   // to help generate sane code for PHIs involving such edges.
-  EverMadeChange |=
-      SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
+  if (SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true)) {
+    EverMadeChange = true;
+    resetDTAndLI();
+  }
+
+#ifndef NDEBUG
+  if (VerifyDT)
+    assert(getDT().verify(DominatorTree::VerificationLevel::Fast) &&
+           "Incorrect DominatorTree updates in CGP");
+
+  if (VerifyLoopInfo)
+    LI->verify(getDT());
+#endif
 
   // If we are optimzing huge function, we need to consider the build time.
   // Because the basic algorithm's complex is near O(N!).
   IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
 
-  // Transformations above may invalidate dominator tree and/or loop info.
-  DT.reset();
-  LI->releaseMemory();
-  LI->analyze(getDT(F));
-
   bool MadeChange = true;
   bool FuncIterated = false;
   while (MadeChange) {
@@ -600,9 +626,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
       bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
 
-      if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
-        DT.reset();
-
       MadeChange |= Changed;
       if (IsHugeFunc) {
         // If the BB is updated, it may still has chance to be optimized.
@@ -635,11 +658,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     MadeChange |= optimizePhiTypes(F);
 
     if (MadeChange)
-      eliminateFallThrough(F, DT.get());
+      eliminateFallThrough(F);
 
 #ifndef NDEBUG
-    if (MadeChange && VerifyLoopInfo)
-      LI->verify(getDT(F));
+    if (VerifyDT)
+      assert(getDT().verify(DominatorTree::VerificationLevel::Fast) &&
+             "Incorrect DominatorTree updates in CGP");
+
+    if (VerifyLoopInfo)
+      LI->verify(getDT());
 #endif
 
     // Really free removed instructions during promotion.
@@ -657,6 +684,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   NewGEPBases.clear();
   SunkAddrs.clear();
 
+  // LoopInfo is not needed anymore and ConstantFoldTerminator can break it.
+  LI = nullptr;
+
   if (!DisableBranchOpts) {
     MadeChange = false;
     // Use a set vector to get deterministic iteration order. The order the
@@ -665,7 +695,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     SmallSetVector<BasicBlock *, 8> WorkList;
     for (BasicBlock &BB : F) {
       SmallVector<BasicBlock *, 2> Successors(successors(&BB));
-      MadeChange |= ConstantFoldTerminator(&BB, true);
+      MadeChange |= ConstantFoldTerminator(&BB, true, nullptr, DTU);
       if (!MadeChange)
         continue;
 
@@ -680,13 +710,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       BasicBlock *BB = WorkList.pop_back_val();
       SmallVector<BasicBlock *, 2> Successors(successors(BB));
 
-      DeleteDeadBlock(BB);
+      DeleteDeadBlock(BB, DTU);
 
       for (BasicBlock *Succ : Successors)
         if (pred_empty(Succ))
           WorkList.insert(Succ);
     }
 
+    // Flush pending DT updates in order to finalise deletion of dead blocks.
+    DTU->flush();
+
     // Merge pairs of basic blocks with unconditional branches, connected by
     // a single edge.
     if (EverMadeChange || MadeChange)
@@ -773,7 +806,7 @@ void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
 /// Merge basic blocks which are connected by a single edge, where one of the
 /// basic blocks has a single successor pointing to the other basic block,
 /// which has a single predecessor.
-bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
+bool CodeGenPrepare::eliminateFallThrough(Function &F) {
   bool Changed = false;
   // Scan all of the blocks in the function, except for the entry block.
   // Use a temporary array to avoid iterator being invalidated when
@@ -795,19 +828,13 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
     if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
       continue;
 
-    // Make an effort to skip unreachable blocks.
-    if (DT && !DT->isReachableFromEntry(BB))
-      continue;
-
     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
     if (Term && !Term->isConditional()) {
       Changed = true;
       LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
 
       // Merge BB into SinglePred and delete it.
-      MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
-                                /* MemDep */ nullptr,
-                                /* PredecessorWithTwoSuccessors */ false, DT);
+      MergeBlockIntoPredecessor(BB, DTU, LI);
       Preds.insert(SinglePred);
 
       if (IsHugeFunc) {
@@ -863,7 +890,8 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
 /// edges in ways that are non-optimal for isel. Start by eliminating these
 /// blocks so we can split them the way we want them.
-bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
+bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F,
+                                                ModifyDT &ModifiedDT) {
   SmallPtrSet<BasicBlock *, 16> Preheaders;
   SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
   while (!LoopList.empty()) {
@@ -890,7 +918,7 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
         !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
       continue;
 
-    eliminateMostlyEmptyBlock(BB);
+    eliminateMostlyEmptyBlock(BB, ModifiedDT);
     MadeChange = true;
   }
   return MadeChange;
@@ -1067,7 +1095,9 @@ static void replaceAllUsesWith(Value *Old, Value *New,
 
 /// Eliminate a basic block that has only phi's and an unconditional branch in
 /// it.
-void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
+/// Indicate that the DT was modified only if the DT wasn't updated.
+void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB,
+                                               ModifyDT &ModifiedDT) {
   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
   BasicBlock *DestBB = BI->getSuccessor(0);
 
@@ -1081,7 +1111,7 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
       assert(SinglePred == BB &&
              "Single predecessor not the same as predecessor");
       // Merge DestBB into SinglePred/BB and delete it.
-      MergeBlockIntoPredecessor(DestBB);
+      MergeBlockIntoPredecessor(DestBB, DTU, LI);
       // Note: BB(=SinglePred) will not be deleted on this path.
       // DestBB(=its single successor) is the one that was deleted.
       LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
@@ -1128,6 +1158,7 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
   BB->eraseFromParent();
   ++NumBlocksElim;
 
+  ModifiedDT = ModifyDT::ModifyBBDT;
   LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
@@ -1504,7 +1535,7 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
     // Finally, we need to ensure that the insert point will dominate all
     // existing uses of the increment.
 
-    auto &DT = getDT(*BO->getParent()->getParent());
+    auto &DT = getDT();
     if (DT.dominates(Cmp->getParent(), BO->getParent()))
       // If we're moving up the dom tree, all uses are trivially dominated.
       // (This is the common case for code produced by LSR.)
@@ -2189,7 +2220,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
 ///
 /// If the transform is performed, return true and set ModifiedDT to true.
 static bool despeculateCountZeros(IntrinsicInst *CountZeros,
-                                  LoopInfo &LI,
+                                  DomTreeUpdater &DTU, LoopInfo &LI,
                                   const TargetLowering *TLI,
                                   const DataLayout *DL, ModifyDT &ModifiedDT,
                                   SmallSet<BasicBlock *, 32> &FreshBBs,
@@ -2217,7 +2248,8 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
 
   // The intrinsic will be sunk behind a compare against zero and branch.
   BasicBlock *StartBlock = CountZeros->getParent();
-  BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+  BasicBlock *CallBlock = SplitBlock(StartBlock, CountZeros, &DTU, &LI,
+                                     /* MSSAU */ nullptr, "cond.false");
   if (IsHugeFunc)
     FreshBBs.insert(CallBlock);
 
@@ -2225,17 +2257,11 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
   // in this block to select the result of the intrinsic or the bit-width
   // constant if the input to the intrinsic is zero.
   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
-  BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+  BasicBlock *EndBlock = SplitBlock(CallBlock, &*SplitPt, &DTU, &LI,
+                                    /* MSSAU */ nullptr, "cond.end");
   if (IsHugeFunc)
     FreshBBs.insert(EndBlock);
 
-  // Update the LoopInfo. The new blocks are in the same loop as the start
-  // block.
-  if (Loop *L = LI.getLoopFor(StartBlock)) {
-    L->addBasicBlockToLoop(CallBlock, LI);
-    L->addBasicBlockToLoop(EndBlock, LI);
-  }
-
   // Set up a builder to create a compare, conditional branch, and PHI.
   IRBuilder<> Builder(CountZeros->getContext());
   Builder.SetInsertPoint(StartBlock->getTerminator());
@@ -2250,6 +2276,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
   Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
   Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
   StartBlock->getTerminator()->eraseFromParent();
+  DTU.applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}});
 
   // Create a PHI in the end block to select either the output of the intrinsic
   // or the bit width of the operand.
@@ -2410,7 +2437,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
     case Intrinsic::cttz:
     case Intrinsic::ctlz:
       // If counting zeros is expensive, try to avoid it.
-      return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
+      return despeculateCountZeros(II, *DTU, *LI, TLI, DL, ModifiedDT, FreshBBs,
                                    IsHugeFunc);
     case Intrinsic::fshl:
     case Intrinsic::fshr:
@@ -2578,7 +2605,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
       continue;
 
     // Duplicate the return into TailCallBB.
-    (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
+    (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB, DTU);
     assert(!VerifyBFIUpdates ||
            BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
     BFI->setBlockFreq(
@@ -2591,7 +2618,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
 
   // If we eliminated all predecessors of the block, delete the block now.
   if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
-    BB->eraseFromParent();
+    DTU->deleteBB(BB);
 
   return Changed;
 }
@@ -5346,10 +5373,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     // Defer the query (and possible computation of) the dom tree to point of
     // actual use.  It's expected that most address matches don't actually need
     // the domtree.
-    auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
-      Function *F = MemoryInst->getParent()->getParent();
-      return this->getDT(*F);
-    };
+    auto getDTFn = [this]() -> const DominatorTree & { return getDT(); };
     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
         V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
         *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
@@ -6014,7 +6038,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
         continue;
       bool inserted = false;
       for (auto &Pt : CurPts) {
-        if (getDT(F).dominates(Inst, Pt)) {
+        if (getDT().dominates(Inst, Pt)) {
           replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
           RemovedInsts.insert(Pt);
           Pt->removeFromParent();
@@ -6023,7 +6047,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
           Changed = true;
           break;
         }
-        if (!getDT(F).dominates(Pt, Inst))
+        if (!getDT().dominates(Pt, Inst))
           // Give up if we need to merge in a common dominator as the
           // experiments show it is not profitable.
           continue;
@@ -6145,8 +6169,8 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
           if (isa<PHINode>(BaseI))
             NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
           else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
-            NewBaseInsertBB =
-                SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
+            NewBaseInsertBB = SplitEdge(NewBaseInsertBB,
+                                        Invoke->getNormalDest(), &getDT(), LI);
             NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
           } else
             NewBaseInsertPt = std::next(BaseI->getIterator());
@@ -6966,12 +6990,6 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
        llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
     return false;
 
-  // The DominatorTree needs to be rebuilt by any consumers after this
-  // transformation. We simply reset here rather than setting the ModifiedDT
-  // flag to avoid restarting the function walk in runOnFunction for each
-  // select optimized.
-  DT.reset();
-
   // Transform a sequence like this:
   //    start:
   //       %cmp = cmp uge i32 %a, %b
@@ -6996,6 +7014,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   // block and its branch may be optimized away. In that case, one side of the
   // first branch will point directly to select.end, and the corresponding PHI
   // predecessor block will be the start block.
+  // The CFG is altered here and we update the DominatorTree and the LoopInfo,
+  // but we don't set a ModifiedDT flag to avoid restarting the function walk in
+  // runOnFunction for each select optimized.
 
   // Collect values that go on the true side and the values that go on the false
   // side.
@@ -7021,20 +7042,20 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   BranchInst *TrueBranch = nullptr;
   BranchInst *FalseBranch = nullptr;
   if (TrueInstrs.size() == 0) {
-    FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
-        CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+    FalseBranch = cast<BranchInst>(
+        SplitBlockAndInsertIfElse(CondFr, &*SplitPt, false, nullptr, DTU, LI));
     FalseBlock = FalseBranch->getParent();
     EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
   } else if (FalseInstrs.size() == 0) {
-    TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
-        CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+    TrueBranch = cast<BranchInst>(
+        SplitBlockAndInsertIfThen(CondFr, &*SplitPt, false, nullptr, DTU, LI));
     TrueBlock = TrueBranch->getParent();
     EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
   } else {
     Instruction *ThenTerm = nullptr;
     Instruction *ElseTerm = nullptr;
     SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm,
-                                  nullptr, nullptr, LI);
+                                  nullptr, DTU, LI);
     TrueBranch = cast<BranchInst>(ThenTerm);
     FalseBranch = cast<BranchInst>(ElseTerm);
     TrueBlock = TrueBranch->getParent();
@@ -8312,13 +8333,9 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
         // For huge function we tend to quickly go though the inner optmization
         // opportunities in the BB. So we go back to the BB head to re-optimize
         // each instruction instead of go back to the function head.
-        if (IsHugeFunc) {
-          DT.reset();
-          getDT(*BB.getParent());
+        if (IsHugeFunc)
           break;
-        } else {
-          return true;
-        }
+        return true;
       }
     }
   } while (ModifiedDT == ModifyDT::ModifyInstDT);
@@ -8371,7 +8388,7 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
 // to re-order dbg.value intrinsics.
 bool CodeGenPrepare::placeDbgValues(Function &F) {
   bool MadeChange = false;
-  DominatorTree DT(F);
+  DominatorTree &DT = getDT();
 
   for (BasicBlock &BB : F) {
     for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
@@ -8481,7 +8498,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
 ///
 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
 ///
-bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
+bool CodeGenPrepare::splitBranchCondition(Function &F) {
   if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
     return false;
 
@@ -8575,6 +8592,20 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
       PN.addIncoming(Val, TmpBB);
     }
 
+    if (LI) {
+      if (Loop *L = LI->getLoopFor(&BB))
+        L->addBasicBlockToLoop(TmpBB, *LI);
+    }
+
+    if (DTU) {
+      // The edge we need to delete starts at BB and ends at whatever TBB ends
+      // up pointing to.
+      DTU->applyUpdates({{DominatorTree::Insert, &BB, TmpBB},
+                         {DominatorTree::Insert, TmpBB, TBB},
+                         {DominatorTree::Insert, TmpBB, FBB},
+                         {DominatorTree::Delete, &BB, TBB}});
+    }
+
     // Update the branch weights (from SelectionDAGBuilder::
     // FindMergedConditions).
     if (Opc == Instruction::Or) {
@@ -8650,7 +8681,6 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
       }
     }
 
-    ModifiedDT = ModifyDT::ModifyBBDT;
     MadeChange = true;
 
     LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();

diff  --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 73a50b793e6d2e..ed5c6a7e6c086e 100644
--- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -18,7 +18,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -32,6 +33,8 @@
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
 
@@ -77,6 +80,8 @@ class FastDivInsertionTask {
   Instruction *SlowDivOrRem = nullptr;
   IntegerType *BypassType = nullptr;
   BasicBlock *MainBB = nullptr;
+  DomTreeUpdater *DTU = nullptr;
+  LoopInfo *LI = nullptr;
 
   bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
   ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
@@ -100,7 +105,8 @@ class FastDivInsertionTask {
   Type *getSlowType() { return SlowDivOrRem->getType(); }
 
 public:
-  FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
+  FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths,
+                       DomTreeUpdater *DTU, LoopInfo *LI);
 
   Value *getReplacement(DivCacheTy &Cache);
 };
@@ -108,7 +114,9 @@ class FastDivInsertionTask {
 } // end anonymous namespace
 
 FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
-                                           const BypassWidthsTy &BypassWidths) {
+                                           const BypassWidthsTy &BypassWidths,
+                                           DomTreeUpdater *DTU, LoopInfo *LI)
+    : DTU(DTU), LI(LI) {
   switch (I->getOpcode()) {
   case Instruction::UDiv:
   case Instruction::SDiv:
@@ -413,7 +421,7 @@ std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
     // lets us entirely avoid a long div.
 
     // Split the basic block before the div/rem.
-    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+    BasicBlock *SuccessorBB = SplitBlock(MainBB, SlowDivOrRem, DTU, LI);
     // Remove the unconditional branch from MainBB to SuccessorBB.
     MainBB->back().eraseFromParent();
     QuotRemWithBB Long;
@@ -424,13 +432,23 @@ std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
     QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
     Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
     Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
+
+    if (DTU)
+      DTU->applyUpdates({{DominatorTree::Insert, MainBB, Fast.BB},
+                         {DominatorTree::Insert, Fast.BB, SuccessorBB}});
+
+    if (LI) {
+      if (Loop *L = LI->getLoopFor(MainBB))
+        L->addBasicBlockToLoop(Fast.BB, *LI);
+    }
+
     return Result;
   } else {
     // General case. Create both slow and fast div/rem pairs and choose one of
     // them at runtime.
 
     // Split the basic block before the div/rem.
-    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+    BasicBlock *SuccessorBB = SplitBlock(MainBB, SlowDivOrRem, DTU, LI);
     // Remove the unconditional branch from MainBB to SuccessorBB.
     MainBB->back().eraseFromParent();
     QuotRemWithBB Fast = createFastBB(SuccessorBB);
@@ -439,6 +457,21 @@ std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
     Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
                                             DivisorShort ? nullptr : Divisor);
     Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
+
+    if (DTU)
+      DTU->applyUpdates({{DominatorTree::Insert, MainBB, Fast.BB},
+                         {DominatorTree::Insert, MainBB, Slow.BB},
+                         {DominatorTree::Insert, Fast.BB, SuccessorBB},
+                         {DominatorTree::Insert, Slow.BB, SuccessorBB},
+                         {DominatorTree::Delete, MainBB, SuccessorBB}});
+
+    if (LI) {
+      if (Loop *L = LI->getLoopFor(MainBB)) {
+        L->addBasicBlockToLoop(Fast.BB, *LI);
+        L->addBasicBlockToLoop(Slow.BB, *LI);
+      }
+    }
+
     return Result;
   }
 }
@@ -446,7 +479,8 @@ std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
 /// This optimization identifies DIV/REM instructions in a BB that can be
 /// profitably bypassed and carried out with a shorter, faster divide.
 bool llvm::bypassSlowDivision(BasicBlock *BB,
-                              const BypassWidthsTy &BypassWidths) {
+                              const BypassWidthsTy &BypassWidths,
+                              DomTreeUpdater *DTU, LoopInfo *LI) {
   DivCacheTy PerBBDivCache;
 
   bool MadeChange = false;
@@ -461,7 +495,7 @@ bool llvm::bypassSlowDivision(BasicBlock *BB,
     if (I->hasNUses(0))
       continue;
 
-    FastDivInsertionTask Task(I, BypassWidths);
+    FastDivInsertionTask Task(I, BypassWidths, DTU, LI);
     if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
       I->replaceAllUsesWith(Replacement);
       I->eraseFromParent();

diff  --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info-inseltpoison.ll
index 8c198a3c7d607b..8a37bd34f6c98e 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info-inseltpoison.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info-inseltpoison.ll
@@ -4,22 +4,22 @@
 
 define i64 @sdiv64(i64 %a, i64 %b) {
 ; CHECK-LABEL: @sdiv64(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], [[DBG6:!dbg !.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, [[DBG6]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, [[DBG6]]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]], [[DBG6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], !dbg [[DBG6:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, !dbg [[DBG6]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]], !dbg [[DBG6]]
 ; CHECK:       4:
-; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[B]] to i32, [[DBG6]]
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[A]] to i32, [[DBG6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], [[DBG6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64, [[DBG6]]
-; CHECK-NEXT:    br label [[TMP11:%.*]], [[DBG6]]
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[B]] to i32, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[A]] to i32, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64, !dbg [[DBG6]]
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]], !dbg [[DBG6]]
 ; CHECK:       9:
-; CHECK-NEXT:    [[TMP10:%.*]] = sdiv i64 [[A]], [[B]], [[DBG6]]
-; CHECK-NEXT:    br label [[TMP11]], [[DBG6]]
-; CHECK:       11:
-; CHECK-NEXT:    [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ], [[DBG6]]
-; CHECK-NEXT:    ret i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP10:%.*]] = sdiv i64 [[A]], [[B]], !dbg [[DBG6]]
+; CHECK-NEXT:    br label [[DOTSPLIT]], !dbg [[DBG6]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ], !dbg [[DBG6]]
+; CHECK-NEXT:    ret i64 [[TMP11]]
 ;
   %d = sdiv i64 %a, %b, !dbg !6
   ret i64 %d
@@ -29,27 +29,27 @@ define i64 @sdiv64(i64 %a, i64 %b) {
 ; division.
 define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
 ; CHECK-LABEL: @sdivrem64(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], [[DBG6]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, [[DBG6]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, [[DBG6]]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP11:%.*]], [[DBG6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, !dbg [[DBG6]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP11:%.*]], !dbg [[DBG6]]
 ; CHECK:       4:
-; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[B]] to i32, [[DBG6]]
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[A]] to i32, [[DBG6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], [[DBG6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = urem i32 [[TMP6]], [[TMP5]], [[DBG6]]
-; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP7]] to i64, [[DBG6]]
-; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP8]] to i64, [[DBG6]]
-; CHECK-NEXT:    br label [[TMP14:%.*]], [[DBG6]]
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[B]] to i32, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[A]] to i32, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = urem i32 [[TMP6]], [[TMP5]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP7]] to i64, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP8]] to i64, !dbg [[DBG6]]
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]], !dbg [[DBG6]]
 ; CHECK:       11:
-; CHECK-NEXT:    [[TMP12:%.*]] = sdiv i64 [[A]], [[B]], [[DBG6]]
-; CHECK-NEXT:    [[TMP13:%.*]] = srem i64 [[A]], [[B]], [[DBG6]]
-; CHECK-NEXT:    br label [[TMP14]], [[DBG6]]
-; CHECK:       14:
-; CHECK-NEXT:    [[TMP15:%.*]] = phi i64 [ [[TMP9]], [[TMP4]] ], [ [[TMP12]], [[TMP11]] ], [[DBG6]]
-; CHECK-NEXT:    [[TMP16:%.*]] = phi i64 [ [[TMP10]], [[TMP4]] ], [ [[TMP13]], [[TMP11]] ], [[DBG6]]
-; CHECK-NEXT:    [[INS0:%.*]] = insertelement <2 x i64> poison, i64 [[TMP15]], i32 0
-; CHECK-NEXT:    [[INS1:%.*]] = insertelement <2 x i64> [[INS0]], i64 [[TMP16]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = sdiv i64 [[A]], [[B]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP13:%.*]] = srem i64 [[A]], [[B]], !dbg [[DBG6]]
+; CHECK-NEXT:    br label [[DOTSPLIT]], !dbg [[DBG6]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i64 [ [[TMP9]], [[TMP4]] ], [ [[TMP12]], [[TMP11]] ], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i64 [ [[TMP10]], [[TMP4]] ], [ [[TMP13]], [[TMP11]] ], !dbg [[DBG6]]
+; CHECK-NEXT:    [[INS0:%.*]] = insertelement <2 x i64> poison, i64 [[TMP14]], i32 0
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <2 x i64> [[INS0]], i64 [[TMP15]], i32 1
 ; CHECK-NEXT:    ret <2 x i64> [[INS1]]
 ;
   %d = sdiv i64 %a, %b, !dbg !6

diff  --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info.ll
index cf2d96214a54c2..0d097202fe80fe 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/bypass-slow-div-debug-info.ll
@@ -4,22 +4,22 @@
 
 define i64 @sdiv64(i64 %a, i64 %b) {
 ; CHECK-LABEL: @sdiv64(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], !dbg !6
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, !dbg !6
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, !dbg !6
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]], !dbg !6
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], !dbg [[DBG6:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, !dbg [[DBG6]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]], !dbg [[DBG6]]
 ; CHECK:       4:
-; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[B]] to i32, !dbg !6
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[A]] to i32, !dbg !6
-; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], !dbg !6
-; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64, !dbg !6
-; CHECK-NEXT:    br label [[TMP11:%.*]], !dbg !6
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[B]] to i32, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[A]] to i32, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64, !dbg [[DBG6]]
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]], !dbg [[DBG6]]
 ; CHECK:       9:
-; CHECK-NEXT:    [[TMP10:%.*]] = sdiv i64 [[A]], [[B]], !dbg !6
-; CHECK-NEXT:    br label [[TMP11]], !dbg !6
-; CHECK:       11:
-; CHECK-NEXT:    [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ], !dbg !6
-; CHECK-NEXT:    ret i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP10:%.*]] = sdiv i64 [[A]], [[B]], !dbg [[DBG6]]
+; CHECK-NEXT:    br label [[DOTSPLIT]], !dbg [[DBG6]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ], !dbg [[DBG6]]
+; CHECK-NEXT:    ret i64 [[TMP11]]
 ;
   %d = sdiv i64 %a, %b, !dbg !6
   ret i64 %d
@@ -29,27 +29,27 @@ define i64 @sdiv64(i64 %a, i64 %b) {
 ; division.
 define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
 ; CHECK-LABEL: @sdivrem64(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], !dbg !6
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, !dbg !6
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, !dbg !6
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP11:%.*]], !dbg !6
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0, !dbg [[DBG6]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP11:%.*]], !dbg [[DBG6]]
 ; CHECK:       4:
-; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[B]] to i32, !dbg !6
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[A]] to i32, !dbg !6
-; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], !dbg !6
-; CHECK-NEXT:    [[TMP8:%.*]] = urem i32 [[TMP6]], [[TMP5]], !dbg !6
-; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP7]] to i64, !dbg !6
-; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP8]] to i64, !dbg !6
-; CHECK-NEXT:    br label [[TMP14:%.*]], !dbg !6
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[B]] to i32, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[A]] to i32, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[TMP5]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = urem i32 [[TMP6]], [[TMP5]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP7]] to i64, !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP8]] to i64, !dbg [[DBG6]]
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]], !dbg [[DBG6]]
 ; CHECK:       11:
-; CHECK-NEXT:    [[TMP12:%.*]] = sdiv i64 [[A]], [[B]], !dbg !6
-; CHECK-NEXT:    [[TMP13:%.*]] = srem i64 [[A]], [[B]], !dbg !6
-; CHECK-NEXT:    br label [[TMP14]], !dbg !6
-; CHECK:       14:
-; CHECK-NEXT:    [[TMP15:%.*]] = phi i64 [ [[TMP9]], [[TMP4]] ], [ [[TMP12]], [[TMP11]] ], !dbg !6
-; CHECK-NEXT:    [[TMP16:%.*]] = phi i64 [ [[TMP10]], [[TMP4]] ], [ [[TMP13]], [[TMP11]] ], !dbg !6
-; CHECK-NEXT:    [[INS0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP15]], i32 0
-; CHECK-NEXT:    [[INS1:%.*]] = insertelement <2 x i64> [[INS0]], i64 [[TMP16]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = sdiv i64 [[A]], [[B]], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP13:%.*]] = srem i64 [[A]], [[B]], !dbg [[DBG6]]
+; CHECK-NEXT:    br label [[DOTSPLIT]], !dbg [[DBG6]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i64 [ [[TMP9]], [[TMP4]] ], [ [[TMP12]], [[TMP11]] ], !dbg [[DBG6]]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i64 [ [[TMP10]], [[TMP4]] ], [ [[TMP13]], [[TMP11]] ], !dbg [[DBG6]]
+; CHECK-NEXT:    [[INS0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP14]], i32 0
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <2 x i64> [[INS0]], i64 [[TMP15]], i32 1
 ; CHECK-NEXT:    ret <2 x i64> [[INS1]]
 ;
   %d = sdiv i64 %a, %b, !dbg !6


        


More information about the llvm-commits mailing list