[llvm] [SimplifyCFG] Simplify identical predecessors (PR #173022)

Kunqiu Chen via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 22 01:02:12 PST 2025


https://github.com/Camsyn updated https://github.com/llvm/llvm-project/pull/173022

>From abf72043c8a5416cbb48fcdd43a9eb0681409296 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Fri, 19 Dec 2025 23:27:13 +0800
Subject: [PATCH 1/8] before-commit test

---
 llvm/test/Transforms/SimplifyCFG/dup-preds.ll | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/dup-preds.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/dup-preds.ll b/llvm/test/Transforms/SimplifyCFG/dup-preds.ll
new file mode 100644
index 0000000000000..859ef2a64e394
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/dup-preds.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s -check-prefix=SIMPLIFY-CFG
+
+define i8 @foo(i8 %v1, i8 %v2) {
+; SIMPLIFY-CFG-LABEL: @foo(
+; SIMPLIFY-CFG-NEXT:  entry:
+; SIMPLIFY-CFG-NEXT:    switch i8 [[V1:%.*]], label [[EXIT:%.*]] [
+; SIMPLIFY-CFG-NEXT:      i8 0, label [[THEN:%.*]]
+; SIMPLIFY-CFG-NEXT:      i8 1, label [[ELSE:%.*]]
+; SIMPLIFY-CFG-NEXT:    ]
+; SIMPLIFY-CFG:       then:
+; SIMPLIFY-CFG-NEXT:    switch i8 [[V2:%.*]], label [[EXIT]] [
+; SIMPLIFY-CFG-NEXT:      i8 0, label [[SWITCH_CASE_0:%.*]]
+; SIMPLIFY-CFG-NEXT:      i8 1, label [[SWITCH_CASE_1:%.*]]
+; SIMPLIFY-CFG-NEXT:      i8 2, label [[SWITCH_CASE_1]]
+; SIMPLIFY-CFG-NEXT:    ]
+; SIMPLIFY-CFG:       switch.case.0:
+; SIMPLIFY-CFG-NEXT:    br label [[EXIT]]
+; SIMPLIFY-CFG:       switch.case.1:
+; SIMPLIFY-CFG-NEXT:    br label [[EXIT]]
+; SIMPLIFY-CFG:       else:
+; SIMPLIFY-CFG-NEXT:    br label [[EXIT]]
+; SIMPLIFY-CFG:       exit:
+; SIMPLIFY-CFG-NEXT:    [[RET:%.*]] = phi i8 [ 0, [[ELSE]] ], [ 0, [[SWITCH_CASE_0]] ], [ 1, [[SWITCH_CASE_1]] ], [ 2, [[THEN]] ], [ 3, [[ENTRY:%.*]] ]
+; SIMPLIFY-CFG-NEXT:    ret i8 [[RET]]
+;
+entry:
+  switch i8 %v1, label %exit [
+  i8 0, label %then
+  i8 1, label %else
+  ]
+
+then:                                             ; preds = %entry
+  switch i8 %v2, label %exit [
+  i8 0, label %switch.case.0
+  i8 1, label %switch.case.1
+  i8 2, label %switch.case.2
+  ]
+
+switch.case.0:                                    ; preds = %then
+  br label %exit
+
+switch.case.1:                                    ; preds = %then
+  br label %exit
+
+switch.case.2:                                    ; preds = %then
+  br label %exit
+
+else:                                             ; preds = %entry
+  br label %exit
+
+exit:                                             ; preds = %else, %switch.case.2, %switch.case.1, %switch.case.0, %then, %entry
+  %ret = phi i8 [ 0, %else ], [ 0, %switch.case.0 ], [ 1, %switch.case.1 ], [ 1, %switch.case.2 ], [ 2, %then ], [ 3, %entry ]
+  ret i8 %ret
+}

>From 2a76b29e86099ff67ef827b46ea64c1bc24e61ee Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Fri, 19 Dec 2025 23:20:33 +0800
Subject: [PATCH 2/8] feat: impl generic dup preds merging

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 221 +++++++++++++++++++++-
 1 file changed, 219 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2a957737697c3..ca7e394e99b3c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -319,6 +319,7 @@ class SimplifyCFGOpt {
   bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
   bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
   bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
+  bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
 
 public:
   SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
@@ -8666,6 +8667,219 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
 
   return false;
 }
+/// Checking whether two BBs are equal depends on the contents of the
+/// BasicBlock and the incoming values of their successor PHINodes.
+/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
+/// calling this function on each BasicBlock every time isEqual is called,
+/// especially since the same BasicBlock may be passed as an argument multiple
+/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
+/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
+/// of the incoming values.
+struct EqualBBWrapper {
+  BasicBlock *BB;
+
+  // One Phi usually has < 8 incoming values.
+  using BB2ValueMap = SmallDenseMap<BasicBlock *, Value *, 8>;
+  using Phi2IVsMap = DenseMap<PHINode *, BB2ValueMap>;
+  Phi2IVsMap *PhiPredIVs;
+};
+
+template <> struct llvm::DenseMapInfo<const EqualBBWrapper *> {
+  static const EqualBBWrapper *getEmptyKey() {
+    return static_cast<EqualBBWrapper *>(DenseMapInfo<void *>::getEmptyKey());
+  }
+  static const EqualBBWrapper *getTombstoneKey() {
+    return static_cast<EqualBBWrapper *>(
+        DenseMapInfo<void *>::getTombstoneKey());
+  }
+  static unsigned getHashValue(const EqualBBWrapper *SSW) {
+    BasicBlock *BB = SSW->BB;
+    BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+    assert(BI->isUnconditional() &&
+           "Only supporting unconditional branches for now");
+    assert(BI->getNumSuccessors() == 1 &&
+           "Expected unconditional branches to have one successor");
+    assert(BB->size() == 1 && "Expected just a single branch in the BB");
+
+    // Since we assume the BB is just a single BranchInst with a single
+    // successor, we hash as the BB and the incoming Values of its successor
+    // PHIs. Initially, we tried to just use the successor BB as the hash, but
+    // including the incoming PHI values leads to better performance.
+    // We also tried to build a map from BB -> Succs.IncomingValues ahead of
+    // time and passing it in SwitchSuccWrapper, but this slowed down the
+    // average compile time without having any impact on the worst case compile
+    // time.
+    BasicBlock *Succ = BI->getSuccessor(0);
+    auto PhiValsForBB = map_range(
+        BB->phis(), [BB, &PhiPredIVs = *SSW->PhiPredIVs](PHINode &Phi) {
+          return PhiPredIVs[&Phi][BB];
+        });
+    return hash_combine(Succ, hash_combine_range(PhiValsForBB));
+  }
+  static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
+    auto *EKey = DenseMapInfo<EqualBBWrapper *>::getEmptyKey();
+    auto *TKey = DenseMapInfo<EqualBBWrapper *>::getTombstoneKey();
+    if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
+      return LHS == RHS;
+
+    BasicBlock *A = LHS->BB;
+    BasicBlock *B = RHS->BB;
+
+    // FIXME: we checked that the size of A and B are both 1 in
+    // simplifyDuplicateSwitchArms to make the Case list smaller to
+    // improve performance. If we decide to support BasicBlocks with more
+    // than just a single instruction, we need to check that A.size() ==
+    // B.size() here, and we need to check more than just the BranchInsts
+    // for equality.
+
+    BranchInst *ABI = cast<BranchInst>(A->getTerminator());
+    BranchInst *BBI = cast<BranchInst>(B->getTerminator());
+    assert(ABI->isUnconditional() && BBI->isUnconditional() &&
+           "Only supporting unconditional branches for now");
+    if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
+      return false;
+
+    // Need to check that PHIs in successor have matching values
+    BasicBlock *Succ = ABI->getSuccessor(0);
+    auto IfPhiIVMatch = [A, B, &PhiPredIVs = *LHS->PhiPredIVs](PHINode &Phi) {
+      // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
+      // query
+      auto &PredIVs = PhiPredIVs[&Phi];
+      return PredIVs[A] == PredIVs[B];
+    };
+    return all_of(Succ->phis(), IfPhiIVMatch);
+  }
+};
+
+bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
+                                                   DomTreeUpdater *DTU) {
+  // Need at least 2 predecessors to do anything.
+  if (!BB || pred_empty(BB))
+    return false;
+  // Precompute PHI incoming values in BB for all candidate preds.
+  // PhiPredIVs[Phi][Pred] = incoming value
+  EqualBBWrapper::Phi2IVsMap PhiPredIVs;
+
+  // Collect candidate non-entry predecessors P with:
+  // - terminator unconditional br to Succ,
+  // - does not have address taken / weird control.
+  auto Filter = [BB](BasicBlock *Pred) {
+    // Entry block cannot be eliminated or have predecessors.
+    if (Pred->isEntryBlock())
+      return false;
+
+    // Single successor and must be Succ.
+    auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
+    if (!BI || !BI->isUnconditional())
+      return false;
+
+    // Avoid blocks that are "address-taken" (blockaddress) or have unusual
+    // uses.
+    if (Pred->hasAddressTaken())
+      return false;
+    if (Pred->isLandingPad())
+      return false;
+
+    // TODO: should we support Pred with >1 instructions?
+    if (Pred->size() != 1)
+      return false;
+
+    // Avoid self-loop predecessor merging for now.
+    if (Pred == BB)
+      return false;
+
+    return true;
+  };
+
+  auto FilteredPreds = make_filter_range(predecessors(BB), Filter);
+
+  SmallVector<EqualBBWrapper> Preds(
+      map_range(FilteredPreds, [&PhiPredIVs](BasicBlock *Pred) {
+        return EqualBBWrapper{Pred, &PhiPredIVs};
+      }));
+
+  if (Preds.size() < 2)
+    return false;
+
+  SmallVector<PHINode *, 8> Phis(make_pointer_range(BB->phis()));
+
+  PhiPredIVs.reserve(Phis.size());
+  for (PHINode *Phi : Phis) {
+    auto &IVs =
+        PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
+    // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
+    // O(|Pred|).
+    for (auto &IV : Phi->incoming_values())
+      IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
+  }
+
+  // Group duplicates using DenseSet with custom equality/hashing.
+  DenseSet<const EqualBBWrapper *> Keep;
+  Keep.reserve(Preds.size());
+
+  SmallVector<DominatorTree::UpdateType> Updates;
+  Updates.reserve(Preds.size() * 2);
+
+  bool MadeChange = false;
+
+  // Helper: redirect all edges X -> DeadPred to X -> LivePred.
+  auto RedirectIncomingEdges = [&](BasicBlock *DeadPred, BasicBlock *LivePred) {
+    // Replace successors in all predecessors of DeadPred.
+    SmallSetVector<BasicBlock *, 8> DeadPredPreds(llvm::from_range,
+                                                  predecessors(DeadPred));
+    if (DTU) {
+      // All predecessors of DeadPred (except the common predecessor) will be
+      // moved to LivePred.
+      Updates.reserve(Updates.size() + DeadPredPreds.size() * 2);
+      SmallPtrSet<BasicBlock *, 16> LivePredPreds(llvm::from_range,
+                                                  predecessors(LivePred));
+      for (BasicBlock *PP : DeadPredPreds) {
+        // Do not modify those common predecessors of DeadPred and LivePred
+        if (!LivePredPreds.contains(PP))
+          Updates.push_back({DominatorTree::Insert, PP, LivePred});
+        Updates.push_back({DominatorTree::Delete, PP, DeadPred});
+      }
+    }
+    LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
+               DeadPred->printAsOperand(dbgs()); dbgs() << " with pred ";
+               LivePred->printAsOperand(dbgs()); dbgs() << " for ";
+               BB->printAsOperand(dbgs()); dbgs() << "\n");
+    for (BasicBlock *PP : DeadPredPreds) {
+      Instruction *T = PP->getTerminator();
+      T->replaceSuccessorWith(DeadPred, LivePred);
+    }
+  };
+
+  // Try to canonicalize duplicates.
+  for (const auto &Pred : Preds) {
+    // Pred is a candidate for simplification. If we find a duplicate BB,
+    // replace it.
+    const auto [It, Inserted] = Keep.insert(&Pred);
+    if (Inserted)
+      continue;
+
+    // Found duplicate: merge P into canonical predecessor It->Pred.
+    BasicBlock *KeepPred = (*It)->BB;
+    BasicBlock *DeadPred = Pred.BB;
+
+    // Avoid merging if either is the other's predecessor in weird ways.
+    if (KeepPred == DeadPred)
+      continue;
+
+    // Redirect all edges into DeadPred to KeepPred.
+    RedirectIncomingEdges(DeadPred, KeepPred);
+
+    // Now DeadPred should become unreachable; leave DCE to later,
+    // but we can try to simplify it if it only branches to Succ.
+    // (We won't erase here to keep the routine simple and DT-safe.)
+    MadeChange = true;
+  }
+
+  if (DTU && !Updates.empty())
+    DTU->applyUpdates(Updates);
+
+  return MadeChange;
+}
 
 /// Check if passing a value to an instruction will cause undefined behavior.
 static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
@@ -8912,8 +9126,6 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
       return true;
     }
 
-  IRBuilder<> Builder(BB);
-
   if (Options.SpeculateBlocks &&
       !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
     // If there is a trivial two-entry PHI node in this basic block, and we can
@@ -8925,6 +9137,11 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
           return true;
   }
 
+  // Merge identical predecessors of this block
+  if (simplifyDuplicatePredecessors(BB, DTU))
+    return true;
+
+  IRBuilder<> Builder(BB);
   Instruction *Terminator = BB->getTerminator();
   Builder.SetInsertPoint(Terminator);
   switch (Terminator->getOpcode()) {

>From 825bb77627cb9e921cee42d5e7ae95be4087d6f2 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Sat, 20 Dec 2025 17:44:31 +0800
Subject: [PATCH 3/8] after-commit test

---
 llvm/test/Transforms/SimplifyCFG/dup-preds.ll | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/dup-preds.ll b/llvm/test/Transforms/SimplifyCFG/dup-preds.ll
index 859ef2a64e394..cd6592a45be40 100644
--- a/llvm/test/Transforms/SimplifyCFG/dup-preds.ll
+++ b/llvm/test/Transforms/SimplifyCFG/dup-preds.ll
@@ -10,18 +10,16 @@ define i8 @foo(i8 %v1, i8 %v2) {
 ; SIMPLIFY-CFG-NEXT:    ]
 ; SIMPLIFY-CFG:       then:
 ; SIMPLIFY-CFG-NEXT:    switch i8 [[V2:%.*]], label [[EXIT]] [
-; SIMPLIFY-CFG-NEXT:      i8 0, label [[SWITCH_CASE_0:%.*]]
+; SIMPLIFY-CFG-NEXT:      i8 0, label [[ELSE]]
 ; SIMPLIFY-CFG-NEXT:      i8 1, label [[SWITCH_CASE_1:%.*]]
 ; SIMPLIFY-CFG-NEXT:      i8 2, label [[SWITCH_CASE_1]]
 ; SIMPLIFY-CFG-NEXT:    ]
-; SIMPLIFY-CFG:       switch.case.0:
-; SIMPLIFY-CFG-NEXT:    br label [[EXIT]]
 ; SIMPLIFY-CFG:       switch.case.1:
 ; SIMPLIFY-CFG-NEXT:    br label [[EXIT]]
 ; SIMPLIFY-CFG:       else:
 ; SIMPLIFY-CFG-NEXT:    br label [[EXIT]]
 ; SIMPLIFY-CFG:       exit:
-; SIMPLIFY-CFG-NEXT:    [[RET:%.*]] = phi i8 [ 0, [[ELSE]] ], [ 0, [[SWITCH_CASE_0]] ], [ 1, [[SWITCH_CASE_1]] ], [ 2, [[THEN]] ], [ 3, [[ENTRY:%.*]] ]
+; SIMPLIFY-CFG-NEXT:    [[RET:%.*]] = phi i8 [ 0, [[ELSE]] ], [ 2, [[THEN]] ], [ 1, [[SWITCH_CASE_1]] ], [ 3, [[ENTRY:%.*]] ]
 ; SIMPLIFY-CFG-NEXT:    ret i8 [[RET]]
 ;
 entry:

>From 185e648f26a90f177a57d39918174e761e276d03 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Sat, 20 Dec 2025 18:13:48 +0800
Subject: [PATCH 4/8] Regenerate some existing tests

---
 .../Transforms/LoopDeletion/simplify-then-delete.ll |  5 +++++
 .../SimplifyCFG/X86/switch-to-lookup-globals.ll     | 13 +++----------
 llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll  | 10 ++++------
 3 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/llvm/test/Transforms/LoopDeletion/simplify-then-delete.ll b/llvm/test/Transforms/LoopDeletion/simplify-then-delete.ll
index 529ee8919bdb3..869fea650f49f 100644
--- a/llvm/test/Transforms/LoopDeletion/simplify-then-delete.ll
+++ b/llvm/test/Transforms/LoopDeletion/simplify-then-delete.ll
@@ -11,6 +11,11 @@ define i32 @pmat(i32 %m, i32 %n, ptr %y, i1 %arg) nounwind {
 ; CHECK-LABEL: @pmat(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[M:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP4]], label [[BB_N10:%.*]], label [[W_E12:%.*]]
+; CHECK:       bb.n10:
+; CHECK-NEXT:    [[CMP51:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br label [[W_E12]]
+; CHECK:       w.e12:
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-to-lookup-globals.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-to-lookup-globals.ll
index bfcc8344264ea..4745cd0f7ea3a 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-to-lookup-globals.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-to-lookup-globals.ll
@@ -10,16 +10,9 @@ target triple = "x86_64-unknown-linux-gnu"
 define i1 @zot(i32 %arg) {
 ; CHECK-LABEL: @zot(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    %0 = icmp ult i32 %arg, 3
-; CHECK-NEXT:    br i1 %0, label %switch.lookup, label %bb6
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    %1 = zext nneg i32 %arg to i64
-; CHECK-NEXT:    %switch.gep = getelementptr inbounds [3 x ptr], ptr @switch.table.zot, i64 0, i64 %1
-; CHECK-NEXT:    %switch.load = load ptr, ptr %switch.gep, align 8
-; CHECK-NEXT:    br label %bb6
-; CHECK:       bb6:
-; CHECK-NEXT:    %tmp7 = phi ptr [ null, %bb ], [ %switch.load, %switch.lookup ]
-; CHECK-NEXT:    %tmp8 = icmp eq ptr %tmp7, getelementptr inbounds ([75 x { i32, i32, i32, i8, i8 }], ptr @global, i64 1, i64 0, i32 0)
+; CHECK-NEXT:    %cond = icmp eq i32 %arg, 1
+; CHECK-NEXT:    %spec.select = select i1 %cond, ptr getelementptr inbounds ([75 x { i32, i32, i32, i8, i8 }], ptr @global, i64 0, i64 6, i32 0), ptr null
+; CHECK-NEXT:    %tmp8 = icmp eq ptr %spec.select, getelementptr inbounds ([75 x { i32, i32, i32, i8, i8 }], ptr @global, i64 1, i64 0, i32 0)
 ; CHECK-NEXT:    ret i1 %tmp8
 ;
 bb:
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
index ae7baeb970689..e9edb33b9420c 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
@@ -72,16 +72,14 @@ define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 %
 ; SIMPLIFY-CFG-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
 ; SIMPLIFY-CFG-NEXT:    br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]]
 ; SIMPLIFY-CFG:       [[BB6]]:
-; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP2]], label %[[BB9:.*]] [
+; SIMPLIFY-CFG-NEXT:    switch i32 [[TMP2]], label %[[BB8:.*]] [
 ; SIMPLIFY-CFG-NEXT:      i32 0, label %[[BB7]]
-; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB8:.*]]
+; SIMPLIFY-CFG-NEXT:      i32 1, label %[[BB7]]
 ; SIMPLIFY-CFG-NEXT:    ]
 ; SIMPLIFY-CFG:       [[BB7]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
+; SIMPLIFY-CFG-NEXT:    br label %[[BB8]]
 ; SIMPLIFY-CFG:       [[BB8]]:
-; SIMPLIFY-CFG-NEXT:    br label %[[BB9]]
-; SIMPLIFY-CFG:       [[BB9]]:
-; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ]
+; SIMPLIFY-CFG-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB7]] ]
 ; SIMPLIFY-CFG-NEXT:    ret i32 [[TMP10]]
 ;
   br i1 %0, label %6, label %7

>From f0142cf1e0893c85aaba6160753050f9e094d50b Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Sat, 20 Dec 2025 21:33:48 +0800
Subject: [PATCH 5/8] Update amd gpu testcase

---
 llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index d6cc833c8c73f..9850e285ac66a 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -682,12 +682,15 @@ divergent.ret:
 }
 
 ; IR-LABEL: @multi_divergent_unreachable_exit(
+; IR: Flow5:
+; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64
+; IR-NEXT: br label %UnifiedReturnBlock
+
 ; IR: UnifiedUnreachableBlock:
 ; IR-NEXT: call void @llvm.amdgcn.unreachable()
-; IR-NEXT: br label %UnifiedReturnBlock
+; IR-NEXT: br label %Flow5
 
 ; IR: UnifiedReturnBlock:
-; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64
 ; IR-NEXT: ret void
 define amdgpu_kernel void @multi_divergent_unreachable_exit(i32 %switch) #0 {
 bb:

>From 8531a9dfe621c9da0865220d1c9b5e812a0d0622 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Mon, 22 Dec 2025 16:58:42 +0800
Subject: [PATCH 6/8] Generalize simplifyDuplicateSwitchArms

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 464 +++++++---------------
 1 file changed, 149 insertions(+), 315 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index ca7e394e99b3c..b56348b0ba9ac 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -293,7 +293,6 @@ class SimplifyCFGOpt {
   bool simplifyCleanupReturn(CleanupReturnInst *RI);
   bool simplifyUnreachable(UnreachableInst *UI);
   bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
-  bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
   bool simplifyIndirectBr(IndirectBrInst *IBI);
   bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
   bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
@@ -7984,7 +7983,7 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
   return true;
 }
 
-/// Checking whether two cases of SI are equal depends on the contents of the
+/// Checking whether two BBs are equal depends on the contents of the
 /// BasicBlock and the incoming values of their successor PHINodes.
 /// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
 /// calling this function on each BasicBlock every time isEqual is called,
@@ -7992,28 +7991,31 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
 /// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
 /// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
 /// of the incoming values.
-struct SwitchSuccWrapper {
-  BasicBlock *Dest;
-  DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> *PhiPredIVs;
+struct EqualBBWrapper {
+  BasicBlock *BB;
+
+  // One Phi usually has < 8 incoming values.
+  using BB2ValueMap = SmallDenseMap<BasicBlock *, Value *, 8>;
+  using Phi2IVsMap = DenseMap<PHINode *, BB2ValueMap>;
+  Phi2IVsMap *PhiPredIVs;
 };
 
-template <> struct llvm::DenseMapInfo<const SwitchSuccWrapper *> {
-  static const SwitchSuccWrapper *getEmptyKey() {
-    return static_cast<SwitchSuccWrapper *>(
-        DenseMapInfo<void *>::getEmptyKey());
+template <> struct llvm::DenseMapInfo<const EqualBBWrapper *> {
+  static const EqualBBWrapper *getEmptyKey() {
+    return static_cast<EqualBBWrapper *>(DenseMapInfo<void *>::getEmptyKey());
   }
-  static const SwitchSuccWrapper *getTombstoneKey() {
-    return static_cast<SwitchSuccWrapper *>(
+  static const EqualBBWrapper *getTombstoneKey() {
+    return static_cast<EqualBBWrapper *>(
         DenseMapInfo<void *>::getTombstoneKey());
   }
-  static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
-    BasicBlock *Succ = SSW->Dest;
-    BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
+  static unsigned getHashValue(const EqualBBWrapper *SSW) {
+    BasicBlock *BB = SSW->BB;
+    BranchInst *BI = cast<BranchInst>(BB->getTerminator());
     assert(BI->isUnconditional() &&
            "Only supporting unconditional branches for now");
     assert(BI->getNumSuccessors() == 1 &&
            "Expected unconditional branches to have one successor");
-    assert(Succ->size() == 1 && "Expected just a single branch in the BB");
+    assert(BB->size() == 1 && "Expected just a single branch in the BB");
 
     // Since we assume the BB is just a single BranchInst with a single
     // successor, we hash as the BB and the incoming Values of its successor
@@ -8023,25 +8025,25 @@ template <> struct llvm::DenseMapInfo<const SwitchSuccWrapper *> {
     // time and passing it in SwitchSuccWrapper, but this slowed down the
     // average compile time without having any impact on the worst case compile
     // time.
-    BasicBlock *BB = BI->getSuccessor(0);
-    SmallVector<Value *> PhiValsForBB;
-    for (PHINode &Phi : BB->phis())
-      PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
-
-    return hash_combine(BB, hash_combine_range(PhiValsForBB));
+    BasicBlock *Succ = BI->getSuccessor(0);
+    auto PhiValsForBB = map_range(
+        BB->phis(), [BB, &PhiPredIVs = *SSW->PhiPredIVs](PHINode &Phi) {
+          return PhiPredIVs[&Phi][BB];
+        });
+    return hash_combine(Succ, hash_combine_range(PhiValsForBB));
   }
-  static bool isEqual(const SwitchSuccWrapper *LHS,
-                      const SwitchSuccWrapper *RHS) {
-    auto EKey = DenseMapInfo<SwitchSuccWrapper *>::getEmptyKey();
-    auto TKey = DenseMapInfo<SwitchSuccWrapper *>::getTombstoneKey();
+  static bool isEqual(const EqualBBWrapper *LHS,
+                      const EqualBBWrapper *RHS) {
+    auto *EKey = DenseMapInfo<EqualBBWrapper *>::getEmptyKey();
+    auto *TKey = DenseMapInfo<EqualBBWrapper *>::getTombstoneKey();
     if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
       return LHS == RHS;
 
-    BasicBlock *A = LHS->Dest;
-    BasicBlock *B = RHS->Dest;
+    BasicBlock *A = LHS->BB;
+    BasicBlock *B = RHS->BB;
 
     // FIXME: we checked that the size of A and B are both 1 in
-    // simplifyDuplicateSwitchArms to make the Case list smaller to
+    // simplifyDuplicatePredecessors to make the Case list smaller to
     // improve performance. If we decide to support BasicBlocks with more
     // than just a single instruction, we need to check that A.size() ==
     // B.size() here, and we need to check more than just the BranchInsts
@@ -8056,106 +8058,154 @@ template <> struct llvm::DenseMapInfo<const SwitchSuccWrapper *> {
 
     // Need to check that PHIs in successor have matching values
     BasicBlock *Succ = ABI->getSuccessor(0);
-    for (PHINode &Phi : Succ->phis()) {
-      auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
-      if (PredIVs[A] != PredIVs[B])
-        return false;
-    }
-
-    return true;
+    auto IfPhiIVMatch = [A, B, &PhiPredIVs = *LHS->PhiPredIVs](PHINode &Phi) {
+      // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
+      // query
+      auto &PredIVs = PhiPredIVs[&Phi];
+      return PredIVs[A] == PredIVs[B];
+    };
+    return all_of(Succ->phis(), IfPhiIVMatch);
   }
 };
 
-bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
-                                                 DomTreeUpdater *DTU) {
+bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
+                                                   DomTreeUpdater *DTU) {
+  // Need at least 2 predecessors to do anything.
+  if (!BB || pred_empty(BB))
+    return false;
+
   // Build Cases. Skip BBs that are not candidates for simplification. Mark
   // PHINodes which need to be processed into PhiPredIVs. We decide to process
   // an entire PHI at once after the loop, opposed to calling
   // getIncomingValueForBlock inside this loop, since each call to
   // getIncomingValueForBlock is O(|Preds|).
-  SmallPtrSet<PHINode *, 8> Phis;
-  SmallPtrSet<BasicBlock *, 8> Seen;
-  DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
-  DenseMap<BasicBlock *, SmallVector<unsigned, 32>> BBToSuccessorIndexes;
-  SmallVector<SwitchSuccWrapper> Cases;
-  Cases.reserve(SI->getNumSuccessors());
-
-  for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
-    BasicBlock *BB = SI->getSuccessor(I);
-
-    // FIXME: Support more than just a single BranchInst. One way we could do
-    // this is by taking a hashing approach of all insts in BB.
-    if (BB->size() != 1)
-      continue;
+  EqualBBWrapper::Phi2IVsMap PhiPredIVs;
 
-    // FIXME: Relax that the terminator is a BranchInst by checking for equality
-    // on other kinds of terminators. We decide to only support unconditional
-    // branches for now for compile time reasons.
-    auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
-    if (!BI || BI->isConditional())
-      continue;
+  // Collect candidate non-entry predecessors P with:
+  // - terminator unconditional br to Succ,
+  // - does not have address taken / weird control.
+  auto Filter = [BB](BasicBlock *Pred) {
+    // Entry block cannot be eliminated or have predecessors.
+    if (Pred->isEntryBlock())
+      return false;
 
-    if (!Seen.insert(BB).second) {
-      auto It = BBToSuccessorIndexes.find(BB);
-      if (It != BBToSuccessorIndexes.end())
-        It->second.emplace_back(I);
-      continue;
-    }
+    // Single successor and must be Succ.
+    auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
+    if (!BI || !BI->isUnconditional())
+      return false;
 
-    // FIXME: This case needs some extra care because the terminators other than
-    // SI need to be updated. For now, consider only backedges to the SI.
-    if (BB->getUniquePredecessor() != SI->getParent())
-      continue;
+    // Avoid blocks that are "address-taken" (blockaddress) or have unusual
+    // uses.
+    if (Pred->hasAddressTaken())
+      return false;
+    if (Pred->isLandingPad())
+      return false;
 
-    // Keep track of which PHIs we need as keys in PhiPredIVs below.
-    for (BasicBlock *Succ : BI->successors())
-      Phis.insert_range(llvm::make_pointer_range(Succ->phis()));
+    // TODO: should we support Pred with >1 instructions?
+    if (Pred->size() != 1)
+      return false;
 
-    // Add the successor only if not previously visited.
-    Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
-    BBToSuccessorIndexes[BB].emplace_back(I);
-  }
+    // Avoid self-loop predecessor merging for now.
+    if (Pred == BB)
+      return false;
+
+    return true;
+  };
+
+  auto FilteredPreds = make_filter_range(predecessors(BB), Filter);
+  auto MappedPreds = map_range(FilteredPreds, [&PhiPredIVs](BasicBlock *Pred) {
+    return EqualBBWrapper{Pred, &PhiPredIVs};
+  });
+
+  SmallVector<EqualBBWrapper> Preds(MappedPreds);
+
+  if (Preds.size() < 2)
+    return false;
 
   // Precompute a data structure to improve performance of isEqual for
-  // SwitchSuccWrapper.
+  // EqualBBWrapper.
+  SmallVector<PHINode *, 8> Phis(make_pointer_range(BB->phis()));
   PhiPredIVs.reserve(Phis.size());
   for (PHINode *Phi : Phis) {
     auto &IVs =
         PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
+    // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
+    // O(|Pred|).
     for (auto &IV : Phi->incoming_values())
       IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
   }
 
-  // Build a set such that if the SwitchSuccWrapper exists in the set and
-  // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
-  // which is not in the set should be replaced with the one in the set. If the
-  // SwitchSuccWrapper is not in the set, then it should be added to the set so
-  // other SwitchSuccWrappers can check against it in the same manner. We use
-  // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
-  // around information to isEquality, getHashValue, and when doing the
-  // replacement with better performance.
-  DenseSet<const SwitchSuccWrapper *> ReplaceWith;
-  ReplaceWith.reserve(Cases.size());
+  // Group duplicates using DenseSet with custom equality/hashing.
+  // Build a set such that if the EqualBBWrapper exists in the set and another
+  // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
+  // the set should be replaced with the one in the set. If the EqualBBWrapper
+  // is not in the set, then it should be added to the set so other
+  // EqualBBWrapper can check against it in the same manner. We use
+  // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
+  // information to isEquality, getHashValue, and when doing the replacement
+  // with better performance.
+  DenseSet<const EqualBBWrapper *> Keep;
+  Keep.reserve(Preds.size());
 
   SmallVector<DominatorTree::UpdateType> Updates;
-  Updates.reserve(ReplaceWith.size());
+  Updates.reserve(Preds.size() * 2);
+
   bool MadeChange = false;
-  for (auto &SSW : Cases) {
-    // SSW is a candidate for simplification. If we find a duplicate BB,
-    // replace it.
-    const auto [It, Inserted] = ReplaceWith.insert(&SSW);
-    if (!Inserted) {
-      // We know that SI's parent BB no longer dominates the old case successor
-      // since we are making it dead.
-      Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
-      const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
-      for (unsigned Idx : Successors)
-        SI->setSuccessor(Idx, (*It)->Dest);
-      MadeChange = true;
+
+  // Helper: redirect all edges X -> DeadPred to X -> LivePred.
+  auto RedirectIncomingEdges = [&](BasicBlock *DeadPred, BasicBlock *LivePred) {
+    SmallSetVector<BasicBlock *, 8> DeadPredPreds(llvm::from_range,
+                                                  predecessors(DeadPred));
+    if (DTU) {
+      // All predecessors of DeadPred (except the common predecessor) will be
+      // moved to LivePred.
+      Updates.reserve(Updates.size() + DeadPredPreds.size() * 2);
+      SmallPtrSet<BasicBlock *, 16> LivePredPreds(llvm::from_range,
+                                                  predecessors(LivePred));
+      for (BasicBlock *PP : DeadPredPreds) {
+        // Do not modify those common predecessors of DeadPred and LivePred
+        if (!LivePredPreds.contains(PP))
+          Updates.push_back({DominatorTree::Insert, PP, LivePred});
+        Updates.push_back({DominatorTree::Delete, PP, DeadPred});
+      }
     }
+    LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
+               DeadPred->printAsOperand(dbgs()); dbgs() << " with pred ";
+               LivePred->printAsOperand(dbgs()); dbgs() << " for ";
+               BB->printAsOperand(dbgs()); dbgs() << "\n");
+    // Replace successors in all predecessors of DeadPred.
+    for (BasicBlock *PP : DeadPredPreds) {
+      Instruction *T = PP->getTerminator();
+      T->replaceSuccessorWith(DeadPred, LivePred);
+    }
+  };
+
+  // Try to eliminate duplicate predecessors.
+  for (const auto &Pred : Preds) {
+    // Pred is a candidate for simplification. If we find a duplicate BB,
+    // replace it.
+    const auto [It, Inserted] = Keep.insert(&Pred);
+    if (Inserted)
+      continue;
+
+    // Found duplicate: merge P into canonical predecessor It->Pred.
+    BasicBlock *KeepPred = (*It)->BB;
+    BasicBlock *DeadPred = Pred.BB;
+
+    // Avoid merging if either is the other's predecessor in weird ways.
+    if (KeepPred == DeadPred)
+      continue;
+
+    // Redirect all edges into DeadPred to KeepPred.
+    RedirectIncomingEdges(DeadPred, KeepPred);
+
+    // Now DeadPred should become unreachable; leave DCE to later,
+    // but we can try to simplify it if it only branches to Succ.
+    // (We won't erase here to keep the routine simple and DT-safe.)
+    MadeChange = true;
   }
 
-  if (DTU)
+  if (DTU && !Updates.empty())
     DTU->applyUpdates(Updates);
 
   return MadeChange;
@@ -8220,9 +8270,6 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
     return requestResimplify();
 
-  if (simplifyDuplicateSwitchArms(SI, DTU))
-    return requestResimplify();
-
   if (simplifySwitchWhenUMin(SI, DTU))
     return requestResimplify();
 
@@ -8667,219 +8714,6 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
 
   return false;
 }
-/// Checking whether two BBs are equal depends on the contents of the
-/// BasicBlock and the incoming values of their successor PHINodes.
-/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
-/// calling this function on each BasicBlock every time isEqual is called,
-/// especially since the same BasicBlock may be passed as an argument multiple
-/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
-/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
-/// of the incoming values.
-struct EqualBBWrapper {
-  BasicBlock *BB;
-
-  // One Phi usually has < 8 incoming values.
-  using BB2ValueMap = SmallDenseMap<BasicBlock *, Value *, 8>;
-  using Phi2IVsMap = DenseMap<PHINode *, BB2ValueMap>;
-  Phi2IVsMap *PhiPredIVs;
-};
-
-template <> struct llvm::DenseMapInfo<const EqualBBWrapper *> {
-  static const EqualBBWrapper *getEmptyKey() {
-    return static_cast<EqualBBWrapper *>(DenseMapInfo<void *>::getEmptyKey());
-  }
-  static const EqualBBWrapper *getTombstoneKey() {
-    return static_cast<EqualBBWrapper *>(
-        DenseMapInfo<void *>::getTombstoneKey());
-  }
-  static unsigned getHashValue(const EqualBBWrapper *SSW) {
-    BasicBlock *BB = SSW->BB;
-    BranchInst *BI = cast<BranchInst>(BB->getTerminator());
-    assert(BI->isUnconditional() &&
-           "Only supporting unconditional branches for now");
-    assert(BI->getNumSuccessors() == 1 &&
-           "Expected unconditional branches to have one successor");
-    assert(BB->size() == 1 && "Expected just a single branch in the BB");
-
-    // Since we assume the BB is just a single BranchInst with a single
-    // successor, we hash as the BB and the incoming Values of its successor
-    // PHIs. Initially, we tried to just use the successor BB as the hash, but
-    // including the incoming PHI values leads to better performance.
-    // We also tried to build a map from BB -> Succs.IncomingValues ahead of
-    // time and passing it in SwitchSuccWrapper, but this slowed down the
-    // average compile time without having any impact on the worst case compile
-    // time.
-    BasicBlock *Succ = BI->getSuccessor(0);
-    auto PhiValsForBB = map_range(
-        BB->phis(), [BB, &PhiPredIVs = *SSW->PhiPredIVs](PHINode &Phi) {
-          return PhiPredIVs[&Phi][BB];
-        });
-    return hash_combine(Succ, hash_combine_range(PhiValsForBB));
-  }
-  static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
-    auto *EKey = DenseMapInfo<EqualBBWrapper *>::getEmptyKey();
-    auto *TKey = DenseMapInfo<EqualBBWrapper *>::getTombstoneKey();
-    if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
-      return LHS == RHS;
-
-    BasicBlock *A = LHS->BB;
-    BasicBlock *B = RHS->BB;
-
-    // FIXME: we checked that the size of A and B are both 1 in
-    // simplifyDuplicateSwitchArms to make the Case list smaller to
-    // improve performance. If we decide to support BasicBlocks with more
-    // than just a single instruction, we need to check that A.size() ==
-    // B.size() here, and we need to check more than just the BranchInsts
-    // for equality.
-
-    BranchInst *ABI = cast<BranchInst>(A->getTerminator());
-    BranchInst *BBI = cast<BranchInst>(B->getTerminator());
-    assert(ABI->isUnconditional() && BBI->isUnconditional() &&
-           "Only supporting unconditional branches for now");
-    if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
-      return false;
-
-    // Need to check that PHIs in successor have matching values
-    BasicBlock *Succ = ABI->getSuccessor(0);
-    auto IfPhiIVMatch = [A, B, &PhiPredIVs = *LHS->PhiPredIVs](PHINode &Phi) {
-      // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
-      // query
-      auto &PredIVs = PhiPredIVs[&Phi];
-      return PredIVs[A] == PredIVs[B];
-    };
-    return all_of(Succ->phis(), IfPhiIVMatch);
-  }
-};
-
-bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
-                                                   DomTreeUpdater *DTU) {
-  // Need at least 2 predecessors to do anything.
-  if (!BB || pred_empty(BB))
-    return false;
-  // Precompute PHI incoming values in BB for all candidate preds.
-  // PhiPredIVs[Phi][Pred] = incoming value
-  EqualBBWrapper::Phi2IVsMap PhiPredIVs;
-
-  // Collect candidate non-entry predecessors P with:
-  // - terminator unconditional br to Succ,
-  // - does not have address taken / weird control.
-  auto Filter = [BB](BasicBlock *Pred) {
-    // Entry block cannot be eliminated or have predecessors.
-    if (Pred->isEntryBlock())
-      return false;
-
-    // Single successor and must be Succ.
-    auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
-    if (!BI || !BI->isUnconditional())
-      return false;
-
-    // Avoid blocks that are "address-taken" (blockaddress) or have unusual
-    // uses.
-    if (Pred->hasAddressTaken())
-      return false;
-    if (Pred->isLandingPad())
-      return false;
-
-    // TODO: should we support Pred with >1 instructions?
-    if (Pred->size() != 1)
-      return false;
-
-    // Avoid self-loop predecessor merging for now.
-    if (Pred == BB)
-      return false;
-
-    return true;
-  };
-
-  auto FilteredPreds = make_filter_range(predecessors(BB), Filter);
-
-  SmallVector<EqualBBWrapper> Preds(
-      map_range(FilteredPreds, [&PhiPredIVs](BasicBlock *Pred) {
-        return EqualBBWrapper{Pred, &PhiPredIVs};
-      }));
-
-  if (Preds.size() < 2)
-    return false;
-
-  SmallVector<PHINode *, 8> Phis(make_pointer_range(BB->phis()));
-
-  PhiPredIVs.reserve(Phis.size());
-  for (PHINode *Phi : Phis) {
-    auto &IVs =
-        PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
-    // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
-    // O(|Pred|).
-    for (auto &IV : Phi->incoming_values())
-      IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
-  }
-
-  // Group duplicates using DenseSet with custom equality/hashing.
-  DenseSet<const EqualBBWrapper *> Keep;
-  Keep.reserve(Preds.size());
-
-  SmallVector<DominatorTree::UpdateType> Updates;
-  Updates.reserve(Preds.size() * 2);
-
-  bool MadeChange = false;
-
-  // Helper: redirect all edges X -> DeadPred to X -> LivePred.
-  auto RedirectIncomingEdges = [&](BasicBlock *DeadPred, BasicBlock *LivePred) {
-    // Replace successors in all predecessors of DeadPred.
-    SmallSetVector<BasicBlock *, 8> DeadPredPreds(llvm::from_range,
-                                                  predecessors(DeadPred));
-    if (DTU) {
-      // All predecessors of DeadPred (except the common predecessor) will be
-      // moved to LivePred.
-      Updates.reserve(Updates.size() + DeadPredPreds.size() * 2);
-      SmallPtrSet<BasicBlock *, 16> LivePredPreds(llvm::from_range,
-                                                  predecessors(LivePred));
-      for (BasicBlock *PP : DeadPredPreds) {
-        // Do not modify those common predecessors of DeadPred and LivePred
-        if (!LivePredPreds.contains(PP))
-          Updates.push_back({DominatorTree::Insert, PP, LivePred});
-        Updates.push_back({DominatorTree::Delete, PP, DeadPred});
-      }
-    }
-    LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
-               DeadPred->printAsOperand(dbgs()); dbgs() << " with pred ";
-               LivePred->printAsOperand(dbgs()); dbgs() << " for ";
-               BB->printAsOperand(dbgs()); dbgs() << "\n");
-    for (BasicBlock *PP : DeadPredPreds) {
-      Instruction *T = PP->getTerminator();
-      T->replaceSuccessorWith(DeadPred, LivePred);
-    }
-  };
-
-  // Try to canonicalize duplicates.
-  for (const auto &Pred : Preds) {
-    // Pred is a candidate for simplification. If we find a duplicate BB,
-    // replace it.
-    const auto [It, Inserted] = Keep.insert(&Pred);
-    if (Inserted)
-      continue;
-
-    // Found duplicate: merge P into canonical predecessor It->Pred.
-    BasicBlock *KeepPred = (*It)->BB;
-    BasicBlock *DeadPred = Pred.BB;
-
-    // Avoid merging if either is the other's predecessor in weird ways.
-    if (KeepPred == DeadPred)
-      continue;
-
-    // Redirect all edges into DeadPred to KeepPred.
-    RedirectIncomingEdges(DeadPred, KeepPred);
-
-    // Now DeadPred should become unreachable; leave DCE to later,
-    // but we can try to simplify it if it only branches to Succ.
-    // (We won't erase here to keep the routine simple and DT-safe.)
-    MadeChange = true;
-  }
-
-  if (DTU && !Updates.empty())
-    DTU->applyUpdates(Updates);
-
-  return MadeChange;
-}
 
 /// Check if passing a value to an instruction will cause undefined behavior.
 static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {

>From 6725bd3bad8c18055c33e92d697910797c042e86 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Mon, 22 Dec 2025 17:01:23 +0800
Subject: [PATCH 7/8] CompTime: retain canonical loop

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b56348b0ba9ac..80abcdfbb20e4 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -8074,6 +8074,11 @@ bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
   if (!BB || pred_empty(BB))
     return false;
 
+  // Compilation time consideration: retain the canonical loop, otherwise, we
+  // require more time in the later loop canonicalization.
+  if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BB))
+    return false;
+
   // Build Cases. Skip BBs that are not candidates for simplification. Mark
   // PHINodes which need to be processed into PhiPredIVs. We decide to process
   // an entire PHI at once after the loop, opposed to calling

>From 77105cacd6fa7537134d15ace5e3934d860b1a17 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Mon, 22 Dec 2025 17:01:54 +0800
Subject: [PATCH 8/8] Update tests after generalize simplifyDuplicateSwitchArms

---
 .../Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll     | 4 ++--
 llvm/test/Transforms/SimplifyCFG/HoistCode.ll                 | 4 ++--
 llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
index ea81e0e4fa48c..0adedd297a58c 100644
--- a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
@@ -141,12 +141,12 @@ define i32 @PR34471(i32 %x) {
 ; NO_FWD-NEXT:      i32 19, label [[IF19:%.*]]
 ; NO_FWD-NEXT:      i32 42, label [[IF19]]
 ; NO_FWD-NEXT:    ]
-; NO_FWD:       if19:
+; NO_FWD:       if42:
 ; NO_FWD-NEXT:    br label [[RETURN]]
 ; NO_FWD:       else3:
 ; NO_FWD-NEXT:    br label [[RETURN]]
 ; NO_FWD:       return:
-; NO_FWD-NEXT:    [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 17, [[ENTRY:%.*]] ], [ 0, [[ELSE3]] ]
+; NO_FWD-NEXT:    [[R:%.*]] = phi i32 [ 17, [[ENTRY:%.*]] ], [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ]
 ; NO_FWD-NEXT:    ret i32 [[R]]
 ;
 ; FWD-LABEL: @PR34471(
diff --git a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
index f17652cc5e471..ca1792e281a92 100644
--- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
+++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll
@@ -67,10 +67,10 @@ define float @PR39535min_switch(i64 %i, float %x) {
 ; CHECK-NEXT:      i64 1, label [[BB1:%.*]]
 ; CHECK-NEXT:      i64 2, label [[BB1]]
 ; CHECK-NEXT:    ]
-; CHECK:       bb1:
+; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi fast float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[X:%.*]], [[BB1]] ]
+; CHECK-NEXT:    [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret float [[COND]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
index e642cd264416e..1e37b42334e8d 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
@@ -274,12 +274,12 @@ define i8 @switch_to_select_two_case_results_no_default(i32 %i) !prof !0 {
 ; CHECK-NEXT:      i32 4, label [[CASE3:%.*]]
 ; CHECK-NEXT:      i32 6, label [[CASE3]]
 ; CHECK-NEXT:    ], !prof [[PROF5]]
-; CHECK:       case3:
+; CHECK:       case4:
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       default:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       end:
-; CHECK-NEXT:    [[T0:%.*]] = phi i8 [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ], [ 44, [[CASE3]] ]
+; CHECK-NEXT:    [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
 ; CHECK-NEXT:    ret i8 [[T0]]
 ;
 entry:



More information about the llvm-commits mailing list