[llvm] [SimplifyCFG] Sink common code from cond preds (PR #173353)

Kunqiu Chen via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 24 23:29:52 PST 2025


https://github.com/Camsyn updated https://github.com/llvm/llvm-project/pull/173353

>From 733fac2aba22e6a367889760f4c0d4667a965100 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 23 Dec 2025 18:29:37 +0800
Subject: [PATCH 1/6] Before-commit test

---
 .../Transforms/SimplifyCFG/sink-cond-preds.ll | 153 ++++++++++++++++++
 1 file changed, 153 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll b/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
new file mode 100644
index 0000000000000..93f9ab6daaf70
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -sink-common-insts -S | FileCheck %s
+; RUN: opt < %s -passes='simplifycfg<sink-common-insts>' -S | FileCheck %s
+
+
+; Simplified from node/libnode.crypto_context.ll
+define i32 @foo(ptr %ssl, ptr %name, ptr %iv, ptr %ectx, ptr %hctx, i32 %enc) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @fn1(ptr [[SSL:%.*]])
+; CHECK-NEXT:    [[CALL1:%.*]] = call ptr @fn2(ptr [[CALL]], i32 0)
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[ENC:%.*]], 0
+; CHECK-NEXT:    [[TICKET_KEY_NAME_13:%.*]] = getelementptr i8, ptr [[CALL1]], i64 80
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[NAME:%.*]], ptr [[TICKET_KEY_NAME_13]], i64 16, i1 false)
+; CHECK-NEXT:    [[CALL2:%.*]] = call i8 @fn3(ptr [[IV:%.*]], i64 16)
+; CHECK-NEXT:    [[TOBOOL_I:%.*]] = trunc i8 [[CALL2]] to i1
+; CHECK-NEXT:    br i1 [[TOBOOL_I]], label [[COND_PRED1:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[BCMP:%.*]] = call i32 @bcmp(ptr [[NAME]], ptr [[TICKET_KEY_NAME_13]], i64 16)
+; CHECK-NEXT:    [[CMP16_NOT:%.*]] = icmp eq i32 [[BCMP]], 0
+; CHECK-NEXT:    br i1 [[CMP16_NOT]], label [[COND_PRED2:%.*]], label [[RETURN]]
+; CHECK:       cond_pred1:
+; CHECK-NEXT:    [[CALL4:%.*]] = call ptr @fn4()
+; CHECK-NEXT:    [[TICKET_KEY_AES_:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
+; CHECK-NEXT:    [[CALL6:%.*]] = call i32 @fn5(ptr [[ECTX:%.*]], ptr [[CALL4]], ptr null, ptr [[TICKET_KEY_AES_]], ptr [[IV]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CALL6]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN]], label [[COMMON:%.*]]
+; CHECK:       cond_pred2:
+; CHECK-NEXT:    [[CALL19:%.*]] = call ptr @fn4()
+; CHECK-NEXT:    [[TICKET_KEY_AES_20:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
+; CHECK-NEXT:    [[CALL22:%.*]] = call i32 @fn5(ptr [[ECTX]], ptr [[CALL19]], ptr null, ptr [[TICKET_KEY_AES_20]], ptr [[IV]])
+; CHECK-NEXT:    [[CMP23:%.*]] = icmp slt i32 [[CALL22]], 1
+; CHECK-NEXT:    br i1 [[CMP23]], label [[RETURN]], label [[COMMON]]
+; CHECK:       common:
+; CHECK-NEXT:    [[TICKET_KEY_HMAC_25:%.*]] = getelementptr i8, ptr [[CALL1]], i64 112
+; CHECK-NEXT:    [[CALL27:%.*]] = call ptr @fn6()
+; CHECK-NEXT:    [[CALL28:%.*]] = call i32 @fn7(ptr [[HCTX:%.*]], ptr [[TICKET_KEY_HMAC_25]], i32 16, ptr [[CALL27]], ptr null)
+; CHECK-NEXT:    [[CMP29:%.*]] = icmp slt i32 [[CALL28]], 1
+; CHECK-NEXT:    [[SPEC_SELECT11:%.*]] = select i1 [[CMP29]], i32 -1, i32 1
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ -1, [[COND_PRED1]] ], [ -1, [[COND_PRED2]] ], [ -1, [[IF_THEN]] ], [ 0, [[IF_END]] ], [ [[SPEC_SELECT11]], [[COMMON]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  %call = call ptr @fn1(ptr %ssl)
+  %call1 = call ptr @fn2(ptr %call, i32 0)
+  %tobool.not = icmp eq i32 %enc, 0
+  %ticket_key_name_13 = getelementptr i8, ptr %call1, i64 80
+  br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  call void @llvm.memcpy.p0.p0.i64(ptr %name, ptr %ticket_key_name_13, i64 16, i1 false)
+  %call2 = call i8 @fn3(ptr %iv, i64 16)
+  %tobool.i = trunc i8 %call2 to i1
+  br i1 %tobool.i, label %cond_pred1, label %return
+
+if.end:                                           ; preds = %entry
+  %bcmp = call i32 @bcmp(ptr %name, ptr %ticket_key_name_13, i64 16)
+  %cmp16.not = icmp eq i32 %bcmp, 0
+  br i1 %cmp16.not, label %cond_pred2, label %return
+
+cond_pred1:                                       ; preds = %if.then
+  %call4 = call ptr @fn4()
+  %ticket_key_aes_ = getelementptr i8, ptr %call1, i64 96
+  %call6 = call i32 @fn5(ptr %ectx, ptr %call4, ptr null, ptr %ticket_key_aes_, ptr %iv)
+  %cmp = icmp slt i32 %call6, 1
+  br i1 %cmp, label %return, label %common
+
+cond_pred2:                                       ; preds = %if.end
+  %call19 = call ptr @fn4()
+  %ticket_key_aes_20 = getelementptr i8, ptr %call1, i64 96
+  %call22 = call i32 @fn5(ptr %ectx, ptr %call19, ptr null, ptr %ticket_key_aes_20, ptr %iv)
+  %cmp23 = icmp slt i32 %call22, 1
+  br i1 %cmp23, label %return, label %common
+
+common:                                           ; preds = %cond_pred2, %cond_pred1
+  %ticket_key_hmac_25 = getelementptr i8, ptr %call1, i64 112
+  %call27 = call ptr @fn6()
+  %call28 = call i32 @fn7(ptr %hctx, ptr %ticket_key_hmac_25, i32 16, ptr %call27, ptr null)
+  %cmp29 = icmp slt i32 %call28, 1
+  %spec.select11 = select i1 %cmp29, i32 -1, i32 1
+  br label %return
+
+return:                                           ; preds = %common, %cond_pred2, %cond_pred1, %if.end, %if.then
+  %retval.0 = phi i32 [ -1, %cond_pred1 ], [ -1, %cond_pred2 ], [ -1, %if.then ], [ 0, %if.end ], [ %spec.select11, %common ]
+  ret i32 %retval.0
+}
+
+; Simplified from compiler-rt/x86.c.ll
+define ptr @getAMDProcessorTypeAndSubtype(i32 %Family, ptr %CpuModel, i1 %cmp35) {
+; CHECK-LABEL: @getAMDProcessorTypeAndSubtype(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[FAMILY:%.*]], label [[END:%.*]] [
+; CHECK-NEXT:      i32 26, label [[COND_PRED1:%.*]]
+; CHECK-NEXT:      i32 21, label [[COND_PRED2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       cond_pred1:
+; CHECK-NEXT:    store i32 0, ptr [[CPUMODEL:%.*]], align 4
+; CHECK-NEXT:    br i1 [[CMP35:%.*]], label [[END]], label [[COMMON:%.*]]
+; CHECK:       cond_pred2:
+; CHECK-NEXT:    store i32 0, ptr [[CPUMODEL]], align 4
+; CHECK-NEXT:    br i1 [[CMP35]], label [[END]], label [[COMMON]]
+; CHECK:       end:
+; CHECK-NEXT:    ret ptr null
+; CHECK:       common:
+; CHECK-NEXT:    [[PHI1:%.*]] = phi i32 [ 0, [[COND_PRED1]] ], [ 0, [[COND_PRED2]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 1, [[COND_PRED1]] ], [ 2, [[COND_PRED2]] ]
+; CHECK-NEXT:    call void @use(i32 [[PHI1]])
+; CHECK-NEXT:    call void @use(i32 [[PHI2]])
+; CHECK-NEXT:    br label [[END]]
+;
+entry:
+  switch i32 %Family, label %end [
+  i32 26, label %cond_pred1
+  i32 21, label %cond_pred2
+  ]
+
+cond_pred1:                                       ; preds = %entry
+  store i32 0, ptr %CpuModel, align 4
+  br i1 %cmp35, label %end, label %common
+
+cond_pred2:                                       ; preds = %entry
+  store i32 0, ptr %CpuModel, align 4
+  br i1 %cmp35, label %end, label %common
+
+end:                                              ; preds = %common, %cond_pred2, %cond_pred1, %entry
+  ret ptr null
+
+common:                                           ; preds = %cond_pred2, %cond_pred1
+  %phi1 = phi i32 [ 0, %cond_pred1 ], [ 0, %cond_pred2 ]
+  %phi2 = phi i32 [ 1, %cond_pred1 ], [ 2, %cond_pred2 ]
+  call void @use(i32 %phi1)
+  call void @use(i32 %phi2)
+  br label %end
+}
+
+
+declare void @use(i32)
+
+declare ptr @fn1(ptr)
+declare ptr @fn2(ptr, i32)
+declare i8 @fn3(ptr, i64)
+declare ptr @fn4()
+declare i32 @fn5(ptr, ptr, ptr, ptr, ptr)
+declare ptr @fn6()
+declare i32 @fn7(ptr, ptr, i32, ptr, ptr)
+
+declare i32 @bcmp(ptr, ptr, i64)
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)

>From 91bc4ca82e16a01c30ab2866b5b493b29b90f989 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 23 Dec 2025 18:46:57 +0800
Subject: [PATCH 2/6] feat: sink common code from cond preds

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 373 +++++++++++++++++-----
 1 file changed, 291 insertions(+), 82 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2a957737697c3..1738be44ca33a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2297,15 +2297,23 @@ static bool canSinkInstructions(
 // Assuming canSinkInstructions(Blocks) has returned true, sink the last
 // instruction of every block in Blocks to their common successor, commoning
 // into one instruction.
-static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
-  auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
+// If CondBrMoveTo is not null, move the conditional branch in Blocks to
+// CondBrMoveTo.
+static void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
+                                BasicBlock *CondBrMoveTo = nullptr) {
+  auto *BBEnd =
+      CondBrMoveTo ? CondBrMoveTo : Blocks[0]->getTerminator()->getSuccessor(0);
 
   // canSinkInstructions returning true guarantees that every block has at
   // least one non-terminator instruction.
   SmallVector<Instruction*,4> Insts;
   for (auto *BB : Blocks) {
     Instruction *I = BB->getTerminator();
-    I = I->getPrevNode();
+    if (CondBrMoveTo)
+      assert(isa<BranchInst>(I) && cast<BranchInst>(I)->isConditional() &&
+             "Expected a conditional branch");
+    else
+      I = I->getPrevNode();
     Insts.push_back(I);
   }
 
@@ -2368,14 +2376,15 @@ static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
       }
     }
 
-  for (User *U : make_early_inc_range(I0->users())) {
-    // canSinkLastInstruction checked that all instructions are only used by
-    // phi nodes in a way that allows replacing the phi node with the common
-    // instruction.
-    auto *PN = cast<PHINode>(U);
-    PN->replaceAllUsesWith(I0);
-    PN->eraseFromParent();
-  }
+  if (!CondBrMoveTo)
+    for (User *U : make_early_inc_range(I0->users())) {
+      // canSinkLastInstruction checked that all instructions are only used by
+      // phi nodes in a way that allows replacing the phi node with the common
+      // instruction.
+      auto *PN = cast<PHINode>(U);
+      PN->replaceAllUsesWith(I0);
+      PN->eraseFromParent();
+    }
 
   // Finally nuke all instructions apart from the common instruction.
   for (auto *I : Insts) {
@@ -2389,85 +2398,131 @@ static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   }
 }
 
-/// Check whether BB's predecessors end with unconditional branches. If it is
-/// true, sink any common code from the predecessors to BB.
-static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
-                                           DomTreeUpdater *DTU) {
-  // We support two situations:
-  //   (1) all incoming arcs are unconditional
-  //   (2) there are non-unconditional incoming arcs
-  //
-  // (2) is very common in switch defaults and
-  // else-if patterns;
-  //
-  //   if (a) f(1);
-  //   else if (b) f(2);
-  //
-  // produces:
-  //
-  //       [if]
-  //      /    \
-  //    [f(1)] [if]
-  //      |     | \
-  //      |     |  |
-  //      |  [f(2)]|
-  //       \    | /
-  //        [ end ]
-  //
-  // [end] has two unconditional predecessor arcs and one conditional. The
-  // conditional refers to the implicit empty 'else' arc. This conditional
-  // arc can also be caused by an empty default block in a switch.
-  //
-  // In this case, we attempt to sink code from all *unconditional* arcs.
-  // If we can sink instructions from these arcs (determined during the scan
-  // phase below) we insert a common successor for all unconditional arcs and
-  // connect that to [end], to enable sinking:
-  //
-  //       [if]
-  //      /    \
-  //    [x(1)] [if]
-  //      |     | \
-  //      |     |  \
-  //      |  [x(2)] |
-  //       \   /    |
-  //   [sink.split] |
-  //         \     /
-  //         [ end ]
-  //
-  SmallVector<BasicBlock*,4> UnconditionalPreds;
-  bool HaveNonUnconditionalPredecessors = false;
-  for (auto *PredBB : predecessors(BB)) {
-    auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
-    if (PredBr && PredBr->isUnconditional())
-      UnconditionalPreds.push_back(PredBB);
-    else
-      HaveNonUnconditionalPredecessors = true;
+/// Modified from UpdatePHINodes in BasicBlockUtils.cpp.
+/// Update the PHI nodes in OrigBB to include the values coming from NewBB.
+/// This also updates AliasAnalysis, if available.
+///           phi [ ..., %pred0 ], [ ..., %pred1 ], ...
+///                               to
+///           phi [ %phi, %sink.split ], ...
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+                           ArrayRef<BasicBlock *> Preds) {
+  // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
+  SmallPtrSet<BasicBlock *, 16> PredSet(llvm::from_range, Preds);
+  for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I);) {
+    PHINode *PN = cast<PHINode>(I++);
+
+    // Check to see if all of the values coming in are the same.  If so, we
+    // don't need to create a new PHI node, unless it's needed for LCSSA.
+    Value *InVal = nullptr;
+    InVal = PN->getIncomingValueForBlock(Preds[0]);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      if (!PredSet.count(PN->getIncomingBlock(i)))
+        continue;
+      if (!InVal)
+        InVal = PN->getIncomingValue(i);
+      else if (InVal != PN->getIncomingValue(i)) {
+        InVal = nullptr;
+        break;
+      }
+    }
+
+    if (InVal) {
+      // If all incoming values for the new PHI would be the same, just don't
+      // make a new PHI.  Instead, just remove the incoming values from the old
+      // PHI.
+      PN->removeIncomingValueIf(
+          [&](unsigned Idx) {
+            return PredSet.contains(PN->getIncomingBlock(Idx));
+          },
+          /* DeletePHIIfEmpty */ false);
+
+      // Add an incoming value to the PHI node in the loop for the preheader
+      // edge.
+      PN->addIncoming(InVal, NewBB);
+      continue;
+    }
+
+    // If the values coming into the block are not the same, we need a new
+    // PHI.
+    // Create the new PHI node, insert it into NewBB at the end of the block
+    PHINode *NewPHI =
+        PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph",
+                        NewBB->getFirstInsertionPt());
+
+    // NOTE! This loop walks backwards for a reason! First off, this minimizes
+    // the cost of removal if we end up removing a large number of values, and
+    // second off, this ensures that the indices for the incoming values aren't
+    // invalidated when we remove one.
+    for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
+      BasicBlock *IncomingBB = PN->getIncomingBlock(i);
+      if (PredSet.count(IncomingBB)) {
+        Value *V = PN->removeIncomingValue(i, false);
+        NewPHI->addIncoming(V, IncomingBB);
+      }
+    }
+
+    PN->addIncoming(NewPHI, NewBB);
   }
-  if (UnconditionalPreds.size() < 2)
+}
+
+/// Try to sink common instructions from predecessors of SinkBB into SinkBB.
+static bool trySinkFromPredsIntoBB(BasicBlock *SinkBB,
+                                   SmallVectorImpl<BasicBlock *> &Preds,
+                                   bool HaveNonUnconditionalPredecessors,
+                                   DomTreeUpdater *DTU) {
+  if (Preds.size() < 2)
     return false;
 
+  const bool IsConditionalPreds =
+      cast<BranchInst>(Preds[0]->getTerminator())->isConditional();
+
+  assert((!IsConditionalPreds || HaveNonUnconditionalPredecessors) &&
+         "HaveNonUnconditionalPredecessors must be true if IsConditionalPreds");
+
   // We take a two-step approach to tail sinking. First we scan from the end of
-  // each block upwards in lockstep. If the n'th instruction from the end of each
-  // block can be sunk, those instructions are added to ValuesToSink and we
+  // each block upwards in lockstep. If the n'th instruction from the end of
+  // each block can be sunk, those instructions are added to ValuesToSink and we
   // carry on. If we can sink an instruction but need to PHI-merge some operands
   // (because they're not identical in each instruction) we add these to
   // PHIOperands.
-  // We prepopulate PHIOperands with the phis that already exist in BB.
+  // We prepopulate PHIOperands with the phis that already exist in SinkBB.
   DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
-  for (PHINode &PN : BB->phis()) {
+  for (PHINode &PN : SinkBB->phis()) {
     SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
     for (const Use &U : PN.incoming_values())
       IncomingVals.insert({PN.getIncomingBlock(U), &U});
-    auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
-    for (BasicBlock *Pred : UnconditionalPreds)
+    auto &Ops = PHIOperands[IncomingVals[Preds[0]]];
+    for (BasicBlock *Pred : Preds)
       Ops.push_back(*IncomingVals[Pred]);
   }
 
+  if (IsConditionalPreds) {
+    // If Preds are conditional, there might be no phi-uses for the last
+    // instruction, we need to add cond-br uses.
+    BranchInst *T0 = cast<BranchInst>(Preds[0]->getTerminator());
+    assert(T0->isConditional() && "Expected a conditional branch");
+    Use *Pred0Use = &T0->getOperandUse(0);
+    BasicBlock *ThenBB = T0->getSuccessor(0);
+    for (BasicBlock *Pred : Preds) {
+      BranchInst *BI = cast<BranchInst>(Pred->getTerminator());
+      assert(BI->isConditional() && "Expected a conditional branch");
+      if (BI->getSuccessor(0) != ThenBB) {
+        // br %cond1, label %common, label %end
+        //            mismatch
+        // br %cond2, label %end, label %common
+        // TODO: if %cond comes from outer block, we can rewrite the cond brs to
+        // make them match.
+        return false;
+      }
+      Use &U = BI->getOperandUse(0);
+      PHIOperands[Pred0Use].push_back(U);
+    }
+  }
+
   int ScanIdx = 0;
-  SmallPtrSet<Value*,4> InstructionsToSink;
-  LockstepReverseIterator<true> LRI(UnconditionalPreds);
-  while (LRI.isValid() &&
-         canSinkInstructions(*LRI, PHIOperands)) {
+  SmallPtrSet<Value *, 4> InstructionsToSink;
+  LockstepReverseIterator<true> LRI(Preds);
+  while (LRI.isValid() && canSinkInstructions(*LRI, PHIOperands)) {
     LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
                       << "\n");
     InstructionsToSink.insert_range(*LRI);
@@ -2479,7 +2534,10 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
   if (ScanIdx == 0)
     return false;
 
-  bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
+  // NOTE: profitability is still based on the original BB (the final merge
+  // point), not the synthetic SinkBB we might introduce for (3).
+  bool followedByDeoptOrUnreachable =
+      IsBlockFollowedByDeoptOrUnreachable(SinkBB);
 
   if (!followedByDeoptOrUnreachable) {
     // Check whether this is the pointer operand of a load/store.
@@ -2612,9 +2670,47 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
     LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
     // We have a conditional edge and we're going to sink some instructions.
     // Insert a new block postdominating all blocks we're going to sink from.
-    if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
+    BasicBlock *SinkSplit =
+        SplitBlockPredecessors(SinkBB, Preds, ".sink.split", DTU);
+    if (!SinkSplit)
       // Edges couldn't be split.
       return false;
+    // If Preds are conditional, we need to preprocess them to make them
+    // unconditional branching to SinkSplit.
+    if (IsConditionalPreds) {
+      // Sink the cond br from Preds into SinkSplit
+      sinkLastInstruction(Preds, SinkSplit);
+      // Erase the legacy uncond br from SinkSplit
+      SinkSplit->getTerminator()->eraseFromParent();
+      BranchInst *NewBr = cast<BranchInst>(SinkSplit->getTerminator());
+      // Correct `br %cond, %sink.split, %common` to `br %cond, %end, %common`
+      NewBr->replaceSuccessorWith(SinkSplit, SinkBB);
+      // Connect Pred to SinkSplit with unconditional branches.
+      for (BasicBlock *Pred : Preds) {
+        assert(!Pred->getTerminator() &&
+               "Pred's terminator should be moved to SinkSplit");
+        BranchInst::Create(SinkSplit, Pred);
+      }
+      BasicBlock *CommonBlock = NewBr->getSuccessor(0) == SinkBB
+                                    ? NewBr->getSuccessor(1)
+                                    : NewBr->getSuccessor(0);
+      // Check if CommonBlock contains any PHIs.
+      if (!CommonBlock->phis().empty()) {
+        // Handle PHIs in Common
+        // phi [ ..., %pred0 ], [ ..., %pred1 ], ...
+        //                     to
+        // phi [ ..., %sink.split ], ...
+        UpdatePHINodes(CommonBlock, SinkSplit, Preds);
+      }
+      if (DTU) {
+        SmallVector<DominatorTree::UpdateType, 8> Updates;
+        Updates.reserve(1 + Preds.size());
+        Updates.push_back({DominatorTree::Insert, SinkSplit, CommonBlock});
+        for (BasicBlock *Pred : Preds)
+          Updates.push_back({DominatorTree::Delete, Pred, CommonBlock});
+        DTU->applyUpdates(Updates);
+      }
+    }
     Changed = true;
   }
 
@@ -2624,8 +2720,8 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
   // many PHI instructions to be generated (currently only one PHI is allowed
   // per sunk instruction).
   //
-  // We can use InstructionsToSink to discount values needing PHI-merging that will
-  // actually be sunk in a later iteration. This allows us to be more
+  // We can use InstructionsToSink to discount values needing PHI-merging that
+  // will actually be sunk in a later iteration. This allows us to be more
   // aggressive in what we sink. This does allow a false positive where we
   // sink presuming a later value will also be sunk, but stop half way through
   // and never actually sink it which means we produce more PHIs than intended.
@@ -2633,14 +2729,13 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
   int SinkIdx = 0;
   for (; SinkIdx != ScanIdx; ++SinkIdx) {
     LLVM_DEBUG(dbgs() << "SINK: Sink: "
-                      << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
-                      << "\n");
+                      << *Preds[0]->getTerminator()->getPrevNode() << "\n");
 
     // Because we've sunk every instruction in turn, the current instruction to
     // sink is always at index 0.
     LRI.reset();
 
-    sinkLastInstruction(UnconditionalPreds);
+    sinkLastInstruction(Preds);
     NumSinkCommonInstrs++;
     Changed = true;
   }
@@ -2649,6 +2744,120 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
   return Changed;
 }
 
+/// Check whether BB's predecessors end with branches. If it is
+/// true, sink any common code from the predecessors to BB.
+static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
+                                           DomTreeUpdater *DTU) {
+  // We support three situations:
+  //   (1) all incoming arcs are unconditional
+  //   (2) there are non-unconditional incoming arcs
+  //   (3) there are incoming conditional arcs P->BB, where P's other successor
+  //       unconditionally branches to BB
+  //
+  // (2) is very common in switch defaults and
+  // else-if patterns;
+  //
+  //   if (a) f(1);
+  //   else if (b) f(2);
+  //
+  // produces:
+  //
+  //       [if]
+  //      /    \
+  //    [f(1)] [if]
+  //      |     | \
+  //      |     |  |
+  //      |  [f(2)]|
+  //       \    | /
+  //        [ end ]
+  //
+  // [end] has two unconditional predecessor arcs and one conditional. The
+  // conditional refers to the implicit empty 'else' arc. This conditional
+  // arc can also be caused by an empty default block in a switch.
+  //
+  // In this case, we attempt to sink code from all *unconditional* arcs.
+  // If we can sink instructions from these arcs (determined during the scan
+  // phase below) we insert a common successor for all unconditional arcs and
+  // connect that to [end], to enable sinking:
+  //
+  //       [if]
+  //      /    \
+  //    [x(1)] [if]
+  //      |     | \
+  //      |     |  \
+  //      |  [x(2)] |
+  //       \   /    |
+  //   [sink.split] |
+  //         \     /
+  //         [ end ]
+  //
+  // (3) is common after SimplifyCFG merging some uncond blocks, e.g.:
+  //
+  //    [f(1)] [f(2)]
+  //      /\    /\
+  //     /  \  /  \
+  //    |  [comm]  |
+  //     \   |    /
+  //      \  |   /
+  //       [ end ]
+  //
+  // Here, [end] has incoming *conditional* arcs from [f(1)] and [f(2)], but the
+  // other successor ([comm]) unconditionally branches to [end].
+  // In this case, we attempt to sink from all such conditional predecessors by
+  // inserting a common successor for them (also named ".sink.split") and moving
+  // the conditional branch into that block:
+  //
+  //    [f(1)] [f(2)]
+  //       \    /
+  //     [sink.split]
+  //        /   \
+  //     [comm]  |
+  //        \   /
+  //       [ end ]
+  //
+  // This enables sinking from the (now) unconditional predecessors into
+  // [sink.split].
+
+  SmallVector<BasicBlock *, 4> UnconditionalPreds;
+  bool HaveNonUnconditionalPredecessors = false;
+
+  // For (3). Common successor -> related conditional predecessors.
+  SmallDenseMap<BasicBlock *, SmallVector<BasicBlock *, 4>, 2>
+      ConditionalPredsMap;
+
+  for (auto *PredBB : predecessors(BB)) {
+    auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
+    if (PredBr && PredBr->isUnconditional()) {
+      UnconditionalPreds.push_back(PredBB);
+      continue;
+    }
+
+    HaveNonUnconditionalPredecessors = true;
+    if (!PredBr || PredBr->getNumSuccessors() != 2)
+      continue;
+    assert(PredBr->isConditional() && "Expected conditional branch");
+
+    // Try to match (3): PredBB has a conditional edge to BB, and the other
+    // successor unconditionally branches to BB.
+    BasicBlock *S0 = PredBr->getSuccessor(0), *S1 = PredBr->getSuccessor(1);
+    BasicBlock *Common = S0 == BB ? S1 : S0;
+
+    if (auto *CommonBr = dyn_cast<BranchInst>(Common->getTerminator()))
+      if (CommonBr->isUnconditional() && CommonBr->getSuccessor(0) == BB)
+        ConditionalPredsMap[Common].push_back(PredBB);
+  }
+
+  // First only unconditional preds (1) and (2).
+  bool Changed = trySinkFromPredsIntoBB(BB, UnconditionalPreds,
+                                        HaveNonUnconditionalPredecessors, DTU);
+
+  // Then attempt conditional preds (3).
+  for (auto &[_, ConditionalPreds] : ConditionalPredsMap)
+    Changed |= trySinkFromPredsIntoBB(BB, ConditionalPreds, true, DTU);
+
+  return Changed;
+}
+
 namespace {
 
 struct CompatibleSets {

>From 22e5c21549aad84322cc52e31bc6ccf59faee8af Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 23 Dec 2025 18:47:15 +0800
Subject: [PATCH 3/6] After-commit test

---
 .../Transforms/SimplifyCFG/sink-cond-preds.ll | 34 ++++++++-----------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll b/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
index 93f9ab6daaf70..b28ae042b1165 100644
--- a/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
+++ b/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
@@ -20,19 +20,7 @@ define i32 @foo(ptr %ssl, ptr %name, ptr %iv, ptr %ectx, ptr %hctx, i32 %enc) {
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[BCMP:%.*]] = call i32 @bcmp(ptr [[NAME]], ptr [[TICKET_KEY_NAME_13]], i64 16)
 ; CHECK-NEXT:    [[CMP16_NOT:%.*]] = icmp eq i32 [[BCMP]], 0
-; CHECK-NEXT:    br i1 [[CMP16_NOT]], label [[COND_PRED2:%.*]], label [[RETURN]]
-; CHECK:       cond_pred1:
-; CHECK-NEXT:    [[CALL4:%.*]] = call ptr @fn4()
-; CHECK-NEXT:    [[TICKET_KEY_AES_:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
-; CHECK-NEXT:    [[CALL6:%.*]] = call i32 @fn5(ptr [[ECTX:%.*]], ptr [[CALL4]], ptr null, ptr [[TICKET_KEY_AES_]], ptr [[IV]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CALL6]], 1
-; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN]], label [[COMMON:%.*]]
-; CHECK:       cond_pred2:
-; CHECK-NEXT:    [[CALL19:%.*]] = call ptr @fn4()
-; CHECK-NEXT:    [[TICKET_KEY_AES_20:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
-; CHECK-NEXT:    [[CALL22:%.*]] = call i32 @fn5(ptr [[ECTX]], ptr [[CALL19]], ptr null, ptr [[TICKET_KEY_AES_20]], ptr [[IV]])
-; CHECK-NEXT:    [[CMP23:%.*]] = icmp slt i32 [[CALL22]], 1
-; CHECK-NEXT:    br i1 [[CMP23]], label [[RETURN]], label [[COMMON]]
+; CHECK-NEXT:    br i1 [[CMP16_NOT]], label [[COND_PRED1]], label [[RETURN]]
 ; CHECK:       common:
 ; CHECK-NEXT:    [[TICKET_KEY_HMAC_25:%.*]] = getelementptr i8, ptr [[CALL1]], i64 112
 ; CHECK-NEXT:    [[CALL27:%.*]] = call ptr @fn6()
@@ -40,8 +28,14 @@ define i32 @foo(ptr %ssl, ptr %name, ptr %iv, ptr %ectx, ptr %hctx, i32 %enc) {
 ; CHECK-NEXT:    [[CMP29:%.*]] = icmp slt i32 [[CALL28]], 1
 ; CHECK-NEXT:    [[SPEC_SELECT11:%.*]] = select i1 [[CMP29]], i32 -1, i32 1
 ; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return.sink.split:
+; CHECK-NEXT:    [[CALL19:%.*]] = call ptr @fn4()
+; CHECK-NEXT:    [[TICKET_KEY_AES_20:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
+; CHECK-NEXT:    [[CALL22:%.*]] = call i32 @fn5(ptr [[ECTX:%.*]], ptr [[CALL19]], ptr null, ptr [[TICKET_KEY_AES_20]], ptr [[IV]])
+; CHECK-NEXT:    [[CMP23:%.*]] = icmp slt i32 [[CALL22]], 1
+; CHECK-NEXT:    br i1 [[CMP23]], label [[RETURN]], label [[COMMON:%.*]]
 ; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ -1, [[COND_PRED1]] ], [ -1, [[COND_PRED2]] ], [ -1, [[IF_THEN]] ], [ 0, [[IF_END]] ], [ [[SPEC_SELECT11]], [[COMMON]] ]
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[SPEC_SELECT11]], [[COMMON]] ], [ 0, [[IF_END]] ], [ -1, [[IF_THEN]] ], [ -1, [[COND_PRED1]] ]
 ; CHECK-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
@@ -97,17 +91,17 @@ define ptr @getAMDProcessorTypeAndSubtype(i32 %Family, ptr %CpuModel, i1 %cmp35)
 ; CHECK-NEXT:      i32 26, label [[COND_PRED1:%.*]]
 ; CHECK-NEXT:      i32 21, label [[COND_PRED2:%.*]]
 ; CHECK-NEXT:    ]
-; CHECK:       cond_pred1:
+; CHECK:       cond_pred2:
+; CHECK-NEXT:    br label [[COND_PRED1]]
+; CHECK:       end.sink.split:
+; CHECK-NEXT:    [[PHI2_PH:%.*]] = phi i32 [ 2, [[COND_PRED2]] ], [ 1, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    store i32 0, ptr [[CPUMODEL:%.*]], align 4
 ; CHECK-NEXT:    br i1 [[CMP35:%.*]], label [[END]], label [[COMMON:%.*]]
-; CHECK:       cond_pred2:
-; CHECK-NEXT:    store i32 0, ptr [[CPUMODEL]], align 4
-; CHECK-NEXT:    br i1 [[CMP35]], label [[END]], label [[COMMON]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret ptr null
 ; CHECK:       common:
-; CHECK-NEXT:    [[PHI1:%.*]] = phi i32 [ 0, [[COND_PRED1]] ], [ 0, [[COND_PRED2]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 1, [[COND_PRED1]] ], [ 2, [[COND_PRED2]] ]
+; CHECK-NEXT:    [[PHI1:%.*]] = phi i32 [ 0, [[COND_PRED1]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ [[PHI2_PH]], [[COND_PRED1]] ]
 ; CHECK-NEXT:    call void @use(i32 [[PHI1]])
 ; CHECK-NEXT:    call void @use(i32 [[PHI2]])
 ; CHECK-NEXT:    br label [[END]]

>From 62ec50d1fc7ce568f0d16549b2b0828c1052da3c Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Wed, 24 Dec 2025 22:15:08 +0800
Subject: [PATCH 4/6] Cleanup outdated comments

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 1738be44ca33a..329307129531b 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2534,8 +2534,6 @@ static bool trySinkFromPredsIntoBB(BasicBlock *SinkBB,
   if (ScanIdx == 0)
     return false;
 
-  // NOTE: profitability is still based on the original BB (the final merge
-  // point), not the synthetic SinkBB we might introduce for (3).
   bool followedByDeoptOrUnreachable =
       IsBlockFollowedByDeoptOrUnreachable(SinkBB);
 

>From cd2106e906e04386abced555fc9e074c26912ad5 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 25 Dec 2025 15:26:38 +0800
Subject: [PATCH 5/6] fix crash triggered by fake conditional branch

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 329307129531b..81f50c28083a4 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2838,6 +2838,9 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
     // Try to match (3): PredBB has a conditional edge to BB, and the other
     // successor unconditionally branches to BB.
     BasicBlock *S0 = PredBr->getSuccessor(0), *S1 = PredBr->getSuccessor(1);
+    // If S0 == S1, it is a fake conditional branch
+    if (S0 == S1)
+      continue;
     BasicBlock *Common = S0 == BB ? S1 : S0;
 
     if (auto *CommonBr = dyn_cast<BranchInst>(Common->getTerminator()))
@@ -8952,6 +8955,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
         return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
       }
 
+
     // Look through return.
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
       bool HasNoUndefAttr =

>From 4d3b879ca1daa77f19bc3ca9f47e137e45df89c5 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 25 Dec 2025 15:29:33 +0800
Subject: [PATCH 6/6] Make formatter happy

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 81f50c28083a4..5ee78f070ffda 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -8955,7 +8955,6 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
         return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
       }
 
-
     // Look through return.
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
       bool HasNoUndefAttr =



More information about the llvm-commits mailing list