[llvm] [SimplifyCFG] Sink common code from cond preds (PR #173353)
Kunqiu Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 24 23:29:52 PST 2025
https://github.com/Camsyn updated https://github.com/llvm/llvm-project/pull/173353
>From 733fac2aba22e6a367889760f4c0d4667a965100 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 23 Dec 2025 18:29:37 +0800
Subject: [PATCH 1/6] Before-commit test
---
.../Transforms/SimplifyCFG/sink-cond-preds.ll | 153 ++++++++++++++++++
1 file changed, 153 insertions(+)
create mode 100644 llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
diff --git a/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll b/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
new file mode 100644
index 0000000000000..93f9ab6daaf70
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -sink-common-insts -S | FileCheck %s
+; RUN: opt < %s -passes='simplifycfg<sink-common-insts>' -S | FileCheck %s
+
+
+; Simplified from node/libnode.crypto_context.ll
+define i32 @foo(ptr %ssl, ptr %name, ptr %iv, ptr %ectx, ptr %hctx, i32 %enc) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @fn1(ptr [[SSL:%.*]])
+; CHECK-NEXT: [[CALL1:%.*]] = call ptr @fn2(ptr [[CALL]], i32 0)
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[ENC:%.*]], 0
+; CHECK-NEXT: [[TICKET_KEY_NAME_13:%.*]] = getelementptr i8, ptr [[CALL1]], i64 80
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[NAME:%.*]], ptr [[TICKET_KEY_NAME_13]], i64 16, i1 false)
+; CHECK-NEXT: [[CALL2:%.*]] = call i8 @fn3(ptr [[IV:%.*]], i64 16)
+; CHECK-NEXT: [[TOBOOL_I:%.*]] = trunc i8 [[CALL2]] to i1
+; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[COND_PRED1:%.*]], label [[RETURN:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[BCMP:%.*]] = call i32 @bcmp(ptr [[NAME]], ptr [[TICKET_KEY_NAME_13]], i64 16)
+; CHECK-NEXT: [[CMP16_NOT:%.*]] = icmp eq i32 [[BCMP]], 0
+; CHECK-NEXT: br i1 [[CMP16_NOT]], label [[COND_PRED2:%.*]], label [[RETURN]]
+; CHECK: cond_pred1:
+; CHECK-NEXT: [[CALL4:%.*]] = call ptr @fn4()
+; CHECK-NEXT: [[TICKET_KEY_AES_:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
+; CHECK-NEXT: [[CALL6:%.*]] = call i32 @fn5(ptr [[ECTX:%.*]], ptr [[CALL4]], ptr null, ptr [[TICKET_KEY_AES_]], ptr [[IV]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL6]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[RETURN]], label [[COMMON:%.*]]
+; CHECK: cond_pred2:
+; CHECK-NEXT: [[CALL19:%.*]] = call ptr @fn4()
+; CHECK-NEXT: [[TICKET_KEY_AES_20:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
+; CHECK-NEXT: [[CALL22:%.*]] = call i32 @fn5(ptr [[ECTX]], ptr [[CALL19]], ptr null, ptr [[TICKET_KEY_AES_20]], ptr [[IV]])
+; CHECK-NEXT: [[CMP23:%.*]] = icmp slt i32 [[CALL22]], 1
+; CHECK-NEXT: br i1 [[CMP23]], label [[RETURN]], label [[COMMON]]
+; CHECK: common:
+; CHECK-NEXT: [[TICKET_KEY_HMAC_25:%.*]] = getelementptr i8, ptr [[CALL1]], i64 112
+; CHECK-NEXT: [[CALL27:%.*]] = call ptr @fn6()
+; CHECK-NEXT: [[CALL28:%.*]] = call i32 @fn7(ptr [[HCTX:%.*]], ptr [[TICKET_KEY_HMAC_25]], i32 16, ptr [[CALL27]], ptr null)
+; CHECK-NEXT: [[CMP29:%.*]] = icmp slt i32 [[CALL28]], 1
+; CHECK-NEXT: [[SPEC_SELECT11:%.*]] = select i1 [[CMP29]], i32 -1, i32 1
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ -1, [[COND_PRED1]] ], [ -1, [[COND_PRED2]] ], [ -1, [[IF_THEN]] ], [ 0, [[IF_END]] ], [ [[SPEC_SELECT11]], [[COMMON]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %call = call ptr @fn1(ptr %ssl)
+ %call1 = call ptr @fn2(ptr %call, i32 0)
+ %tobool.not = icmp eq i32 %enc, 0
+ %ticket_key_name_13 = getelementptr i8, ptr %call1, i64 80
+ br i1 %tobool.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ call void @llvm.memcpy.p0.p0.i64(ptr %name, ptr %ticket_key_name_13, i64 16, i1 false)
+ %call2 = call i8 @fn3(ptr %iv, i64 16)
+ %tobool.i = trunc i8 %call2 to i1
+ br i1 %tobool.i, label %cond_pred1, label %return
+
+if.end: ; preds = %entry
+ %bcmp = call i32 @bcmp(ptr %name, ptr %ticket_key_name_13, i64 16)
+ %cmp16.not = icmp eq i32 %bcmp, 0
+ br i1 %cmp16.not, label %cond_pred2, label %return
+
+cond_pred1: ; preds = %if.then
+ %call4 = call ptr @fn4()
+ %ticket_key_aes_ = getelementptr i8, ptr %call1, i64 96
+ %call6 = call i32 @fn5(ptr %ectx, ptr %call4, ptr null, ptr %ticket_key_aes_, ptr %iv)
+ %cmp = icmp slt i32 %call6, 1
+ br i1 %cmp, label %return, label %common
+
+cond_pred2: ; preds = %if.end
+ %call19 = call ptr @fn4()
+ %ticket_key_aes_20 = getelementptr i8, ptr %call1, i64 96
+ %call22 = call i32 @fn5(ptr %ectx, ptr %call19, ptr null, ptr %ticket_key_aes_20, ptr %iv)
+ %cmp23 = icmp slt i32 %call22, 1
+ br i1 %cmp23, label %return, label %common
+
+common: ; preds = %cond_pred2, %cond_pred1
+ %ticket_key_hmac_25 = getelementptr i8, ptr %call1, i64 112
+ %call27 = call ptr @fn6()
+ %call28 = call i32 @fn7(ptr %hctx, ptr %ticket_key_hmac_25, i32 16, ptr %call27, ptr null)
+ %cmp29 = icmp slt i32 %call28, 1
+ %spec.select11 = select i1 %cmp29, i32 -1, i32 1
+ br label %return
+
+return: ; preds = %common, %cond_pred2, %cond_pred1, %if.end, %if.then
+ %retval.0 = phi i32 [ -1, %cond_pred1 ], [ -1, %cond_pred2 ], [ -1, %if.then ], [ 0, %if.end ], [ %spec.select11, %common ]
+ ret i32 %retval.0
+}
+
+; Simplified from compiler-rt/x86.c.ll
+define ptr @getAMDProcessorTypeAndSubtype(i32 %Family, ptr %CpuModel, i1 %cmp35) {
+; CHECK-LABEL: @getAMDProcessorTypeAndSubtype(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 [[FAMILY:%.*]], label [[END:%.*]] [
+; CHECK-NEXT: i32 26, label [[COND_PRED1:%.*]]
+; CHECK-NEXT: i32 21, label [[COND_PRED2:%.*]]
+; CHECK-NEXT: ]
+; CHECK: cond_pred1:
+; CHECK-NEXT: store i32 0, ptr [[CPUMODEL:%.*]], align 4
+; CHECK-NEXT: br i1 [[CMP35:%.*]], label [[END]], label [[COMMON:%.*]]
+; CHECK: cond_pred2:
+; CHECK-NEXT: store i32 0, ptr [[CPUMODEL]], align 4
+; CHECK-NEXT: br i1 [[CMP35]], label [[END]], label [[COMMON]]
+; CHECK: end:
+; CHECK-NEXT: ret ptr null
+; CHECK: common:
+; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ 0, [[COND_PRED1]] ], [ 0, [[COND_PRED2]] ]
+; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 1, [[COND_PRED1]] ], [ 2, [[COND_PRED2]] ]
+; CHECK-NEXT: call void @use(i32 [[PHI1]])
+; CHECK-NEXT: call void @use(i32 [[PHI2]])
+; CHECK-NEXT: br label [[END]]
+;
+entry:
+ switch i32 %Family, label %end [
+ i32 26, label %cond_pred1
+ i32 21, label %cond_pred2
+ ]
+
+cond_pred1: ; preds = %entry
+ store i32 0, ptr %CpuModel, align 4
+ br i1 %cmp35, label %end, label %common
+
+cond_pred2: ; preds = %entry
+ store i32 0, ptr %CpuModel, align 4
+ br i1 %cmp35, label %end, label %common
+
+end: ; preds = %common, %cond_pred2, %cond_pred1, %entry
+ ret ptr null
+
+common: ; preds = %cond_pred2, %cond_pred1
+ %phi1 = phi i32 [ 0, %cond_pred1 ], [ 0, %cond_pred2 ]
+ %phi2 = phi i32 [ 1, %cond_pred1 ], [ 2, %cond_pred2 ]
+ call void @use(i32 %phi1)
+ call void @use(i32 %phi2)
+ br label %end
+}
+
+
+declare void @use(i32)
+
+declare ptr @fn1(ptr)
+declare ptr @fn2(ptr, i32)
+declare i8 @fn3(ptr, i64)
+declare ptr @fn4()
+declare i32 @fn5(ptr, ptr, ptr, ptr, ptr)
+declare ptr @fn6()
+declare i32 @fn7(ptr, ptr, i32, ptr, ptr)
+
+declare i32 @bcmp(ptr, ptr, i64)
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
>From 91bc4ca82e16a01c30ab2866b5b493b29b90f989 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 23 Dec 2025 18:46:57 +0800
Subject: [PATCH 2/6] feat: sink common code from cond preds
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 373 +++++++++++++++++-----
1 file changed, 291 insertions(+), 82 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2a957737697c3..1738be44ca33a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2297,15 +2297,23 @@ static bool canSinkInstructions(
// Assuming canSinkInstructions(Blocks) has returned true, sink the last
// instruction of every block in Blocks to their common successor, commoning
// into one instruction.
-static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
- auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
+// If CondBrMoveTo is not null, move the conditional branch in Blocks to
+// CondBrMoveTo.
+static void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
+ BasicBlock *CondBrMoveTo = nullptr) {
+ auto *BBEnd =
+ CondBrMoveTo ? CondBrMoveTo : Blocks[0]->getTerminator()->getSuccessor(0);
// canSinkInstructions returning true guarantees that every block has at
// least one non-terminator instruction.
SmallVector<Instruction*,4> Insts;
for (auto *BB : Blocks) {
Instruction *I = BB->getTerminator();
- I = I->getPrevNode();
+ if (CondBrMoveTo)
+ assert(isa<BranchInst>(I) && cast<BranchInst>(I)->isConditional() &&
+ "Expected a conditional branch");
+ else
+ I = I->getPrevNode();
Insts.push_back(I);
}
@@ -2368,14 +2376,15 @@ static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
}
}
- for (User *U : make_early_inc_range(I0->users())) {
- // canSinkLastInstruction checked that all instructions are only used by
- // phi nodes in a way that allows replacing the phi node with the common
- // instruction.
- auto *PN = cast<PHINode>(U);
- PN->replaceAllUsesWith(I0);
- PN->eraseFromParent();
- }
+ if (!CondBrMoveTo)
+ for (User *U : make_early_inc_range(I0->users())) {
+ // canSinkLastInstruction checked that all instructions are only used by
+ // phi nodes in a way that allows replacing the phi node with the common
+ // instruction.
+ auto *PN = cast<PHINode>(U);
+ PN->replaceAllUsesWith(I0);
+ PN->eraseFromParent();
+ }
// Finally nuke all instructions apart from the common instruction.
for (auto *I : Insts) {
@@ -2389,85 +2398,131 @@ static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
}
}
-/// Check whether BB's predecessors end with unconditional branches. If it is
-/// true, sink any common code from the predecessors to BB.
-static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
- DomTreeUpdater *DTU) {
- // We support two situations:
- // (1) all incoming arcs are unconditional
- // (2) there are non-unconditional incoming arcs
- //
- // (2) is very common in switch defaults and
- // else-if patterns;
- //
- // if (a) f(1);
- // else if (b) f(2);
- //
- // produces:
- //
- // [if]
- // / \
- // [f(1)] [if]
- // | | \
- // | | |
- // | [f(2)]|
- // \ | /
- // [ end ]
- //
- // [end] has two unconditional predecessor arcs and one conditional. The
- // conditional refers to the implicit empty 'else' arc. This conditional
- // arc can also be caused by an empty default block in a switch.
- //
- // In this case, we attempt to sink code from all *unconditional* arcs.
- // If we can sink instructions from these arcs (determined during the scan
- // phase below) we insert a common successor for all unconditional arcs and
- // connect that to [end], to enable sinking:
- //
- // [if]
- // / \
- // [x(1)] [if]
- // | | \
- // | | \
- // | [x(2)] |
- // \ / |
- // [sink.split] |
- // \ /
- // [ end ]
- //
- SmallVector<BasicBlock*,4> UnconditionalPreds;
- bool HaveNonUnconditionalPredecessors = false;
- for (auto *PredBB : predecessors(BB)) {
- auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
- if (PredBr && PredBr->isUnconditional())
- UnconditionalPreds.push_back(PredBB);
- else
- HaveNonUnconditionalPredecessors = true;
+/// Modified from UpdatePHINodes in BasicBlockUtils.cpp.
+/// Update the PHI nodes in OrigBB to include the values coming from NewBB.
+/// This also updates AliasAnalysis, if available.
+/// phi [ ..., %pred0 ], [ ..., %pred1 ], ...
+/// to
+/// phi [ %phi, %sink.split ], ...
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds) {
+ // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
+ SmallPtrSet<BasicBlock *, 16> PredSet(llvm::from_range, Preds);
+ for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I);) {
+ PHINode *PN = cast<PHINode>(I++);
+
+ // Check to see if all of the values coming in are the same. If so, we
+ // don't need to create a new PHI node, unless it's needed for LCSSA.
+ Value *InVal = nullptr;
+ InVal = PN->getIncomingValueForBlock(Preds[0]);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (!PredSet.count(PN->getIncomingBlock(i)))
+ continue;
+ if (!InVal)
+ InVal = PN->getIncomingValue(i);
+ else if (InVal != PN->getIncomingValue(i)) {
+ InVal = nullptr;
+ break;
+ }
+ }
+
+ if (InVal) {
+ // If all incoming values for the new PHI would be the same, just don't
+ // make a new PHI. Instead, just remove the incoming values from the old
+ // PHI.
+ PN->removeIncomingValueIf(
+ [&](unsigned Idx) {
+ return PredSet.contains(PN->getIncomingBlock(Idx));
+ },
+ /* DeletePHIIfEmpty */ false);
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ continue;
+ }
+
+ // If the values coming into the block are not the same, we need a new
+ // PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph",
+ NewBB->getFirstInsertionPt());
+
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values aren't
+ // invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
+ BasicBlock *IncomingBB = PN->getIncomingBlock(i);
+ if (PredSet.count(IncomingBB)) {
+ Value *V = PN->removeIncomingValue(i, false);
+ NewPHI->addIncoming(V, IncomingBB);
+ }
+ }
+
+ PN->addIncoming(NewPHI, NewBB);
}
- if (UnconditionalPreds.size() < 2)
+}
+
+/// Try to sink common instructions from predecessors of SinkBB into SinkBB.
+static bool trySinkFromPredsIntoBB(BasicBlock *SinkBB,
+ SmallVectorImpl<BasicBlock *> &Preds,
+ bool HaveNonUnconditionalPredecessors,
+ DomTreeUpdater *DTU) {
+ if (Preds.size() < 2)
return false;
+ const bool IsConditionalPreds =
+ cast<BranchInst>(Preds[0]->getTerminator())->isConditional();
+
+ assert((!IsConditionalPreds || HaveNonUnconditionalPredecessors) &&
+ "HaveNonUnconditionalPredecessors must be true if IsConditionalPreds");
+
// We take a two-step approach to tail sinking. First we scan from the end of
- // each block upwards in lockstep. If the n'th instruction from the end of each
- // block can be sunk, those instructions are added to ValuesToSink and we
+ // each block upwards in lockstep. If the n'th instruction from the end of
+ // each block can be sunk, those instructions are added to ValuesToSink and we
// carry on. If we can sink an instruction but need to PHI-merge some operands
// (because they're not identical in each instruction) we add these to
// PHIOperands.
- // We prepopulate PHIOperands with the phis that already exist in BB.
+ // We prepopulate PHIOperands with the phis that already exist in SinkBB.
DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
- for (PHINode &PN : BB->phis()) {
+ for (PHINode &PN : SinkBB->phis()) {
SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
for (const Use &U : PN.incoming_values())
IncomingVals.insert({PN.getIncomingBlock(U), &U});
- auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
- for (BasicBlock *Pred : UnconditionalPreds)
+ auto &Ops = PHIOperands[IncomingVals[Preds[0]]];
+ for (BasicBlock *Pred : Preds)
Ops.push_back(*IncomingVals[Pred]);
}
+ if (IsConditionalPreds) {
+ // If Preds are conditional, there might be no phi-uses for the last
+ // instruction, we need to add cond-br uses.
+ BranchInst *T0 = cast<BranchInst>(Preds[0]->getTerminator());
+ assert(T0->isConditional() && "Expected a conditional branch");
+ Use *Pred0Use = &T0->getOperandUse(0);
+ BasicBlock *ThenBB = T0->getSuccessor(0);
+ for (BasicBlock *Pred : Preds) {
+ BranchInst *BI = cast<BranchInst>(Pred->getTerminator());
+ assert(BI->isConditional() && "Expected a conditional branch");
+ if (BI->getSuccessor(0) != ThenBB) {
+ // br %cond1, label %common, label %end
+ // mismatch
+ // br %cond2, label %end, label %common
+ // TODO: if %cond comes from outer block, we can rewrite the cond brs to
+ // make them match.
+ return false;
+ }
+ Use &U = BI->getOperandUse(0);
+ PHIOperands[Pred0Use].push_back(U);
+ }
+ }
+
int ScanIdx = 0;
- SmallPtrSet<Value*,4> InstructionsToSink;
- LockstepReverseIterator<true> LRI(UnconditionalPreds);
- while (LRI.isValid() &&
- canSinkInstructions(*LRI, PHIOperands)) {
+ SmallPtrSet<Value *, 4> InstructionsToSink;
+ LockstepReverseIterator<true> LRI(Preds);
+ while (LRI.isValid() && canSinkInstructions(*LRI, PHIOperands)) {
LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
<< "\n");
InstructionsToSink.insert_range(*LRI);
@@ -2479,7 +2534,10 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
if (ScanIdx == 0)
return false;
- bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
+ // NOTE: profitability is still based on the original BB (the final merge
+ // point), not the synthetic SinkBB we might introduce for (3).
+ bool followedByDeoptOrUnreachable =
+ IsBlockFollowedByDeoptOrUnreachable(SinkBB);
if (!followedByDeoptOrUnreachable) {
// Check whether this is the pointer operand of a load/store.
@@ -2612,9 +2670,47 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
// We have a conditional edge and we're going to sink some instructions.
// Insert a new block postdominating all blocks we're going to sink from.
- if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
+ BasicBlock *SinkSplit =
+ SplitBlockPredecessors(SinkBB, Preds, ".sink.split", DTU);
+ if (!SinkSplit)
// Edges couldn't be split.
return false;
+ // If Preds are conditional, we need to preprocess them to make them
+ // unconditional branching to SinkSplit.
+ if (IsConditionalPreds) {
+ // Sink the cond br from Preds into SinkSplit
+ sinkLastInstruction(Preds, SinkSplit);
+ // Erase the legacy uncond br from SinkSplit
+ SinkSplit->getTerminator()->eraseFromParent();
+ BranchInst *NewBr = cast<BranchInst>(SinkSplit->getTerminator());
+ // Correct `br %cond, %sink.split, %common` to `br %cond, %end, %common`
+ NewBr->replaceSuccessorWith(SinkSplit, SinkBB);
+ // Connect Pred to SinkSplit with unconditional branches.
+ for (BasicBlock *Pred : Preds) {
+ assert(!Pred->getTerminator() &&
+ "Pred's terminator should be moved to SinkSplit");
+ BranchInst::Create(SinkSplit, Pred);
+ }
+ BasicBlock *CommonBlock = NewBr->getSuccessor(0) == SinkBB
+ ? NewBr->getSuccessor(1)
+ : NewBr->getSuccessor(0);
+ // Check if CommonBlock contains any PHIs.
+ if (!CommonBlock->phis().empty()) {
+ // Handle PHIs in Common
+ // phi [ ..., %pred0 ], [ ..., %pred1 ], ...
+ // to
+ // phi [ ..., %sink.split ], ...
+ UpdatePHINodes(CommonBlock, SinkSplit, Preds);
+ }
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ Updates.reserve(1 + Preds.size());
+ Updates.push_back({DominatorTree::Insert, SinkSplit, CommonBlock});
+ for (BasicBlock *Pred : Preds)
+ Updates.push_back({DominatorTree::Delete, Pred, CommonBlock});
+ DTU->applyUpdates(Updates);
+ }
+ }
Changed = true;
}
@@ -2624,8 +2720,8 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
// many PHI instructions to be generated (currently only one PHI is allowed
// per sunk instruction).
//
- // We can use InstructionsToSink to discount values needing PHI-merging that will
- // actually be sunk in a later iteration. This allows us to be more
+ // We can use InstructionsToSink to discount values needing PHI-merging that
+ // will actually be sunk in a later iteration. This allows us to be more
// aggressive in what we sink. This does allow a false positive where we
// sink presuming a later value will also be sunk, but stop half way through
// and never actually sink it which means we produce more PHIs than intended.
@@ -2633,14 +2729,13 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
int SinkIdx = 0;
for (; SinkIdx != ScanIdx; ++SinkIdx) {
LLVM_DEBUG(dbgs() << "SINK: Sink: "
- << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
- << "\n");
+ << *Preds[0]->getTerminator()->getPrevNode() << "\n");
// Because we've sunk every instruction in turn, the current instruction to
// sink is always at index 0.
LRI.reset();
- sinkLastInstruction(UnconditionalPreds);
+ sinkLastInstruction(Preds);
NumSinkCommonInstrs++;
Changed = true;
}
@@ -2649,6 +2744,120 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
return Changed;
}
+/// Check whether BB's predecessors end with branches. If it is
+/// true, sink any common code from the predecessors to BB.
+static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
+ DomTreeUpdater *DTU) {
+ // We support three situations:
+ // (1) all incoming arcs are unconditional
+ // (2) there are non-unconditional incoming arcs
+ // (3) there are incoming conditional arcs P->BB, where P's other successor
+ // unconditionally branches to BB
+ //
+ // (2) is very common in switch defaults and
+ // else-if patterns;
+ //
+ // if (a) f(1);
+ // else if (b) f(2);
+ //
+ // produces:
+ //
+ // [if]
+ // / \
+ // [f(1)] [if]
+ // | | \
+ // | | |
+ // | [f(2)]|
+ // \ | /
+ // [ end ]
+ //
+ // [end] has two unconditional predecessor arcs and one conditional. The
+ // conditional refers to the implicit empty 'else' arc. This conditional
+ // arc can also be caused by an empty default block in a switch.
+ //
+ // In this case, we attempt to sink code from all *unconditional* arcs.
+ // If we can sink instructions from these arcs (determined during the scan
+ // phase below) we insert a common successor for all unconditional arcs and
+ // connect that to [end], to enable sinking:
+ //
+ // [if]
+ // / \
+ // [x(1)] [if]
+ // | | \
+ // | | \
+ // | [x(2)] |
+ // \ / |
+ // [sink.split] |
+ // \ /
+ // [ end ]
+ //
+ // (3) is common after SimplifyCFG merging some uncond blocks, e.g.:
+ //
+ // [f(1)] [f(2)]
+ // /\ /\
+ // / \ / \
+ // | [comm] |
+ // \ | /
+ // \ | /
+ // [ end ]
+ //
+ // Here, [end] has incoming *conditional* arcs from [f(1)] and [f(2)], but the
+ // other successor ([comm]) unconditionally branches to [end].
+ // In this case, we attempt to sink from all such conditional predecessors by
+ // inserting a common successor for them (also named ".sink.split") and moving
+ // the conditional branch into that block:
+ //
+ // [f(1)] [f(2)]
+ // \ /
+ // [sink.split]
+ // / \
+ // [comm] |
+ // \ /
+ // [ end ]
+ //
+ // This enables sinking from the (now) unconditional predecessors into
+ // [sink.split].
+
+ SmallVector<BasicBlock *, 4> UnconditionalPreds;
+ bool HaveNonUnconditionalPredecessors = false;
+
+ // For (3). Common successor -> related conditional predecessors.
+ SmallDenseMap<BasicBlock *, SmallVector<BasicBlock *, 4>, 2>
+ ConditionalPredsMap;
+
+ for (auto *PredBB : predecessors(BB)) {
+ auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
+ if (PredBr && PredBr->isUnconditional()) {
+ UnconditionalPreds.push_back(PredBB);
+ continue;
+ }
+
+ HaveNonUnconditionalPredecessors = true;
+ if (!PredBr || PredBr->getNumSuccessors() != 2)
+ continue;
+ assert(PredBr->isConditional() && "Expected conditional branch");
+
+ // Try to match (3): PredBB has a conditional edge to BB, and the other
+ // successor unconditionally branches to BB.
+ BasicBlock *S0 = PredBr->getSuccessor(0), *S1 = PredBr->getSuccessor(1);
+ BasicBlock *Common = S0 == BB ? S1 : S0;
+
+ if (auto *CommonBr = dyn_cast<BranchInst>(Common->getTerminator()))
+ if (CommonBr->isUnconditional() && CommonBr->getSuccessor(0) == BB)
+ ConditionalPredsMap[Common].push_back(PredBB);
+ }
+
+ // First only unconditional preds (1) and (2).
+ bool Changed = trySinkFromPredsIntoBB(BB, UnconditionalPreds,
+ HaveNonUnconditionalPredecessors, DTU);
+
+ // Then attempt conditional preds (3).
+ for (auto &[_, ConditionalPreds] : ConditionalPredsMap)
+ Changed |= trySinkFromPredsIntoBB(BB, ConditionalPreds, true, DTU);
+
+ return Changed;
+}
+
namespace {
struct CompatibleSets {
>From 22e5c21549aad84322cc52e31bc6ccf59faee8af Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 23 Dec 2025 18:47:15 +0800
Subject: [PATCH 3/6] After-commit test
---
.../Transforms/SimplifyCFG/sink-cond-preds.ll | 34 ++++++++-----------
1 file changed, 14 insertions(+), 20 deletions(-)
diff --git a/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll b/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
index 93f9ab6daaf70..b28ae042b1165 100644
--- a/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
+++ b/llvm/test/Transforms/SimplifyCFG/sink-cond-preds.ll
@@ -20,19 +20,7 @@ define i32 @foo(ptr %ssl, ptr %name, ptr %iv, ptr %ectx, ptr %hctx, i32 %enc) {
; CHECK: if.end:
; CHECK-NEXT: [[BCMP:%.*]] = call i32 @bcmp(ptr [[NAME]], ptr [[TICKET_KEY_NAME_13]], i64 16)
; CHECK-NEXT: [[CMP16_NOT:%.*]] = icmp eq i32 [[BCMP]], 0
-; CHECK-NEXT: br i1 [[CMP16_NOT]], label [[COND_PRED2:%.*]], label [[RETURN]]
-; CHECK: cond_pred1:
-; CHECK-NEXT: [[CALL4:%.*]] = call ptr @fn4()
-; CHECK-NEXT: [[TICKET_KEY_AES_:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
-; CHECK-NEXT: [[CALL6:%.*]] = call i32 @fn5(ptr [[ECTX:%.*]], ptr [[CALL4]], ptr null, ptr [[TICKET_KEY_AES_]], ptr [[IV]])
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL6]], 1
-; CHECK-NEXT: br i1 [[CMP]], label [[RETURN]], label [[COMMON:%.*]]
-; CHECK: cond_pred2:
-; CHECK-NEXT: [[CALL19:%.*]] = call ptr @fn4()
-; CHECK-NEXT: [[TICKET_KEY_AES_20:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
-; CHECK-NEXT: [[CALL22:%.*]] = call i32 @fn5(ptr [[ECTX]], ptr [[CALL19]], ptr null, ptr [[TICKET_KEY_AES_20]], ptr [[IV]])
-; CHECK-NEXT: [[CMP23:%.*]] = icmp slt i32 [[CALL22]], 1
-; CHECK-NEXT: br i1 [[CMP23]], label [[RETURN]], label [[COMMON]]
+; CHECK-NEXT: br i1 [[CMP16_NOT]], label [[COND_PRED1]], label [[RETURN]]
; CHECK: common:
; CHECK-NEXT: [[TICKET_KEY_HMAC_25:%.*]] = getelementptr i8, ptr [[CALL1]], i64 112
; CHECK-NEXT: [[CALL27:%.*]] = call ptr @fn6()
@@ -40,8 +28,14 @@ define i32 @foo(ptr %ssl, ptr %name, ptr %iv, ptr %ectx, ptr %hctx, i32 %enc) {
; CHECK-NEXT: [[CMP29:%.*]] = icmp slt i32 [[CALL28]], 1
; CHECK-NEXT: [[SPEC_SELECT11:%.*]] = select i1 [[CMP29]], i32 -1, i32 1
; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return.sink.split:
+; CHECK-NEXT: [[CALL19:%.*]] = call ptr @fn4()
+; CHECK-NEXT: [[TICKET_KEY_AES_20:%.*]] = getelementptr i8, ptr [[CALL1]], i64 96
+; CHECK-NEXT: [[CALL22:%.*]] = call i32 @fn5(ptr [[ECTX:%.*]], ptr [[CALL19]], ptr null, ptr [[TICKET_KEY_AES_20]], ptr [[IV]])
+; CHECK-NEXT: [[CMP23:%.*]] = icmp slt i32 [[CALL22]], 1
+; CHECK-NEXT: br i1 [[CMP23]], label [[RETURN]], label [[COMMON:%.*]]
; CHECK: return:
-; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ -1, [[COND_PRED1]] ], [ -1, [[COND_PRED2]] ], [ -1, [[IF_THEN]] ], [ 0, [[IF_END]] ], [ [[SPEC_SELECT11]], [[COMMON]] ]
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SPEC_SELECT11]], [[COMMON]] ], [ 0, [[IF_END]] ], [ -1, [[IF_THEN]] ], [ -1, [[COND_PRED1]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
@@ -97,17 +91,17 @@ define ptr @getAMDProcessorTypeAndSubtype(i32 %Family, ptr %CpuModel, i1 %cmp35)
; CHECK-NEXT: i32 26, label [[COND_PRED1:%.*]]
; CHECK-NEXT: i32 21, label [[COND_PRED2:%.*]]
; CHECK-NEXT: ]
-; CHECK: cond_pred1:
+; CHECK: cond_pred2:
+; CHECK-NEXT: br label [[COND_PRED1]]
+; CHECK: end.sink.split:
+; CHECK-NEXT: [[PHI2_PH:%.*]] = phi i32 [ 2, [[COND_PRED2]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: store i32 0, ptr [[CPUMODEL:%.*]], align 4
; CHECK-NEXT: br i1 [[CMP35:%.*]], label [[END]], label [[COMMON:%.*]]
-; CHECK: cond_pred2:
-; CHECK-NEXT: store i32 0, ptr [[CPUMODEL]], align 4
-; CHECK-NEXT: br i1 [[CMP35]], label [[END]], label [[COMMON]]
; CHECK: end:
; CHECK-NEXT: ret ptr null
; CHECK: common:
-; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ 0, [[COND_PRED1]] ], [ 0, [[COND_PRED2]] ]
-; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 1, [[COND_PRED1]] ], [ 2, [[COND_PRED2]] ]
+; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ 0, [[COND_PRED1]] ]
+; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ [[PHI2_PH]], [[COND_PRED1]] ]
; CHECK-NEXT: call void @use(i32 [[PHI1]])
; CHECK-NEXT: call void @use(i32 [[PHI2]])
; CHECK-NEXT: br label [[END]]
>From 62ec50d1fc7ce568f0d16549b2b0828c1052da3c Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Wed, 24 Dec 2025 22:15:08 +0800
Subject: [PATCH 4/6] Cleanup outdated comments
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 1738be44ca33a..329307129531b 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2534,8 +2534,6 @@ static bool trySinkFromPredsIntoBB(BasicBlock *SinkBB,
if (ScanIdx == 0)
return false;
- // NOTE: profitability is still based on the original BB (the final merge
- // point), not the synthetic SinkBB we might introduce for (3).
bool followedByDeoptOrUnreachable =
IsBlockFollowedByDeoptOrUnreachable(SinkBB);
>From cd2106e906e04386abced555fc9e074c26912ad5 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 25 Dec 2025 15:26:38 +0800
Subject: [PATCH 5/6] fix crash triggered by fake conditional branch
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 329307129531b..81f50c28083a4 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2838,6 +2838,9 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
// Try to match (3): PredBB has a conditional edge to BB, and the other
// successor unconditionally branches to BB.
BasicBlock *S0 = PredBr->getSuccessor(0), *S1 = PredBr->getSuccessor(1);
+ // If S0 == S1, it is a fake conditional branch
+ if (S0 == S1)
+ continue;
BasicBlock *Common = S0 == BB ? S1 : S0;
if (auto *CommonBr = dyn_cast<BranchInst>(Common->getTerminator()))
@@ -8952,6 +8955,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
}
+
// Look through return.
if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
bool HasNoUndefAttr =
>From 4d3b879ca1daa77f19bc3ca9f47e137e45df89c5 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 25 Dec 2025 15:29:33 +0800
Subject: [PATCH 6/6] Make formatter happy
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 81f50c28083a4..5ee78f070ffda 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -8955,7 +8955,6 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
}
-
// Look through return.
if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
bool HasNoUndefAttr =
More information about the llvm-commits
mailing list