[llvm] [SimplifyCFG] Speculatively execute empty BBs with multiple predecessors (PR #120905)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 16:17:32 PST 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/120905
>From 003eee940576f965702c25dbfde6b1bba0e4d99d Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 22 Dec 2024 22:14:11 +0800
Subject: [PATCH 1/3] [SimplifyCFG] Add pre-commit tests. NFC.
---
.../SimplifyCFG/speculate-blocks.ll | 247 ++++++++++++++++++
1 file changed, 247 insertions(+)
diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll b/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll
index 07eac90186ef62..1f85460d85fbbe 100644
--- a/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll
+++ b/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll
@@ -78,3 +78,250 @@ final_right:
declare void @sideeffect0()
declare void @sideeffect1()
+
+define i1 @speculate_empty_bb(i32 %x, i32 %y) {
+; YES-LABEL: define i1 @speculate_empty_bb
+; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; YES-NEXT: start:
+; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
+; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
+; YES: bb6:
+; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; YES-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; YES: bb5:
+; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
+; YES-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
+; YES: bb2:
+; YES-NEXT: br label [[BB3]]
+; YES: bb3:
+; YES-NEXT: [[RET:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ]
+; YES-NEXT: ret i1 [[RET]]
+;
+; NO-LABEL: define i1 @speculate_empty_bb
+; NO-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; NO-NEXT: start:
+; NO-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
+; NO-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
+; NO: bb6:
+; NO-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; NO-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; NO: bb5:
+; NO-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
+; NO-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
+; NO: bb2:
+; NO-NEXT: br label [[BB3]]
+; NO: bb3:
+; NO-NEXT: [[RET:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ]
+; NO-NEXT: ret i1 [[RET]]
+;
+start:
+ %cmp1 = icmp eq i32 %x, 0
+ br i1 %cmp1, label %bb6, label %bb5
+
+bb6:
+ %cmp2 = icmp eq i32 %y, 0
+ br i1 %cmp2, label %bb2, label %bb3
+
+bb5:
+ %cmp3 = icmp ult i32 %x, %y
+ br i1 %cmp3, label %bb3, label %bb2
+
+bb2:
+ br label %bb3
+
+bb3:
+ %ret = phi i1 [ true, %bb2 ], [ false, %bb6 ], [ false, %bb5 ]
+ ret i1 %ret
+}
+
+define i32 @speculate_empty_bb_not_simplifiable(i32 %x, i32 %y) {
+; YES-LABEL: define i32 @speculate_empty_bb_not_simplifiable
+; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; YES-NEXT: start:
+; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
+; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
+; YES: bb6:
+; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; YES-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; YES: bb5:
+; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
+; YES-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
+; YES: bb2:
+; YES-NEXT: br label [[BB3]]
+; YES: bb3:
+; YES-NEXT: [[RET:%.*]] = phi i32 [ 10, [[BB2]] ], [ 20, [[BB6]] ], [ 30, [[BB5]] ]
+; YES-NEXT: ret i32 [[RET]]
+;
+; NO-LABEL: define i32 @speculate_empty_bb_not_simplifiable
+; NO-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; NO-NEXT: start:
+; NO-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
+; NO-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
+; NO: bb6:
+; NO-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; NO-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; NO: bb5:
+; NO-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
+; NO-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
+; NO: bb2:
+; NO-NEXT: br label [[BB3]]
+; NO: bb3:
+; NO-NEXT: [[RET:%.*]] = phi i32 [ 10, [[BB2]] ], [ 20, [[BB6]] ], [ 30, [[BB5]] ]
+; NO-NEXT: ret i32 [[RET]]
+;
+start:
+ %cmp1 = icmp eq i32 %x, 0
+ br i1 %cmp1, label %bb6, label %bb5
+
+bb6:
+ %cmp2 = icmp eq i32 %y, 0
+ br i1 %cmp2, label %bb2, label %bb3
+
+bb5:
+ %cmp3 = icmp ult i32 %x, %y
+ br i1 %cmp3, label %bb3, label %bb2
+
+bb2:
+ br label %bb3
+
+bb3:
+ %ret = phi i32 [ 10, %bb2 ], [ 20, %bb6 ], [ 30, %bb5 ]
+ ret i32 %ret
+}
+
+define i1 @speculate_nonempty_bb(i32 %x, i32 %y) {
+; YES-LABEL: define i1 @speculate_nonempty_bb
+; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; YES-NEXT: start:
+; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
+; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
+; YES: bb6:
+; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; YES-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; YES: bb5:
+; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
+; YES-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
+; YES: bb2:
+; YES-NEXT: [[PHI:%.*]] = phi i32 [ [[X]], [[BB6]] ], [ [[Y]], [[BB5]] ]
+; YES-NEXT: [[CMP4:%.*]] = icmp eq i32 [[PHI]], 0
+; YES-NEXT: br label [[BB3]]
+; YES: bb3:
+; YES-NEXT: [[RET:%.*]] = phi i1 [ [[CMP4]], [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ]
+; YES-NEXT: ret i1 [[RET]]
+;
+; NO-LABEL: define i1 @speculate_nonempty_bb
+; NO-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; NO-NEXT: start:
+; NO-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
+; NO-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
+; NO: bb6:
+; NO-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; NO-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; NO: bb5:
+; NO-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
+; NO-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
+; NO: bb2:
+; NO-NEXT: [[PHI:%.*]] = phi i32 [ [[X]], [[BB6]] ], [ [[Y]], [[BB5]] ]
+; NO-NEXT: [[CMP4:%.*]] = icmp eq i32 [[PHI]], 0
+; NO-NEXT: br label [[BB3]]
+; NO: bb3:
+; NO-NEXT: [[RET:%.*]] = phi i1 [ [[CMP4]], [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ]
+; NO-NEXT: ret i1 [[RET]]
+;
+start:
+ %cmp1 = icmp eq i32 %x, 0
+ br i1 %cmp1, label %bb6, label %bb5
+
+bb6:
+ %cmp2 = icmp eq i32 %y, 0
+ br i1 %cmp2, label %bb2, label %bb3
+
+bb5:
+ %cmp3 = icmp ult i32 %x, %y
+ br i1 %cmp3, label %bb3, label %bb2
+
+bb2:
+ %phi = phi i32 [ %x, %bb6 ], [ %y, %bb5 ]
+ %cmp4 = icmp eq i32 %phi, 0
+ br label %bb3
+
+bb3:
+ %ret = phi i1 [ %cmp4, %bb2 ], [ false, %bb6 ], [ false, %bb5 ]
+ ret i1 %ret
+}
+
+define i1 @speculate_empty_bb_too_many_select(i32 %x, i32 %y) {
+; YES-LABEL: define i1 @speculate_empty_bb_too_many_select
+; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; YES-NEXT: start:
+; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
+; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
+; YES: bb6:
+; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; YES-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; YES: bb5:
+; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
+; YES-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
+; YES: bb2:
+; YES-NEXT: br label [[BB3]]
+; YES: bb3:
+; YES-NEXT: [[RET:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ]
+; YES-NEXT: [[RET2:%.*]] = phi i32 [ [[X]], [[BB2]] ], [ [[Y]], [[BB6]] ], [ [[X]], [[BB5]] ]
+; YES-NEXT: [[RET3:%.*]] = phi i32 [ [[Y]], [[BB2]] ], [ [[X]], [[BB6]] ], [ [[X]], [[BB5]] ]
+; YES-NEXT: [[RET4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 3, [[BB6]] ], [ 5, [[BB5]] ]
+; YES-NEXT: [[ADD:%.*]] = add i32 [[RET2]], [[RET3]]
+; YES-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[RET4]]
+; YES-NEXT: [[CMP4:%.*]] = icmp eq i32 [[ADD2]], 0
+; YES-NEXT: [[AND:%.*]] = and i1 [[RET]], [[CMP4]]
+; YES-NEXT: ret i1 [[AND]]
+;
+; NO-LABEL: define i1 @speculate_empty_bb_too_many_select
+; NO-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; NO-NEXT: start:
+; NO-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
+; NO-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
+; NO: bb6:
+; NO-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; NO-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; NO: bb5:
+; NO-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
+; NO-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
+; NO: bb2:
+; NO-NEXT: br label [[BB3]]
+; NO: bb3:
+; NO-NEXT: [[RET:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ]
+; NO-NEXT: [[RET2:%.*]] = phi i32 [ [[X]], [[BB2]] ], [ [[Y]], [[BB6]] ], [ [[X]], [[BB5]] ]
+; NO-NEXT: [[RET3:%.*]] = phi i32 [ [[Y]], [[BB2]] ], [ [[X]], [[BB6]] ], [ [[X]], [[BB5]] ]
+; NO-NEXT: [[RET4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 3, [[BB6]] ], [ 5, [[BB5]] ]
+; NO-NEXT: [[ADD:%.*]] = add i32 [[RET2]], [[RET3]]
+; NO-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[RET4]]
+; NO-NEXT: [[CMP4:%.*]] = icmp eq i32 [[ADD2]], 0
+; NO-NEXT: [[AND:%.*]] = and i1 [[RET]], [[CMP4]]
+; NO-NEXT: ret i1 [[AND]]
+;
+start:
+ %cmp1 = icmp eq i32 %x, 0
+ br i1 %cmp1, label %bb6, label %bb5
+
+bb6:
+ %cmp2 = icmp eq i32 %y, 0
+ br i1 %cmp2, label %bb2, label %bb3
+
+bb5:
+ %cmp3 = icmp ult i32 %x, %y
+ br i1 %cmp3, label %bb3, label %bb2
+
+bb2:
+ br label %bb3
+
+bb3:
+ %ret = phi i1 [ true, %bb2 ], [ false, %bb6 ], [ false, %bb5 ]
+ %ret2 = phi i32 [ %x, %bb2 ], [ %y, %bb6 ], [ %x, %bb5 ]
+ %ret3 = phi i32 [ %y, %bb2 ], [ %x, %bb6 ], [ %x, %bb5 ]
+ %ret4 = phi i32 [ 0, %bb2 ], [ 3, %bb6 ], [ 5, %bb5 ]
+ %add = add i32 %ret2, %ret3
+ %add2 = add i32 %add, %ret4
+ %cmp4 = icmp eq i32 %add2, 0
+ %and = and i1 %ret, %cmp4
+ ret i1 %and
+}
>From fe8f09804447631d2a99a626ed582f1398e838ef Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 22 Dec 2024 23:17:24 +0800
Subject: [PATCH 2/3] [SimplifyCFG] Speculatively execute empty BB with
multiple predecessors
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 97 ++++++++
llvm/test/CodeGen/AArch64/and-sink.ll | 9 +-
.../block-placement-optimize-branches.ll | 2 +-
.../AArch64/combine-comparisons-by-cse.ll | 213 +++++++-----------
llvm/test/CodeGen/AArch64/machine_cse.ll | 2 +-
.../wineh-catchret-label-generation.ll | 2 +-
llvm/test/CodeGen/ARM/and-cmp0-sink.ll | 118 ++++------
.../test/CodeGen/Thumb2/mve-blockplacement.ll | 2 +-
.../X86/vector-reductions-expanded.ll | 2 +-
.../SimplifyCFG/2008-05-16-PHIBlockMerge.ll | 7 +-
.../SimplifyCFG/EqualPHIEdgeBlockMerge.ll | 7 +-
.../SimplifyCFG/speculate-blocks.ll | 20 +-
12 files changed, 248 insertions(+), 233 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 17f4b396f753b4..17e339d1d9f251 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3506,6 +3506,96 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
return true;
}
+/// Speculate a conditional basic block flattening the CFG.
+/// Compared to speculativelyExecuteBB, it allows \p ThenBB to have multiple
+/// predecessors other than the current BB. An illustration of this transform is
+/// turning this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// br label BB2
+/// EndBB:
+/// %phi = phi i1 [ true, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
+/// ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// %sel = select i1 %cmp, i1 true, i1 false
+/// br label %EndBB
+/// ThenBB:
+/// br label BB2
+/// EndBB:
+/// %phi = phi i1 [ %sel, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
+/// ...
+/// \endcode
+/// \returns true if the branch edge is removed.
+static bool speculativelyExecuteEmptyBB(BranchInst *BI, bool Invert,
+ DomTreeUpdater *DTU,
+ const TargetTransformInfo &TTI) {
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *ThenBB = BI->getSuccessor(Invert);
+ BasicBlock *EndBB = BI->getSuccessor(!Invert);
+
+ BranchInst *SuccBI = dyn_cast<BranchInst>(ThenBB->getTerminator());
+ if (!SuccBI || !SuccBI->isUnconditional() || SuccBI->getSuccessor(0) != EndBB)
+ return false;
+ if (&ThenBB->front() != SuccBI)
+ return false;
+ if (!isProfitableToSpeculate(BI, Invert, TTI))
+ return false;
+
+ InstructionCost Budget =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ InstructionCost Cost = 0;
+ unsigned SpeculatedInstructions = 0;
+ if (!validateAndCostRequiredSelects(BB, ThenBB, EndBB, SpeculatedInstructions,
+ Cost, TTI) ||
+ Cost > Budget)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+
+ // Insert selects and rewrite the PHI operands.
+ Value *BrCond = BI->getCondition();
+ IRBuilder<NoFolder> Builder(BI);
+ for (PHINode &PN : EndBB->phis()) {
+ unsigned OrigI = PN.getBasicBlockIndex(BB);
+ unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
+ Value *OrigV = PN.getIncomingValue(OrigI);
+ Value *ThenV = PN.getIncomingValue(ThenI);
+
+ // Skip PHIs which are trivial.
+ if (OrigV == ThenV)
+ continue;
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the pre-existing value. Swap them if the branch
+ // destinations were inverted.
+ Value *TrueV = ThenV, *FalseV = OrigV;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
+ PN.setIncomingValue(OrigI, V);
+ }
+
+ // Modify CFG
+ ThenBB->removePredecessor(BB);
+ BranchInst *NewBI = Builder.CreateBr(EndBB);
+ // Transfer the metadata to the new branch instruction.
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+ BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, ThenBB}});
+
+ return true;
+}
+
/// Return true if we can thread a branch across this block.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
int Size = 0;
@@ -8125,6 +8215,13 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return requestResimplify();
}
+ if (Options.SpeculateBlocks) {
+ if (speculativelyExecuteEmptyBB(BI, /*Invert=*/false, DTU, TTI))
+ return true;
+ if (speculativelyExecuteEmptyBB(BI, /*Invert=*/true, DTU, TTI))
+ return true;
+ }
+
// If this is a branch on something for which we know the constant value in
// predecessors (e.g. a phi node in the current block), thread control
// through this block.
diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll
index c84310629e5fda..277091a65e862b 100644
--- a/llvm/test/CodeGen/AArch64/and-sink.ll
+++ b/llvm/test/CodeGen/AArch64/and-sink.ll
@@ -11,15 +11,14 @@
define dso_local i32 @and_sink1(i32 %a, i1 %c) {
; CHECK-LABEL: and_sink1:
; CHECK: // %bb.0:
-; CHECK-NEXT: tbz w1, #0, .LBB0_3
+; CHECK-NEXT: tbz w1, #0, .LBB0_2
; CHECK-NEXT: // %bb.1: // %bb0
+; CHECK-NEXT: tst w0, #0x4
; CHECK-NEXT: adrp x8, A
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: str wzr, [x8, :lo12:A]
-; CHECK-NEXT: tbnz w0, #2, .LBB0_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_3: // %bb2
+; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
index 3645718968f9e3..c1932f055a4f79 100644
--- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
+++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -phi-node-folding-threshold=0 | FileCheck %s
; When consuming profile data we sometimes flip a branch to improve runtime
; performance. If we are optimizing for size, we avoid changing the branch to
diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index 6449c3e11d6672..dc23ae0af31572 100644
--- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -13,10 +13,10 @@ define i32 @combine_gt_ge_10() #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: cmp w8, #10
+; CHECK-NEXT: ldr w9, [x8]
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
+; CHECK-NEXT: cmp w9, #10
; CHECK-NEXT: b.le .LBB0_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x9, :got:c
@@ -29,18 +29,17 @@ define i32 @combine_gt_ge_10() #0 {
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3: // %lor.lhs.false
-; CHECK-NEXT: b.lt .LBB0_6
+; CHECK-NEXT: cmp w9, #10
+; CHECK-NEXT: b.lt .LBB0_5
; CHECK-NEXT: .LBB0_4: // %land.lhs.true3
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB0_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_6: // %if.end
+; CHECK-NEXT: .LBB0_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -80,34 +79,27 @@ define i32 @combine_gt_lt_5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #5
-; CHECK-NEXT: b.le .LBB1_3
+; CHECK-NEXT: b.le .LBB1_2
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB1_6
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_3: // %lor.lhs.false
-; CHECK-NEXT: b.ge .LBB1_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true3
+; CHECK-NEXT: b .LBB1_4
+; CHECK-NEXT: .LBB1_2: // %lor.lhs.false
+; CHECK-NEXT: b.ge .LBB1_5
+; CHECK-NEXT: // %bb.3: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: .LBB1_4: // %return
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB1_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_6: // %if.end
+; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -145,10 +137,10 @@ define i32 @combine_lt_ge_5() #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: cmp w8, #5
+; CHECK-NEXT: ldr w9, [x8]
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
+; CHECK-NEXT: cmp w9, #5
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x9, :got:c
@@ -161,18 +153,17 @@ define i32 @combine_lt_ge_5() #0 {
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_3: // %lor.lhs.false
-; CHECK-NEXT: b.gt .LBB2_6
+; CHECK-NEXT: cmp w9, #5
+; CHECK-NEXT: b.gt .LBB2_5
; CHECK-NEXT: .LBB2_4: // %land.lhs.true3
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB2_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB2_6: // %if.end
+; CHECK-NEXT: .LBB2_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -212,34 +203,27 @@ define i32 @combine_lt_gt_5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #5
-; CHECK-NEXT: b.ge .LBB3_3
+; CHECK-NEXT: b.ge .LBB3_2
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB3_6
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB3_3: // %lor.lhs.false
-; CHECK-NEXT: b.le .LBB3_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true3
+; CHECK-NEXT: b .LBB3_4
+; CHECK-NEXT: .LBB3_2: // %lor.lhs.false
+; CHECK-NEXT: b.le .LBB3_5
+; CHECK-NEXT: // %bb.3: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: .LBB3_4: // %return
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB3_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB3_6: // %if.end
+; CHECK-NEXT: .LBB3_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -279,34 +263,27 @@ define i32 @combine_gt_lt_n5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #5
-; CHECK-NEXT: b.le .LBB4_3
+; CHECK-NEXT: b.le .LBB4_2
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB4_6
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB4_3: // %lor.lhs.false
-; CHECK-NEXT: b.ge .LBB4_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true3
+; CHECK-NEXT: b .LBB4_4
+; CHECK-NEXT: .LBB4_2: // %lor.lhs.false
+; CHECK-NEXT: b.ge .LBB4_5
+; CHECK-NEXT: // %bb.3: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: .LBB4_4: // %return
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB4_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB4_6: // %if.end
+; CHECK-NEXT: .LBB4_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -346,34 +323,27 @@ define i32 @combine_lt_gt_n5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #5
-; CHECK-NEXT: b.ge .LBB5_3
+; CHECK-NEXT: b.ge .LBB5_2
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB5_6
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB5_3: // %lor.lhs.false
-; CHECK-NEXT: b.le .LBB5_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true3
+; CHECK-NEXT: b .LBB5_4
+; CHECK-NEXT: .LBB5_2: // %lor.lhs.false
+; CHECK-NEXT: b.le .LBB5_5
+; CHECK-NEXT: // %bb.3: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: .LBB5_4: // %return
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB5_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB5_6: // %if.end
+; CHECK-NEXT: .LBB5_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -499,24 +469,17 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
; CHECK-NEXT: // %bb.3: // %while.cond.while.end_crit_edge
; CHECK-NEXT: ldr w8, [x19]
; CHECK-NEXT: .LBB7_4: // %while.end
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: b.gt .LBB7_7
-; CHECK-NEXT: // %bb.5: // %land.lhs.true
-; CHECK-NEXT: adrp x8, :got:b
-; CHECK-NEXT: adrp x9, :got:d
-; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
-; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB7_7
-; CHECK-NEXT: // %bb.6:
-; CHECK-NEXT: mov w0, #123 // =0x7b
-; CHECK-NEXT: b .LBB7_8
-; CHECK-NEXT: .LBB7_7: // %if.end
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: .LBB7_8: // %return
+; CHECK-NEXT: adrp x9, :got:b
+; CHECK-NEXT: adrp x10, :got:d
+; CHECK-NEXT: ldr x9, [x9, :got_lo12:b]
+; CHECK-NEXT: ldr x10, [x10, :got_lo12:d]
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr w9, [x9]
+; CHECK-NEXT: ldr w10, [x10]
+; CHECK-NEXT: cmp w9, w10
+; CHECK-NEXT: ccmp w8, #2, #0, eq
+; CHECK-NEXT: mov w8, #123 // =0x7b
+; CHECK-NEXT: csel w0, w8, wzr, lt
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
@@ -564,52 +527,41 @@ return: ; preds = %if.end, %land.lhs.t
define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: b.gt .LBB8_3
+; CHECK-NEXT: b.gt .LBB8_4
; CHECK-NEXT: // %bb.1: // %while.body.preheader
+; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: sub w19, w8, #1
; CHECK-NEXT: .LBB8_2: // %while.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: bl do_something
; CHECK-NEXT: adds w19, w19, #1
; CHECK-NEXT: b.mi .LBB8_2
-; CHECK-NEXT: .LBB8_3: // %while.end
-; CHECK-NEXT: adrp x8, :got:c
-; CHECK-NEXT: ldr x8, [x8, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: cmn w8, #2
-; CHECK-NEXT: b.lt .LBB8_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w19
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .LBB8_4: // %while.end
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
+; CHECK-NEXT: adrp x10, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: ldr x10, [x10, :got_lo12:c]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
+; CHECK-NEXT: ldr w10, [x10]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB8_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #123 // =0x7b
-; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: .cfi_restore w19
-; CHECK-NEXT: .cfi_restore w30
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB8_6: // %if.end
-; CHECK-NEXT: .cfi_restore_state
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: .cfi_restore w19
-; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: mov w8, #123 // =0x7b
+; CHECK-NEXT: ccmn w10, #3, #4, eq
+; CHECK-NEXT: csel w0, w8, wzr, gt
; CHECK-NEXT: ret
entry:
%0 = load i32, ptr @a, align 4
@@ -782,12 +734,14 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: csel x9, x0, xzr, gt
; CHECK-NEXT: str x9, [x1]
-; CHECK-NEXT: b.le .LBB11_2
+; CHECK-NEXT: b.le .LBB11_3
; CHECK-NEXT: // %bb.1: // %lor.lhs.false
; CHECK-NEXT: cmp w8, #2
-; CHECK-NEXT: b.ge .LBB11_4
-; CHECK-NEXT: b .LBB11_6
-; CHECK-NEXT: .LBB11_2: // %land.lhs.true
+; CHECK-NEXT: b.ge .LBB11_5
+; CHECK-NEXT: // %bb.2:
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB11_3: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
@@ -795,11 +749,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB11_4
-; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: b.ne .LBB11_5
+; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB11_4: // %land.lhs.true3
+; CHECK-NEXT: .LBB11_5: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
@@ -807,12 +761,7 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB11_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB11_6: // %if.end
-; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = load i32, ptr @a, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine_cse.ll b/llvm/test/CodeGen/AArch64/machine_cse.ll
index 6478f5a37f7826..df82e11441cb63 100644
--- a/llvm/test/CodeGen/AArch64/machine_cse.ll
+++ b/llvm/test/CodeGen/AArch64/machine_cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -phi-node-folding-threshold=0 -tail-dup-placement=0 | FileCheck %s
; -tail-dup-placement causes tail duplication during layout. This breaks the
; assumptions of the test case as written (specifically, it creates an
; additional cmp instruction, creating a false positive), so we pass
diff --git a/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll b/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll
index 1f30865c98e192..b235dd9f56815d 100644
--- a/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll
+++ b/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple aarch64-unknown-windows-msvc %s -o - | FileCheck %s
+; RUN: llc -mtriple aarch64-unknown-windows-msvc -phi-node-folding-threshold=0 %s -o - | FileCheck %s
declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll
index fb9139c0d1285e..8718d80220c30c 100644
--- a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll
+++ b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll
@@ -192,42 +192,35 @@ define i32 @f0(i1 %c0, i32 %v, ptr %p) {
; V7M-NEXT: lsls r0, r0, #31
; V7M-NEXT: beq .LBB1_2
; V7M-NEXT: @ %bb.1: @ %A
-; V7M-NEXT: tst.w r1, #16843009
-; V7M-NEXT: itt eq
-; V7M-NEXT: moveq r0, #0
-; V7M-NEXT: bxeq lr
-; V7M-NEXT: b .LBB1_3
+; V7M-NEXT: bics r0, r1, #-16843010
+; V7M-NEXT: it ne
+; V7M-NEXT: movne r0, #1
+; V7M-NEXT: bx lr
; V7M-NEXT: .LBB1_2: @ %B
; V7M-NEXT: movs r0, #1
-; V7M-NEXT: tst.w r1, #16843009
; V7M-NEXT: str r0, [r2]
-; V7M-NEXT: itt ne
-; V7M-NEXT: movne r0, #0
-; V7M-NEXT: bxne lr
-; V7M-NEXT: .LBB1_3: @ %D
-; V7M-NEXT: movs r0, #1
+; V7M-NEXT: bic r0, r1, #-16843010
+; V7M-NEXT: clz r0, r0
+; V7M-NEXT: lsrs r0, r0, #5
; V7M-NEXT: bx lr
;
; V7A-LABEL: f0:
; V7A: @ %bb.0: @ %E
-; V7A-NEXT: movw r3, #257
-; V7A-NEXT: tst r0, #1
-; V7A-NEXT: movt r3, #257
-; V7A-NEXT: and r1, r1, r3
-; V7A-NEXT: beq .LBB1_3
+; V7A-NEXT: mov r3, r0
+; V7A-NEXT: movw r0, #257
+; V7A-NEXT: movt r0, #257
+; V7A-NEXT: tst r3, #1
+; V7A-NEXT: and r0, r1, r0
+; V7A-NEXT: beq .LBB1_2
; V7A-NEXT: @ %bb.1: @ %A
-; V7A-NEXT: cmp r1, #0
-; V7A-NEXT: moveq r0, #0
-; V7A-NEXT: bxeq lr
-; V7A-NEXT: .LBB1_2: @ %D
-; V7A-NEXT: mov r0, #1
+; V7A-NEXT: cmp r0, #0
+; V7A-NEXT: movwne r0, #1
; V7A-NEXT: bx lr
-; V7A-NEXT: .LBB1_3: @ %B
-; V7A-NEXT: mov r0, #1
-; V7A-NEXT: cmp r1, #0
-; V7A-NEXT: str r0, [r2]
-; V7A-NEXT: mov r0, #0
-; V7A-NEXT: moveq r0, #1
+; V7A-NEXT: .LBB1_2: @ %B
+; V7A-NEXT: clz r0, r0
+; V7A-NEXT: mov r1, #1
+; V7A-NEXT: str r1, [r2]
+; V7A-NEXT: lsr r0, r0, #5
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: f0:
@@ -235,20 +228,16 @@ define i32 @f0(i1 %c0, i32 %v, ptr %p) {
; V7A-T-NEXT: lsls r0, r0, #31
; V7A-T-NEXT: beq .LBB1_2
; V7A-T-NEXT: @ %bb.1: @ %A
-; V7A-T-NEXT: tst.w r1, #16843009
-; V7A-T-NEXT: itt eq
-; V7A-T-NEXT: moveq r0, #0
-; V7A-T-NEXT: bxeq lr
-; V7A-T-NEXT: b .LBB1_3
+; V7A-T-NEXT: bics r0, r1, #-16843010
+; V7A-T-NEXT: it ne
+; V7A-T-NEXT: movne r0, #1
+; V7A-T-NEXT: bx lr
; V7A-T-NEXT: .LBB1_2: @ %B
; V7A-T-NEXT: movs r0, #1
-; V7A-T-NEXT: tst.w r1, #16843009
; V7A-T-NEXT: str r0, [r2]
-; V7A-T-NEXT: itt ne
-; V7A-T-NEXT: movne r0, #0
-; V7A-T-NEXT: bxne lr
-; V7A-T-NEXT: .LBB1_3: @ %D
-; V7A-T-NEXT: movs r0, #1
+; V7A-T-NEXT: bic r0, r1, #-16843010
+; V7A-T-NEXT: clz r0, r0
+; V7A-T-NEXT: lsrs r0, r0, #5
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: f0:
@@ -309,39 +298,32 @@ define i32 @f1(i1 %c0, i32 %v, ptr %p) {
; V7M-NEXT: lsls r0, r0, #31
; V7M-NEXT: beq .LBB2_2
; V7M-NEXT: @ %bb.1: @ %A
-; V7M-NEXT: tst.w r1, #100663296
-; V7M-NEXT: itt eq
-; V7M-NEXT: moveq r0, #0
-; V7M-NEXT: bxeq lr
-; V7M-NEXT: b .LBB2_3
+; V7M-NEXT: ands r0, r1, #100663296
+; V7M-NEXT: it ne
+; V7M-NEXT: movne r0, #1
+; V7M-NEXT: bx lr
; V7M-NEXT: .LBB2_2: @ %B
; V7M-NEXT: movs r0, #1
-; V7M-NEXT: tst.w r1, #100663296
; V7M-NEXT: str r0, [r2]
-; V7M-NEXT: itt ne
-; V7M-NEXT: movne r0, #0
-; V7M-NEXT: bxne lr
-; V7M-NEXT: .LBB2_3: @ %D
-; V7M-NEXT: movs r0, #1
+; V7M-NEXT: and r0, r1, #100663296
+; V7M-NEXT: clz r0, r0
+; V7M-NEXT: lsrs r0, r0, #5
; V7M-NEXT: bx lr
;
; V7A-LABEL: f1:
; V7A: @ %bb.0: @ %E
; V7A-NEXT: tst r0, #1
-; V7A-NEXT: beq .LBB2_3
+; V7A-NEXT: beq .LBB2_2
; V7A-NEXT: @ %bb.1: @ %A
-; V7A-NEXT: tst r1, #100663296
-; V7A-NEXT: moveq r0, #0
-; V7A-NEXT: bxeq lr
-; V7A-NEXT: .LBB2_2: @ %D
-; V7A-NEXT: mov r0, #1
+; V7A-NEXT: ands r0, r1, #100663296
+; V7A-NEXT: movwne r0, #1
; V7A-NEXT: bx lr
-; V7A-NEXT: .LBB2_3: @ %B
+; V7A-NEXT: .LBB2_2: @ %B
; V7A-NEXT: mov r0, #1
-; V7A-NEXT: tst r1, #100663296
; V7A-NEXT: str r0, [r2]
-; V7A-NEXT: mov r0, #0
-; V7A-NEXT: moveq r0, #1
+; V7A-NEXT: and r0, r1, #100663296
+; V7A-NEXT: clz r0, r0
+; V7A-NEXT: lsr r0, r0, #5
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: f1:
@@ -349,20 +331,16 @@ define i32 @f1(i1 %c0, i32 %v, ptr %p) {
; V7A-T-NEXT: lsls r0, r0, #31
; V7A-T-NEXT: beq .LBB2_2
; V7A-T-NEXT: @ %bb.1: @ %A
-; V7A-T-NEXT: tst.w r1, #100663296
-; V7A-T-NEXT: itt eq
-; V7A-T-NEXT: moveq r0, #0
-; V7A-T-NEXT: bxeq lr
-; V7A-T-NEXT: b .LBB2_3
+; V7A-T-NEXT: ands r0, r1, #100663296
+; V7A-T-NEXT: it ne
+; V7A-T-NEXT: movne r0, #1
+; V7A-T-NEXT: bx lr
; V7A-T-NEXT: .LBB2_2: @ %B
; V7A-T-NEXT: movs r0, #1
-; V7A-T-NEXT: tst.w r1, #100663296
; V7A-T-NEXT: str r0, [r2]
-; V7A-T-NEXT: itt ne
-; V7A-T-NEXT: movne r0, #0
-; V7A-T-NEXT: bxne lr
-; V7A-T-NEXT: .LBB2_3: @ %D
-; V7A-T-NEXT: movs r0, #1
+; V7A-T-NEXT: and r0, r1, #100663296
+; V7A-T-NEXT: clz r0, r0
+; V7A-T-NEXT: lsrs r0, r0, #5
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: f1:
diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
index d076cb00ad7e0e..858a5b6d0039da 100644
--- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -phi-node-folding-threshold=0 -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s
@var_36 = hidden local_unnamed_addr global i8 0, align 1
@arr_61 = hidden local_unnamed_addr global [1 x i32] zeroinitializer, align 4
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
index dd5ff12fda6132..333480e0eb95f0 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -O2 -mattr=avx < %s | opt -expand-reductions -mattr=avx -S | FileCheck %s
+; RUN: opt -O2 -phi-node-folding-threshold=0 -mattr=avx < %s | opt -expand-reductions -mattr=avx -S | FileCheck %s
; Test if SLP vector reduction patterns are recognized
; and optionally converted to reduction intrinsics and
diff --git a/llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
index aa4fca8da1470e..648a52259429ba 100644
--- a/llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
@@ -50,16 +50,15 @@ Exit: ; preds = %Succ
define void @b() {
; CHECK-LABEL: @b(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[BB_NOMERGE:%.*]]
-; CHECK: BB.nomerge:
; CHECK-NEXT: br label [[SUCC:%.*]]
; CHECK: Succ:
-; CHECK-NEXT: [[B:%.*]] = phi i32 [ 1, [[BB_NOMERGE]] ], [ 2, [[COMMON:%.*]] ]
+; CHECK-NEXT: [[B:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[SPEC_SELECT:%.*]], [[COMMON:%.*]] ]
; CHECK-NEXT: [[CONDE:%.*]] = call i1 @foo()
; CHECK-NEXT: br i1 [[CONDE]], label [[COMMON]], label [[EXIT:%.*]]
; CHECK: Common:
; CHECK-NEXT: [[COND:%.*]] = call i1 @foo()
-; CHECK-NEXT: br i1 [[COND]], label [[BB_NOMERGE]], label [[SUCC]]
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[COND]], i32 1, i32 2
+; CHECK-NEXT: br label [[SUCC]]
; CHECK: Exit:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll b/llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
index 6831102955a724..1c97ee7c5a330a 100644
--- a/llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
+++ b/llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
@@ -224,16 +224,15 @@ Exit: ; preds = %Succ
define void @b() {
; CHECK-LABEL: @b(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[BB_NOMERGE:%.*]]
-; CHECK: BB.nomerge:
; CHECK-NEXT: br label [[SUCC:%.*]]
; CHECK: Succ:
-; CHECK-NEXT: [[B:%.*]] = phi i32 [ 1, [[BB_NOMERGE]] ], [ 2, [[COMMON:%.*]] ]
+; CHECK-NEXT: [[B:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[SPEC_SELECT:%.*]], [[COMMON:%.*]] ]
; CHECK-NEXT: [[CONDE:%.*]] = call i1 @foo()
; CHECK-NEXT: br i1 [[CONDE]], label [[COMMON]], label [[EXIT:%.*]]
; CHECK: Common:
; CHECK-NEXT: [[COND:%.*]] = call i1 @foo()
-; CHECK-NEXT: br i1 [[COND]], label [[BB_NOMERGE]], label [[SUCC]]
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[COND]], i32 1, i32 2
+; CHECK-NEXT: br label [[SUCC]]
; CHECK: Exit:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll b/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll
index 1f85460d85fbbe..b835c115a6c90f 100644
--- a/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll
+++ b/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll
@@ -87,14 +87,14 @@ define i1 @speculate_empty_bb(i32 %x, i32 %y) {
; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
; YES: bb6:
; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
-; YES-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
+; YES-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP2]], i1 true, i1 false
+; YES-NEXT: br label [[BB3:%.*]]
; YES: bb5:
; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
-; YES-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
-; YES: bb2:
+; YES-NEXT: [[SPEC_SELECT1:%.*]] = select i1 [[CMP3]], i1 false, i1 true
; YES-NEXT: br label [[BB3]]
; YES: bb3:
-; YES-NEXT: [[RET:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ]
+; YES-NEXT: [[RET:%.*]] = phi i1 [ [[SPEC_SELECT]], [[BB6]] ], [ [[SPEC_SELECT1]], [[BB5]] ]
; YES-NEXT: ret i1 [[RET]]
;
; NO-LABEL: define i1 @speculate_empty_bb
@@ -139,17 +139,11 @@ define i32 @speculate_empty_bb_not_simplifiable(i32 %x, i32 %y) {
; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
; YES-NEXT: start:
; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0
-; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]]
-; YES: bb6:
; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
-; YES-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]]
-; YES: bb5:
+; YES-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP2]], i32 10, i32 20
; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]]
-; YES-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]]
-; YES: bb2:
-; YES-NEXT: br label [[BB3]]
-; YES: bb3:
-; YES-NEXT: [[RET:%.*]] = phi i32 [ 10, [[BB2]] ], [ 20, [[BB6]] ], [ 30, [[BB5]] ]
+; YES-NEXT: [[SPEC_SELECT1:%.*]] = select i1 [[CMP3]], i32 30, i32 10
+; YES-NEXT: [[RET:%.*]] = select i1 [[CMP1]], i32 [[SPEC_SELECT]], i32 [[SPEC_SELECT1]]
; YES-NEXT: ret i32 [[RET]]
;
; NO-LABEL: define i32 @speculate_empty_bb_not_simplifiable
>From 9c51c23b68f842fb0a3fbfba0d99fd8f1d57b8e3 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 24 Dec 2024 08:17:12 +0800
Subject: [PATCH 3/3] [SimplifyCFG] Address review comments.
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 17e339d1d9f251..42d8a7854039d3 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3515,7 +3515,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
/// %cmp = icmp ult %x, %y
/// br i1 %cmp, label %EndBB, label %ThenBB
/// ThenBB:
-/// br label BB2
+/// br label %EndBB
/// EndBB:
/// %phi = phi i1 [ true, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
/// ...
@@ -3525,12 +3525,12 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
/// \code
/// BB:
/// %cmp = icmp ult %x, %y
-/// %sel = select i1 %cmp, i1 true, i1 false
+/// %sel = select i1 %cmp, i1 false, i1 true
/// br label %EndBB
/// ThenBB:
-/// br label BB2
+/// br label %EndBB
/// EndBB:
-/// %phi = phi i1 [ %sel, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
+/// %phi = phi i1 [ true, %ThenBB ], [ %sel, %BB ], [ false, %OtherBB ]
/// ...
/// \endcode
/// \returns true if the branch edge is removed.
@@ -3593,6 +3593,7 @@ static bool speculativelyExecuteEmptyBB(BranchInst *BI, bool Invert,
if (DTU)
DTU->applyUpdates({{DominatorTree::Delete, BB, ThenBB}});
+ ++NumSpeculations;
return true;
}
More information about the llvm-commits
mailing list