[llvm] [SimplifyCFG] Speculatively execute empty BBs with multiple predecessors (PR #120905)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 22 07:41:33 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
`SimplifyCFGOpt::speculativelyExecuteBB` flattens CFG by hoisting instructions of `ThenBB` and creating select instructions for phi nodes in `EndBB`. However, it doesn't work when there are other BBs that jumps into `ThenBB`.
This patch allows SimplifyCFG to speculate empty `ThenBB` with multiple predecessors. It will reduce the number of branches and enable more folding opportunities.
Closes https://github.com/llvm/llvm-project/issues/120539.
---
Patch is 38.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120905.diff
12 Files Affected:
- (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+97)
- (modified) llvm/test/CodeGen/AArch64/and-sink.ll (+4-5)
- (modified) llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll (+81-132)
- (modified) llvm/test/CodeGen/AArch64/machine_cse.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll (+1-1)
- (modified) llvm/test/CodeGen/ARM/and-cmp0-sink.ll (+48-70)
- (modified) llvm/test/CodeGen/Thumb2/mve-blockplacement.ll (+1-1)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll (+1-1)
- (modified) llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll (+3-4)
- (modified) llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll (+3-4)
- (modified) llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll (+241)
``````````diff
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 17f4b396f753b4..17e339d1d9f251 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3506,6 +3506,96 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
return true;
}
+/// Speculate a conditional basic block flattening the CFG.
+/// Compared to speculativelyExecuteBB, it allows \p ThenBB to have multiple
+/// predecessors other than the current BB. An illustration of this transform is
+/// turning this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// br label BB2
+/// EndBB:
+/// %phi = phi i1 [ true, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
+/// ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// %sel = select i1 %cmp, i1 true, i1 false
+/// br label %EndBB
+/// ThenBB:
+/// br label BB2
+/// EndBB:
+/// %phi = phi i1 [ %sel, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
+/// ...
+/// \endcode
+/// \returns true if the branch edge is removed.
+static bool speculativelyExecuteEmptyBB(BranchInst *BI, bool Invert,
+ DomTreeUpdater *DTU,
+ const TargetTransformInfo &TTI) {
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *ThenBB = BI->getSuccessor(Invert);
+ BasicBlock *EndBB = BI->getSuccessor(!Invert);
+
+ BranchInst *SuccBI = dyn_cast<BranchInst>(ThenBB->getTerminator());
+ if (!SuccBI || !SuccBI->isUnconditional() || SuccBI->getSuccessor(0) != EndBB)
+ return false;
+ if (&ThenBB->front() != SuccBI)
+ return false;
+ if (!isProfitableToSpeculate(BI, Invert, TTI))
+ return false;
+
+ InstructionCost Budget =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ InstructionCost Cost = 0;
+ unsigned SpeculatedInstructions = 0;
+ if (!validateAndCostRequiredSelects(BB, ThenBB, EndBB, SpeculatedInstructions,
+ Cost, TTI) ||
+ Cost > Budget)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+
+ // Insert selects and rewrite the PHI operands.
+ Value *BrCond = BI->getCondition();
+ IRBuilder<NoFolder> Builder(BI);
+ for (PHINode &PN : EndBB->phis()) {
+ unsigned OrigI = PN.getBasicBlockIndex(BB);
+ unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
+ Value *OrigV = PN.getIncomingValue(OrigI);
+ Value *ThenV = PN.getIncomingValue(ThenI);
+
+ // Skip PHIs which are trivial.
+ if (OrigV == ThenV)
+ continue;
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the pre-existing value. Swap them if the branch
+ // destinations were inverted.
+ Value *TrueV = ThenV, *FalseV = OrigV;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
+ PN.setIncomingValue(OrigI, V);
+ }
+
+ // Modify CFG
+ ThenBB->removePredecessor(BB);
+ BranchInst *NewBI = Builder.CreateBr(EndBB);
+ // Transfer the metadata to the new branch instruction.
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+ BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, ThenBB}});
+
+ return true;
+}
+
/// Return true if we can thread a branch across this block.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
int Size = 0;
@@ -8125,6 +8215,13 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return requestResimplify();
}
+ if (Options.SpeculateBlocks) {
+ if (speculativelyExecuteEmptyBB(BI, /*Invert=*/false, DTU, TTI))
+ return true;
+ if (speculativelyExecuteEmptyBB(BI, /*Invert=*/true, DTU, TTI))
+ return true;
+ }
+
// If this is a branch on something for which we know the constant value in
// predecessors (e.g. a phi node in the current block), thread control
// through this block.
diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll
index c84310629e5fda..277091a65e862b 100644
--- a/llvm/test/CodeGen/AArch64/and-sink.ll
+++ b/llvm/test/CodeGen/AArch64/and-sink.ll
@@ -11,15 +11,14 @@
define dso_local i32 @and_sink1(i32 %a, i1 %c) {
; CHECK-LABEL: and_sink1:
; CHECK: // %bb.0:
-; CHECK-NEXT: tbz w1, #0, .LBB0_3
+; CHECK-NEXT: tbz w1, #0, .LBB0_2
; CHECK-NEXT: // %bb.1: // %bb0
+; CHECK-NEXT: tst w0, #0x4
; CHECK-NEXT: adrp x8, A
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: str wzr, [x8, :lo12:A]
-; CHECK-NEXT: tbnz w0, #2, .LBB0_3
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_3: // %bb2
+; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
index 3645718968f9e3..c1932f055a4f79 100644
--- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
+++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -phi-node-folding-threshold=0 | FileCheck %s
; When consuming profile data we sometimes flip a branch to improve runtime
; performance. If we are optimizing for size, we avoid changing the branch to
diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index 6449c3e11d6672..dc23ae0af31572 100644
--- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -13,10 +13,10 @@ define i32 @combine_gt_ge_10() #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: cmp w8, #10
+; CHECK-NEXT: ldr w9, [x8]
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
+; CHECK-NEXT: cmp w9, #10
; CHECK-NEXT: b.le .LBB0_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x9, :got:c
@@ -29,18 +29,17 @@ define i32 @combine_gt_ge_10() #0 {
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3: // %lor.lhs.false
-; CHECK-NEXT: b.lt .LBB0_6
+; CHECK-NEXT: cmp w9, #10
+; CHECK-NEXT: b.lt .LBB0_5
; CHECK-NEXT: .LBB0_4: // %land.lhs.true3
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB0_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_6: // %if.end
+; CHECK-NEXT: .LBB0_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -80,34 +79,27 @@ define i32 @combine_gt_lt_5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #5
-; CHECK-NEXT: b.le .LBB1_3
+; CHECK-NEXT: b.le .LBB1_2
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB1_6
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_3: // %lor.lhs.false
-; CHECK-NEXT: b.ge .LBB1_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true3
+; CHECK-NEXT: b .LBB1_4
+; CHECK-NEXT: .LBB1_2: // %lor.lhs.false
+; CHECK-NEXT: b.ge .LBB1_5
+; CHECK-NEXT: // %bb.3: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: .LBB1_4: // %return
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB1_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_6: // %if.end
+; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -145,10 +137,10 @@ define i32 @combine_lt_ge_5() #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: cmp w8, #5
+; CHECK-NEXT: ldr w9, [x8]
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
+; CHECK-NEXT: cmp w9, #5
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x9, :got:c
@@ -161,18 +153,17 @@ define i32 @combine_lt_ge_5() #0 {
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_3: // %lor.lhs.false
-; CHECK-NEXT: b.gt .LBB2_6
+; CHECK-NEXT: cmp w9, #5
+; CHECK-NEXT: b.gt .LBB2_5
; CHECK-NEXT: .LBB2_4: // %land.lhs.true3
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB2_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB2_6: // %if.end
+; CHECK-NEXT: .LBB2_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -212,34 +203,27 @@ define i32 @combine_lt_gt_5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #5
-; CHECK-NEXT: b.ge .LBB3_3
+; CHECK-NEXT: b.ge .LBB3_2
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB3_6
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB3_3: // %lor.lhs.false
-; CHECK-NEXT: b.le .LBB3_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true3
+; CHECK-NEXT: b .LBB3_4
+; CHECK-NEXT: .LBB3_2: // %lor.lhs.false
+; CHECK-NEXT: b.le .LBB3_5
+; CHECK-NEXT: // %bb.3: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: .LBB3_4: // %return
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB3_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB3_6: // %if.end
+; CHECK-NEXT: .LBB3_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -279,34 +263,27 @@ define i32 @combine_gt_lt_n5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #5
-; CHECK-NEXT: b.le .LBB4_3
+; CHECK-NEXT: b.le .LBB4_2
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB4_6
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB4_3: // %lor.lhs.false
-; CHECK-NEXT: b.ge .LBB4_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true3
+; CHECK-NEXT: b .LBB4_4
+; CHECK-NEXT: .LBB4_2: // %lor.lhs.false
+; CHECK-NEXT: b.ge .LBB4_5
+; CHECK-NEXT: // %bb.3: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: .LBB4_4: // %return
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB4_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB4_6: // %if.end
+; CHECK-NEXT: .LBB4_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -346,34 +323,27 @@ define i32 @combine_lt_gt_n5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #5
-; CHECK-NEXT: b.ge .LBB5_3
+; CHECK-NEXT: b.ge .LBB5_2
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB5_6
-; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB5_3: // %lor.lhs.false
-; CHECK-NEXT: b.le .LBB5_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true3
+; CHECK-NEXT: b .LBB5_4
+; CHECK-NEXT: .LBB5_2: // %lor.lhs.false
+; CHECK-NEXT: b.le .LBB5_5
+; CHECK-NEXT: // %bb.3: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: .LBB5_4: // %return
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB5_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB5_6: // %if.end
+; CHECK-NEXT: .LBB5_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@@ -499,24 +469,17 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
; CHECK-NEXT: // %bb.3: // %while.cond.while.end_crit_edge
; CHECK-NEXT: ldr w8, [x19]
; CHECK-NEXT: .LBB7_4: // %while.end
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: b.gt .LBB7_7
-; CHECK-NEXT: // %bb.5: // %land.lhs.true
-; CHECK-NEXT: adrp x8, :got:b
-; CHECK-NEXT: adrp x9, :got:d
-; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
-; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: ldr w9, [x9]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB7_7
-; CHECK-NEXT: // %bb.6:
-; CHECK-NEXT: mov w0, #123 // =0x7b
-; CHECK-NEXT: b .LBB7_8
-; CHECK-NEXT: .LBB7_7: // %if.end
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: .LBB7_8: // %return
+; CHECK-NEXT: adrp x9, :got:b
+; CHECK-NEXT: adrp x10, :got:d
+; CHECK-NEXT: ldr x9, [x9, :got_lo12:b]
+; CHECK-NEXT: ldr x10, [x10, :got_lo12:d]
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr w9, [x9]
+; CHECK-NEXT: ldr w10, [x10]
+; CHECK-NEXT: cmp w9, w10
+; CHECK-NEXT: ccmp w8, #2, #0, eq
+; CHECK-NEXT: mov w8, #123 // =0x7b
+; CHECK-NEXT: csel w0, w8, wzr, lt
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
@@ -564,52 +527,41 @@ return: ; preds = %if.end, %land.lhs.t
define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: b.gt .LBB8_3
+; CHECK-NEXT: b.gt .LBB8_4
; CHECK-NEXT: // %bb.1: // %while.body.preheader
+; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: sub w19, w8, #1
; CHECK-NEXT: .LBB8_2: // %while.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: bl do_something
; CHECK-NEXT: adds w19, w19, #1
; CHECK-NEXT: b.mi .LBB8_2
-; CHECK-NEXT: .LBB8_3: // %while.end
-; CHECK-NEXT: adrp x8, :got:c
-; CHECK-NEXT: ldr x8, [x8, :got_lo12:c]
-; CHECK-NEXT: ldr w8, [x8]
-; CHECK-NEXT: cmn w8, #2
-; CHECK-NEXT: b.lt .LBB8_6
-; CHECK-NEXT: // %bb.4: // %land.lhs.true
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w19
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .LBB8_4: // %while.end
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
+; CHECK-NEXT: adrp x10, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT: ldr x10, [x10, :got_lo12:c]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
+; CHECK-NEXT: ldr w10, [x10]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB8_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #123 // =0x7b
-; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: .cfi_restore w19
-; CHECK-NEXT: .cfi_restore w30
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB8_6: // %if.end
-; CHECK-NEXT: .cfi_restore_state
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: .cfi_restore w19
-; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: mov w8, #123 // =0x7b
+; CHECK-NEXT: ccmn w10, #3, #4, eq
+; CHECK-NEXT: csel w0, w8, wzr, gt
; CHECK-NEXT: ret
entry:
%0 = load i32, ptr @a, align 4
@@ -782,12 +734,14 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: csel x9, x0, xzr, gt
; CHECK-NEXT: str x9, [x1]
-; CHECK-NEXT: b.le .LBB11_2
+; CHECK-NEXT: b.le .LBB11_3
; CHECK-NEXT: // %bb.1: // %lor.lhs.false
; CHECK-NEXT: cmp w8, #2
-; CHECK-NEXT: b.ge .LBB11_4
-; CHECK-NEXT: b .LBB11_6
-; CHECK-NEXT: .LBB11_2: // %land.lhs.true
+; CHECK-NEXT: b.ge .LBB11_5
+; CHECK-NEXT: // %bb.2:
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB11_3: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
@@ -795,11 +749,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB11_4
-; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: b.ne .LBB11_5
+; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB11_4: // %land.lhs.true3
+; CHECK-NEXT: .LBB11_5: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
@@ -807,12 +761,7 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB11_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB11_6: // %if.end
-; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = load i32, ptr @a, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine_cse.ll b/llvm/test/CodeGen/AArch64/machine_cse.ll
index 6478f5a37f7826..df82e11441cb63 100644
--- a/llvm/test/CodeGen/AArch64/machine_cse.ll
+++ b/llvm/test/CodeGen/AArch64/machine_cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/120905
More information about the llvm-commits
mailing list