[llvm] [SimplifyCFG] Speculatively execute empty BBs with multiple predecessors (PR #120905)

via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 22 07:41:33 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Yingwei Zheng (dtcxzyw)

<details>
<summary>Changes</summary>

`SimplifyCFGOpt::speculativelyExecuteBB` flattens CFG by hoisting instructions of `ThenBB` and creating select instructions for phi nodes in `EndBB`. However, it doesn't work when there are other BBs that jumps into `ThenBB`.

This patch allows SimplifyCFG to speculate empty `ThenBB` with multiple predecessors. It will reduce the number of branches and enable more folding opportunities.

Closes https://github.com/llvm/llvm-project/issues/120539.


---

Patch is 38.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120905.diff


12 Files Affected:

- (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+97) 
- (modified) llvm/test/CodeGen/AArch64/and-sink.ll (+4-5) 
- (modified) llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll (+81-132) 
- (modified) llvm/test/CodeGen/AArch64/machine_cse.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll (+1-1) 
- (modified) llvm/test/CodeGen/ARM/and-cmp0-sink.ll (+48-70) 
- (modified) llvm/test/CodeGen/Thumb2/mve-blockplacement.ll (+1-1) 
- (modified) llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll (+1-1) 
- (modified) llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll (+3-4) 
- (modified) llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll (+3-4) 
- (modified) llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll (+241) 


``````````diff
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 17f4b396f753b4..17e339d1d9f251 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3506,6 +3506,96 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
   return true;
 }
 
+/// Speculate a conditional basic block flattening the CFG.
+/// Compared to speculativelyExecuteBB, it allows \p ThenBB to have multiple
+/// predecessors other than the current BB. An illustration of this transform is
+/// turning this IR:
+/// \code
+///   BB:
+///     %cmp = icmp ult %x, %y
+///     br i1 %cmp, label %EndBB, label %ThenBB
+///   ThenBB:
+///     br label BB2
+///   EndBB:
+///     %phi = phi i1 [ true, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
+///     ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+///   BB:
+///     %cmp = icmp ult %x, %y
+///     %sel = select i1 %cmp, i1 true, i1 false
+///     br label %EndBB
+///   ThenBB:
+///     br label BB2
+///   EndBB:
+///     %phi = phi i1 [ %sel, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
+///     ...
+/// \endcode
+/// \returns true if the branch edge is removed.
+static bool speculativelyExecuteEmptyBB(BranchInst *BI, bool Invert,
+                                        DomTreeUpdater *DTU,
+                                        const TargetTransformInfo &TTI) {
+  BasicBlock *BB = BI->getParent();
+  BasicBlock *ThenBB = BI->getSuccessor(Invert);
+  BasicBlock *EndBB = BI->getSuccessor(!Invert);
+
+  BranchInst *SuccBI = dyn_cast<BranchInst>(ThenBB->getTerminator());
+  if (!SuccBI || !SuccBI->isUnconditional() || SuccBI->getSuccessor(0) != EndBB)
+    return false;
+  if (&ThenBB->front() != SuccBI)
+    return false;
+  if (!isProfitableToSpeculate(BI, Invert, TTI))
+    return false;
+
+  InstructionCost Budget =
+      PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+  InstructionCost Cost = 0;
+  unsigned SpeculatedInstructions = 0;
+  if (!validateAndCostRequiredSelects(BB, ThenBB, EndBB, SpeculatedInstructions,
+                                      Cost, TTI) ||
+      Cost > Budget)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+
+  // Insert selects and rewrite the PHI operands.
+  Value *BrCond = BI->getCondition();
+  IRBuilder<NoFolder> Builder(BI);
+  for (PHINode &PN : EndBB->phis()) {
+    unsigned OrigI = PN.getBasicBlockIndex(BB);
+    unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
+    Value *OrigV = PN.getIncomingValue(OrigI);
+    Value *ThenV = PN.getIncomingValue(ThenI);
+
+    // Skip PHIs which are trivial.
+    if (OrigV == ThenV)
+      continue;
+
+    // Create a select whose true value is the speculatively executed value and
+    // false value is the pre-existing value. Swap them if the branch
+    // destinations were inverted.
+    Value *TrueV = ThenV, *FalseV = OrigV;
+    if (Invert)
+      std::swap(TrueV, FalseV);
+    Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
+    PN.setIncomingValue(OrigI, V);
+  }
+
+  // Modify CFG
+  ThenBB->removePredecessor(BB);
+  BranchInst *NewBI = Builder.CreateBr(EndBB);
+  // Transfer the metadata to the new branch instruction.
+  NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg,
+                            LLVMContext::MD_annotation});
+  BI->eraseFromParent();
+  if (DTU)
+    DTU->applyUpdates({{DominatorTree::Delete, BB, ThenBB}});
+
+  return true;
+}
+
 /// Return true if we can thread a branch across this block.
 static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
   int Size = 0;
@@ -8125,6 +8215,13 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
         return requestResimplify();
   }
 
+  if (Options.SpeculateBlocks) {
+    if (speculativelyExecuteEmptyBB(BI, /*Invert=*/false, DTU, TTI))
+      return true;
+    if (speculativelyExecuteEmptyBB(BI, /*Invert=*/true, DTU, TTI))
+      return true;
+  }
+
   // If this is a branch on something for which we know the constant value in
   // predecessors (e.g. a phi node in the current block), thread control
   // through this block.
diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll
index c84310629e5fda..277091a65e862b 100644
--- a/llvm/test/CodeGen/AArch64/and-sink.ll
+++ b/llvm/test/CodeGen/AArch64/and-sink.ll
@@ -11,15 +11,14 @@
 define dso_local i32 @and_sink1(i32 %a, i1 %c) {
 ; CHECK-LABEL: and_sink1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w1, #0, .LBB0_3
+; CHECK-NEXT:    tbz w1, #0, .LBB0_2
 ; CHECK-NEXT:  // %bb.1: // %bb0
+; CHECK-NEXT:    tst w0, #0x4
 ; CHECK-NEXT:    adrp x8, A
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    str wzr, [x8, :lo12:A]
-; CHECK-NEXT:    tbnz w0, #2, .LBB0_3
-; CHECK-NEXT:  // %bb.2:
-; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_3: // %bb2
+; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 
diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
index 3645718968f9e3..c1932f055a4f79 100644
--- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
+++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -phi-node-folding-threshold=0 | FileCheck %s
 
 ; When consuming profile data we sometimes flip a branch to improve runtime
 ; performance. If we are optimizing for size, we avoid changing the branch to
diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index 6449c3e11d6672..dc23ae0af31572 100644
--- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -13,10 +13,10 @@ define i32 @combine_gt_ge_10() #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, :got:a
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT:    ldr w8, [x8]
-; CHECK-NEXT:    cmp w8, #10
+; CHECK-NEXT:    ldr w9, [x8]
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
+; CHECK-NEXT:    cmp w9, #10
 ; CHECK-NEXT:    b.le .LBB0_3
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    adrp x9, :got:c
@@ -29,18 +29,17 @@ define i32 @combine_gt_ge_10() #0 {
 ; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_3: // %lor.lhs.false
-; CHECK-NEXT:    b.lt .LBB0_6
+; CHECK-NEXT:    cmp w9, #10
+; CHECK-NEXT:    b.lt .LBB0_5
 ; CHECK-NEXT:  .LBB0_4: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB0_6
-; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_6: // %if.end
+; CHECK-NEXT:  .LBB0_5:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
@@ -80,34 +79,27 @@ define i32 @combine_gt_lt_5() #0 {
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    cmp w8, #5
-; CHECK-NEXT:    b.le .LBB1_3
+; CHECK-NEXT:    b.le .LBB1_2
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:c
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT:    ldr w8, [x8]
-; CHECK-NEXT:    ldr w9, [x9]
-; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB1_6
-; CHECK-NEXT:  // %bb.2:
-; CHECK-NEXT:    mov w0, #1 // =0x1
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB1_3: // %lor.lhs.false
-; CHECK-NEXT:    b.ge .LBB1_6
-; CHECK-NEXT:  // %bb.4: // %land.lhs.true3
+; CHECK-NEXT:    b .LBB1_4
+; CHECK-NEXT:  .LBB1_2: // %lor.lhs.false
+; CHECK-NEXT:    b.ge .LBB1_5
+; CHECK-NEXT:  // %bb.3: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT:  .LBB1_4: // %return
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB1_6
-; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB1_6: // %if.end
+; CHECK-NEXT:  .LBB1_5:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
@@ -145,10 +137,10 @@ define i32 @combine_lt_ge_5() #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, :got:a
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT:    ldr w8, [x8]
-; CHECK-NEXT:    cmp w8, #5
+; CHECK-NEXT:    ldr w9, [x8]
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
+; CHECK-NEXT:    cmp w9, #5
 ; CHECK-NEXT:    b.ge .LBB2_3
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    adrp x9, :got:c
@@ -161,18 +153,17 @@ define i32 @combine_lt_ge_5() #0 {
 ; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB2_3: // %lor.lhs.false
-; CHECK-NEXT:    b.gt .LBB2_6
+; CHECK-NEXT:    cmp w9, #5
+; CHECK-NEXT:    b.gt .LBB2_5
 ; CHECK-NEXT:  .LBB2_4: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB2_6
-; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB2_6: // %if.end
+; CHECK-NEXT:  .LBB2_5:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
@@ -212,34 +203,27 @@ define i32 @combine_lt_gt_5() #0 {
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    cmp w8, #5
-; CHECK-NEXT:    b.ge .LBB3_3
+; CHECK-NEXT:    b.ge .LBB3_2
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:c
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT:    ldr w8, [x8]
-; CHECK-NEXT:    ldr w9, [x9]
-; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB3_6
-; CHECK-NEXT:  // %bb.2:
-; CHECK-NEXT:    mov w0, #1 // =0x1
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB3_3: // %lor.lhs.false
-; CHECK-NEXT:    b.le .LBB3_6
-; CHECK-NEXT:  // %bb.4: // %land.lhs.true3
+; CHECK-NEXT:    b .LBB3_4
+; CHECK-NEXT:  .LBB3_2: // %lor.lhs.false
+; CHECK-NEXT:    b.le .LBB3_5
+; CHECK-NEXT:  // %bb.3: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT:  .LBB3_4: // %return
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB3_6
-; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB3_6: // %if.end
+; CHECK-NEXT:  .LBB3_5:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
@@ -279,34 +263,27 @@ define i32 @combine_gt_lt_n5() #0 {
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    cmn w8, #5
-; CHECK-NEXT:    b.le .LBB4_3
+; CHECK-NEXT:    b.le .LBB4_2
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:c
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT:    ldr w8, [x8]
-; CHECK-NEXT:    ldr w9, [x9]
-; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB4_6
-; CHECK-NEXT:  // %bb.2:
-; CHECK-NEXT:    mov w0, #1 // =0x1
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB4_3: // %lor.lhs.false
-; CHECK-NEXT:    b.ge .LBB4_6
-; CHECK-NEXT:  // %bb.4: // %land.lhs.true3
+; CHECK-NEXT:    b .LBB4_4
+; CHECK-NEXT:  .LBB4_2: // %lor.lhs.false
+; CHECK-NEXT:    b.ge .LBB4_5
+; CHECK-NEXT:  // %bb.3: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT:  .LBB4_4: // %return
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB4_6
-; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB4_6: // %if.end
+; CHECK-NEXT:  .LBB4_5:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
@@ -346,34 +323,27 @@ define i32 @combine_lt_gt_n5() #0 {
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    cmn w8, #5
-; CHECK-NEXT:    b.ge .LBB5_3
+; CHECK-NEXT:    b.ge .LBB5_2
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:c
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:c]
-; CHECK-NEXT:    ldr w8, [x8]
-; CHECK-NEXT:    ldr w9, [x9]
-; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB5_6
-; CHECK-NEXT:  // %bb.2:
-; CHECK-NEXT:    mov w0, #1 // =0x1
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB5_3: // %lor.lhs.false
-; CHECK-NEXT:    b.le .LBB5_6
-; CHECK-NEXT:  // %bb.4: // %land.lhs.true3
+; CHECK-NEXT:    b .LBB5_4
+; CHECK-NEXT:  .LBB5_2: // %lor.lhs.false
+; CHECK-NEXT:    b.le .LBB5_5
+; CHECK-NEXT:  // %bb.3: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT:  .LBB5_4: // %return
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB5_6
-; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB5_6: // %if.end
+; CHECK-NEXT:  .LBB5_5:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
@@ -499,24 +469,17 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
 ; CHECK-NEXT:  // %bb.3: // %while.cond.while.end_crit_edge
 ; CHECK-NEXT:    ldr w8, [x19]
 ; CHECK-NEXT:  .LBB7_4: // %while.end
-; CHECK-NEXT:    cmp w8, #1
-; CHECK-NEXT:    b.gt .LBB7_7
-; CHECK-NEXT:  // %bb.5: // %land.lhs.true
-; CHECK-NEXT:    adrp x8, :got:b
-; CHECK-NEXT:    adrp x9, :got:d
-; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
-; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
-; CHECK-NEXT:    ldr w8, [x8]
-; CHECK-NEXT:    ldr w9, [x9]
-; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB7_7
-; CHECK-NEXT:  // %bb.6:
-; CHECK-NEXT:    mov w0, #123 // =0x7b
-; CHECK-NEXT:    b .LBB7_8
-; CHECK-NEXT:  .LBB7_7: // %if.end
-; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:  .LBB7_8: // %return
+; CHECK-NEXT:    adrp x9, :got:b
+; CHECK-NEXT:    adrp x10, :got:d
+; CHECK-NEXT:    ldr x9, [x9, :got_lo12:b]
+; CHECK-NEXT:    ldr x10, [x10, :got_lo12:d]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr w9, [x9]
+; CHECK-NEXT:    ldr w10, [x10]
+; CHECK-NEXT:    cmp w9, w10
+; CHECK-NEXT:    ccmp w8, #2, #0, eq
+; CHECK-NEXT:    mov w8, #123 // =0x7b
+; CHECK-NEXT:    csel w0, w8, wzr, lt
 ; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    .cfi_restore w19
@@ -564,52 +527,41 @@ return:                                           ; preds = %if.end, %land.lhs.t
 define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
 ; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    .cfi_remember_state
 ; CHECK-NEXT:    adrp x8, :got:a
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    b.gt .LBB8_3
+; CHECK-NEXT:    b.gt .LBB8_4
 ; CHECK-NEXT:  // %bb.1: // %while.body.preheader
+; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    sub w19, w8, #1
 ; CHECK-NEXT:  .LBB8_2: // %while.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    bl do_something
 ; CHECK-NEXT:    adds w19, w19, #1
 ; CHECK-NEXT:    b.mi .LBB8_2
-; CHECK-NEXT:  .LBB8_3: // %while.end
-; CHECK-NEXT:    adrp x8, :got:c
-; CHECK-NEXT:    ldr x8, [x8, :got_lo12:c]
-; CHECK-NEXT:    ldr w8, [x8]
-; CHECK-NEXT:    cmn w8, #2
-; CHECK-NEXT:    b.lt .LBB8_6
-; CHECK-NEXT:  // %bb.4: // %land.lhs.true
+; CHECK-NEXT:  // %bb.3:
+; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w19
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:  .LBB8_4: // %while.end
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:d
+; CHECK-NEXT:    adrp x10, :got:c
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT:    ldr x10, [x10, :got_lo12:c]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
+; CHECK-NEXT:    ldr w10, [x10]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB8_6
-; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    mov w0, #123 // =0x7b
-; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    .cfi_restore w19
-; CHECK-NEXT:    .cfi_restore w30
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB8_6: // %if.end
-; CHECK-NEXT:    .cfi_restore_state
-; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    .cfi_restore w19
-; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    mov w8, #123 // =0x7b
+; CHECK-NEXT:    ccmn w10, #3, #4, eq
+; CHECK-NEXT:    csel w0, w8, wzr, gt
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @a, align 4
@@ -782,12 +734,14 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    csel x9, x0, xzr, gt
 ; CHECK-NEXT:    str x9, [x1]
-; CHECK-NEXT:    b.le .LBB11_2
+; CHECK-NEXT:    b.le .LBB11_3
 ; CHECK-NEXT:  // %bb.1: // %lor.lhs.false
 ; CHECK-NEXT:    cmp w8, #2
-; CHECK-NEXT:    b.ge .LBB11_4
-; CHECK-NEXT:    b .LBB11_6
-; CHECK-NEXT:  .LBB11_2: // %land.lhs.true
+; CHECK-NEXT:    b.ge .LBB11_5
+; CHECK-NEXT:  // %bb.2:
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB11_3: // %land.lhs.true
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:c
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
@@ -795,11 +749,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB11_4
-; CHECK-NEXT:  // %bb.3:
+; CHECK-NEXT:    b.ne .LBB11_5
+; CHECK-NEXT:  // %bb.4:
 ; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB11_4: // %land.lhs.true3
+; CHECK-NEXT:  .LBB11_5: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
@@ -807,12 +761,7 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB11_6
-; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB11_6: // %if.end
-; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @a, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine_cse.ll b/llvm/test/CodeGen/AArch64/machine_cse.ll
index 6478f5a37f7826..df82e11441cb63 100644
--- a/llvm/test/CodeGen/AArch64/machine_cse.ll
+++ b/llvm/test/CodeGen/AArch64/machine_cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/120905


More information about the llvm-commits mailing list