[llvm] [RISCV] Optimize conditional branches that can be statically evaluated (PR #131684)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 11:15:09 PDT 2025
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/131684
>From b77baa43ac212feb55c77826d2ceeff7c750912e Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 17 Mar 2025 18:56:50 -0700
Subject: [PATCH 1/7] precommit test case
---
llvm/test/CodeGen/RISCV/branch_zero.ll | 1 +
llvm/test/CodeGen/RISCV/simplify-condbr.ll | 108 +++++++++++++++++++++
2 files changed, 109 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/simplify-condbr.ll
diff --git a/llvm/test/CodeGen/RISCV/branch_zero.ll b/llvm/test/CodeGen/RISCV/branch_zero.ll
index fd0979977ba3b..0f50adec35a34 100644
--- a/llvm/test/CodeGen/RISCV/branch_zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch_zero.ll
@@ -83,3 +83,4 @@ exit1:
if.then:
br label %for.body
}
+
diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
new file mode 100644
index 0000000000000..4dff2ac53f9ed
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
+
+declare fastcc i1 @S_reginclass()
+
+define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) nounwind {
+; CHECK-LABEL: S_regrepeat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: andi a2, a2, 255
+; CHECK-NEXT: addi a4, a2, -19
+; CHECK-NEXT: li a5, 2
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: bltu a4, a5, .LBB0_4
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: bltu a1, a2, .LBB0_8
+; CHECK-NEXT: # %bb.2: # %do_exactf
+; CHECK-NEXT: andi a3, a3, 1
+; CHECK-NEXT: beqz a3, .LBB0_10
+; CHECK-NEXT: # %bb.3: # %land.rhs251
+; CHECK-NEXT: lw zero, 0(zero)
+; CHECK-NEXT: li s0, 1
+; CHECK-NEXT: bnez s0, .LBB0_9
+; CHECK-NEXT: j .LBB0_8
+; CHECK-NEXT: .LBB0_4: # %sw.bb336
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: andi s2, a3, 1
+; CHECK-NEXT: .LBB0_5: # %land.rhs345
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: call S_reginclass
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: beqz a0, .LBB0_7
+; CHECK-NEXT: # %bb.6: # %while.body350
+; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1
+; CHECK-NEXT: addiw s0, s0, 1
+; CHECK-NEXT: bnez s2, .LBB0_5
+; CHECK-NEXT: j .LBB0_8
+; CHECK-NEXT: .LBB0_7:
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: bnez s0, .LBB0_9
+; CHECK-NEXT: .LBB0_8: # %if.else1492
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB0_9: # %if.end1497
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_10:
+; CHECK-NEXT: bnez zero, .LBB0_9
+; CHECK-NEXT: j .LBB0_8
+entry:
+ switch i8 %0, label %if.else1492 [
+ i8 19, label %sw.bb336
+ i8 20, label %sw.bb336
+ i8 1, label %do_exactf
+ i8 0, label %do_exactf
+ ]
+
+do_exactf: ; preds = %entry, %entry
+ br i1 %cmp343, label %land.rhs251, label %if.end334
+
+land.rhs251: ; preds = %do_exactf
+ %bcmp414 = load volatile i32, ptr null, align 4
+ br label %if.end334
+
+if.end334: ; preds = %land.rhs251, %do_exactf
+ %hardcount.7 = phi i32 [ 0, %do_exactf ], [ 1, %land.rhs251 ]
+ call void @llvm.lifetime.end.p0(i64 0, ptr null)
+ br label %sw.epilog1489
+
+sw.bb336: ; preds = %entry, %entry
+ br label %land.rhs345
+
+land.rhs345: ; preds = %while.body350, %sw.bb336
+ %hardcount.8634 = phi i32 [ %inc356, %while.body350 ], [ 0, %sw.bb336 ]
+ %call347 = call fastcc i1 @S_reginclass()
+ br i1 %call347, label %while.body350, label %sw.epilog1489
+
+while.body350: ; preds = %land.rhs345
+ %inc356 = add i32 %hardcount.8634, 1
+ br i1 %cmp343, label %land.rhs345, label %if.end1497
+
+sw.epilog1489: ; preds = %land.rhs345, %if.end334
+ %hardcount.20 = phi i32 [ %hardcount.7, %if.end334 ], [ %hardcount.8634, %land.rhs345 ]
+ %tobool1490.not = icmp eq i32 %hardcount.20, 0
+ br i1 %tobool1490.not, label %if.else1492, label %if.end1497
+
+if.else1492: ; preds = %sw.epilog1489, %entry
+ br label %if.end1497
+
+if.end1497: ; preds = %if.else1492, %sw.epilog1489, %while.body350
+ %c.0 = phi i32 [ 0, %if.else1492 ], [ %max, %sw.epilog1489 ], [ 0, %while.body350 ]
+ ret i32 %c.0
+}
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
>From a058ee5da0e777aab311d3fc4ea19d636fa11101 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 17 Mar 2025 11:36:13 -0700
Subject: [PATCH 2/7] [BranchFolding][RISCV] Optimize branches NE/EQ to zero or
one
There are two changes here.
The first that we teach analyzeBranch how to
evaluate a conditional branch followed by a unconditional branch such that
we can evaluate the conditional branch statically. Often, we will see comparison
to one or zero since SelectionDAG often uses i1 for the conditional comparison.
As a result, we handle this specific case. We handle only EQ and NEQ for now,
but this can be expanded in the future. We can also expand on handling arbitrary
constants in the future.
The second change is that we pass AllowModify=false to analyzeBranch in the
tail merging code. The reason we do this is because this code is doing some
clever tricks to the branch code that it will restore later. Now that we are
actually optimizing branches in analyzeBranch, we have to be careful not to
mess up this canonical form that the tail merging code expects.
---
llvm/lib/CodeGen/BranchFolding.cpp | 7 +-
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 110 ++++++++++++++++++
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 20 ++++
llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll | 20 ----
llvm/test/CodeGen/RISCV/branch_zero.ll | 23 +---
.../RISCV/rvv/vxrm-insert-out-of-loop.ll | 66 +++++------
llvm/test/CodeGen/RISCV/simplify-condbr.ll | 1 -
7 files changed, 169 insertions(+), 78 deletions(-)
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 6f5afbd2a996a..989ae6bce8881 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -492,7 +492,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
DebugLoc dl = CurMBB->findBranchDebugLoc();
if (!dl)
dl = BranchDL;
- if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->reverseBranchCondition(Cond)) {
@@ -1132,7 +1132,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond)) {
// Failing case: IBB is the target of a cbr, and we cannot reverse the
// branch.
SmallVector<MachineOperand, 4> NewCond(Cond);
@@ -1589,7 +1589,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// Loop: xxx; jcc Out; jmp Loop
// we want:
// Loop: xxx; jncc Loop; jmp Out
- if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB &&
+ !CurCond.empty()) {
SmallVector<MachineOperand, 4> NewCond(CurCond);
if (!TII->reverseBranchCondition(NewCond)) {
DebugLoc Dl = MBB->findBranchDebugLoc();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 3a04344f8237f..2d249c1d14e30 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1014,6 +1014,109 @@ RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
}
}
+bool RISCVInstrInfo::trySimplifyCondBr(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ SmallVectorImpl<MachineOperand> &Cond) const {
+
+ if (!TBB || Cond.size() != 3)
+ return false;
+
+ RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
+ auto LHS = Cond[1];
+ auto RHS = Cond[2];
+
+ // Return true if MO definitely contains the value one.
+ auto isOne = [](MachineOperand &MO) -> bool {
+ if (MO.isImm() && MO.getImm() == 1)
+ return true;
+
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ return false;
+
+ MachineRegisterInfo &MRI =
+ MO.getParent()->getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg());
+ if (!DefMI)
+ return false;
+
+ // For now, just check the canonical one value.
+ if (DefMI->getOpcode() == RISCV::ADDI &&
+ DefMI->getOperand(1).getReg() == RISCV::X0 &&
+ DefMI->getOperand(2).getImm() == 1)
+ return true;
+
+ return false;
+ };
+
+ // Return true if MO definitely contains the value zero.
+ auto isZero = [](MachineOperand &MO) -> bool {
+ if (MO.isImm() && MO.getImm() == 0)
+ return true;
+ if (MO.isReg() && MO.getReg() == RISCV::X0)
+ return true;
+ return false;
+ };
+
+ MachineBasicBlock *Folded = nullptr;
+ switch (CC) {
+ default:
+ // TODO: Implement for more CCs
+ return false;
+ case RISCVCC::COND_EQ: {
+ // We can statically evaluate that we take the first branch
+ if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
+ Folded = TBB;
+ break;
+ }
+ // We can statically evaluate that we take the second branch
+ if ((isZero(LHS) && isOne(RHS)) || (isOne(LHS) && isZero(RHS))) {
+ Folded = FBB;
+ break;
+ }
+ return false;
+ }
+ case RISCVCC::COND_NE: {
+ // We can statically evaluate that we take the first branch
+ if ((isOne(LHS) && isZero(RHS)) || (isZero(LHS) && isOne(RHS))) {
+ Folded = TBB;
+ break;
+ }
+ // We can statically evaluate that we take the second branch
+ if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
+ Folded = FBB;
+ break;
+ }
+ return false;
+ }
+ }
+
+ // At this point, its legal to optimize.
+ removeBranch(MBB);
+ Cond.clear();
+
+ // Only need to insert a branch if we're not falling through.
+ if (Folded) {
+ DebugLoc DL = MBB.findBranchDebugLoc();
+ insertBranch(MBB, Folded, nullptr, {}, DL);
+ }
+
+ // Update the successors. Remove them all and add back the correct one.
+ while (!MBB.succ_empty())
+ MBB.removeSuccessor(MBB.succ_end() - 1);
+
+ // If it's a fallthrough, we need to figure out where MBB is going.
+ if (!Folded) {
+ MachineFunction::iterator Fallthrough = ++MBB.getIterator();
+ if (Fallthrough != MBB.getParent()->end())
+ MBB.addSuccessor(&*Fallthrough);
+ } else
+ MBB.addSuccessor(Folded);
+
+ TBB = Folded;
+ FBB = nullptr;
+ return true;
+}
+
bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -1071,6 +1174,9 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Handle a single conditional branch.
if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
parseCondBranch(*I, TBB, Cond);
+ // Try to fold the branch of the conditional branch into a the fallthru.
+ if (AllowModify)
+ trySimplifyCondBr(MBB, TBB, FBB, Cond);
return false;
}
@@ -1079,6 +1185,10 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
I->getDesc().isUnconditionalBranch()) {
parseCondBranch(*std::prev(I), TBB, Cond);
FBB = getBranchDestBlock(*I);
+ // Try to fold the branch of the conditional branch into an unconditional
+ // branch.
+ if (AllowModify)
+ trySimplifyCondBr(MBB, TBB, FBB, Cond);
return false;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index d68bd58885873..e8f6403a93a55 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -319,6 +319,26 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
const MachineInstr &MI2) const;
bool hasReassociableVectorSibling(const MachineInstr &Inst,
bool &Commuted) const;
+ /// Return true if the branch represented by the conditional branch with
+ /// components TBB, FBB, and CurCond was folded into an unconditional branch.
+ ///
+ /// If FBB is nullptr, then the the input represents a conditional branch with
+ /// a fallthrough.
+ ///
+ /// For example:
+ /// BRCOND EQ 0, 0, BB1
+ /// BR BB2
+ ///
+ /// can be simplified to BR BB1 since 0 == 0 statically. On the other hand,
+ ///
+ ///
+ /// BRCOND EQ 0, 1, BB1
+ /// BR BB2
+ ///
+ /// can be simplified to BR BB2 because 0 != 1 statically.
+ bool trySimplifyCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ SmallVectorImpl<MachineOperand> &Cond) const;
};
namespace RISCV {
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
index 338925059862c..74ec7308cb646 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
@@ -357,11 +357,6 @@ define i64 @ctpop_i64(i64 %a) nounwind {
define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ugt_two:
; RV32I: # %bb.0:
-; RV32I-NEXT: beqz zero, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltiu a0, zero, 0
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB6_2:
; RV32I-NEXT: srli a2, a0, 1
; RV32I-NEXT: lui a3, 349525
; RV32I-NEXT: lui a4, 209715
@@ -404,11 +399,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
;
; RV32ZBB-LABEL: ctpop_i64_ugt_two:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: beqz zero, .LBB6_2
-; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltiu a0, zero, 0
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB6_2:
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: add a0, a1, a0
@@ -422,11 +412,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ugt_one:
; RV32I: # %bb.0:
-; RV32I-NEXT: beqz zero, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: snez a0, zero
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: srli a2, a0, 1
; RV32I-NEXT: lui a3, 349525
; RV32I-NEXT: lui a4, 209715
@@ -470,11 +455,6 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
;
; RV32ZBB-LABEL: ctpop_i64_ugt_one:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: beqz zero, .LBB7_2
-; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: snez a0, zero
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB7_2:
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: add a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/branch_zero.ll b/llvm/test/CodeGen/RISCV/branch_zero.ll
index 0f50adec35a34..2c13c28647516 100644
--- a/llvm/test/CodeGen/RISCV/branch_zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch_zero.ll
@@ -5,16 +5,11 @@
define void @foo(i16 %finder_idx) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: .LBB0_1: # %for.body
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: # %bb.1: # %for.body
; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: bltz a0, .LBB0_4
+; CHECK-NEXT: bltz a0, .LBB0_3
; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: bnez zero, .LBB0_1
-; CHECK-NEXT: # %bb.3: # %while.body
-; CHECK-NEXT: .LBB0_4: # %while.cond1.preheader.i
+; CHECK-NEXT: .LBB0_3: # %while.cond1.preheader.i
entry:
br label %for.body
@@ -46,16 +41,11 @@ if.then:
define void @bar(i16 %finder_idx) {
; CHECK-LABEL: bar:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: .LBB1_1: # %for.body
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: # %bb.1: # %for.body
; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: bgez a0, .LBB1_4
+; CHECK-NEXT: bgez a0, .LBB1_3
; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i
-; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: bnez zero, .LBB1_1
-; CHECK-NEXT: # %bb.3: # %while.body
-; CHECK-NEXT: .LBB1_4: # %while.cond1.preheader.i
+; CHECK-NEXT: .LBB1_3: # %while.cond1.preheader.i
entry:
br label %for.body
@@ -83,4 +73,3 @@ exit1:
if.then:
br label %for.body
}
-
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index c35f05be304cc..5251074717c93 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -14,9 +14,9 @@
define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_dst_stride, ptr nocapture noundef readonly %src1, i32 noundef signext %i_src1_stride, ptr nocapture noundef readonly %src2, i32 noundef signext %i_src2_stride, i32 noundef signext %i_width, i32 noundef signext %i_height) {
; RV32-LABEL: test1:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: blez a7, .LBB0_17
+; RV32-NEXT: blez a7, .LBB0_13
; RV32-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
-; RV32-NEXT: blez a6, .LBB0_17
+; RV32-NEXT: blez a6, .LBB0_13
; RV32-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader
; RV32-NEXT: addi t0, a7, -1
; RV32-NEXT: csrr t2, vlenb
@@ -25,11 +25,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: mul t5, a5, t0
; RV32-NEXT: slli t1, t2, 1
; RV32-NEXT: li t6, 32
-; RV32-NEXT: mv t0, t1
-; RV32-NEXT: bnez zero, .LBB0_4
-; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader
; RV32-NEXT: li t0, 32
-; RV32-NEXT: .LBB0_4: # %for.cond1.preheader.us.preheader
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -41,17 +37,13 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: add t3, a0, t3
; RV32-NEXT: add t4, a2, t4
; RV32-NEXT: add s0, a4, t5
-; RV32-NEXT: bltu t6, t1, .LBB0_6
-; RV32-NEXT: # %bb.5: # %for.cond1.preheader.us.preheader
+; RV32-NEXT: bltu t6, t1, .LBB0_4
+; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader
; RV32-NEXT: li t1, 32
-; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us.preheader
+; RV32-NEXT: .LBB0_4: # %for.cond1.preheader.us.preheader
; RV32-NEXT: add t3, t3, a6
; RV32-NEXT: add t5, t4, a6
; RV32-NEXT: add t4, s0, a6
-; RV32-NEXT: beqz zero, .LBB0_8
-; RV32-NEXT: # %bb.7: # %for.cond1.preheader.us.preheader
-; RV32-NEXT: mv t1, t0
-; RV32-NEXT: .LBB0_8: # %for.cond1.preheader.us.preheader
; RV32-NEXT: li t0, 0
; RV32-NEXT: sltu t5, a0, t5
; RV32-NEXT: sltu t6, a2, t3
@@ -70,25 +62,25 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: or t1, t1, t3
; RV32-NEXT: andi t1, t1, 1
; RV32-NEXT: slli t2, t2, 1
-; RV32-NEXT: j .LBB0_10
-; RV32-NEXT: .LBB0_9: # %for.cond1.for.cond.cleanup3_crit_edge.us
-; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT: j .LBB0_6
+; RV32-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
+; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a2, a2, a3
; RV32-NEXT: addi t0, t0, 1
; RV32-NEXT: add a4, a4, a5
-; RV32-NEXT: beq t0, a7, .LBB0_16
-; RV32-NEXT: .LBB0_10: # %for.cond1.preheader.us
+; RV32-NEXT: beq t0, a7, .LBB0_12
+; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us
; RV32-NEXT: # =>This Loop Header: Depth=1
-; RV32-NEXT: # Child Loop BB0_13 Depth 2
-; RV32-NEXT: # Child Loop BB0_15 Depth 2
-; RV32-NEXT: beqz t1, .LBB0_12
-; RV32-NEXT: # %bb.11: # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT: # Child Loop BB0_9 Depth 2
+; RV32-NEXT: # Child Loop BB0_11 Depth 2
+; RV32-NEXT: beqz t1, .LBB0_8
+; RV32-NEXT: # %bb.7: # in Loop: Header=BB0_6 Depth=1
; RV32-NEXT: li t4, 0
; RV32-NEXT: li t3, 0
-; RV32-NEXT: j .LBB0_15
-; RV32-NEXT: .LBB0_12: # %vector.ph
-; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT: j .LBB0_11
+; RV32-NEXT: .LBB0_8: # %vector.ph
+; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1
; RV32-NEXT: li t3, 0
; RV32-NEXT: neg t4, t2
; RV32-NEXT: and t4, t4, a6
@@ -96,8 +88,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: li t6, 0
; RV32-NEXT: li t5, 0
; RV32-NEXT: vsetvli s0, zero, e8, m2, ta, ma
-; RV32-NEXT: .LBB0_13: # %vector.body
-; RV32-NEXT: # Parent Loop BB0_10 Depth=1
+; RV32-NEXT: .LBB0_9: # %vector.body
+; RV32-NEXT: # Parent Loop BB0_6 Depth=1
; RV32-NEXT: # => This Inner Loop Header: Depth=2
; RV32-NEXT: add s0, a2, t6
; RV32-NEXT: add s1, a4, t6
@@ -112,12 +104,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: or s2, t6, t5
; RV32-NEXT: vs2r.v v8, (s0)
; RV32-NEXT: mv t6, s1
-; RV32-NEXT: bnez s2, .LBB0_13
-; RV32-NEXT: # %bb.14: # %middle.block
-; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1
-; RV32-NEXT: beq t4, a6, .LBB0_9
-; RV32-NEXT: .LBB0_15: # %for.body4.us
-; RV32-NEXT: # Parent Loop BB0_10 Depth=1
+; RV32-NEXT: bnez s2, .LBB0_9
+; RV32-NEXT: # %bb.10: # %middle.block
+; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1
+; RV32-NEXT: beq t4, a6, .LBB0_5
+; RV32-NEXT: .LBB0_11: # %for.body4.us
+; RV32-NEXT: # Parent Loop BB0_6 Depth=1
; RV32-NEXT: # => This Inner Loop Header: Depth=2
; RV32-NEXT: add t5, a2, t4
; RV32-NEXT: add t6, a4, t4
@@ -133,9 +125,9 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: srli t5, t5, 1
; RV32-NEXT: or t6, t6, t3
; RV32-NEXT: sb t5, 0(s0)
-; RV32-NEXT: bnez t6, .LBB0_15
-; RV32-NEXT: j .LBB0_9
-; RV32-NEXT: .LBB0_16:
+; RV32-NEXT: bnez t6, .LBB0_11
+; RV32-NEXT: j .LBB0_5
+; RV32-NEXT: .LBB0_12:
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
@@ -144,7 +136,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: .cfi_restore s2
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
-; RV32-NEXT: .LBB0_17: # %for.cond.cleanup
+; RV32-NEXT: .LBB0_13: # %for.cond.cleanup
; RV32-NEXT: ret
;
; RV64P670-LABEL: test1:
diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
index 4dff2ac53f9ed..8e8c5fe2976af 100644
--- a/llvm/test/CodeGen/RISCV/simplify-condbr.ll
+++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
@@ -58,7 +58,6 @@ define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) noun
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_10:
-; CHECK-NEXT: bnez zero, .LBB0_9
; CHECK-NEXT: j .LBB0_8
entry:
switch i8 %0, label %if.else1492 [
>From 8fab8b18fee88d7170077434a6eb2bebef89b653 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 17 Mar 2025 19:37:13 -0700
Subject: [PATCH 3/7] fixup! convert lambda to static func
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 64 ++++++++++++------------
1 file changed, 32 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2d249c1d14e30..8590a82f94f2e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1014,48 +1014,48 @@ RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
}
}
-bool RISCVInstrInfo::trySimplifyCondBr(
- MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- SmallVectorImpl<MachineOperand> &Cond) const {
+// Return true if MO definitely contains the value one.
+static bool isOne(MachineOperand &MO) {
+ if (MO.isImm() && MO.getImm() == 1)
+ return true;
- if (!TBB || Cond.size() != 3)
+ if (!MO.isReg() || !MO.getReg().isVirtual())
return false;
- RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
- auto LHS = Cond[1];
- auto RHS = Cond[2];
+ MachineRegisterInfo &MRI =
+ MO.getParent()->getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg());
+ if (!DefMI)
+ return false;
- // Return true if MO definitely contains the value one.
- auto isOne = [](MachineOperand &MO) -> bool {
- if (MO.isImm() && MO.getImm() == 1)
- return true;
+ // For now, just check the canonical one value.
+ if (DefMI->getOpcode() == RISCV::ADDI &&
+ DefMI->getOperand(1).getReg() == RISCV::X0 &&
+ DefMI->getOperand(2).getImm() == 1)
+ return true;
- if (!MO.isReg() || !MO.getReg().isVirtual())
- return false;
+ return false;
+}
- MachineRegisterInfo &MRI =
- MO.getParent()->getParent()->getParent()->getRegInfo();
- MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg());
- if (!DefMI)
- return false;
+// Return true if MO definitely contains the value zero.
+static bool isZero(MachineOperand &MO) {
+ if (MO.isImm() && MO.getImm() == 0)
+ return true;
+ if (MO.isReg() && MO.getReg() == RISCV::X0)
+ return true;
+ return false;
+}
- // For now, just check the canonical one value.
- if (DefMI->getOpcode() == RISCV::ADDI &&
- DefMI->getOperand(1).getReg() == RISCV::X0 &&
- DefMI->getOperand(2).getImm() == 1)
- return true;
+bool RISCVInstrInfo::trySimplifyCondBr(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (!TBB || Cond.size() != 3)
return false;
- };
- // Return true if MO definitely contains the value zero.
- auto isZero = [](MachineOperand &MO) -> bool {
- if (MO.isImm() && MO.getImm() == 0)
- return true;
- if (MO.isReg() && MO.getReg() == RISCV::X0)
- return true;
- return false;
- };
+ RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
+ auto LHS = Cond[1];
+ auto RHS = Cond[2];
MachineBasicBlock *Folded = nullptr;
switch (CC) {
>From 728079bf982a0a1ccc107f1ed49fd3cf90a0f3bd Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 19 Mar 2025 08:51:05 -0700
Subject: [PATCH 4/7] fixup! combine with optimizeCondBr
---
llvm/lib/CodeGen/PeepholeOptimizer.cpp | 34 +--
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 238 ++++++++----------
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 20 --
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 +
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 +
llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll | 20 --
llvm/test/CodeGen/RISCV/double-br-fcmp.ll | 40 ---
llvm/test/CodeGen/RISCV/float-br-fcmp.ll | 40 ---
llvm/test/CodeGen/RISCV/half-br-fcmp.ll | 80 ------
.../RISCV/machine-sink-load-immediate.ll | 103 +-------
10 files changed, 138 insertions(+), 439 deletions(-)
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 6f44837c5441c..da7cd5e877060 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -566,14 +566,9 @@ class PeepholeOptimizerLegacy : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineLoopInfoWrapperPass>();
- AU.addPreserved<MachineLoopInfoWrapperPass>();
- if (Aggressive) {
- AU.addRequired<MachineDominatorTreeWrapperPass>();
- AU.addPreserved<MachineDominatorTreeWrapperPass>();
- }
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
}
MachineFunctionProperties getRequiredProperties() const override {
@@ -1650,27 +1645,20 @@ PreservedAnalyses
PeepholeOptimizerPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
MFPropsModifier _(*this, MF);
- auto *DT =
- Aggressive ? &MFAM.getResult<MachineDominatorTreeAnalysis>(MF) : nullptr;
+ auto *DT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
PeepholeOptimizer Impl(DT, MLI);
bool Changed = Impl.run(MF);
if (!Changed)
return PreservedAnalyses::all();
- auto PA = getMachineFunctionPassPreservedAnalyses();
- PA.preserve<MachineDominatorTreeAnalysis>();
- PA.preserve<MachineLoopAnalysis>();
- PA.preserveSet<CFGAnalyses>();
- return PA;
+ return getMachineFunctionPassPreservedAnalyses();
}
bool PeepholeOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- auto *DT = Aggressive
- ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
- : nullptr;
+ auto *DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
PeepholeOptimizer Impl(DT, MLI);
return Impl.run(MF);
@@ -1791,6 +1779,20 @@ bool PeepholeOptimizer::run(MachineFunction &MF) {
}
if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
+ // optimizeCondBranch might have converted a conditional branch to
+ // an unconditional branch. If there is a branch instruction after it,
+ // delete it.
+ MachineInstr *NewBr = &*std::prev(MII);
+ if (NewBr->isUnconditionalBranch()) {
+ if (MII != MBB.end()) {
+ MachineInstr *Dead = &*MII;
+ ++MII;
+ MachineBasicBlock *DeadDest = TII->getBranchDestBlock(*Dead);
+ if (TII->getBranchDestBlock(*NewBr)!= DeadDest)
+ DT->deleteEdge(&MBB, DeadDest);
+ Dead->eraseFromParent();
+ }
+ }
Changed = true;
continue;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 8590a82f94f2e..cc71df71517de 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1014,109 +1014,6 @@ RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
}
}
-// Return true if MO definitely contains the value one.
-static bool isOne(MachineOperand &MO) {
- if (MO.isImm() && MO.getImm() == 1)
- return true;
-
- if (!MO.isReg() || !MO.getReg().isVirtual())
- return false;
-
- MachineRegisterInfo &MRI =
- MO.getParent()->getParent()->getParent()->getRegInfo();
- MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg());
- if (!DefMI)
- return false;
-
- // For now, just check the canonical one value.
- if (DefMI->getOpcode() == RISCV::ADDI &&
- DefMI->getOperand(1).getReg() == RISCV::X0 &&
- DefMI->getOperand(2).getImm() == 1)
- return true;
-
- return false;
-}
-
-// Return true if MO definitely contains the value zero.
-static bool isZero(MachineOperand &MO) {
- if (MO.isImm() && MO.getImm() == 0)
- return true;
- if (MO.isReg() && MO.getReg() == RISCV::X0)
- return true;
- return false;
-}
-
-bool RISCVInstrInfo::trySimplifyCondBr(
- MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- SmallVectorImpl<MachineOperand> &Cond) const {
-
- if (!TBB || Cond.size() != 3)
- return false;
-
- RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
- auto LHS = Cond[1];
- auto RHS = Cond[2];
-
- MachineBasicBlock *Folded = nullptr;
- switch (CC) {
- default:
- // TODO: Implement for more CCs
- return false;
- case RISCVCC::COND_EQ: {
- // We can statically evaluate that we take the first branch
- if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
- Folded = TBB;
- break;
- }
- // We can statically evaluate that we take the second branch
- if ((isZero(LHS) && isOne(RHS)) || (isOne(LHS) && isZero(RHS))) {
- Folded = FBB;
- break;
- }
- return false;
- }
- case RISCVCC::COND_NE: {
- // We can statically evaluate that we take the first branch
- if ((isOne(LHS) && isZero(RHS)) || (isZero(LHS) && isOne(RHS))) {
- Folded = TBB;
- break;
- }
- // We can statically evaluate that we take the second branch
- if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
- Folded = FBB;
- break;
- }
- return false;
- }
- }
-
- // At this point, its legal to optimize.
- removeBranch(MBB);
- Cond.clear();
-
- // Only need to insert a branch if we're not falling through.
- if (Folded) {
- DebugLoc DL = MBB.findBranchDebugLoc();
- insertBranch(MBB, Folded, nullptr, {}, DL);
- }
-
- // Update the successors. Remove them all and add back the correct one.
- while (!MBB.succ_empty())
- MBB.removeSuccessor(MBB.succ_end() - 1);
-
- // If it's a fallthrough, we need to figure out where MBB is going.
- if (!Folded) {
- MachineFunction::iterator Fallthrough = ++MBB.getIterator();
- if (Fallthrough != MBB.getParent()->end())
- MBB.addSuccessor(&*Fallthrough);
- } else
- MBB.addSuccessor(Folded);
-
- TBB = Folded;
- FBB = nullptr;
- return true;
-}
-
bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -1174,9 +1071,12 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Handle a single conditional branch.
if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
parseCondBranch(*I, TBB, Cond);
- // Try to fold the branch of the conditional branch into a the fallthru.
- if (AllowModify)
- trySimplifyCondBr(MBB, TBB, FBB, Cond);
+ // Try and optimize the conditional branch.
+ if (AllowModify) {
+ optimizeCondBranch(*I);
+ // The branch might have changed, reanalyze it.
+ return analyzeBranch(MBB, TBB, FBB, Cond, false);
+ }
return false;
}
@@ -1185,10 +1085,14 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
I->getDesc().isUnconditionalBranch()) {
parseCondBranch(*std::prev(I), TBB, Cond);
FBB = getBranchDestBlock(*I);
- // Try to fold the branch of the conditional branch into an unconditional
- // branch.
- if (AllowModify)
- trySimplifyCondBr(MBB, TBB, FBB, Cond);
+ // Try and optimize the pair.
+ if (AllowModify) {
+ if (optimizeCondBranch(*std::prev(I)))
+ I->eraseFromParent();
+
+ // The branch might have changed, reanalyze it.
+ return analyzeBranch(MBB, TBB, FBB, Cond, false);
+ }
return false;
}
@@ -1344,7 +1248,8 @@ bool RISCVInstrInfo::reverseBranchCondition(
bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
MachineBasicBlock *MBB = MI.getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ if (!MBB)
+ return false;
MachineBasicBlock *TBB, *FBB;
SmallVector<MachineOperand, 3> Cond;
@@ -1354,8 +1259,98 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
assert(CC != RISCVCC::COND_INVALID);
- if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
+ // Right now we only care about LI (i.e. ADDI x0, imm)
+ auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
+ if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).getReg() == RISCV::X0) {
+ Imm = MI->getOperand(2).getImm();
+ return true;
+ }
return false;
+ };
+
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ // Either a load from immediate instruction or X0.
+ auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
+ if (!Op.isReg())
+ return false;
+ Register Reg = Op.getReg();
+ if (Reg == RISCV::X0) {
+ Imm = 0;
+ return true;
+ }
+ return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
+ };
+
+ // Try and convert a conditional branch that can be evaluated statically
+ // into an unconditional branch.
+ MachineBasicBlock *Folded = nullptr;
+ int64_t C0, C1;
+ if (isFromLoadImm(Cond[1], C0) && isFromLoadImm(Cond[2], C1)) {
+ switch (CC) {
+ default:
+ // TODO: Implement for more CCs
+ break;
+ case RISCVCC::COND_EQ: {
+ Folded = (C0 == C1) ? TBB : FBB;
+ break;
+ }
+ case RISCVCC::COND_NE: {
+ Folded = (C0 != C1) ? TBB : FBB;
+ break;
+ }
+ case RISCVCC::COND_LT: {
+ Folded = (C0 < C1) ? TBB : FBB;
+ break;
+ }
+ case RISCVCC::COND_GE: {
+ Folded = (C0 >= C1) ? TBB : FBB;
+ break;
+ }
+ case RISCVCC::COND_LTU: {
+ Folded = ((uint64_t)C0 < (uint64_t)C1) ? TBB : FBB;
+ break;
+ }
+ case RISCVCC::COND_GEU: {
+ Folded = ((uint64_t)C0 >= (uint64_t)C1) ? TBB : FBB;
+ break;
+ }
+ }
+
+ // Do the conversion
+ // Build the new unconditional branch
+ DebugLoc DL = MBB->findBranchDebugLoc();
+ if (Folded) {
+ BuildMI(*MBB, MI, DL, get(RISCV::PseudoBR)).addMBB(Folded);
+ } else {
+ MachineFunction::iterator Fallthrough = ++MBB->getIterator();
+ if (Fallthrough == MBB->getParent()->end())
+ return false;
+ BuildMI(*MBB, MI, DL, get(RISCV::PseudoBR)).addMBB(&*Fallthrough);
+ }
+
+ // Update successors of MBB.
+ if (Folded == TBB) {
+ // If we're taking TBB, then the succ to delete is the fallthrough (if
+ // it was a succ in the first place), or its the MBB from the
+ // unconditional branch.
+ if (!FBB) {
+ MachineFunction::iterator Fallthrough = ++MBB->getIterator();
+ if (Fallthrough != MBB->getParent()->end() &&
+ MBB->isSuccessor(&*Fallthrough))
+ MBB->removeSuccessor(&*Fallthrough, true);
+ } else {
+ MBB->removeSuccessor(FBB, true);
+ }
+ } else if (Folded == FBB) {
+ // If we're taking the fallthrough or unconditional branch, then the
+ // succ to remove is the one from the conditional branch.
+ MBB->removeSuccessor(TBB, true);
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
// For two constants C0 and C1 from
// ```
@@ -1374,24 +1369,6 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
//
// To make sure this optimization is really beneficial, we only
// optimize for cases where Y had only one use (i.e. only used by the branch).
-
- // Right now we only care about LI (i.e. ADDI x0, imm)
- auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
- if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
- MI->getOperand(1).getReg() == RISCV::X0) {
- Imm = MI->getOperand(2).getImm();
- return true;
- }
- return false;
- };
- // Either a load from immediate instruction or X0.
- auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
- if (!Op.isReg())
- return false;
- Register Reg = Op.getReg();
- return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
- };
-
MachineOperand &LHS = MI.getOperand(0);
MachineOperand &RHS = MI.getOperand(1);
// Try to find the register for constant Z; return
@@ -1410,7 +1387,6 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
};
bool Modify = false;
- int64_t C0;
if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
// Might be case 1.
// Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index e8f6403a93a55..d68bd58885873 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -319,26 +319,6 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
const MachineInstr &MI2) const;
bool hasReassociableVectorSibling(const MachineInstr &Inst,
bool &Commuted) const;
- /// Return true if the branch represented by the conditional branch with
- /// components TBB, FBB, and CurCond was folded into an unconditional branch.
- ///
- /// If FBB is nullptr, then the the input represents a conditional branch with
- /// a fallthrough.
- ///
- /// For example:
- /// BRCOND EQ 0, 0, BB1
- /// BR BB2
- ///
- /// can be simplified to BR BB1 since 0 == 0 statically. On the other hand,
- ///
- ///
- /// BRCOND EQ 0, 1, BB1
- /// BR BB2
- ///
- /// can be simplified to BR BB2 because 0 != 1 statically.
- bool trySimplifyCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- SmallVectorImpl<MachineOperand> &Cond) const;
};
namespace RISCV {
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index e1481667a4ab7..209e1125b3c2e 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -158,6 +158,7 @@
; CHECK-NEXT: MachinePostDominator Tree Construction
; CHECK-NEXT: Machine Cycle Info Analysis
; CHECK-NEXT: Machine code sinking
+; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: AArch64 MI Peephole Optimization pass
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index beef7a574dc4f..e3ba2f7559c53 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -115,6 +115,7 @@
; CHECK-NEXT: MachinePostDominator Tree Construction
; CHECK-NEXT: Machine Cycle Info Analysis
; CHECK-NEXT: Machine code sinking
+; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: Machine Trace Metrics
diff --git a/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll b/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll
index 51ea8873d8c03..41049195360fc 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll
@@ -11,22 +11,12 @@ declare bfloat @dummy(bfloat)
define void @br_fcmp_false(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: br_fcmp_false:
; RV32IZFBFMIN: # %bb.0:
-; RV32IZFBFMIN-NEXT: li a0, 1
-; RV32IZFBFMIN-NEXT: bnez a0, .LBB0_2
-; RV32IZFBFMIN-NEXT: # %bb.1: # %if.then
-; RV32IZFBFMIN-NEXT: ret
-; RV32IZFBFMIN-NEXT: .LBB0_2: # %if.else
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: call abort
;
; RV64IZFBFMIN-LABEL: br_fcmp_false:
; RV64IZFBFMIN: # %bb.0:
-; RV64IZFBFMIN-NEXT: li a0, 1
-; RV64IZFBFMIN-NEXT: bnez a0, .LBB0_2
-; RV64IZFBFMIN-NEXT: # %bb.1: # %if.then
-; RV64IZFBFMIN-NEXT: ret
-; RV64IZFBFMIN-NEXT: .LBB0_2: # %if.else
; RV64IZFBFMIN-NEXT: addi sp, sp, -16
; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFBFMIN-NEXT: call abort
@@ -583,22 +573,12 @@ if.then:
define void @br_fcmp_true(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: br_fcmp_true:
; RV32IZFBFMIN: # %bb.0:
-; RV32IZFBFMIN-NEXT: li a0, 1
-; RV32IZFBFMIN-NEXT: bnez a0, .LBB16_2
-; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else
-; RV32IZFBFMIN-NEXT: ret
-; RV32IZFBFMIN-NEXT: .LBB16_2: # %if.then
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: call abort
;
; RV64IZFBFMIN-LABEL: br_fcmp_true:
; RV64IZFBFMIN: # %bb.0:
-; RV64IZFBFMIN-NEXT: li a0, 1
-; RV64IZFBFMIN-NEXT: bnez a0, .LBB16_2
-; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else
-; RV64IZFBFMIN-NEXT: ret
-; RV64IZFBFMIN-NEXT: .LBB16_2: # %if.then
; RV64IZFBFMIN-NEXT: addi sp, sp, -16
; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFBFMIN-NEXT: call abort
diff --git a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll
index 035228e73c707..bea51fc4322d0 100644
--- a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll
@@ -14,44 +14,24 @@ declare void @exit(i32)
define void @br_fcmp_false(double %a, double %b) nounwind {
; RV32IFD-LABEL: br_fcmp_false:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: li a0, 1
-; RV32IFD-NEXT: bnez a0, .LBB0_2
-; RV32IFD-NEXT: # %bb.1: # %if.then
-; RV32IFD-NEXT: ret
-; RV32IFD-NEXT: .LBB0_2: # %if.else
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: call abort
;
; RV64IFD-LABEL: br_fcmp_false:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: li a0, 1
-; RV64IFD-NEXT: bnez a0, .LBB0_2
-; RV64IFD-NEXT: # %bb.1: # %if.then
-; RV64IFD-NEXT: ret
-; RV64IFD-NEXT: .LBB0_2: # %if.else
; RV64IFD-NEXT: addi sp, sp, -16
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: call abort
;
; RV32IZFINXZDINX-LABEL: br_fcmp_false:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: li a0, 1
-; RV32IZFINXZDINX-NEXT: bnez a0, .LBB0_2
-; RV32IZFINXZDINX-NEXT: # %bb.1: # %if.then
-; RV32IZFINXZDINX-NEXT: ret
-; RV32IZFINXZDINX-NEXT: .LBB0_2: # %if.else
; RV32IZFINXZDINX-NEXT: addi sp, sp, -16
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call abort
;
; RV64IZFINXZDINX-LABEL: br_fcmp_false:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: li a0, 1
-; RV64IZFINXZDINX-NEXT: bnez a0, .LBB0_2
-; RV64IZFINXZDINX-NEXT: # %bb.1: # %if.then
-; RV64IZFINXZDINX-NEXT: ret
-; RV64IZFINXZDINX-NEXT: .LBB0_2: # %if.else
; RV64IZFINXZDINX-NEXT: addi sp, sp, -16
; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINXZDINX-NEXT: call abort
@@ -897,44 +877,24 @@ if.then:
define void @br_fcmp_true(double %a, double %b) nounwind {
; RV32IFD-LABEL: br_fcmp_true:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: li a0, 1
-; RV32IFD-NEXT: bnez a0, .LBB16_2
-; RV32IFD-NEXT: # %bb.1: # %if.else
-; RV32IFD-NEXT: ret
-; RV32IFD-NEXT: .LBB16_2: # %if.then
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: call abort
;
; RV64IFD-LABEL: br_fcmp_true:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: li a0, 1
-; RV64IFD-NEXT: bnez a0, .LBB16_2
-; RV64IFD-NEXT: # %bb.1: # %if.else
-; RV64IFD-NEXT: ret
-; RV64IFD-NEXT: .LBB16_2: # %if.then
; RV64IFD-NEXT: addi sp, sp, -16
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: call abort
;
; RV32IZFINXZDINX-LABEL: br_fcmp_true:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: li a0, 1
-; RV32IZFINXZDINX-NEXT: bnez a0, .LBB16_2
-; RV32IZFINXZDINX-NEXT: # %bb.1: # %if.else
-; RV32IZFINXZDINX-NEXT: ret
-; RV32IZFINXZDINX-NEXT: .LBB16_2: # %if.then
; RV32IZFINXZDINX-NEXT: addi sp, sp, -16
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call abort
;
; RV64IZFINXZDINX-LABEL: br_fcmp_true:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: li a0, 1
-; RV64IZFINXZDINX-NEXT: bnez a0, .LBB16_2
-; RV64IZFINXZDINX-NEXT: # %bb.1: # %if.else
-; RV64IZFINXZDINX-NEXT: ret
-; RV64IZFINXZDINX-NEXT: .LBB16_2: # %if.then
; RV64IZFINXZDINX-NEXT: addi sp, sp, -16
; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINXZDINX-NEXT: call abort
diff --git a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
index 35caa627b57bc..c4f23f251c535 100644
--- a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
@@ -15,44 +15,24 @@ declare float @dummy(float)
define void @br_fcmp_false(float %a, float %b) nounwind {
; RV32IF-LABEL: br_fcmp_false:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: li a0, 1
-; RV32IF-NEXT: bnez a0, .LBB0_2
-; RV32IF-NEXT: # %bb.1: # %if.then
-; RV32IF-NEXT: ret
-; RV32IF-NEXT: .LBB0_2: # %if.else
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: call abort
;
; RV64IF-LABEL: br_fcmp_false:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: li a0, 1
-; RV64IF-NEXT: bnez a0, .LBB0_2
-; RV64IF-NEXT: # %bb.1: # %if.then
-; RV64IF-NEXT: ret
-; RV64IF-NEXT: .LBB0_2: # %if.else
; RV64IF-NEXT: addi sp, sp, -16
; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-NEXT: call abort
;
; RV32IZFINX-LABEL: br_fcmp_false:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: li a0, 1
-; RV32IZFINX-NEXT: bnez a0, .LBB0_2
-; RV32IZFINX-NEXT: # %bb.1: # %if.then
-; RV32IZFINX-NEXT: ret
-; RV32IZFINX-NEXT: .LBB0_2: # %if.else
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: call abort
;
; RV64IZFINX-LABEL: br_fcmp_false:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: li a0, 1
-; RV64IZFINX-NEXT: bnez a0, .LBB0_2
-; RV64IZFINX-NEXT: # %bb.1: # %if.then
-; RV64IZFINX-NEXT: ret
-; RV64IZFINX-NEXT: .LBB0_2: # %if.else
; RV64IZFINX-NEXT: addi sp, sp, -16
; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINX-NEXT: call abort
@@ -898,44 +878,24 @@ if.then:
define void @br_fcmp_true(float %a, float %b) nounwind {
; RV32IF-LABEL: br_fcmp_true:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: li a0, 1
-; RV32IF-NEXT: bnez a0, .LBB16_2
-; RV32IF-NEXT: # %bb.1: # %if.else
-; RV32IF-NEXT: ret
-; RV32IF-NEXT: .LBB16_2: # %if.then
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: call abort
;
; RV64IF-LABEL: br_fcmp_true:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: li a0, 1
-; RV64IF-NEXT: bnez a0, .LBB16_2
-; RV64IF-NEXT: # %bb.1: # %if.else
-; RV64IF-NEXT: ret
-; RV64IF-NEXT: .LBB16_2: # %if.then
; RV64IF-NEXT: addi sp, sp, -16
; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-NEXT: call abort
;
; RV32IZFINX-LABEL: br_fcmp_true:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: li a0, 1
-; RV32IZFINX-NEXT: bnez a0, .LBB16_2
-; RV32IZFINX-NEXT: # %bb.1: # %if.else
-; RV32IZFINX-NEXT: ret
-; RV32IZFINX-NEXT: .LBB16_2: # %if.then
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: call abort
;
; RV64IZFINX-LABEL: br_fcmp_true:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: li a0, 1
-; RV64IZFINX-NEXT: bnez a0, .LBB16_2
-; RV64IZFINX-NEXT: # %bb.1: # %if.else
-; RV64IZFINX-NEXT: ret
-; RV64IZFINX-NEXT: .LBB16_2: # %if.then
; RV64IZFINX-NEXT: addi sp, sp, -16
; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINX-NEXT: call abort
diff --git a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll
index e9b142e33362f..ceb49747c543c 100644
--- a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll
@@ -23,88 +23,48 @@ declare half @dummy(half)
define void @br_fcmp_false(half %a, half %b) nounwind {
; RV32IZFH-LABEL: br_fcmp_false:
; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: li a0, 1
-; RV32IZFH-NEXT: bnez a0, .LBB0_2
-; RV32IZFH-NEXT: # %bb.1: # %if.then
-; RV32IZFH-NEXT: ret
-; RV32IZFH-NEXT: .LBB0_2: # %if.else
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: call abort
;
; RV64IZFH-LABEL: br_fcmp_false:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: li a0, 1
-; RV64IZFH-NEXT: bnez a0, .LBB0_2
-; RV64IZFH-NEXT: # %bb.1: # %if.then
-; RV64IZFH-NEXT: ret
-; RV64IZFH-NEXT: .LBB0_2: # %if.else
; RV64IZFH-NEXT: addi sp, sp, -16
; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFH-NEXT: call abort
;
; RV32IZHINX-LABEL: br_fcmp_false:
; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: li a0, 1
-; RV32IZHINX-NEXT: bnez a0, .LBB0_2
-; RV32IZHINX-NEXT: # %bb.1: # %if.then
-; RV32IZHINX-NEXT: ret
-; RV32IZHINX-NEXT: .LBB0_2: # %if.else
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: call abort
;
; RV64IZHINX-LABEL: br_fcmp_false:
; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: li a0, 1
-; RV64IZHINX-NEXT: bnez a0, .LBB0_2
-; RV64IZHINX-NEXT: # %bb.1: # %if.then
-; RV64IZHINX-NEXT: ret
-; RV64IZHINX-NEXT: .LBB0_2: # %if.else
; RV64IZHINX-NEXT: addi sp, sp, -16
; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZHINX-NEXT: call abort
;
; RV32IZFHMIN-LABEL: br_fcmp_false:
; RV32IZFHMIN: # %bb.0:
-; RV32IZFHMIN-NEXT: li a0, 1
-; RV32IZFHMIN-NEXT: bnez a0, .LBB0_2
-; RV32IZFHMIN-NEXT: # %bb.1: # %if.then
-; RV32IZFHMIN-NEXT: ret
-; RV32IZFHMIN-NEXT: .LBB0_2: # %if.else
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: call abort
;
; RV64IZFHMIN-LABEL: br_fcmp_false:
; RV64IZFHMIN: # %bb.0:
-; RV64IZFHMIN-NEXT: li a0, 1
-; RV64IZFHMIN-NEXT: bnez a0, .LBB0_2
-; RV64IZFHMIN-NEXT: # %bb.1: # %if.then
-; RV64IZFHMIN-NEXT: ret
-; RV64IZFHMIN-NEXT: .LBB0_2: # %if.else
; RV64IZFHMIN-NEXT: addi sp, sp, -16
; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFHMIN-NEXT: call abort
;
; RV32IZHINXMIN-LABEL: br_fcmp_false:
; RV32IZHINXMIN: # %bb.0:
-; RV32IZHINXMIN-NEXT: li a0, 1
-; RV32IZHINXMIN-NEXT: bnez a0, .LBB0_2
-; RV32IZHINXMIN-NEXT: # %bb.1: # %if.then
-; RV32IZHINXMIN-NEXT: ret
-; RV32IZHINXMIN-NEXT: .LBB0_2: # %if.else
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: call abort
;
; RV64IZHINXMIN-LABEL: br_fcmp_false:
; RV64IZHINXMIN: # %bb.0:
-; RV64IZHINXMIN-NEXT: li a0, 1
-; RV64IZHINXMIN-NEXT: bnez a0, .LBB0_2
-; RV64IZHINXMIN-NEXT: # %bb.1: # %if.then
-; RV64IZHINXMIN-NEXT: ret
-; RV64IZHINXMIN-NEXT: .LBB0_2: # %if.else
; RV64IZHINXMIN-NEXT: addi sp, sp, -16
; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZHINXMIN-NEXT: call abort
@@ -1762,88 +1722,48 @@ if.then:
define void @br_fcmp_true(half %a, half %b) nounwind {
; RV32IZFH-LABEL: br_fcmp_true:
; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: li a0, 1
-; RV32IZFH-NEXT: bnez a0, .LBB16_2
-; RV32IZFH-NEXT: # %bb.1: # %if.else
-; RV32IZFH-NEXT: ret
-; RV32IZFH-NEXT: .LBB16_2: # %if.then
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: call abort
;
; RV64IZFH-LABEL: br_fcmp_true:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: li a0, 1
-; RV64IZFH-NEXT: bnez a0, .LBB16_2
-; RV64IZFH-NEXT: # %bb.1: # %if.else
-; RV64IZFH-NEXT: ret
-; RV64IZFH-NEXT: .LBB16_2: # %if.then
; RV64IZFH-NEXT: addi sp, sp, -16
; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFH-NEXT: call abort
;
; RV32IZHINX-LABEL: br_fcmp_true:
; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: li a0, 1
-; RV32IZHINX-NEXT: bnez a0, .LBB16_2
-; RV32IZHINX-NEXT: # %bb.1: # %if.else
-; RV32IZHINX-NEXT: ret
-; RV32IZHINX-NEXT: .LBB16_2: # %if.then
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: call abort
;
; RV64IZHINX-LABEL: br_fcmp_true:
; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: li a0, 1
-; RV64IZHINX-NEXT: bnez a0, .LBB16_2
-; RV64IZHINX-NEXT: # %bb.1: # %if.else
-; RV64IZHINX-NEXT: ret
-; RV64IZHINX-NEXT: .LBB16_2: # %if.then
; RV64IZHINX-NEXT: addi sp, sp, -16
; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZHINX-NEXT: call abort
;
; RV32IZFHMIN-LABEL: br_fcmp_true:
; RV32IZFHMIN: # %bb.0:
-; RV32IZFHMIN-NEXT: li a0, 1
-; RV32IZFHMIN-NEXT: bnez a0, .LBB16_2
-; RV32IZFHMIN-NEXT: # %bb.1: # %if.else
-; RV32IZFHMIN-NEXT: ret
-; RV32IZFHMIN-NEXT: .LBB16_2: # %if.then
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: call abort
;
; RV64IZFHMIN-LABEL: br_fcmp_true:
; RV64IZFHMIN: # %bb.0:
-; RV64IZFHMIN-NEXT: li a0, 1
-; RV64IZFHMIN-NEXT: bnez a0, .LBB16_2
-; RV64IZFHMIN-NEXT: # %bb.1: # %if.else
-; RV64IZFHMIN-NEXT: ret
-; RV64IZFHMIN-NEXT: .LBB16_2: # %if.then
; RV64IZFHMIN-NEXT: addi sp, sp, -16
; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFHMIN-NEXT: call abort
;
; RV32IZHINXMIN-LABEL: br_fcmp_true:
; RV32IZHINXMIN: # %bb.0:
-; RV32IZHINXMIN-NEXT: li a0, 1
-; RV32IZHINXMIN-NEXT: bnez a0, .LBB16_2
-; RV32IZHINXMIN-NEXT: # %bb.1: # %if.else
-; RV32IZHINXMIN-NEXT: ret
-; RV32IZHINXMIN-NEXT: .LBB16_2: # %if.then
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: call abort
;
; RV64IZHINXMIN-LABEL: br_fcmp_true:
; RV64IZHINXMIN: # %bb.0:
-; RV64IZHINXMIN-NEXT: li a0, 1
-; RV64IZHINXMIN-NEXT: bnez a0, .LBB16_2
-; RV64IZHINXMIN-NEXT: # %bb.1: # %if.else
-; RV64IZHINXMIN-NEXT: ret
-; RV64IZHINXMIN-NEXT: .LBB16_2: # %if.then
; RV64IZHINXMIN-NEXT: addi sp, sp, -16
; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZHINXMIN-NEXT: call abort
diff --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
index eb84774014a4b..8ed7870bd2b31 100644
--- a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
+++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
@@ -4,102 +4,21 @@
define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-LABEL: sink_li:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call toupper
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: beqz s0, .LBB0_26
-; CHECK-NEXT: # %bb.1: # %while.body.preheader
-; CHECK-NEXT: li a2, 1
-; CHECK-NEXT: li a3, 9
-; CHECK-NEXT: li a4, 32
-; CHECK-NEXT: .LBB0_2: # %while.body
+; CHECK-NEXT: # kill: killed $x10
+; CHECK-NEXT: beqz s0, .LBB0_2
+; CHECK-NEXT: .LBB0_1: # %while.body.6
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: bnez a2, .LBB0_4
-; CHECK-NEXT: # %bb.3: # %while.body
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bne a2, a3, .LBB0_16
-; CHECK-NEXT: .LBB0_4: # %while.body.1
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bnez a2, .LBB0_6
-; CHECK-NEXT: # %bb.5: # %while.body.1
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bne a2, a3, .LBB0_17
-; CHECK-NEXT: .LBB0_6: # %while.body.3
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bnez a2, .LBB0_8
-; CHECK-NEXT: # %bb.7: # %while.body.3
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bne a2, a4, .LBB0_19
-; CHECK-NEXT: .LBB0_8: # %while.body.4
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bnez a2, .LBB0_10
-; CHECK-NEXT: # %bb.9: # %while.body.4
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bne a2, a4, .LBB0_21
-; CHECK-NEXT: .LBB0_10: # %while.body.5
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bnez a2, .LBB0_12
-; CHECK-NEXT: # %bb.11: # %while.body.5
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bne a2, a3, .LBB0_23
-; CHECK-NEXT: .LBB0_12: # %while.body.6
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: bnez a2, .LBB0_2
-; CHECK-NEXT: # %bb.13: # %while.body.6
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: beq a2, a3, .LBB0_2
-; CHECK-NEXT: # %bb.14: # %while.body.6
-; CHECK-NEXT: beqz a2, .LBB0_24
-; CHECK-NEXT: # %bb.15: # %strdup.exit.split.loop.exit126
-; CHECK-NEXT: addi s0, s1, 7
-; CHECK-NEXT: j .LBB0_25
-; CHECK-NEXT: .LBB0_16: # %while.body
-; CHECK-NEXT: beqz a2, .LBB0_26
-; CHECK-NEXT: j .LBB0_18
-; CHECK-NEXT: .LBB0_17: # %while.body.1
-; CHECK-NEXT: beqz a2, .LBB0_24
-; CHECK-NEXT: .LBB0_18: # %strdup.exit.loopexit
-; CHECK-NEXT: li s0, 0
-; CHECK-NEXT: j .LBB0_25
-; CHECK-NEXT: .LBB0_19: # %while.body.3
-; CHECK-NEXT: beqz a2, .LBB0_24
-; CHECK-NEXT: # %bb.20: # %strdup.exit.split.loop.exit120
-; CHECK-NEXT: addi s0, s1, 4
-; CHECK-NEXT: j .LBB0_25
-; CHECK-NEXT: .LBB0_21: # %while.body.4
-; CHECK-NEXT: beqz a2, .LBB0_24
-; CHECK-NEXT: # %bb.22: # %strdup.exit.split.loop.exit122
-; CHECK-NEXT: addi s0, s1, 5
-; CHECK-NEXT: j .LBB0_25
-; CHECK-NEXT: .LBB0_23: # %while.body.5
-; CHECK-NEXT: bnez a2, .LBB0_25
-; CHECK-NEXT: .LBB0_24:
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: j .LBB0_26
-; CHECK-NEXT: .LBB0_25: # %strdup.exit
-; CHECK-NEXT: li s1, 0
-; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: j .LBB0_1
+; CHECK-NEXT: .LBB0_2: # %return
; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: mv a1, s0
-; CHECK-NEXT: jalr s1
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: mv a1, s2
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: jalr s1
-; CHECK-NEXT: li a1, 1
-; CHECK-NEXT: .LBB0_26: # %return
-; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
entry:
%call = call i32 @toupper()
>From e369d25fa20a0627f9a9897a1e9dcd249921c544 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 19 Mar 2025 09:00:47 -0700
Subject: [PATCH 5/7] fixup! only update dom tree if needed
---
llvm/lib/CodeGen/PeepholeOptimizer.cpp | 12 ++++++++----
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 -
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 -
3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index da7cd5e877060..704a280f31bae 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -568,7 +568,8 @@ class PeepholeOptimizerLegacy : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineLoopInfoWrapperPass>();
- AU.addRequired<MachineDominatorTreeWrapperPass>();
+ if (Aggressive)
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
}
MachineFunctionProperties getRequiredProperties() const override {
@@ -1645,7 +1646,8 @@ PreservedAnalyses
PeepholeOptimizerPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
MFPropsModifier _(*this, MF);
- auto *DT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
+ auto *DT =
+ Aggressive ? &MFAM.getResult<MachineDominatorTreeAnalysis>(MF) : nullptr;
auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
PeepholeOptimizer Impl(DT, MLI);
bool Changed = Impl.run(MF);
@@ -1658,7 +1660,9 @@ PeepholeOptimizerPass::run(MachineFunction &MF,
bool PeepholeOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- auto *DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ auto *DT = Aggressive
+ ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
+ : nullptr;
auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
PeepholeOptimizer Impl(DT, MLI);
return Impl.run(MF);
@@ -1788,7 +1792,7 @@ bool PeepholeOptimizer::run(MachineFunction &MF) {
MachineInstr *Dead = &*MII;
++MII;
MachineBasicBlock *DeadDest = TII->getBranchDestBlock(*Dead);
- if (TII->getBranchDestBlock(*NewBr)!= DeadDest)
+ if (DT && TII->getBranchDestBlock(*NewBr) != DeadDest)
DT->deleteEdge(&MBB, DeadDest);
Dead->eraseFromParent();
}
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 209e1125b3c2e..e1481667a4ab7 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -158,7 +158,6 @@
; CHECK-NEXT: MachinePostDominator Tree Construction
; CHECK-NEXT: Machine Cycle Info Analysis
; CHECK-NEXT: Machine code sinking
-; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: AArch64 MI Peephole Optimization pass
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index e3ba2f7559c53..beef7a574dc4f 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -115,7 +115,6 @@
; CHECK-NEXT: MachinePostDominator Tree Construction
; CHECK-NEXT: Machine Cycle Info Analysis
; CHECK-NEXT: Machine code sinking
-; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: Machine Trace Metrics
>From 09678c40feff5d00b5707223569eed7f4388e7fa Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 19 Mar 2025 09:04:37 -0700
Subject: [PATCH 6/7] fixup! simplify switch
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index cc71df71517de..ce2c3b0f21955 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1288,9 +1288,8 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
int64_t C0, C1;
if (isFromLoadImm(Cond[1], C0) && isFromLoadImm(Cond[2], C1)) {
switch (CC) {
- default:
- // TODO: Implement for more CCs
- break;
+ case RISCVCC::COND_INVALID:
+ llvm_unreachable("Unexpected CC");
case RISCVCC::COND_EQ: {
Folded = (C0 == C1) ? TBB : FBB;
break;
>From cfb65551f148e39e33311f77d6fc364cfaca3473 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 19 Mar 2025 10:36:58 -0700
Subject: [PATCH 7/7] fixup! determine when DT and MLI need to be recalculated
---
llvm/lib/CodeGen/PeepholeOptimizer.cpp | 23 +++++++++++++++++----
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 3 ++-
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 10 ++++++---
llvm/test/CodeGen/ARM/O3-pipeline.ll | 1 +
llvm/test/CodeGen/LoongArch/opt-pipeline.ll | 3 ++-
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 3 ++-
6 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 704a280f31bae..07a57ff82208d 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -433,6 +433,8 @@ class PeepholeOptimizer : private MachineFunction::Delegate {
MachineDominatorTree *DT = nullptr; // Machine dominator tree
MachineLoopInfo *MLI = nullptr;
+ bool NeedToInvalidateMLI = false;
+
public:
PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
: DT(DT), MLI(MLI) {}
@@ -444,6 +446,7 @@ class PeepholeOptimizer : private MachineFunction::Delegate {
/// Sequence of instructions that formulate recurrence cycle.
using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
+ bool needToInvalidateMLI() const { return NeedToInvalidateMLI; }
private:
bool optimizeCmpInstr(MachineInstr &MI);
bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
@@ -568,8 +571,10 @@ class PeepholeOptimizerLegacy : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineLoopInfoWrapperPass>();
- if (Aggressive)
+ if (Aggressive) {
AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ }
}
MachineFunctionProperties getRequiredProperties() const override {
@@ -1654,7 +1659,13 @@ PeepholeOptimizerPass::run(MachineFunction &MF,
if (!Changed)
return PreservedAnalyses::all();
- return getMachineFunctionPassPreservedAnalyses();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserve<MachineDominatorTreeAnalysis>();
+ if (!Impl.needToInvalidateMLI()) {
+ PA.preserve<MachineLoopAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
+ }
+ return PA;
}
bool PeepholeOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) {
@@ -1783,6 +1794,7 @@ bool PeepholeOptimizer::run(MachineFunction &MF) {
}
if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
+ NeedToInvalidateMLI = true;
// optimizeCondBranch might have converted a conditional branch to
// an unconditional branch. If there is a branch instruction after it,
// delete it.
@@ -1790,10 +1802,13 @@ bool PeepholeOptimizer::run(MachineFunction &MF) {
if (NewBr->isUnconditionalBranch()) {
if (MII != MBB.end()) {
MachineInstr *Dead = &*MII;
- ++MII;
MachineBasicBlock *DeadDest = TII->getBranchDestBlock(*Dead);
- if (DT && TII->getBranchDestBlock(*NewBr) != DeadDest)
+ if (DT && TII->getBranchDestBlock(*NewBr) != DeadDest) {
DT->deleteEdge(&MBB, DeadDest);
+ MLI->calculate(*DT);
+ NeedToInvalidateMLI = false;
+ }
+ ++MII;
Dead->eraseFromParent();
}
}
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index e1481667a4ab7..a4b6b19cdba3a 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -160,6 +160,8 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: AArch64 MI Peephole Optimization pass
; CHECK-NEXT: AArch64 Dead register definitions
; CHECK-NEXT: Detect Dead Lanes
@@ -169,7 +171,6 @@
; CHECK-NEXT: Live Variable Analysis
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
-; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Register Coalescer
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 4b6cc32522f5b..330a44ace3a89 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -343,6 +343,7 @@
; GCN-O1-NEXT: Remove unreachable machine basic blocks
; GCN-O1-NEXT: Live Variable Analysis
; GCN-O1-NEXT: MachineDominator Tree Construction
+; GCN-O1-NEXT: Machine Natural Loop Construction
; GCN-O1-NEXT: SI Optimize VGPR LiveRange
; GCN-O1-NEXT: Eliminate PHI nodes for register allocation
; GCN-O1-NEXT: SI Lower control flow pseudo instructions
@@ -639,8 +640,9 @@
; GCN-O1-OPTS-NEXT: GCN DPP Combine
; GCN-O1-OPTS-NEXT: SI Load Store Optimizer
; GCN-O1-OPTS-NEXT: SI Peephole SDWA
-; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction
+; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction
+; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: Early Machine Loop Invariant Code Motion
; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction
; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
@@ -957,8 +959,9 @@
; GCN-O2-NEXT: GCN DPP Combine
; GCN-O2-NEXT: SI Load Store Optimizer
; GCN-O2-NEXT: SI Peephole SDWA
-; GCN-O2-NEXT: Machine Block Frequency Analysis
; GCN-O2-NEXT: MachineDominator Tree Construction
+; GCN-O2-NEXT: Machine Natural Loop Construction
+; GCN-O2-NEXT: Machine Block Frequency Analysis
; GCN-O2-NEXT: Early Machine Loop Invariant Code Motion
; GCN-O2-NEXT: MachineDominator Tree Construction
; GCN-O2-NEXT: Machine Block Frequency Analysis
@@ -1289,8 +1292,9 @@
; GCN-O3-NEXT: GCN DPP Combine
; GCN-O3-NEXT: SI Load Store Optimizer
; GCN-O3-NEXT: SI Peephole SDWA
-; GCN-O3-NEXT: Machine Block Frequency Analysis
; GCN-O3-NEXT: MachineDominator Tree Construction
+; GCN-O3-NEXT: Machine Natural Loop Construction
+; GCN-O3-NEXT: Machine Block Frequency Analysis
; GCN-O3-NEXT: Early Machine Loop Invariant Code Motion
; GCN-O3-NEXT: MachineDominator Tree Construction
; GCN-O3-NEXT: Machine Block Frequency Analysis
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 960d7305e66f6..002e2d20e8f09 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -104,6 +104,7 @@
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 90d994909264a..d4697698c6900 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -116,9 +116,10 @@
; LAXX-NEXT: Process Implicit Definitions
; LAXX-NEXT: Remove unreachable machine basic blocks
; LAXX-NEXT: Live Variable Analysis
+; LAXX-NEXT: MachineDominator Tree Construction
+; LAXX-NEXT: Machine Natural Loop Construction
; LAXX-NEXT: Eliminate PHI nodes for register allocation
; LAXX-NEXT: Two-Address instruction pass
-; LAXX-NEXT: MachineDominator Tree Construction
; LAXX-NEXT: Slot index numbering
; LAXX-NEXT: Live Interval Analysis
; LAXX-NEXT: Register Coalescer
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index beef7a574dc4f..adea438589a31 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -117,13 +117,14 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Machine Trace Metrics
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine InstCombiner
; RV64-NEXT: RISC-V Optimize W Instructions
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V Merge Base Offset
-; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: RISC-V VL Optimizer
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
More information about the llvm-commits
mailing list