[llvm] [RISCV] Add late optimization pass for RISC-V to optimize branch instructions (PR #131728)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 17 21:50:47 PDT 2025


https://github.com/michaelmaitland created https://github.com/llvm/llvm-project/pull/131728

This is an alternative to #117060, and is stacked on #131684. Marking
@mikhailramalho as co-author here because I got the idea of a late peephole pass
and the test case from #117060.

I use a late pass because we introduce the optimizable branches so late in the
pipeline.

Co-authored-by: Mikhail R. Gadelha <mikhail at igalia.com>

>From 4511b8c5250369ee47e612daac2135076d32e101 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 17 Mar 2025 18:56:50 -0700
Subject: [PATCH 1/4] precommit test case

---
 llvm/test/CodeGen/RISCV/branch_zero.ll     |   1 +
 llvm/test/CodeGen/RISCV/simplify-condbr.ll | 108 +++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/simplify-condbr.ll

diff --git a/llvm/test/CodeGen/RISCV/branch_zero.ll b/llvm/test/CodeGen/RISCV/branch_zero.ll
index fd0979977ba3b..0f50adec35a34 100644
--- a/llvm/test/CodeGen/RISCV/branch_zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch_zero.ll
@@ -83,3 +83,4 @@ exit1:
 if.then:
   br label %for.body
 }
+
diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
new file mode 100644
index 0000000000000..4dff2ac53f9ed
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
+
+declare fastcc i1 @S_reginclass()
+
+define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) nounwind {
+; CHECK-LABEL: S_regrepeat:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -32
+; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    andi a2, a2, 255
+; CHECK-NEXT:    addi a4, a2, -19
+; CHECK-NEXT:    li a5, 2
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    bltu a4, a5, .LBB0_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    bltu a1, a2, .LBB0_8
+; CHECK-NEXT:  # %bb.2: # %do_exactf
+; CHECK-NEXT:    andi a3, a3, 1
+; CHECK-NEXT:    beqz a3, .LBB0_10
+; CHECK-NEXT:  # %bb.3: # %land.rhs251
+; CHECK-NEXT:    lw zero, 0(zero)
+; CHECK-NEXT:    li s0, 1
+; CHECK-NEXT:    bnez s0, .LBB0_9
+; CHECK-NEXT:    j .LBB0_8
+; CHECK-NEXT:  .LBB0_4: # %sw.bb336
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    li s0, 0
+; CHECK-NEXT:    andi s2, a3, 1
+; CHECK-NEXT:  .LBB0_5: # %land.rhs345
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    call S_reginclass
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    beqz a0, .LBB0_7
+; CHECK-NEXT:  # %bb.6: # %while.body350
+; CHECK-NEXT:    # in Loop: Header=BB0_5 Depth=1
+; CHECK-NEXT:    addiw s0, s0, 1
+; CHECK-NEXT:    bnez s2, .LBB0_5
+; CHECK-NEXT:    j .LBB0_8
+; CHECK-NEXT:  .LBB0_7:
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    bnez s0, .LBB0_9
+; CHECK-NEXT:  .LBB0_8: # %if.else1492
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:  .LBB0_9: # %if.end1497
+; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 32
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_10:
+; CHECK-NEXT:    bnez zero, .LBB0_9
+; CHECK-NEXT:    j .LBB0_8
+entry:
+  switch i8 %0, label %if.else1492 [
+    i8 19, label %sw.bb336
+    i8 20, label %sw.bb336
+    i8 1, label %do_exactf
+    i8 0, label %do_exactf
+  ]
+
+do_exactf:                                        ; preds = %entry, %entry
+  br i1 %cmp343, label %land.rhs251, label %if.end334
+
+land.rhs251:                                      ; preds = %do_exactf
+  %bcmp414 = load volatile i32, ptr null, align 4
+  br label %if.end334
+
+if.end334:                                        ; preds = %land.rhs251, %do_exactf
+  %hardcount.7 = phi i32 [ 0, %do_exactf ], [ 1, %land.rhs251 ]
+  call void @llvm.lifetime.end.p0(i64 0, ptr null)
+  br label %sw.epilog1489
+
+sw.bb336:                                         ; preds = %entry, %entry
+  br label %land.rhs345
+
+land.rhs345:                                      ; preds = %while.body350, %sw.bb336
+  %hardcount.8634 = phi i32 [ %inc356, %while.body350 ], [ 0, %sw.bb336 ]
+  %call347 = call fastcc i1 @S_reginclass()
+  br i1 %call347, label %while.body350, label %sw.epilog1489
+
+while.body350:                                    ; preds = %land.rhs345
+  %inc356 = add i32 %hardcount.8634, 1
+  br i1 %cmp343, label %land.rhs345, label %if.end1497
+
+sw.epilog1489:                                    ; preds = %land.rhs345, %if.end334
+  %hardcount.20 = phi i32 [ %hardcount.7, %if.end334 ], [ %hardcount.8634, %land.rhs345 ]
+  %tobool1490.not = icmp eq i32 %hardcount.20, 0
+  br i1 %tobool1490.not, label %if.else1492, label %if.end1497
+
+if.else1492:                                      ; preds = %sw.epilog1489, %entry
+  br label %if.end1497
+
+if.end1497:                                       ; preds = %if.else1492, %sw.epilog1489, %while.body350
+  %c.0 = phi i32 [ 0, %if.else1492 ], [ %max, %sw.epilog1489 ], [ 0, %while.body350 ]
+  ret i32 %c.0
+}
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }

>From 33091e8e4e645a9cca5b4ff983be54456ea507ac Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 17 Mar 2025 11:36:13 -0700
Subject: [PATCH 2/4] [BranchFolding][RISCV] Optimize branches NE/EQ to zero or
 one

There are two changes here.

The first that we teach analyzeBranch how to
evaluate a conditional branch followed by a unconditional branch such that
we can evaluate the conditional branch statically. Often, we will see comparison
to one or zero since SelectionDAG often uses i1 for the conditional comparison.
As a result, we handle this specific case. We handle only EQ and NEQ for now,
but this can be expanded in the future. We can also expand on handling arbitrary
constants in the future.

The second change is that we pass AllowModify=false to analyzeBranch in the
tail merging code. The reason we do this is because this code is doing some
clever tricks to the branch code that it will restore later. Now that we are
actually optimizing branches in analyzeBranch, we have to be careful not to
mess up this canonical form that the tail merging code expects.
---
 llvm/lib/CodeGen/BranchFolding.cpp            |   7 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      | 110 ++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.h        |  20 ++++
 llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll |  20 ----
 llvm/test/CodeGen/RISCV/branch_zero.ll        |  23 +---
 llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll |  62 +++++-----
 .../RISCV/rvv/vxrm-insert-out-of-loop.ll      |  66 +++++------
 llvm/test/CodeGen/RISCV/simplify-condbr.ll    |   1 -
 8 files changed, 200 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 5218e39b88222..b4e0b4bf4585e 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -467,7 +467,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
   DebugLoc dl = CurMBB->findBranchDebugLoc();
   if (!dl)
     dl = BranchDL;
-  if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+  if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond)) {
     MachineBasicBlock *NextBB = &*I;
     if (TBB == NextBB && !Cond.empty() && !FBB) {
       if (!TII->reverseBranchCondition(Cond)) {
@@ -1107,7 +1107,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
       MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
       SmallVector<MachineOperand, 4> Cond;
-      if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+      if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond)) {
         // Failing case: IBB is the target of a cbr, and we cannot reverse the
         // branch.
         SmallVector<MachineOperand, 4> NewCond(Cond);
@@ -1564,7 +1564,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
     //    Loop: xxx; jcc Out; jmp Loop
     // we want:
     //    Loop: xxx; jncc Loop; jmp Out
-    if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+    if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB &&
+        !CurCond.empty()) {
       SmallVector<MachineOperand, 4> NewCond(CurCond);
       if (!TII->reverseBranchCondition(NewCond)) {
         DebugLoc Dl = MBB->findBranchDebugLoc();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2fdf6bd36e88f..e0d364c4a1306 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1005,6 +1005,109 @@ RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
   }
 }
 
+// Return true if MO definitely contains the value one.
+static bool isOne(MachineOperand &MO) {
+  if (MO.isImm() && MO.getImm() == 1)
+    return true;
+
+  if (!MO.isReg() || !MO.getReg().isVirtual())
+    return false;
+
+  MachineRegisterInfo &MRI =
+      MO.getParent()->getParent()->getParent()->getRegInfo();
+  MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg());
+  if (!DefMI)
+    return false;
+
+  // For now, just check the canonical one value.
+  if (DefMI->getOpcode() == RISCV::ADDI &&
+      DefMI->getOperand(1).getReg() == RISCV::X0 &&
+      DefMI->getOperand(2).getImm() == 1)
+    return true;
+
+  return false;
+}
+
+// Return true if MO definitely contains the value zero.
+static bool isZero(MachineOperand &MO) {
+  if (MO.isImm() && MO.getImm() == 0)
+    return true;
+  if (MO.isReg() && MO.getReg() == RISCV::X0)
+    return true;
+  return false;
+}
+
+bool RISCVInstrInfo::trySimplifyCondBr(
+    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    SmallVectorImpl<MachineOperand> &Cond) const {
+
+  if (!TBB || Cond.size() != 3)
+    return false;
+
+  RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
+  auto LHS = Cond[1];
+  auto RHS = Cond[2];
+
+  MachineBasicBlock *Folded = nullptr;
+  switch (CC) {
+  default:
+    // TODO: Implement for more CCs
+    return false;
+  case RISCVCC::COND_EQ: {
+    // We can statically evaluate that we take the first branch
+    if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
+      Folded = TBB;
+      break;
+    }
+    // We can statically evaluate that we take the second branch
+    if ((isZero(LHS) && isOne(RHS)) || (isOne(LHS) && isZero(RHS))) {
+      Folded = FBB;
+      break;
+    }
+    return false;
+  }
+  case RISCVCC::COND_NE: {
+    // We can statically evaluate that we take the first branch
+    if ((isOne(LHS) && isZero(RHS)) || (isZero(LHS) && isOne(RHS))) {
+      Folded = TBB;
+      break;
+    }
+    // We can statically evaluate that we take the second branch
+    if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
+      Folded = FBB;
+      break;
+    }
+    return false;
+  }
+  }
+
+  // At this point, its legal to optimize.
+  removeBranch(MBB);
+  Cond.clear();
+
+  // Only need to insert a branch if we're not falling through.
+  if (Folded) {
+    DebugLoc DL = MBB.findBranchDebugLoc();
+    insertBranch(MBB, Folded, nullptr, {}, DL);
+  }
+
+  // Update the successors. Remove them all and add back the correct one.
+  while (!MBB.succ_empty())
+    MBB.removeSuccessor(MBB.succ_end() - 1);
+
+  // If it's a fallthrough, we need to figure out where MBB is going.
+  if (!Folded) {
+    MachineFunction::iterator Fallthrough = ++MBB.getIterator();
+    if (Fallthrough != MBB.getParent()->end())
+      MBB.addSuccessor(&*Fallthrough);
+  } else
+    MBB.addSuccessor(Folded);
+
+  TBB = Folded;
+  FBB = nullptr;
+  return true;
+}
+
 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *&TBB,
                                    MachineBasicBlock *&FBB,
@@ -1062,6 +1165,9 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   // Handle a single conditional branch.
   if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
     parseCondBranch(*I, TBB, Cond);
+    // Try to fold the branch of the conditional branch into a the fallthru.
+    if (AllowModify)
+      trySimplifyCondBr(MBB, TBB, FBB, Cond);
     return false;
   }
 
@@ -1070,6 +1176,10 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
       I->getDesc().isUnconditionalBranch()) {
     parseCondBranch(*std::prev(I), TBB, Cond);
     FBB = getBranchDestBlock(*I);
+    // Try to fold the branch of the conditional branch into an unconditional
+    // branch.
+    if (AllowModify)
+      trySimplifyCondBr(MBB, TBB, FBB, Cond);
     return false;
   }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 656cb38e11297..926d8f1a0ba6e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -319,6 +319,26 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
                                const MachineInstr &MI2) const;
   bool hasReassociableVectorSibling(const MachineInstr &Inst,
                                     bool &Commuted) const;
+  /// Return true if the branch represented by the conditional branch with
+  /// components TBB, FBB, and CurCond was folded into an unconditional branch.
+  ///
+  /// If FBB is nullptr, then the the input represents a conditional branch with
+  /// a fallthrough.
+  ///
+  /// For example:
+  /// BRCOND EQ 0, 0, BB1
+  /// BR BB2
+  ///
+  /// can be simplified to BR BB1 since 0 == 0 statically. On the other hand,
+  ///
+  ///
+  /// BRCOND EQ 0, 1, BB1
+  /// BR BB2
+  ///
+  /// can be simplified to BR BB2 because 0 != 1 statically.
+  bool trySimplifyCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                         MachineBasicBlock *FBB,
+                         SmallVectorImpl<MachineOperand> &Cond) const;
 };
 
 namespace RISCV {
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
index 338925059862c..74ec7308cb646 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
@@ -357,11 +357,6 @@ define i64 @ctpop_i64(i64 %a) nounwind {
 define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
 ; RV32I-LABEL: ctpop_i64_ugt_two:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beqz zero, .LBB6_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sltiu a0, zero, 0
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB6_2:
 ; RV32I-NEXT:    srli a2, a0, 1
 ; RV32I-NEXT:    lui a3, 349525
 ; RV32I-NEXT:    lui a4, 209715
@@ -404,11 +399,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
 ;
 ; RV32ZBB-LABEL: ctpop_i64_ugt_two:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    beqz zero, .LBB6_2
-; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    sltiu a0, zero, 0
-; RV32ZBB-NEXT:    ret
-; RV32ZBB-NEXT:  .LBB6_2:
 ; RV32ZBB-NEXT:    cpop a0, a0
 ; RV32ZBB-NEXT:    cpop a1, a1
 ; RV32ZBB-NEXT:    add a0, a1, a0
@@ -422,11 +412,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
 define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
 ; RV32I-LABEL: ctpop_i64_ugt_one:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beqz zero, .LBB7_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    snez a0, zero
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB7_2:
 ; RV32I-NEXT:    srli a2, a0, 1
 ; RV32I-NEXT:    lui a3, 349525
 ; RV32I-NEXT:    lui a4, 209715
@@ -470,11 +455,6 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
 ;
 ; RV32ZBB-LABEL: ctpop_i64_ugt_one:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    beqz zero, .LBB7_2
-; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    snez a0, zero
-; RV32ZBB-NEXT:    ret
-; RV32ZBB-NEXT:  .LBB7_2:
 ; RV32ZBB-NEXT:    cpop a0, a0
 ; RV32ZBB-NEXT:    cpop a1, a1
 ; RV32ZBB-NEXT:    add a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/branch_zero.ll b/llvm/test/CodeGen/RISCV/branch_zero.ll
index 0f50adec35a34..2c13c28647516 100644
--- a/llvm/test/CodeGen/RISCV/branch_zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch_zero.ll
@@ -5,16 +5,11 @@
 define void @foo(i16 %finder_idx) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:  .LBB0_1: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:  # %bb.1: # %for.body
 ; CHECK-NEXT:    slli a0, a0, 48
-; CHECK-NEXT:    bltz a0, .LBB0_4
+; CHECK-NEXT:    bltz a0, .LBB0_3
 ; CHECK-NEXT:  # %bb.2: # %while.cond.preheader.i
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    bnez zero, .LBB0_1
-; CHECK-NEXT:  # %bb.3: # %while.body
-; CHECK-NEXT:  .LBB0_4: # %while.cond1.preheader.i
+; CHECK-NEXT:  .LBB0_3: # %while.cond1.preheader.i
 entry:
   br label %for.body
 
@@ -46,16 +41,11 @@ if.then:
 define void @bar(i16 %finder_idx) {
 ; CHECK-LABEL: bar:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:  .LBB1_1: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:  # %bb.1: # %for.body
 ; CHECK-NEXT:    slli a0, a0, 48
-; CHECK-NEXT:    bgez a0, .LBB1_4
+; CHECK-NEXT:    bgez a0, .LBB1_3
 ; CHECK-NEXT:  # %bb.2: # %while.cond.preheader.i
-; CHECK-NEXT:    # in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    bnez zero, .LBB1_1
-; CHECK-NEXT:  # %bb.3: # %while.body
-; CHECK-NEXT:  .LBB1_4: # %while.cond1.preheader.i
+; CHECK-NEXT:  .LBB1_3: # %while.cond1.preheader.i
 entry:
   br label %for.body
 
@@ -83,4 +73,3 @@ exit1:
 if.then:
   br label %for.body
 }
-
diff --git a/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll b/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll
index 1e72529b17f59..00689c3136517 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mattr=+zcmp -verify-machineinstrs  \
 ; RUN: -mtriple=riscv32 -target-abi=ilp32 < %s \
 ; RUN: | FileCheck %s -check-prefixes=RV32IZCMP
@@ -11,40 +12,39 @@
 
 declare dso_local void @f1() local_unnamed_addr
 declare dso_local void @f2() local_unnamed_addr
-define  dso_local void @f0() local_unnamed_addr {
+define  dso_local void @f0(i1 %c) local_unnamed_addr {
 ; RV32IZCMP-LABEL: f0:
-; RV32IZCMP: 	.cfi_startproc
-; RV32IZCMP-NEXT: # %bb.0:                                # %entry
-; RV32IZCMP-NEXT: 	bnez	zero, .LBB0_2
-; RV32IZCMP-NEXT: # %bb.1:                                # %if.T
-; RV32IZCMP-NEXT: 	cm.push	{ra}, -16
-; RV32IZCMP-NEXT: 	.cfi_def_cfa_offset 16
-; RV32IZCMP-NEXT: 	.cfi_offset ra, -4
-; RV32IZCMP-NEXT: 	call	f1
-; RV32IZCMP-NEXT: 	cm.pop	{ra}, 16
-; RV32IZCMP-NEXT:     .cfi_restore ra
-; RV32IZCMP-NEXT:     .cfi_def_cfa_offset 0
-; RV32IZCMP-NEXT: .LBB0_2:                                # %if.F
-; RV32IZCMP-NEXT: 	tail	f2
-; RV32IZCMP-NEXT: .Lfunc_end0:
-
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    andi a0, a0, 1
+; RV32IZCMP-NEXT:    beqz a0, .LBB0_2
+; RV32IZCMP-NEXT:  # %bb.1: # %if.T
+; RV32IZCMP-NEXT:    cm.push {ra}, -16
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    call f1
+; RV32IZCMP-NEXT:    cm.pop {ra}, 16
+; RV32IZCMP-NEXT:    .cfi_restore ra
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 0
+; RV32IZCMP-NEXT:  .LBB0_2: # %if.F
+; RV32IZCMP-NEXT:    tail f2
+;
 ; RV64IZCMP-LABEL: f0:
-; RV64IZCMP: 	.cfi_startproc
-; RV64IZCMP-NEXT: # %bb.0:                                # %entry
-; RV64IZCMP-NEXT: 	bnez	zero, .LBB0_2
-; RV64IZCMP-NEXT: # %bb.1:                                # %if.T
-; RV64IZCMP-NEXT: 	cm.push	{ra}, -16
-; RV64IZCMP-NEXT: 	.cfi_def_cfa_offset 16
-; RV64IZCMP-NEXT: 	.cfi_offset ra, -8
-; RV64IZCMP-NEXT: 	call	f1
-; RV64IZCMP-NEXT: 	cm.pop	{ra}, 16
-; RV64IZCMP-NEXT:     .cfi_restore ra
-; RV64IZCMP-NEXT:     .cfi_def_cfa_offset 0
-; RV64IZCMP-NEXT: .LBB0_2:                                # %if.F
-; RV64IZCMP-NEXT: 	tail	f2
-; RV64IZCMP-NEXT: .Lfunc_end0:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    andi a0, a0, 1
+; RV64IZCMP-NEXT:    beqz a0, .LBB0_2
+; RV64IZCMP-NEXT:  # %bb.1: # %if.T
+; RV64IZCMP-NEXT:    cm.push {ra}, -16
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    call f1
+; RV64IZCMP-NEXT:    cm.pop {ra}, 16
+; RV64IZCMP-NEXT:    .cfi_restore ra
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 0
+; RV64IZCMP-NEXT:  .LBB0_2: # %if.F
+; RV64IZCMP-NEXT:    tail f2
+
 entry:
-  br i1 poison, label %if.T, label %if.F
+  br i1 %c, label %if.T, label %if.F
 
 if.T:
   tail call void @f1()
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index c35f05be304cc..5251074717c93 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -14,9 +14,9 @@
 define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_dst_stride, ptr nocapture noundef readonly %src1, i32 noundef signext %i_src1_stride, ptr nocapture noundef readonly %src2, i32 noundef signext %i_src2_stride, i32 noundef signext %i_width, i32 noundef signext %i_height) {
 ; RV32-LABEL: test1:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    blez a7, .LBB0_17
+; RV32-NEXT:    blez a7, .LBB0_13
 ; RV32-NEXT:  # %bb.1: # %for.cond1.preheader.lr.ph
-; RV32-NEXT:    blez a6, .LBB0_17
+; RV32-NEXT:    blez a6, .LBB0_13
 ; RV32-NEXT:  # %bb.2: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    addi t0, a7, -1
 ; RV32-NEXT:    csrr t2, vlenb
@@ -25,11 +25,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    mul t5, a5, t0
 ; RV32-NEXT:    slli t1, t2, 1
 ; RV32-NEXT:    li t6, 32
-; RV32-NEXT:    mv t0, t1
-; RV32-NEXT:    bnez zero, .LBB0_4
-; RV32-NEXT:  # %bb.3: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    li t0, 32
-; RV32-NEXT:  .LBB0_4: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
@@ -41,17 +37,13 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    add t3, a0, t3
 ; RV32-NEXT:    add t4, a2, t4
 ; RV32-NEXT:    add s0, a4, t5
-; RV32-NEXT:    bltu t6, t1, .LBB0_6
-; RV32-NEXT:  # %bb.5: # %for.cond1.preheader.us.preheader
+; RV32-NEXT:    bltu t6, t1, .LBB0_4
+; RV32-NEXT:  # %bb.3: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    li t1, 32
-; RV32-NEXT:  .LBB0_6: # %for.cond1.preheader.us.preheader
+; RV32-NEXT:  .LBB0_4: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    add t3, t3, a6
 ; RV32-NEXT:    add t5, t4, a6
 ; RV32-NEXT:    add t4, s0, a6
-; RV32-NEXT:    beqz zero, .LBB0_8
-; RV32-NEXT:  # %bb.7: # %for.cond1.preheader.us.preheader
-; RV32-NEXT:    mv t1, t0
-; RV32-NEXT:  .LBB0_8: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    li t0, 0
 ; RV32-NEXT:    sltu t5, a0, t5
 ; RV32-NEXT:    sltu t6, a2, t3
@@ -70,25 +62,25 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    or t1, t1, t3
 ; RV32-NEXT:    andi t1, t1, 1
 ; RV32-NEXT:    slli t2, t2, 1
-; RV32-NEXT:    j .LBB0_10
-; RV32-NEXT:  .LBB0_9: # %for.cond1.for.cond.cleanup3_crit_edge.us
-; RV32-NEXT:    # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT:    j .LBB0_6
+; RV32-NEXT:  .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
+; RV32-NEXT:    # in Loop: Header=BB0_6 Depth=1
 ; RV32-NEXT:    add a0, a0, a1
 ; RV32-NEXT:    add a2, a2, a3
 ; RV32-NEXT:    addi t0, t0, 1
 ; RV32-NEXT:    add a4, a4, a5
-; RV32-NEXT:    beq t0, a7, .LBB0_16
-; RV32-NEXT:  .LBB0_10: # %for.cond1.preheader.us
+; RV32-NEXT:    beq t0, a7, .LBB0_12
+; RV32-NEXT:  .LBB0_6: # %for.cond1.preheader.us
 ; RV32-NEXT:    # =>This Loop Header: Depth=1
-; RV32-NEXT:    # Child Loop BB0_13 Depth 2
-; RV32-NEXT:    # Child Loop BB0_15 Depth 2
-; RV32-NEXT:    beqz t1, .LBB0_12
-; RV32-NEXT:  # %bb.11: # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT:    # Child Loop BB0_9 Depth 2
+; RV32-NEXT:    # Child Loop BB0_11 Depth 2
+; RV32-NEXT:    beqz t1, .LBB0_8
+; RV32-NEXT:  # %bb.7: # in Loop: Header=BB0_6 Depth=1
 ; RV32-NEXT:    li t4, 0
 ; RV32-NEXT:    li t3, 0
-; RV32-NEXT:    j .LBB0_15
-; RV32-NEXT:  .LBB0_12: # %vector.ph
-; RV32-NEXT:    # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT:    j .LBB0_11
+; RV32-NEXT:  .LBB0_8: # %vector.ph
+; RV32-NEXT:    # in Loop: Header=BB0_6 Depth=1
 ; RV32-NEXT:    li t3, 0
 ; RV32-NEXT:    neg t4, t2
 ; RV32-NEXT:    and t4, t4, a6
@@ -96,8 +88,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    li t6, 0
 ; RV32-NEXT:    li t5, 0
 ; RV32-NEXT:    vsetvli s0, zero, e8, m2, ta, ma
-; RV32-NEXT:  .LBB0_13: # %vector.body
-; RV32-NEXT:    # Parent Loop BB0_10 Depth=1
+; RV32-NEXT:  .LBB0_9: # %vector.body
+; RV32-NEXT:    # Parent Loop BB0_6 Depth=1
 ; RV32-NEXT:    # => This Inner Loop Header: Depth=2
 ; RV32-NEXT:    add s0, a2, t6
 ; RV32-NEXT:    add s1, a4, t6
@@ -112,12 +104,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    or s2, t6, t5
 ; RV32-NEXT:    vs2r.v v8, (s0)
 ; RV32-NEXT:    mv t6, s1
-; RV32-NEXT:    bnez s2, .LBB0_13
-; RV32-NEXT:  # %bb.14: # %middle.block
-; RV32-NEXT:    # in Loop: Header=BB0_10 Depth=1
-; RV32-NEXT:    beq t4, a6, .LBB0_9
-; RV32-NEXT:  .LBB0_15: # %for.body4.us
-; RV32-NEXT:    # Parent Loop BB0_10 Depth=1
+; RV32-NEXT:    bnez s2, .LBB0_9
+; RV32-NEXT:  # %bb.10: # %middle.block
+; RV32-NEXT:    # in Loop: Header=BB0_6 Depth=1
+; RV32-NEXT:    beq t4, a6, .LBB0_5
+; RV32-NEXT:  .LBB0_11: # %for.body4.us
+; RV32-NEXT:    # Parent Loop BB0_6 Depth=1
 ; RV32-NEXT:    # => This Inner Loop Header: Depth=2
 ; RV32-NEXT:    add t5, a2, t4
 ; RV32-NEXT:    add t6, a4, t4
@@ -133,9 +125,9 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    srli t5, t5, 1
 ; RV32-NEXT:    or t6, t6, t3
 ; RV32-NEXT:    sb t5, 0(s0)
-; RV32-NEXT:    bnez t6, .LBB0_15
-; RV32-NEXT:    j .LBB0_9
-; RV32-NEXT:  .LBB0_16:
+; RV32-NEXT:    bnez t6, .LBB0_11
+; RV32-NEXT:    j .LBB0_5
+; RV32-NEXT:  .LBB0_12:
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
@@ -144,7 +136,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    .cfi_restore s2
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
-; RV32-NEXT:  .LBB0_17: # %for.cond.cleanup
+; RV32-NEXT:  .LBB0_13: # %for.cond.cleanup
 ; RV32-NEXT:    ret
 ;
 ; RV64P670-LABEL: test1:
diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
index 4dff2ac53f9ed..8e8c5fe2976af 100644
--- a/llvm/test/CodeGen/RISCV/simplify-condbr.ll
+++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
@@ -58,7 +58,6 @@ define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) noun
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_10:
-; CHECK-NEXT:    bnez zero, .LBB0_9
 ; CHECK-NEXT:    j .LBB0_8
 entry:
   switch i8 %0, label %if.else1492 [

>From 1d0f096d44bac3a3f68c18448741ce0a1fe2baff Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 17 Mar 2025 20:13:50 -0700
Subject: [PATCH 3/4] precommit test case

---
 llvm/test/CodeGen/RISCV/simplify-condbr.ll | 72 ++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
index 8e8c5fe2976af..3c995c269e794 100644
--- a/llvm/test/CodeGen/RISCV/simplify-condbr.ll
+++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
@@ -5,8 +5,13 @@
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
 declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
 
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #0
+
 declare fastcc i1 @S_reginclass()
 
+declare fastcc ptr @Perl_av_store(i64)
+
 define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) nounwind {
 ; CHECK-LABEL: S_regrepeat:
 ; CHECK:       # %bb.0: # %entry
@@ -104,4 +109,71 @@ if.end1497:                                       ; preds = %if.else1492, %sw.ep
   ret i32 %c.0
 }
 
+define ptr @Perl_pp_refassign(ptr %PL_stack_sp, i1 %tobool.not, i1 %tobool3.not, i1 %cond1) nounwind {
+; CHECK-LABEL: Perl_pp_refassign:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi a1, a1, 1
+; CHECK-NEXT:    beqz a1, .LBB1_3
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    andi a2, a2, 1
+; CHECK-NEXT:    bnez a2, .LBB1_4
+; CHECK-NEXT:  .LBB1_2: # %cond.true4
+; CHECK-NEXT:    ld a0, 0(a0)
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    bnez a0, .LBB1_5
+; CHECK-NEXT:    j .LBB1_6
+; CHECK-NEXT:  .LBB1_3: # %cond.true
+; CHECK-NEXT:    ld a1, 0(a0)
+; CHECK-NEXT:    andi a2, a2, 1
+; CHECK-NEXT:    beqz a2, .LBB1_2
+; CHECK-NEXT:  .LBB1_4:
+; CHECK-NEXT:    beqz zero, .LBB1_6
+; CHECK-NEXT:  .LBB1_5: # %sw.bb85
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    ld a0, 0(a1)
+; CHECK-NEXT:    call Perl_av_store
+; CHECK-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:  .LBB1_6: # %common.ret
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
+entry:
+  br i1 %tobool.not, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = load ptr, ptr %PL_stack_sp, align 8
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi ptr [ %0, %cond.true ], [ null, %entry ]
+  br i1 %tobool3.not, label %cond.end7, label %cond.true4
+
+cond.true4:                                       ; preds = %cond.end
+  %1 = load ptr, ptr %PL_stack_sp, align 8
+  %2 = icmp ne ptr %1, null
+  br label %cond.end7
+
+cond.end7:                                        ; preds = %cond.true4, %cond.end
+  %cond84 = phi i1 [ %2, %cond.true4 ], [ false, %cond.end ]
+  br i1 %cond1, label %if.end48, label %sw.bb
+
+sw.bb:                                            ; preds = %cond.end7
+  call void @llvm.assume(i1 %tobool.not)
+  br label %if.end48
+
+if.end48:                                         ; preds = %sw.bb, %cond.end7
+  br i1 %cond84, label %sw.bb85, label %common.ret
+
+common.ret:                                       ; preds = %sw.bb85, %if.end48
+  ret ptr null
+
+sw.bb85:                                          ; preds = %if.end48
+  %3 = load i64, ptr %cond, align 8
+  %call125 = call fastcc ptr @Perl_av_store(i64 %3)
+  br label %common.ret
+}
+
 attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }

>From 35e2715ca99e1cf52bd309334de55b82bd011b35 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 17 Mar 2025 20:42:38 -0700
Subject: [PATCH 4/4] [RISCV] Add late optimization pass for RISC-V to optimize
 branch instructions

This is an alternative to #117060, and is stacked on #131684. Marking
@mikhailramalho as Co-Author here because I got the idea of a late peephole pass
and the test case from #117060.

I use a late pass because we introduce the optimizable branches so late in the
pipeline.

Co-authored-by: Mikhail R. Gadelha <mikhail at igalia.com>
---
 llvm/lib/Target/RISCV/CMakeLists.txt         |  1 +
 llvm/lib/Target/RISCV/RISCV.h                |  3 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.h       | 26 +++---
 llvm/lib/Target/RISCV/RISCVLatePeephole.cpp  | 87 ++++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp |  2 +
 llvm/test/CodeGen/RISCV/O0-pipeline.ll       |  1 +
 llvm/test/CodeGen/RISCV/O3-pipeline.ll       |  1 +
 llvm/test/CodeGen/RISCV/simplify-condbr.ll   |  2 +-
 8 files changed, 109 insertions(+), 14 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVLatePeephole.cpp

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index e8d00f4df7c86..3fff898411d7a 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_target(RISCVCodeGen
   RISCVConstantPoolValue.cpp
   RISCVDeadRegisterDefinitions.cpp
   RISCVMakeCompressible.cpp
+  RISCVLatePeephole.cpp
   RISCVExpandAtomicPseudoInsts.cpp
   RISCVExpandPseudoInsts.cpp
   RISCVFoldMemOffset.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 641e2eb4094f9..cd6045355a9ef 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -43,6 +43,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM,
 FunctionPass *createRISCVMakeCompressibleOptPass();
 void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
 
+FunctionPass *createRISCVLatePeepholeOptPass();
+void initializeRISCVLatePeepholeOptPass(PassRegistry &);
+
 FunctionPass *createRISCVGatherScatterLoweringPass();
 void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 926d8f1a0ba6e..d00b6f57d10e0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -306,19 +306,6 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
 
   static bool isLdStSafeToPair(const MachineInstr &LdSt,
                                const TargetRegisterInfo *TRI);
-
-protected:
-  const RISCVSubtarget &STI;
-
-private:
-  unsigned getInstBundleLength(const MachineInstr &MI) const;
-
-  bool isVectorAssociativeAndCommutative(const MachineInstr &MI,
-                                         bool Invert = false) const;
-  bool areRVVInstsReassociable(const MachineInstr &MI1,
-                               const MachineInstr &MI2) const;
-  bool hasReassociableVectorSibling(const MachineInstr &Inst,
-                                    bool &Commuted) const;
   /// Return true if the branch represented by the conditional branch with
   /// components TBB, FBB, and CurCond was folded into an unconditional branch.
   ///
@@ -339,6 +326,19 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
   bool trySimplifyCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                          MachineBasicBlock *FBB,
                          SmallVectorImpl<MachineOperand> &Cond) const;
+
+protected:
+  const RISCVSubtarget &STI;
+
+private:
+  unsigned getInstBundleLength(const MachineInstr &MI) const;
+
+  bool isVectorAssociativeAndCommutative(const MachineInstr &MI,
+                                         bool Invert = false) const;
+  bool areRVVInstsReassociable(const MachineInstr &MI1,
+                               const MachineInstr &MI2) const;
+  bool hasReassociableVectorSibling(const MachineInstr &Inst,
+                                    bool &Commuted) const;
 };
 
 namespace RISCV {
diff --git a/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp b/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp
new file mode 100644
index 0000000000000..5ecee5069d3cd
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp
@@ -0,0 +1,87 @@
+//===-- RISCVLatePeephole.cpp - Late stage peephole optimization ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file provides RISC-V late peephole optimizations
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-late-peephole"
+#define RISCV_LATE_PEEPHOLE_NAME "RISC-V Late Stage Peephole"
+
+namespace {
+
+struct RISCVLatePeepholeOpt : public MachineFunctionPass {
+  static char ID;
+
+  RISCVLatePeepholeOpt() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return RISCV_LATE_PEEPHOLE_NAME; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTreeWrapperPass>();
+    AU.addPreserved<MachineDominatorTreeWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+  bool optimizeBlock(MachineBasicBlock &MBB);
+
+  const RISCVInstrInfo *TII = nullptr;
+};
+} // namespace
+
+char RISCVLatePeepholeOpt::ID = 0;
+INITIALIZE_PASS(RISCVLatePeepholeOpt, "riscv-late-peephole",
+                RISCV_LATE_PEEPHOLE_NAME, false, false)
+
+bool RISCVLatePeepholeOpt::optimizeBlock(MachineBasicBlock &MBB) {
+
+  // Use trySimplifyCondBr directly to know whether the optimization occured.
+  MachineBasicBlock *TBB, *FBB;
+  SmallVector<MachineOperand, 4> Cond;
+  if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, false))
+    return TII->trySimplifyCondBr(MBB, TBB, FBB, Cond);
+
+  return false;
+}
+
+bool RISCVLatePeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
+
+  bool MadeChange = false;
+
+  for (MachineBasicBlock &MBB : MF)
+    MadeChange |= optimizeBlock(MBB);
+
+  return MadeChange;
+}
+
+/// Returns an instance of the Make Compressible Optimization pass.
+FunctionPass *llvm::createRISCVLatePeepholeOptPass() {
+  return new RISCVLatePeepholeOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index f78e5f8147d98..a283bd02bf8fa 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -128,6 +128,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeKCFIPass(*PR);
   initializeRISCVDeadRegisterDefinitionsPass(*PR);
   initializeRISCVMakeCompressibleOptPass(*PR);
+  initializeRISCVLatePeepholeOptPass(*PR);
   initializeRISCVGatherScatterLoweringPass(*PR);
   initializeRISCVCodeGenPreparePass(*PR);
   initializeRISCVPostRAExpandPseudoPass(*PR);
@@ -567,6 +568,7 @@ void RISCVPassConfig::addPreEmitPass() {
     addPass(createMachineCopyPropagationPass(true));
   addPass(&BranchRelaxationPassID);
   addPass(createRISCVMakeCompressibleOptPass());
+  addPass(createRISCVLatePeepholeOptPass());
 }
 
 void RISCVPassConfig::addPreEmitPass2() {
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index f93cb65897210..29ec19b7e35a7 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -64,6 +64,7 @@
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       Branch relaxation pass
 ; CHECK-NEXT:       RISC-V Make Compressible
+; CHECK-NEXT:       RISC-V Late Stage Peephole
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 976d1ee003a1f..b3698caf7f0f6 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -196,6 +196,7 @@
 ; CHECK-NEXT:       Machine Copy Propagation Pass
 ; CHECK-NEXT:       Branch relaxation pass
 ; CHECK-NEXT:       RISC-V Make Compressible
+; CHECK-NEXT:       RISC-V Late Stage Peephole
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       Remove Loads Into Fake Uses
 ; CHECK-NEXT:       StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
index 3c995c269e794..3f9a73607103a 100644
--- a/llvm/test/CodeGen/RISCV/simplify-condbr.ll
+++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
@@ -128,7 +128,7 @@ define ptr @Perl_pp_refassign(ptr %PL_stack_sp, i1 %tobool.not, i1 %tobool3.not,
 ; CHECK-NEXT:    andi a2, a2, 1
 ; CHECK-NEXT:    beqz a2, .LBB1_2
 ; CHECK-NEXT:  .LBB1_4:
-; CHECK-NEXT:    beqz zero, .LBB1_6
+; CHECK-NEXT:    j .LBB1_6
 ; CHECK-NEXT:  .LBB1_5: # %sw.bb85
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill



More information about the llvm-commits mailing list