[llvm] Riscv branch peephole opt (PR #90451)

Zhijin Zeng via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 4 03:20:29 PDT 2024


https://github.com/zengdage updated https://github.com/llvm/llvm-project/pull/90451

>From 6f50e0f72f5edd0b0ad6d514f2cae2bd05f5b729 Mon Sep 17 00:00:00 2001
From: Zhijin Zeng <zhijin.zeng at spacemit.com>
Date: Mon, 29 Apr 2024 15:36:01 +0800
Subject: [PATCH 1/2] [RISC-V][NFC] Add branch peephole optimization tests

---
 llvm/test/CodeGen/RISCV/peephole-branch.mir | 467 ++++++++++++++++++++
 1 file changed, 467 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/peephole-branch.mir

diff --git a/llvm/test/CodeGen/RISCV/peephole-branch.mir b/llvm/test/CodeGen/RISCV/peephole-branch.mir
new file mode 100644
index 0000000000000..8f22b42dacef3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/peephole-branch.mir
@@ -0,0 +1,467 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -o - %s -mtriple=riscv32 -run-pass=machine-cp | FileCheck %s
+
+--- |
+  define void @peephole_bne(ptr %a, ptr %b, ptr %c) {
+  entry:
+    %e = load i32, ptr %a, align 4
+    %p = icmp ne i32 %e, %e
+    br i1 %p, label %block1, label %block2
+
+  block1:                                           ; preds = %entry
+    store i32 %e, ptr %b, align 4
+    br label %end_block
+
+  block2:                                           ; preds = %entry
+    store i32 87, ptr %c, align 4
+    br label %end_block
+
+  end_block:                                        ; preds = %block2, %block1
+    ret void
+  }
+
+  define void @peephole_beq(ptr %a, ptr %b, ptr %c) {
+  entry:
+    %e = load i32, ptr %a, align 4
+    %p = icmp eq i32 %e, %e
+    br i1 %p, label %block1, label %block2
+
+  block1:                                           ; preds = %entry
+    store i32 %e, ptr %b, align 4
+    br label %end_block
+
+  block2:                                           ; preds = %entry
+    store i32 87, ptr %c, align 4
+    br label %end_block
+
+  end_block:                                        ; preds = %block2, %block1
+    ret void
+  }
+
+  define void @peephole_blt(ptr %a, ptr %b, ptr %c) {
+  entry:
+    %e = load i32, ptr %a, align 4
+    %p = icmp slt i32 %e, %e
+    br i1 %p, label %block1, label %block2
+
+  block1:                                           ; preds = %entry
+    store i32 %e, ptr %b, align 4
+    br label %end_block
+
+  block2:                                           ; preds = %entry
+    store i32 87, ptr %c, align 4
+    br label %end_block
+
+  end_block:                                        ; preds = %block2, %block1
+    ret void
+  }
+
+  define void @peephole_bltu(ptr %a, ptr %b, ptr %c) {
+  entry:
+    %e = load i32, ptr %a, align 4
+    %p = icmp ult i32 %e, %e
+    br i1 %p, label %block1, label %block2
+
+  block1:                                           ; preds = %entry
+    store i32 %e, ptr %b, align 4
+    br label %end_block
+
+  block2:                                           ; preds = %entry
+    store i32 87, ptr %c, align 4
+    br label %end_block
+
+  end_block:                                        ; preds = %block2, %block1
+    ret void
+  }
+
+  define void @peephole_bge(ptr %a, ptr %b, ptr %c) {
+  entry:
+    %e = load i32, ptr %a, align 4
+    %p = icmp sge i32 %e, %e
+    br i1 %p, label %block1, label %block2
+
+  block1:                                           ; preds = %entry
+    store i32 %e, ptr %b, align 4
+    br label %end_block
+
+  block2:                                           ; preds = %entry
+    store i32 87, ptr %c, align 4
+    br label %end_block
+
+  end_block:                                        ; preds = %block2, %block1
+    ret void
+  }
+
+  define void @peephole_bgeu(ptr %a, ptr %b, ptr %c) {
+  entry:
+    %e = load i32, ptr %a, align 4
+    %p = icmp uge i32 %e, %e
+    br i1 %p, label %block1, label %block2
+
+  block1:                                           ; preds = %entry
+    store i32 %e, ptr %b, align 4
+    br label %end_block
+
+  block2:                                           ; preds = %entry
+    store i32 87, ptr %c, align 4
+    br label %end_block
+
+  end_block:                                        ; preds = %block2, %block1
+    ret void
+  }
+
+...
+---
+name:            peephole_bne
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x10' }
+  - { reg: '$x11' }
+  - { reg: '$x12' }
+body:             |
+  ; CHECK-LABEL: name: peephole_bne
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x13 = ADDI $x0, 1
+  ; CHECK-NEXT:   BNE killed renamable $x13, $x13, %bb.2
+  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.block1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = LW killed renamable $x10, 0
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x11, 0
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.block2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = ADDI $x0, 87
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x12, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.end_block:
+  ; CHECK-NEXT:   PseudoRET
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+    liveins: $x10, $x11, $x12
+
+    renamable $x13 = ADDI $x0, 1
+    BNE killed renamable $x13, $x13, %bb.2
+    PseudoBR %bb.1
+
+  bb.1.block1:
+    liveins: $x10, $x11
+
+    renamable $x10 = LW killed renamable $x10, 0
+    SW killed renamable $x10, killed renamable $x11, 0
+    PseudoBR %bb.3
+
+  bb.2.block2:
+    liveins: $x12
+
+    renamable $x10 = ADDI $x0, 87
+    SW killed renamable $x10, killed renamable $x12, 0
+
+  bb.3.end_block:
+    PseudoRET
+
+...
+---
+name:            peephole_beq
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x10' }
+  - { reg: '$x11' }
+  - { reg: '$x12' }
+body:             |
+  ; CHECK-LABEL: name: peephole_beq
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BEQ $x0, $x0, %bb.2
+  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.block1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = LW killed renamable $x10, 0
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x11, 0
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.block2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = ADDI $x0, 87
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x12, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.end_block:
+  ; CHECK-NEXT:   PseudoRET
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+    liveins: $x10, $x11, $x12
+
+    BEQ $x0, $x0, %bb.2
+    PseudoBR %bb.1
+
+  bb.1.block1:
+    liveins: $x10, $x11
+
+    renamable $x10 = LW killed renamable $x10, 0
+    SW killed renamable $x10, killed renamable $x11, 0
+    PseudoBR %bb.3
+
+  bb.2.block2:
+    liveins: $x12
+
+    renamable $x10 = ADDI $x0, 87
+    SW killed renamable $x10, killed renamable $x12, 0
+
+  bb.3.end_block:
+    PseudoRET
+
+...
+---
+name:            peephole_blt
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x10' }
+  - { reg: '$x11' }
+  - { reg: '$x12' }
+body:             |
+  ; CHECK-LABEL: name: peephole_blt
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x13 = ADDI $x0, 1
+  ; CHECK-NEXT:   BLT killed renamable $x13, $x13, %bb.2
+  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.block1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = LW killed renamable $x10, 0
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x11, 0
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.block2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = ADDI $x0, 87
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x12, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.end_block:
+  ; CHECK-NEXT:   PseudoRET
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+    liveins: $x10, $x11, $x12
+
+    renamable $x13 = ADDI $x0, 1
+    BLT killed renamable $x13, $x13, %bb.2
+    PseudoBR %bb.1
+
+  bb.1.block1:
+    liveins: $x10, $x11
+
+    renamable $x10 = LW killed renamable $x10, 0
+    SW killed renamable $x10, killed renamable $x11, 0
+    PseudoBR %bb.3
+
+  bb.2.block2:
+    liveins: $x12
+
+    renamable $x10 = ADDI $x0, 87
+    SW killed renamable $x10, killed renamable $x12, 0
+
+  bb.3.end_block:
+    PseudoRET
+
+...
+---
+name:            peephole_bltu
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x10' }
+  - { reg: '$x11' }
+  - { reg: '$x12' }
+body:             |
+  ; CHECK-LABEL: name: peephole_bltu
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x13 = ADDI $x0, 1
+  ; CHECK-NEXT:   BLTU killed renamable $x13, $x13, %bb.2
+  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.block1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = LW killed renamable $x10, 0
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x11, 0
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.block2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = ADDI $x0, 87
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x12, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.end_block:
+  ; CHECK-NEXT:   PseudoRET
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+    liveins: $x10, $x11, $x12
+
+    renamable $x13 = ADDI $x0, 1
+    BLTU killed renamable $x13, $x13, %bb.2
+    PseudoBR %bb.1
+
+  bb.1.block1:
+    liveins: $x10, $x11
+
+    renamable $x10 = LW killed renamable $x10, 0
+    SW killed renamable $x10, killed renamable $x11, 0
+    PseudoBR %bb.3
+
+  bb.2.block2:
+    liveins: $x12
+
+    renamable $x10 = ADDI $x0, 87
+    SW killed renamable $x10, killed renamable $x12, 0
+
+  bb.3.end_block:
+    PseudoRET
+
+...
+---
+name:            peephole_bge
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x10' }
+  - { reg: '$x11' }
+  - { reg: '$x12' }
+body:             |
+  ; CHECK-LABEL: name: peephole_bge
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BGE $x0, $x0, %bb.2
+  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.block1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = LW killed renamable $x10, 0
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x11, 0
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.block2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = ADDI $x0, 87
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x12, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.end_block:
+  ; CHECK-NEXT:   PseudoRET
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+    liveins: $x10, $x11, $x12
+
+    BGE $x0, $x0, %bb.2
+    PseudoBR %bb.1
+
+  bb.1.block1:
+    liveins: $x10, $x11
+
+    renamable $x10 = LW killed renamable $x10, 0
+    SW killed renamable $x10, killed renamable $x11, 0
+    PseudoBR %bb.3
+
+  bb.2.block2:
+    liveins: $x12
+
+    renamable $x10 = ADDI $x0, 87
+    SW killed renamable $x10, killed renamable $x12, 0
+
+  bb.3.end_block:
+    PseudoRET
+
+...
+---
+name:            peephole_bgeu
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x10' }
+  - { reg: '$x11' }
+  - { reg: '$x12' }
+body:             |
+  ; CHECK-LABEL: name: peephole_bgeu
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BGEU $x0, $x0, %bb.2
+  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.block1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x10, $x11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = LW killed renamable $x10, 0
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x11, 0
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.block2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $x10 = ADDI $x0, 87
+  ; CHECK-NEXT:   SW killed renamable $x10, killed renamable $x12, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.end_block:
+  ; CHECK-NEXT:   PseudoRET
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+    liveins: $x10, $x11, $x12
+
+    BGEU $x0, $x0, %bb.2
+    PseudoBR %bb.1
+
+  bb.1.block1:
+    liveins: $x10, $x11
+
+    renamable $x10 = LW killed renamable $x10, 0
+    SW killed renamable $x10, killed renamable $x11, 0
+    PseudoBR %bb.3
+
+  bb.2.block2:
+    liveins: $x12
+
+    renamable $x10 = ADDI $x0, 87
+    SW killed renamable $x10, killed renamable $x12, 0
+
+  bb.3.end_block:
+    PseudoRET
+
+...

>From 0c5843776ac6fbc2058f5f48f4cc8c5e4c4dbedc Mon Sep 17 00:00:00 2001
From: Zhijin Zeng <zhijin.zeng at spacemit.com>
Date: Mon, 29 Apr 2024 09:11:04 +0800
Subject: [PATCH 2/2] [RISC-V] Add RISC-V machine instruction peephole pass

After block-placement and machine-cp, the following situations
may require optimization.

```
bne a0, a0, %bb.2  ->  remove it
blt a0, a0, %bb.2  ->  remove it
bltu a0, a0, %bb.2 ->  remove it

beq a0, a0, %bb.2  ->  br, %bb.2
bge a0, a0, %bb.2  ->  br, %bb.2
bgeu a0, a0, %bb.2 ->  br, %bb.2

```
---
 llvm/lib/Target/RISCV/CMakeLists.txt          |   1 +
 llvm/lib/Target/RISCV/RISCV.h                 |   3 +
 llvm/lib/Target/RISCV/RISCVMIPeepholeOpt.cpp  | 165 ++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |   2 +
 llvm/test/CodeGen/RISCV/O3-pipeline.ll        |   4 +-
 llvm/test/CodeGen/RISCV/branch_zero.ll        |  10 +-
 llvm/test/CodeGen/RISCV/peephole-branch.mir   |  26 ++-
 llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll |  46 +++--
 8 files changed, 207 insertions(+), 50 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVMIPeepholeOpt.cpp

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 8715403f3839a..429a22a532379 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -45,6 +45,7 @@ add_llvm_target(RISCVCodeGen
   RISCVISelLowering.cpp
   RISCVMachineFunctionInfo.cpp
   RISCVMergeBaseOffset.cpp
+  RISCVMIPeepholeOpt.cpp
   RISCVOptWInstrs.cpp
   RISCVPostRAExpandPseudoInsts.cpp
   RISCVRedundantCopyElimination.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index d405395dcf9ec..d808eca616966 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -78,6 +78,9 @@ void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
 FunctionPass *createRISCVMoveMergePass();
 void initializeRISCVMoveMergePass(PassRegistry &);
 
+FunctionPass *createRISCVMIPeepholeOptPass();
+void initializeRISCVMIPeepholeOptPass(PassRegistry &);
+
 FunctionPass *createRISCVPushPopOptimizationPass();
 void initializeRISCVPushPopOptPass(PassRegistry &);
 
diff --git a/llvm/lib/Target/RISCV/RISCVMIPeepholeOpt.cpp b/llvm/lib/Target/RISCV/RISCVMIPeepholeOpt.cpp
new file mode 100644
index 0000000000000..9d4907ad3dbab
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMIPeepholeOpt.cpp
@@ -0,0 +1,165 @@
+//===- RISCVMIPeepholeOpt.cpp - RISC-V MI peephole optimization pass ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs below peephole optimizations on MIR level.
+//
+// 1. Remove redundant branch instrunctions which may be generated by
+// block-placement.
+//
+//    bne a1, a1, %bb.1
+//    blt a1, a1, %bb.1
+//    bltu a1, a1, %bb.1
+//
+//    These instruction is bound to fallthrough to next basic block, rather
+//    than into the %bb.1, so it should be removed here.
+//
+// 2. beq a1, a1, %bb.1 -> br %bb.1
+//    bge a1, a1, %bb.1 -> br %bb.1
+//    bgeu a1, a1, %bb.1 -> br %bb.1
+//
+//    These instruction is bound to go into %bb.1, so it should be replaced by
+//    br pseudo instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-mi-peephole-opt"
+
+namespace {
+
+class RISCVMIPeepholeOpt : public MachineFunctionPass {
+public:
+  static char ID;
+  const TargetInstrInfo *TII;
+  const MachineFunction *MFI;
+
+  RISCVMIPeepholeOpt() : MachineFunctionPass(ID) {
+    initializeRISCVMIPeepholeOptPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool visitBranch(MachineInstr &MI, SmallVector<MachineInstr *> &DeadInstrs);
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "RISC-V MI Peephole Optimization pass";
+  }
+};
+} // end anonymous namespace
+
+char RISCVMIPeepholeOpt::ID = 0;
+
+INITIALIZE_PASS(RISCVMIPeepholeOpt, "riscv-mi-peephole-opt",
+                "RISC-V MI Peephole Optimization", false, false)
+
+bool RISCVMIPeepholeOpt::visitBranch(MachineInstr &MI,
+                                     SmallVector<MachineInstr *> &DeadInstrs) {
+  Register FirstReg = MI.getOperand(0).getReg();
+  Register SecondReg = MI.getOperand(1).getReg();
+
+  if (FirstReg != SecondReg)
+    return false;
+
+  MachineBasicBlock *MBB = MI.getParent();
+  auto LastMI = MBB->rbegin();
+
+  if (&MI != &*LastMI && (*LastMI).getOpcode() != RISCV::PseudoBR)
+    return false;
+
+  MachineFunction::iterator It = std::next(MachineFunction::iterator(MBB));
+
+  switch (MI.getOpcode()) {
+  default:
+    break;
+  case RISCV::BEQ:
+  case RISCV::BGE:
+  case RISCV::BGEU: {
+    BuildMI(*MBB, &MI, MI.getDebugLoc(), TII->get(RISCV::PseudoBR))
+        .add(MI.getOperand(2));
+
+    MachineBasicBlock *CBTargetMBB = MI.getOperand(2).getMBB();
+
+    if ((*LastMI).getOpcode() == RISCV::PseudoBR) {
+      MachineBasicBlock *BRTargetMBB = (*LastMI).getOperand(0).getMBB();
+
+      if (BRTargetMBB != CBTargetMBB)
+        MBB->removeSuccessor(BRTargetMBB);
+      DeadInstrs.push_back(&*LastMI);
+    } else if (It != MFI->end() && CBTargetMBB != &*It)
+      MBB->removeSuccessor(&*It);
+    break;
+  }
+  case RISCV::BNE:
+  case RISCV::BLT:
+  case RISCV::BLTU: {
+    MachineBasicBlock *CBTargetMBB = MI.getOperand(2).getMBB();
+
+    if ((*LastMI).getOpcode() == RISCV::PseudoBR) {
+      MachineBasicBlock *BRTargetMBB = (*LastMI).getOperand(0).getMBB();
+
+      if (BRTargetMBB != CBTargetMBB)
+        MBB->removeSuccessor(CBTargetMBB);
+    } else if (It != MFI->end() && CBTargetMBB != &*It)
+      MBB->removeSuccessor(CBTargetMBB);
+    break;
+  }
+  }
+  DeadInstrs.push_back(&MI);
+
+  return true;
+}
+
+bool RISCVMIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  SmallVector<MachineInstr *> DeadInstrs;
+
+  TII = MF.getSubtarget().getInstrInfo();
+  MFI = &MF;
+
+  bool Changed = false;
+
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : make_early_inc_range(MBB)) {
+      switch (MI.getOpcode()) {
+      default:
+        break;
+      case RISCV::BNE:
+      case RISCV::BEQ:
+      case RISCV::BLT:
+      case RISCV::BLTU:
+      case RISCV::BGE:
+      case RISCV::BGEU:
+        Changed |= visitBranch(MI, DeadInstrs);
+        break;
+      }
+    }
+  }
+
+  if (DeadInstrs.empty())
+    return Changed;
+
+  for (MachineInstr *Inst : DeadInstrs)
+    Inst->eraseFromParent();
+
+  return Changed;
+}
+
+FunctionPass *llvm::createRISCVMIPeepholeOptPass() {
+  return new RISCVMIPeepholeOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 0876f46728a10..1af33aac60169 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -111,6 +111,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVCodeGenPreparePass(*PR);
   initializeRISCVPostRAExpandPseudoPass(*PR);
   initializeRISCVMergeBaseOffsetOptPass(*PR);
+  initializeRISCVMIPeepholeOptPass(*PR);
   initializeRISCVOptWInstrsPass(*PR);
   initializeRISCVPreRAExpandPseudoPass(*PR);
   initializeRISCVExpandPseudoPass(*PR);
@@ -501,6 +502,7 @@ void RISCVPassConfig::addPreEmitPass() {
 void RISCVPassConfig::addPreEmitPass2() {
   if (TM->getOptLevel() != CodeGenOptLevel::None) {
     addPass(createRISCVMoveMergePass());
+    addPass(createRISCVMIPeepholeOptPass());
     // Schedule PushPop Optimization before expansion of Pseudo instruction,
     // ensuring return instruction is detected correctly.
     addPass(createRISCVPushPopOptimizationPass());
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 4121d11109111..61ec7a79604bc 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=riscv32 -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | \
 ; RUN:   grep -v "Verify generated machine code" | \
 ; RUN:   FileCheck %s --check-prefixes=CHECK
@@ -195,7 +196,8 @@
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       Stack Frame Layout Analysis
 ; CHECK-NEXT:       RISC-V Zcmp move merging pass
-; CHECK-NEXT:       RISC-V Zcmp Push/Pop optimization pass 
+; CHECK-NEXT:       RISC-V MI Peephole Optimization pass
+; CHECK-NEXT:       RISC-V Zcmp Push/Pop optimization pass
 ; CHECK-NEXT:       RISC-V pseudo instruction expansion pass
 ; CHECK-NEXT:       RISC-V atomic pseudo instruction expansion pass
 ; CHECK-NEXT:       Unpack machine instruction bundles
diff --git a/llvm/test/CodeGen/RISCV/branch_zero.ll b/llvm/test/CodeGen/RISCV/branch_zero.ll
index fd0979977ba3b..9f96f0d94a27a 100644
--- a/llvm/test/CodeGen/RISCV/branch_zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch_zero.ll
@@ -5,14 +5,11 @@
 define void @foo(i16 %finder_idx) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:  .LBB0_1: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:  # %bb.1: # %for.body
 ; CHECK-NEXT:    slli a0, a0, 48
 ; CHECK-NEXT:    bltz a0, .LBB0_4
 ; CHECK-NEXT:  # %bb.2: # %while.cond.preheader.i
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    bnez zero, .LBB0_1
 ; CHECK-NEXT:  # %bb.3: # %while.body
 ; CHECK-NEXT:  .LBB0_4: # %while.cond1.preheader.i
 entry:
@@ -46,14 +43,11 @@ if.then:
 define void @bar(i16 %finder_idx) {
 ; CHECK-LABEL: bar:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:  .LBB1_1: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:  # %bb.1: # %for.body
 ; CHECK-NEXT:    slli a0, a0, 48
 ; CHECK-NEXT:    bgez a0, .LBB1_4
 ; CHECK-NEXT:  # %bb.2: # %while.cond.preheader.i
-; CHECK-NEXT:    # in Loop: Header=BB1_1 Depth=1
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    bnez zero, .LBB1_1
 ; CHECK-NEXT:  # %bb.3: # %while.body
 ; CHECK-NEXT:  .LBB1_4: # %while.cond1.preheader.i
 entry:
diff --git a/llvm/test/CodeGen/RISCV/peephole-branch.mir b/llvm/test/CodeGen/RISCV/peephole-branch.mir
index 8f22b42dacef3..267252166dfe5 100644
--- a/llvm/test/CodeGen/RISCV/peephole-branch.mir
+++ b/llvm/test/CodeGen/RISCV/peephole-branch.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -o - %s -mtriple=riscv32 -run-pass=machine-cp | FileCheck %s
+# RUN: llc -o - %s -mtriple=riscv32 -run-pass=riscv-mi-peephole-opt | FileCheck %s
 
 --- |
   define void @peephole_bne(ptr %a, ptr %b, ptr %c) {
@@ -122,11 +122,10 @@ liveins:
 body:             |
   ; CHECK-LABEL: name: peephole_bne
   ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $x10, $x11, $x12
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x13 = ADDI $x0, 1
-  ; CHECK-NEXT:   BNE killed renamable $x13, $x13, %bb.2
   ; CHECK-NEXT:   PseudoBR %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.block1:
@@ -182,11 +181,10 @@ liveins:
 body:             |
   ; CHECK-LABEL: name: peephole_beq
   ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $x10, $x11, $x12
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   BEQ $x0, $x0, %bb.2
-  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT:   PseudoBR %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.block1:
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
@@ -240,11 +238,10 @@ liveins:
 body:             |
   ; CHECK-LABEL: name: peephole_blt
   ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $x10, $x11, $x12
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x13 = ADDI $x0, 1
-  ; CHECK-NEXT:   BLT killed renamable $x13, $x13, %bb.2
   ; CHECK-NEXT:   PseudoBR %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.block1:
@@ -300,11 +297,10 @@ liveins:
 body:             |
   ; CHECK-LABEL: name: peephole_bltu
   ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $x10, $x11, $x12
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x13 = ADDI $x0, 1
-  ; CHECK-NEXT:   BLTU killed renamable $x13, $x13, %bb.2
   ; CHECK-NEXT:   PseudoBR %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.block1:
@@ -360,11 +356,10 @@ liveins:
 body:             |
   ; CHECK-LABEL: name: peephole_bge
   ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $x10, $x11, $x12
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   BGE $x0, $x0, %bb.2
-  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT:   PseudoBR %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.block1:
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
@@ -418,11 +413,10 @@ liveins:
 body:             |
   ; CHECK-LABEL: name: peephole_bgeu
   ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $x10, $x11, $x12
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   BGEU $x0, $x0, %bb.2
-  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT:   PseudoBR %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.block1:
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
diff --git a/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll b/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll
index 5edf3cf49e25d..d32602a09e8c6 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mattr=+zcmp -verify-machineinstrs  \
 ; RUN: -mtriple=riscv32 -target-abi=ilp32 < %s \
 ; RUN: | FileCheck %s -check-prefixes=RV32IZCMP
@@ -13,32 +14,27 @@ declare dso_local void @f1() local_unnamed_addr
 declare dso_local void @f2() local_unnamed_addr
 define  dso_local void @f0() local_unnamed_addr {
 ; RV32IZCMP-LABEL: f0:
-; RV32IZCMP: 	.cfi_startproc
-; RV32IZCMP-NEXT: # %bb.0:                                # %entry
-; RV32IZCMP-NEXT: 	bnez	zero, .LBB0_2
-; RV32IZCMP-NEXT: # %bb.1:                                # %if.T
-; RV32IZCMP-NEXT: 	cm.push	{ra}, -16
-; RV32IZCMP-NEXT: 	.cfi_def_cfa_offset 16
-; RV32IZCMP-NEXT: 	.cfi_offset ra, -4
-; RV32IZCMP-NEXT: 	call	f1
-; RV32IZCMP-NEXT: 	cm.pop	{ra}, 16
-; RV32IZCMP-NEXT: .LBB0_2:                                # %if.F
-; RV32IZCMP-NEXT: 	tail	f2
-; RV32IZCMP-NEXT: .Lfunc_end0:
-
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:  # %bb.1: # %if.T
+; RV32IZCMP-NEXT:    cm.push {ra}, -16
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    call f1
+; RV32IZCMP-NEXT:    cm.pop {ra}, 16
+; RV32IZCMP-NEXT:  # %bb.2: # %if.F
+; RV32IZCMP-NEXT:    tail f2
+;
 ; RV64IZCMP-LABEL: f0:
-; RV64IZCMP: 	.cfi_startproc
-; RV64IZCMP-NEXT: # %bb.0:                                # %entry
-; RV64IZCMP-NEXT: 	bnez	zero, .LBB0_2
-; RV64IZCMP-NEXT: # %bb.1:                                # %if.T
-; RV64IZCMP-NEXT: 	cm.push	{ra}, -16
-; RV64IZCMP-NEXT: 	.cfi_def_cfa_offset 16
-; RV64IZCMP-NEXT: 	.cfi_offset ra, -8
-; RV64IZCMP-NEXT: 	call	f1
-; RV64IZCMP-NEXT: 	cm.pop	{ra}, 16
-; RV64IZCMP-NEXT: .LBB0_2:                                # %if.F
-; RV64IZCMP-NEXT: 	tail	f2
-; RV64IZCMP-NEXT: .Lfunc_end0:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:  # %bb.1: # %if.T
+; RV64IZCMP-NEXT:    cm.push {ra}, -16
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    call f1
+; RV64IZCMP-NEXT:    cm.pop {ra}, 16
+; RV64IZCMP-NEXT:  # %bb.2: # %if.F
+; RV64IZCMP-NEXT:    tail f2
+
 entry:
   br i1 poison, label %if.T, label %if.F
 



More information about the llvm-commits mailing list