[llvm] [RISCV] add more generic macrofusions (PR #151140)
Daniel Henrique Barboza via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 1 06:46:26 PDT 2025
https://github.com/danielhb updated https://github.com/llvm/llvm-project/pull/151140
>From 51d5c2f78e076ead6e2f43179bb3a83f6143fb32 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Tue, 4 Mar 2025 10:22:17 -0800
Subject: [PATCH 1/4] [RISCV] add more generic macrofusions
These are some macrofusions that are used internally in Ventana in an
yet not upstreamed processor. Figured it would be good to contribute
them ahead of the processor to allow the community to also use them in
their own processors, while also alleaviting our own downstream upkeep.
The macrofusions being added are:
- add+lw
- addi+ld, addi+lw
- adduw+lw
- auipc+ld
- bfext (slli+srli)
- lui+ld
- shXadd+load, where X=1,2,3 and load=lb,lh,lw,ld
- shXadduw+load, where X=1,2,3 and load=lb,lh,lw,ld
---
llvm/lib/Target/RISCV/RISCVMacroFusion.td | 91 ++++
llvm/test/CodeGen/RISCV/features-info.ll | 9 +
llvm/test/CodeGen/RISCV/macro-fusions.mir | 560 ++++++++++++++++++++++
3 files changed, 660 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 875a93d09a2c6..34ad042c565d7 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -91,3 +91,94 @@ def TuneLDADDFusion
CheckIsImmOperand<2>,
CheckImmOperand<2, 0>
]>>;
+
+// Fuse add with lw:
+// add rd, rs1, rs2
+// lw rd, 0(rd)
+def TuneADDLWFusion
+ : SimpleFusion<"add-lw-fusion", "HasADDLWFusion", "Enable ADD+LW macrofusion",
+ CheckOpcode<[ADD]>,
+ CheckAll<[
+ CheckOpcode<[LW]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperand<2, 0>
+ ]>>;
+
+// Fuse AUIPC followed by LD:
+// auipc rd, imm20
+// ld rd, imm12(rd)
+def TuneAUIPCLDFusion
+ : SimpleFusion<"auipc-ld-fusion", "HasAUIPCLDFusion",
+ "Enable AUIPC+LD macrofusion",
+ CheckOpcode<[AUIPC]>,
+ CheckOpcode<[LD]>>;
+
+// Fuse LUI followed by LD:
+// lui rd, imm[31:12]
+// ld rd, imm12(rd)
+def TuneLUILDFusion
+ : SimpleFusion<"lui-ld-fusion", "HasLUILDFusion",
+ "Enable LUI+LD macrofusion",
+ CheckOpcode<[LUI]>,
+ CheckOpcode<[LD]>>;
+
+// Bitfield extract fusion: similar to TuneShiftedZExtWFusion
+// but without the immediate restriction
+// slli rd, rs1, imm12
+// srli rd, rd, imm12
+def TuneBFExtFusion
+ : SimpleFusion<"bfext-fusion", "HasBFExtFusion",
+ "Enable SLLI+SRLI (bitfield extract) macrofusion",
+ CheckAll<[
+ CheckOpcode<[SLLI]>,
+ CheckIsImmOperand<2>,
+ ]>,
+ CheckAll<[
+ CheckOpcode<[SRLI]>,
+ CheckIsImmOperand<2>,
+ ]>>;
+
+// Fuse ADDI followed by LD
+// addi rd, rs1, imm12
+// ld rd, imm12(rd)
+def TuneADDILDFusion
+ : SimpleFusion<"addi-ld-fusion", "HasADDILDFusion",
+ "Enable ADDI+LD macrofusion",
+ CheckOpcode<[ADDI]>,
+ CheckOpcode<[LD]>>;
+
+// Fuse ADDI followed by LW
+// addi rd, rs1, imm12
+// lw rd, imm12(rd)
+def TuneADDILWFusion
+ : SimpleFusion<"addi-lw-fusion", "HasADDILWFusion",
+ "Enable ADDI+LW macrofusion",
+ CheckOpcode<[ADDI]>,
+ CheckOpcode<[LW]>>;
+
+// Fuse ADDUW followed by LW
+// adduw rd, rs1, rs2
+// lw rd, imm12(rd)
+def TuneADDUWLWFusion
+ : SimpleFusion<"adduw-lw-fusion", "HasADDUWLWFusion",
+ "Enable ADD_UW+LW macrofusion",
+ CheckOpcode<[ADD_UW]>,
+ CheckOpcode<[LW]>>;
+
+// Fuse SHXADD followed by a load (lb, lh, lw, ld)
+// shXadd rd, rs1, rs2
+// load rd, imm12(rd)
+def TuneSHXADDLoadFusion
+ : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion",
+ "Enable SH(1|2|3)ADD + load macrofusion",
+ CheckOpcode<[SH1ADD, SH2ADD, SH3ADD]>,
+ CheckOpcode<[LB, LH, LW, LD]>>;
+
+// Fuse SHXADD.UW followed by a load (lb, lh, lw, ld)
+// shXadd.uw rd, rs1, rs2
+// load rd, imm12(rd)
+def TuneSHXADDUWLoadFusion
+ : SimpleFusion<"shxadduw-load-fusion", "HasSHXADDUWLoadFusion",
+ "Enable SH(1|2|3)ADDUW + load macrofusion",
+ CheckOpcode<[SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
+ CheckOpcode<[LB, LH, LW, LD]>>;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index b94665b718ae7..de14b3355ac07 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -6,9 +6,15 @@
; CHECK-NEXT: 32bit - Implements RV32.
; CHECK-NEXT: 64bit - Implements RV64.
; CHECK-NEXT: a - 'A' (Atomic Instructions).
+; CHECK-NEXT: add-lw-fusion - Enable ADD+LW macrofusion.
+; CHECK-NEXT: addi-ld-fusion - Enable ADDI+LD macrofusion.
+; CHECK-NEXT: addi-lw-fusion - Enable ADDI+LW macrofusion.
+; CHECK-NEXT: adduw-lw-fusion - Enable ADD_UW+LW macrofusion.
; CHECK-NEXT: andes45 - Andes 45-Series processors.
; CHECK-NEXT: auipc-addi-fusion - Enable AUIPC+ADDI macrofusion.
+; CHECK-NEXT: auipc-ld-fusion - Enable AUIPC+LD macrofusion.
; CHECK-NEXT: b - 'B' (the collection of the Zba, Zbb, Zbs extensions).
+; CHECK-NEXT: bfext-fusion - Enable SLLI+SRLI (bitfield extract) macrofusion.
; CHECK-NEXT: c - 'C' (Compressed Instructions).
; CHECK-NEXT: conditional-cmv-fusion - Enable branch+c.mv fusion.
; CHECK-NEXT: d - 'D' (Double-Precision Floating-Point).
@@ -58,6 +64,7 @@
; CHECK-NEXT: ld-add-fusion - Enable LD+ADD macrofusion.
; CHECK-NEXT: log-vrgather - Has vrgather.vv with LMUL*log2(LMUL) latency
; CHECK-NEXT: lui-addi-fusion - Enable LUI+ADDI macro fusion.
+; CHECK-NEXT: lui-ld-fusion - Enable LUI+LD macrofusion.
; CHECK-NEXT: m - 'M' (Integer Multiplication and Division).
; CHECK-NEXT: mips-p8700 - MIPS p8700 processor.
; CHECK-NEXT: no-default-unroll - Disable default unroll preference..
@@ -130,6 +137,8 @@
; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp).
; CHECK-NEXT: shvstvala - 'Shvstvala' (vstval provides all needed values).
; CHECK-NEXT: shvstvecd - 'Shvstvecd' (vstvec supports Direct mode).
+; CHECK-NEXT: shxadd-load-fusion - Enable SH(1|2|3)ADD + load macrofusion.
+; CHECK-NEXT: shxadduw-load-fusion - Enable SH(1|2|3)ADDUW + load macrofusion.
; CHECK-NEXT: sifive7 - SiFive 7-Series processors.
; CHECK-NEXT: smaia - 'Smaia' (Advanced Interrupt Architecture Machine Level).
; CHECK-NEXT: smcdeleg - 'Smcdeleg' (Counter Delegation Machine Level).
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index 13464141ce27e..a8e6d887d57f8 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -2,7 +2,12 @@
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
+# RUN: -mattr=+add-lw-fusion,+auipc-ld-fusion,+lui-ld-fusion,+addi-ld-fusion,+addi-lw-fusion \
+# RUN: -mattr=+zba,+adduw-lw-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
# RUN: | FileCheck %s
+# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
+# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
+# RUN: -mattr=+zba,+bfext-fusion | FileCheck --check-prefixes=CHECK-BFEXT %s
# CHECK: lui_addi:%bb.0
# CHECK: Macro fuse: {{.*}}LUI - ADDI
@@ -174,3 +179,558 @@ body: |
$x11 = COPY %5
PseudoRET
...
+
+# CHECK: add_lw
+# CHECK: Macro fuse: {{.*}}ADD - LW
+---
+name: add_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: auipc_ld
+# CHECK: Macro fuse: {{.*}}AUIPC - LD
+---
+name: auipc_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LD %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_ld
+# CHECK: Macro fuse: {{.*}}LUI - LD
+---
+name: lui_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LD %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK-BFEXT: bitfield_extract
+# CHECK-BFEXT: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: bitfield_extract
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = SLLI %1, 31
+ %3:gpr = XORI %1, 3
+ %4:gpr = SRLI %2, 48
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: addi_ld
+# CHECK: Macro fuse: {{.*}}ADDI - LD
+---
+name: addi_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lw
+# CHECK: Macro fuse: {{.*}}ADDI - LW
+---
+name: addi_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lw
+# CHECK: Macro fuse: {{.*}}ADD_UW - LW
+---
+name: adduw_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lb
+# CHECK: Macro fuse: {{.*}}SH1ADD - LB
+---
+name: sh1add_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lb
+# CHECK: Macro fuse: {{.*}}SH2ADD - LB
+---
+name: sh2add_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lb
+# CHECK: Macro fuse: {{.*}}SH3ADD - LB
+---
+name: sh3add_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lh
+# CHECK: Macro fuse: {{.*}}SH1ADD - LH
+---
+name: sh1add_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lh
+# CHECK: Macro fuse: {{.*}}SH2ADD - LH
+---
+name: sh2add_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lh
+# CHECK: Macro fuse: {{.*}}SH3ADD - LH
+---
+name: sh3add_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lw
+# CHECK: Macro fuse: {{.*}}SH1ADD - LW
+---
+name: sh1add_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lw
+# CHECK: Macro fuse: {{.*}}SH2ADD - LW
+---
+name: sh2add_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lw
+# CHECK: Macro fuse: {{.*}}SH3ADD - LW
+---
+name: sh3add_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_ld
+# CHECK: Macro fuse: {{.*}}SH1ADD - LD
+---
+name: sh1add_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_ld
+# CHECK: Macro fuse: {{.*}}SH2ADD - LD
+---
+name: sh2add_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_ld
+# CHECK: Macro fuse: {{.*}}SH3ADD - LD
+---
+name: sh3add_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lb
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LB
+---
+name: sh1adduw_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lb
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LB
+---
+name: sh2adduw_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lb
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LB
+---
+name: sh3adduw_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lh
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LH
+---
+name: sh1adduw_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lh
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LH
+---
+name: sh2adduw_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lh
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LH
+---
+name: sh3adduw_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lw
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LW
+---
+name: sh1adduw_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lw
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LW
+---
+name: sh2adduw_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lw
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LW
+---
+name: sh3adduw_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_ld
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LD
+---
+name: sh1adduw_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_ld
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LD
+---
+name: sh2adduw_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_ld
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LD
+---
+name: sh3adduw_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
>From 3a1f13c8acc1ed5fd23f35cdf8e6eaba5ed72207 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Tue, 29 Jul 2025 13:23:16 -0700
Subject: [PATCH 2/4] Add extra load macrofusion cases and other fixes
- add missing macrofusions in veyron-v1 processor def;
- fix ADD_UW/add.uw comment;
- add ADD + lb/lh/lbu/lhu/lwu macrofusions;
- add shXADD + lbu/lhu/lwu macrofusions;
- add shXADD_UW + lbu/lhu/lwu macrofusions.
---
llvm/lib/Target/RISCV/RISCVMacroFusion.td | 24 +-
llvm/lib/Target/RISCV/RISCVProcessors.td | 5 +-
llvm/test/CodeGen/RISCV/features-info.ll | 1 +
llvm/test/CodeGen/RISCV/macro-fusions.mir | 416 +++++++++++++++++++++-
4 files changed, 438 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 34ad042c565d7..8df756b44bf89 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -104,6 +104,18 @@ def TuneADDLWFusion
CheckImmOperand<2, 0>
]>>;
+// Fuse add followed by a load (lb, lh, lw, ld, lbu, lhu, lwu):
+// add rd, rs1, rs2
+// load rd, 0(rd)
+def TuneADDLoadFusion
+ : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD + load macrofusion",
+ CheckOpcode<[ADD]>,
+ CheckAll<[
+ CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperand<2, 0>
+ ]>>;
+
// Fuse AUIPC followed by LD:
// auipc rd, imm20
// ld rd, imm12(rd)
@@ -156,8 +168,8 @@ def TuneADDILWFusion
CheckOpcode<[ADDI]>,
CheckOpcode<[LW]>>;
-// Fuse ADDUW followed by LW
-// adduw rd, rs1, rs2
+// Fuse ADD_UW followed by LW
+// add.uw rd, rs1, rs2
// lw rd, imm12(rd)
def TuneADDUWLWFusion
: SimpleFusion<"adduw-lw-fusion", "HasADDUWLWFusion",
@@ -165,20 +177,20 @@ def TuneADDUWLWFusion
CheckOpcode<[ADD_UW]>,
CheckOpcode<[LW]>>;
-// Fuse SHXADD followed by a load (lb, lh, lw, ld)
+// Fuse SHXADD followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
// shXadd rd, rs1, rs2
// load rd, imm12(rd)
def TuneSHXADDLoadFusion
: SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion",
"Enable SH(1|2|3)ADD + load macrofusion",
CheckOpcode<[SH1ADD, SH2ADD, SH3ADD]>,
- CheckOpcode<[LB, LH, LW, LD]>>;
+ CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-// Fuse SHXADD.UW followed by a load (lb, lh, lw, ld)
+// Fuse SHXADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
// shXadd.uw rd, rs1, rs2
// load rd, imm12(rd)
def TuneSHXADDUWLoadFusion
: SimpleFusion<"shxadduw-load-fusion", "HasSHXADDUWLoadFusion",
"Enable SH(1|2|3)ADDUW + load macrofusion",
CheckOpcode<[SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
- CheckOpcode<[LB, LH, LW, LD]>>;
+ CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 838edf6c57250..ab43d3fa3a472 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -595,7 +595,10 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
TuneZExtHFusion,
TuneZExtWFusion,
TuneShiftedZExtWFusion,
- TuneLDADDFusion]> {
+ TuneLDADDFusion,
+ TuneADDLWFusion,
+ TuneAUIPCLDFusion,
+ TuneLUILDFusion]> {
let MVendorID = 0x61f;
let MArchID = 0x8000000000010000;
let MImpID = 0x111;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index de14b3355ac07..ada77e5fd2fbb 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -6,6 +6,7 @@
; CHECK-NEXT: 32bit - Implements RV32.
; CHECK-NEXT: 64bit - Implements RV64.
; CHECK-NEXT: a - 'A' (Atomic Instructions).
+; CHECK-NEXT: add-load-fusion - Enable ADD + load macrofusion.
; CHECK-NEXT: add-lw-fusion - Enable ADD+LW macrofusion.
; CHECK-NEXT: addi-ld-fusion - Enable ADDI+LD macrofusion.
; CHECK-NEXT: addi-lw-fusion - Enable ADDI+LW macrofusion.
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index a8e6d887d57f8..587830c5f2947 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
-# RUN: -mattr=+add-lw-fusion,+auipc-ld-fusion,+lui-ld-fusion,+addi-ld-fusion,+addi-lw-fusion \
+# RUN: -mattr=+add-load-fusion,+add-lw-fusion,+auipc-ld-fusion,+lui-ld-fusion,+addi-ld-fusion,+addi-lw-fusion \
# RUN: -mattr=+zba,+adduw-lw-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
# RUN: | FileCheck %s
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
@@ -180,6 +180,42 @@ body: |
PseudoRET
...
+# CHECK: add_lb
+# CHECK: Macro fuse: {{.*}}ADD - LB
+---
+name: add_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: add_lh
+# CHECK: Macro fuse: {{.*}}ADD - LH
+---
+name: add_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
# CHECK: add_lw
# CHECK: Macro fuse: {{.*}}ADD - LW
---
@@ -198,6 +234,60 @@ body: |
PseudoRET
...
+# CHECK: add_lbu
+# CHECK: Macro fuse: {{.*}}ADD - LBU
+---
+name: add_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: add_lhu
+# CHECK: Macro fuse: {{.*}}ADD - LHU
+---
+name: add_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: add_lwu
+# CHECK: Macro fuse: {{.*}}ADD - LWU
+---
+name: add_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
# CHECK: auipc_ld
# CHECK: Macro fuse: {{.*}}AUIPC - LD
---
@@ -519,6 +609,168 @@ body: |
PseudoRET
...
+# CHECK: sh1add_lbu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LBU
+---
+name: sh1add_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lbu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LBU
+---
+name: sh2add_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lbu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LBU
+---
+name: sh3add_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lhu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LHU
+---
+name: sh1add_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lhu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LHU
+---
+name: sh2add_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lhu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LHU
+---
+name: sh3add_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lwu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LWU
+---
+name: sh1add_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lwu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LWU
+---
+name: sh2add_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lwu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LWU
+---
+name: sh3add_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
# CHECK: sh1adduw_lb
# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LB
---
@@ -734,3 +986,165 @@ body: |
$x11 = COPY %5
PseudoRET
...
+
+# CHECK: sh1adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LBU
+---
+name: sh1adduw_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LBU
+---
+name: sh2adduw_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LBU
+---
+name: sh3adduw_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LHU
+---
+name: sh1adduw_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LHU
+---
+name: sh2adduw_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LHU
+---
+name: sh3adduw_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LWU
+---
+name: sh1adduw_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LWU
+---
+name: sh2adduw_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LWU
+---
+name: sh3adduw_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
>From 7f8b3a057702421426408211632d996e382216ba Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Thu, 31 Jul 2025 07:26:53 -0700
Subject: [PATCH 3/4] RISCVMacroFusion.td: review changes
- add TuneAUIPCADDIWFusion;
- turn TuneAUIPCLDFusion (auipc+ld) into TuneAUIPCLoadFusion
(auipc + lb/lh/lw/ld/lbu/lhu/lwu);
- turn TuneLUILDFusion (lui+ld) into TuneLUILoadFusion
(lui + lb/lh/lw/ld/lbu/lhu/lwu);
- turn TuneADD_UWLWFusion (add.uw+lw) into TuneADD_UWLoadFusion
(add.uw + lb/lh/lw/ld/lbu/lhu/lwu);
- remove TuneADDILWFusion. Turn TuneADDILDFusion into TuneADDILoadFusion
(addi + lb/lh/lw/ld/lbu/lhu/lwu);
- remove the immediate check from TuneADDLoadFusion;
- remove the immediate check from TuneBFExtFusion: it was a copy/paste
reminiscent from the existing slli+srli fusions;
- renames: TuneSHXADD_UWLoadFusion and "Enable SH(1|2|3)ADD_UW ..."
---
llvm/lib/Target/RISCV/RISCVMacroFusion.td | 88 ++---
llvm/lib/Target/RISCV/RISCVProcessors.td | 4 +-
llvm/test/CodeGen/RISCV/features-info.ll | 12 +-
llvm/test/CodeGen/RISCV/macro-fusions.mir | 433 +++++++++++++++++++++-
4 files changed, 473 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 8df756b44bf89..f090b2c150c85 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -28,6 +28,15 @@ def TuneAUIPCADDIFusion
CheckOpcode<[AUIPC]>,
CheckOpcode<[ADDI]>>;
+// Fuse AUIPC followed by ADDIW:
+// auipc rd, imm20
+// addiw rd, rd, imm12
+def TuneAUIPCADDIWFusion
+ : SimpleFusion<"auipc-addiw-fusion", "HasAUIPCADDIWFusion",
+ "Enable AUIPC+ADDIW macrofusion",
+ CheckOpcode<[AUIPC]>,
+ CheckOpcode<[ADDIW]>>;
+
// Fuse zero extension of halfword:
// slli rd, rs1, 48
// srli rd, rd, 48
@@ -110,29 +119,25 @@ def TuneADDLWFusion
def TuneADDLoadFusion
: SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD + load macrofusion",
CheckOpcode<[ADD]>,
- CheckAll<[
- CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>,
- CheckIsImmOperand<2>,
- CheckImmOperand<2, 0>
- ]>>;
+ CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-// Fuse AUIPC followed by LD:
+// Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu)
// auipc rd, imm20
-// ld rd, imm12(rd)
-def TuneAUIPCLDFusion
- : SimpleFusion<"auipc-ld-fusion", "HasAUIPCLDFusion",
- "Enable AUIPC+LD macrofusion",
+// load rd, imm12(rd)
+def TuneAUIPCLoadFusion
+ : SimpleFusion<"auipc-load-fusion", "HasAUIPCLoadFusion",
+ "Enable AUIPC + load macrofusion",
CheckOpcode<[AUIPC]>,
- CheckOpcode<[LD]>>;
+ CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-// Fuse LUI followed by LD:
+// Fuse LUI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
// lui rd, imm[31:12]
-// ld rd, imm12(rd)
-def TuneLUILDFusion
- : SimpleFusion<"lui-ld-fusion", "HasLUILDFusion",
- "Enable LUI+LD macrofusion",
+// load rd, imm12(rd)
+def TuneLUILoadFusion
+ : SimpleFusion<"lui-load-fusion", "HasLUILoadFusion",
+ "Enable LUI + load macrofusion",
CheckOpcode<[LUI]>,
- CheckOpcode<[LD]>>;
+ CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
// Bitfield extract fusion: similar to TuneShiftedZExtWFusion
// but without the immediate restriction
@@ -141,41 +146,26 @@ def TuneLUILDFusion
def TuneBFExtFusion
: SimpleFusion<"bfext-fusion", "HasBFExtFusion",
"Enable SLLI+SRLI (bitfield extract) macrofusion",
- CheckAll<[
- CheckOpcode<[SLLI]>,
- CheckIsImmOperand<2>,
- ]>,
- CheckAll<[
- CheckOpcode<[SRLI]>,
- CheckIsImmOperand<2>,
- ]>>;
+ CheckOpcode<[SLLI]>,
+ CheckOpcode<[SRLI]>>;
-// Fuse ADDI followed by LD
+// Fuse ADDI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
// addi rd, rs1, imm12
-// ld rd, imm12(rd)
-def TuneADDILDFusion
- : SimpleFusion<"addi-ld-fusion", "HasADDILDFusion",
- "Enable ADDI+LD macrofusion",
- CheckOpcode<[ADDI]>,
- CheckOpcode<[LD]>>;
-
-// Fuse ADDI followed by LW
-// addi rd, rs1, imm12
-// lw rd, imm12(rd)
-def TuneADDILWFusion
- : SimpleFusion<"addi-lw-fusion", "HasADDILWFusion",
- "Enable ADDI+LW macrofusion",
+// load rd, imm12(rd)
+def TuneADDILoadFusion
+ : SimpleFusion<"addi-load-fusion", "HasADDILoadFusion",
+ "Enable ADDI + load macrofusion",
CheckOpcode<[ADDI]>,
- CheckOpcode<[LW]>>;
+ CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-// Fuse ADD_UW followed by LW
+// Fuse ADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
// add.uw rd, rs1, rs2
-// lw rd, imm12(rd)
-def TuneADDUWLWFusion
- : SimpleFusion<"adduw-lw-fusion", "HasADDUWLWFusion",
- "Enable ADD_UW+LW macrofusion",
+// load rd, imm12(rd)
+def TuneADD_UWLoadFusion
+ : SimpleFusion<"adduw-load-fusion", "HasADD_UWLoadFusion",
+ "Enable ADD_UW + load macrofusion",
CheckOpcode<[ADD_UW]>,
- CheckOpcode<[LW]>>;
+ CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
// Fuse SHXADD followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
// shXadd rd, rs1, rs2
@@ -189,8 +179,8 @@ def TuneSHXADDLoadFusion
// Fuse SHXADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
// shXadd.uw rd, rs1, rs2
// load rd, imm12(rd)
-def TuneSHXADDUWLoadFusion
- : SimpleFusion<"shxadduw-load-fusion", "HasSHXADDUWLoadFusion",
- "Enable SH(1|2|3)ADDUW + load macrofusion",
+def TuneSHXADD_UWLoadFusion
+ : SimpleFusion<"shxadduw-load-fusion", "HasSHXADD_UWLoadFusion",
+ "Enable SH(1|2|3)ADD_UW + load macrofusion",
CheckOpcode<[SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index ab43d3fa3a472..b03c493b842d6 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -597,8 +597,8 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
TuneShiftedZExtWFusion,
TuneLDADDFusion,
TuneADDLWFusion,
- TuneAUIPCLDFusion,
- TuneLUILDFusion]> {
+ TuneAUIPCLoadFusion,
+ TuneLUILoadFusion]> {
let MVendorID = 0x61f;
let MArchID = 0x8000000000010000;
let MImpID = 0x111;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index ada77e5fd2fbb..1ed2c55941ec7 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -8,12 +8,12 @@
; CHECK-NEXT: a - 'A' (Atomic Instructions).
; CHECK-NEXT: add-load-fusion - Enable ADD + load macrofusion.
; CHECK-NEXT: add-lw-fusion - Enable ADD+LW macrofusion.
-; CHECK-NEXT: addi-ld-fusion - Enable ADDI+LD macrofusion.
-; CHECK-NEXT: addi-lw-fusion - Enable ADDI+LW macrofusion.
-; CHECK-NEXT: adduw-lw-fusion - Enable ADD_UW+LW macrofusion.
+; CHECK-NEXT: addi-load-fusion - Enable ADDI + load macrofusion.
+; CHECK-NEXT: adduw-load-fusion - Enable ADD_UW + load macrofusion.
; CHECK-NEXT: andes45 - Andes 45-Series processors.
; CHECK-NEXT: auipc-addi-fusion - Enable AUIPC+ADDI macrofusion.
-; CHECK-NEXT: auipc-ld-fusion - Enable AUIPC+LD macrofusion.
+; CHECK-NEXT: auipc-addiw-fusion - Enable AUIPC+ADDIW macrofusion.
+; CHECK-NEXT: auipc-load-fusion - Enable AUIPC + load macrofusion.
; CHECK-NEXT: b - 'B' (the collection of the Zba, Zbb, Zbs extensions).
; CHECK-NEXT: bfext-fusion - Enable SLLI+SRLI (bitfield extract) macrofusion.
; CHECK-NEXT: c - 'C' (Compressed Instructions).
@@ -65,7 +65,7 @@
; CHECK-NEXT: ld-add-fusion - Enable LD+ADD macrofusion.
; CHECK-NEXT: log-vrgather - Has vrgather.vv with LMUL*log2(LMUL) latency
; CHECK-NEXT: lui-addi-fusion - Enable LUI+ADDI macro fusion.
-; CHECK-NEXT: lui-ld-fusion - Enable LUI+LD macrofusion.
+; CHECK-NEXT: lui-load-fusion - Enable LUI + load macrofusion.
; CHECK-NEXT: m - 'M' (Integer Multiplication and Division).
; CHECK-NEXT: mips-p8700 - MIPS p8700 processor.
; CHECK-NEXT: no-default-unroll - Disable default unroll preference..
@@ -139,7 +139,7 @@
; CHECK-NEXT: shvstvala - 'Shvstvala' (vstval provides all needed values).
; CHECK-NEXT: shvstvecd - 'Shvstvecd' (vstvec supports Direct mode).
; CHECK-NEXT: shxadd-load-fusion - Enable SH(1|2|3)ADD + load macrofusion.
-; CHECK-NEXT: shxadduw-load-fusion - Enable SH(1|2|3)ADDUW + load macrofusion.
+; CHECK-NEXT: shxadduw-load-fusion - Enable SH(1|2|3)ADD_UW + load macrofusion.
; CHECK-NEXT: sifive7 - SiFive 7-Series processors.
; CHECK-NEXT: smaia - 'Smaia' (Advanced Interrupt Architecture Machine Level).
; CHECK-NEXT: smcdeleg - 'Smcdeleg' (Counter Delegation Machine Level).
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index 587830c5f2947..135dbb559cf9b 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -1,9 +1,9 @@
# REQUIRES: asserts
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
-# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
-# RUN: -mattr=+add-load-fusion,+add-lw-fusion,+auipc-ld-fusion,+lui-ld-fusion,+addi-ld-fusion,+addi-lw-fusion \
-# RUN: -mattr=+zba,+adduw-lw-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
+# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+auipc-addiw-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
+# RUN: -mattr=+add-load-fusion,+add-lw-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \
+# RUN: -mattr=+zba,+adduw-load-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
# RUN: | FileCheck %s
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
@@ -43,6 +43,23 @@ body: |
PseudoRET
...
+# CHECK: auipc_addiw
+# CHECK: Macro fuse: {{.*}}AUIPC - ADDIW
+---
+name: auipc_addiw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = ADDIW %2, 3
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
# CHECK: slli_srli_shifted_zext
# CHECK: Macro fuse: {{.*}}SLLI - SRLI
---
@@ -288,6 +305,57 @@ body: |
PseudoRET
...
+# CHECK: auipc_lb
+# CHECK: Macro fuse: {{.*}}AUIPC - LB
+---
+name: auipc_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LB %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lh
+# CHECK: Macro fuse: {{.*}}AUIPC - LH
+---
+name: auipc_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LH %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lw
+# CHECK: Macro fuse: {{.*}}AUIPC - LW
+---
+name: auipc_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LW %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
# CHECK: auipc_ld
# CHECK: Macro fuse: {{.*}}AUIPC - LD
---
@@ -305,6 +373,108 @@ body: |
PseudoRET
...
+# CHECK: auipc_lbu
+# CHECK: Macro fuse: {{.*}}AUIPC - LBU
+---
+name: auipc_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LBU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lhu
+# CHECK: Macro fuse: {{.*}}AUIPC - LHU
+---
+name: auipc_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LHU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lwu
+# CHECK: Macro fuse: {{.*}}AUIPC - LWU
+---
+name: auipc_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LWU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lb
+# CHECK: Macro fuse: {{.*}}LUI - LB
+---
+name: lui_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LB %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lh
+# CHECK: Macro fuse: {{.*}}LUI - LH
+---
+name: lui_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LH %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lw
+# CHECK: Macro fuse: {{.*}}LUI - LW
+---
+name: lui_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LW %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
# CHECK: lui_ld
# CHECK: Macro fuse: {{.*}}LUI - LD
---
@@ -322,6 +492,57 @@ body: |
PseudoRET
...
+# CHECK: lui_lbu
+# CHECK: Macro fuse: {{.*}}LUI - LBU
+---
+name: lui_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LBU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lhu
+# CHECK: Macro fuse: {{.*}}LUI - LHU
+---
+name: lui_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LHU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lwu
+# CHECK: Macro fuse: {{.*}}LUI - LWU
+---
+name: lui_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LWU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
# CHECK-BFEXT: bitfield_extract
# CHECK-BFEXT: Macro fuse: {{.*}}SLLI - SRLI
---
@@ -339,10 +560,10 @@ body: |
PseudoRET
...
-# CHECK: addi_ld
-# CHECK: Macro fuse: {{.*}}ADDI - LD
+# CHECK: addi_lb
+# CHECK: Macro fuse: {{.*}}ADDI - LB
---
-name: addi_ld
+name: addi_lb
tracksRegLiveness: true
body: |
bb.0.entry:
@@ -351,7 +572,25 @@ body: |
%2:gpr = COPY $x11
%3:gpr = ADDI %1, 8
%4:gpr = XORI %2, 3
- %5:gpr = LD %3, 0
+ %5:gpr = LB %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lh
+# CHECK: Macro fuse: {{.*}}ADDI - LH
+---
+name: addi_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 0
$x10 = COPY %4
$x11 = COPY %5
PseudoRET
@@ -375,6 +614,114 @@ body: |
PseudoRET
...
+# CHECK: addi_ld
+# CHECK: Macro fuse: {{.*}}ADDI - LD
+---
+name: addi_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lbu
+# CHECK: Macro fuse: {{.*}}ADDI - LBU
+---
+name: addi_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lhu
+# CHECK: Macro fuse: {{.*}}ADDI - LHU
+---
+name: addi_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lwu
+# CHECK: Macro fuse: {{.*}}ADDI - LWU
+---
+name: addi_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lb
+# CHECK: Macro fuse: {{.*}}ADD_UW - LB
+---
+name: adduw_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lh
+# CHECK: Macro fuse: {{.*}}ADD_UW - LH
+---
+name: adduw_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
# CHECK: adduw_lw
# CHECK: Macro fuse: {{.*}}ADD_UW - LW
---
@@ -393,6 +740,78 @@ body: |
PseudoRET
...
+# CHECK: adduw_ld
+# CHECK: Macro fuse: {{.*}}ADD_UW - LD
+---
+name: adduw_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lbu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LBU
+---
+name: adduw_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lhu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LHU
+---
+name: adduw_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lwu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LWU
+---
+name: adduw_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
# CHECK: sh1add_lb
# CHECK: Macro fuse: {{.*}}SH1ADD - LB
---
>From 5e3991e297b758140d7faa9b30053299da845643 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Fri, 1 Aug 2025 06:05:28 -0700
Subject: [PATCH 4/4] Remove and consolidate macrofusions
- remove auipc+addiw since it rarely/doesn't happen;
- remove add+lw. veyron-v1 now uses add+load;
- merge add+load and add.uw+load into a single fusion;
- merge shXadd+load and shXadd.uw+load into a single fusion;
- fix immediate 0 comment in AddLoad fusion.
---
llvm/lib/Target/RISCV/RISCVMacroFusion.td | 57 ++++-------------------
llvm/lib/Target/RISCV/RISCVProcessors.td | 3 +-
llvm/test/CodeGen/RISCV/features-info.ll | 8 +---
llvm/test/CodeGen/RISCV/macro-fusions.mir | 23 ++-------
4 files changed, 15 insertions(+), 76 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index f090b2c150c85..459c8bece5bd7 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -28,15 +28,6 @@ def TuneAUIPCADDIFusion
CheckOpcode<[AUIPC]>,
CheckOpcode<[ADDI]>>;
-// Fuse AUIPC followed by ADDIW:
-// auipc rd, imm20
-// addiw rd, rd, imm12
-def TuneAUIPCADDIWFusion
- : SimpleFusion<"auipc-addiw-fusion", "HasAUIPCADDIWFusion",
- "Enable AUIPC+ADDIW macrofusion",
- CheckOpcode<[AUIPC]>,
- CheckOpcode<[ADDIW]>>;
-
// Fuse zero extension of halfword:
// slli rd, rs1, 48
// srli rd, rd, 48
@@ -101,24 +92,12 @@ def TuneLDADDFusion
CheckImmOperand<2, 0>
]>>;
-// Fuse add with lw:
-// add rd, rs1, rs2
-// lw rd, 0(rd)
-def TuneADDLWFusion
- : SimpleFusion<"add-lw-fusion", "HasADDLWFusion", "Enable ADD+LW macrofusion",
- CheckOpcode<[ADD]>,
- CheckAll<[
- CheckOpcode<[LW]>,
- CheckIsImmOperand<2>,
- CheckImmOperand<2, 0>
- ]>>;
-
-// Fuse add followed by a load (lb, lh, lw, ld, lbu, lhu, lwu):
-// add rd, rs1, rs2
-// load rd, 0(rd)
+// Fuse add(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu):
+// add(.uw) rd, rs1, rs2
+// load rd, imm12(rd)
def TuneADDLoadFusion
- : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD + load macrofusion",
- CheckOpcode<[ADD]>,
+ : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD(.UW) + load macrofusion",
+ CheckOpcode<[ADD, ADD_UW]>,
CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
// Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu)
@@ -158,29 +137,11 @@ def TuneADDILoadFusion
CheckOpcode<[ADDI]>,
CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-// Fuse ADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
-// add.uw rd, rs1, rs2
-// load rd, imm12(rd)
-def TuneADD_UWLoadFusion
- : SimpleFusion<"adduw-load-fusion", "HasADD_UWLoadFusion",
- "Enable ADD_UW + load macrofusion",
- CheckOpcode<[ADD_UW]>,
- CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-
-// Fuse SHXADD followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
-// shXadd rd, rs1, rs2
+// Fuse shXadd(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
+// shXadd(.uw) rd, rs1, rs2
// load rd, imm12(rd)
def TuneSHXADDLoadFusion
: SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion",
- "Enable SH(1|2|3)ADD + load macrofusion",
- CheckOpcode<[SH1ADD, SH2ADD, SH3ADD]>,
- CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-
-// Fuse SHXADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
-// shXadd.uw rd, rs1, rs2
-// load rd, imm12(rd)
-def TuneSHXADD_UWLoadFusion
- : SimpleFusion<"shxadduw-load-fusion", "HasSHXADD_UWLoadFusion",
- "Enable SH(1|2|3)ADD_UW + load macrofusion",
- CheckOpcode<[SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
+ "Enable SH(1|2|3)ADD(.UW) + load macrofusion",
+ CheckOpcode<[SH1ADD, SH2ADD, SH3ADD, SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index b03c493b842d6..c3b3deb8c9fa8 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -595,8 +595,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
TuneZExtHFusion,
TuneZExtWFusion,
TuneShiftedZExtWFusion,
- TuneLDADDFusion,
- TuneADDLWFusion,
+ TuneADDLoadFusion,
TuneAUIPCLoadFusion,
TuneLUILoadFusion]> {
let MVendorID = 0x61f;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 1ed2c55941ec7..2a620126931a5 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -6,13 +6,10 @@
; CHECK-NEXT: 32bit - Implements RV32.
; CHECK-NEXT: 64bit - Implements RV64.
; CHECK-NEXT: a - 'A' (Atomic Instructions).
-; CHECK-NEXT: add-load-fusion - Enable ADD + load macrofusion.
-; CHECK-NEXT: add-lw-fusion - Enable ADD+LW macrofusion.
+; CHECK-NEXT: add-load-fusion - Enable ADD(.UW) + load macrofusion.
; CHECK-NEXT: addi-load-fusion - Enable ADDI + load macrofusion.
-; CHECK-NEXT: adduw-load-fusion - Enable ADD_UW + load macrofusion.
; CHECK-NEXT: andes45 - Andes 45-Series processors.
; CHECK-NEXT: auipc-addi-fusion - Enable AUIPC+ADDI macrofusion.
-; CHECK-NEXT: auipc-addiw-fusion - Enable AUIPC+ADDIW macrofusion.
; CHECK-NEXT: auipc-load-fusion - Enable AUIPC + load macrofusion.
; CHECK-NEXT: b - 'B' (the collection of the Zba, Zbb, Zbs extensions).
; CHECK-NEXT: bfext-fusion - Enable SLLI+SRLI (bitfield extract) macrofusion.
@@ -138,8 +135,7 @@
; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp).
; CHECK-NEXT: shvstvala - 'Shvstvala' (vstval provides all needed values).
; CHECK-NEXT: shvstvecd - 'Shvstvecd' (vstvec supports Direct mode).
-; CHECK-NEXT: shxadd-load-fusion - Enable SH(1|2|3)ADD + load macrofusion.
-; CHECK-NEXT: shxadduw-load-fusion - Enable SH(1|2|3)ADD_UW + load macrofusion.
+; CHECK-NEXT: shxadd-load-fusion - Enable SH(1|2|3)ADD(.UW) + load macrofusion.
; CHECK-NEXT: sifive7 - SiFive 7-Series processors.
; CHECK-NEXT: smaia - 'Smaia' (Advanced Interrupt Architecture Machine Level).
; CHECK-NEXT: smcdeleg - 'Smcdeleg' (Counter Delegation Machine Level).
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index 135dbb559cf9b..ae5b52da2ac16 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -1,9 +1,9 @@
# REQUIRES: asserts
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
-# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+auipc-addiw-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
-# RUN: -mattr=+add-load-fusion,+add-lw-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \
-# RUN: -mattr=+zba,+adduw-load-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
+# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
+# RUN: -mattr=+add-load-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \
+# RUN: -mattr=+zba,+shxadd-load-fusion \
# RUN: | FileCheck %s
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
@@ -43,23 +43,6 @@ body: |
PseudoRET
...
-# CHECK: auipc_addiw
-# CHECK: Macro fuse: {{.*}}AUIPC - ADDIW
----
-name: auipc_addiw
-tracksRegLiveness: true
-body: |
- bb.0.entry:
- liveins: $x10
- %1:gpr = COPY $x10
- %2:gpr = AUIPC 1
- %3:gpr = XORI %1, 2
- %4:gpr = ADDIW %2, 3
- $x10 = COPY %3
- $x11 = COPY %4
- PseudoRET
-...
-
# CHECK: slli_srli_shifted_zext
# CHECK: Macro fuse: {{.*}}SLLI - SRLI
---
More information about the llvm-commits
mailing list