[llvm] [RISCV] add more generic macrofusions (PR #151140)

Daniel Henrique Barboza via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 6 10:30:47 PDT 2025


https://github.com/danielhb updated https://github.com/llvm/llvm-project/pull/151140

>From bdeed1e8ac588f0e28dec046cdf8de91590466b1 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Tue, 4 Mar 2025 10:22:17 -0800
Subject: [PATCH 1/5] [RISCV] add more generic macrofusions

These are some macrofusions that are used internally in Ventana in an
yet not upstreamed processor. Figured it would be good to contribute
them ahead of the processor to allow the community to also use them in
their own processors, while also alleaviting our own downstream upkeep.

The macrofusions being added are:

- add+lw
- addi+ld, addi+lw
- adduw+lw
- auipc+ld
- bfext (slli+srli)
- lui+ld
- shXadd+load, where X=1,2,3 and load=lb,lh,lw,ld
- shXadduw+load, where X=1,2,3 and load=lb,lh,lw,ld
---
 llvm/lib/Target/RISCV/RISCVMacroFusion.td |  91 ++++
 llvm/test/CodeGen/RISCV/features-info.ll  |   9 +
 llvm/test/CodeGen/RISCV/macro-fusions.mir | 560 ++++++++++++++++++++++
 3 files changed, 660 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 875a93d09a2c6..34ad042c565d7 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -91,3 +91,94 @@ def TuneLDADDFusion
                    CheckIsImmOperand<2>,
                    CheckImmOperand<2, 0>
                  ]>>;
+
+// Fuse add with lw:
+//   add rd, rs1, rs2
+//   lw rd, 0(rd)
+def TuneADDLWFusion
+  : SimpleFusion<"add-lw-fusion", "HasADDLWFusion", "Enable ADD+LW macrofusion",
+                 CheckOpcode<[ADD]>,
+                 CheckAll<[
+                   CheckOpcode<[LW]>,
+                   CheckIsImmOperand<2>,
+                   CheckImmOperand<2, 0>
+                 ]>>;
+
+// Fuse AUIPC followed by LD:
+//   auipc rd, imm20
+//   ld rd, imm12(rd)
+def TuneAUIPCLDFusion
+  : SimpleFusion<"auipc-ld-fusion", "HasAUIPCLDFusion",
+                 "Enable AUIPC+LD macrofusion",
+                 CheckOpcode<[AUIPC]>,
+                 CheckOpcode<[LD]>>;
+
+// Fuse LUI followed by LD:
+//   lui rd, imm[31:12]
+//   ld rd, imm12(rd)
+def TuneLUILDFusion
+  : SimpleFusion<"lui-ld-fusion", "HasLUILDFusion",
+                 "Enable LUI+LD macrofusion",
+                 CheckOpcode<[LUI]>,
+                 CheckOpcode<[LD]>>;
+
+// Bitfield extract fusion: similar to TuneShiftedZExtWFusion
+// but without the immediate restriction
+//   slli rd, rs1, imm12
+//   srli rd, rd, imm12
+def TuneBFExtFusion
+  : SimpleFusion<"bfext-fusion", "HasBFExtFusion",
+                 "Enable SLLI+SRLI (bitfield extract) macrofusion",
+                 CheckAll<[
+                   CheckOpcode<[SLLI]>,
+                   CheckIsImmOperand<2>,
+                 ]>,
+                 CheckAll<[
+                   CheckOpcode<[SRLI]>,
+                   CheckIsImmOperand<2>,
+                 ]>>;
+
+// Fuse ADDI followed by LD
+//   addi rd, rs1, imm12
+//   ld rd, imm12(rd)
+def TuneADDILDFusion
+  : SimpleFusion<"addi-ld-fusion", "HasADDILDFusion",
+                 "Enable ADDI+LD macrofusion",
+                 CheckOpcode<[ADDI]>,
+                 CheckOpcode<[LD]>>;
+
+// Fuse ADDI followed by LW
+//   addi rd, rs1, imm12
+//   lw rd, imm12(rd)
+def TuneADDILWFusion
+  : SimpleFusion<"addi-lw-fusion", "HasADDILWFusion",
+                 "Enable ADDI+LW macrofusion",
+                 CheckOpcode<[ADDI]>,
+                 CheckOpcode<[LW]>>;
+
+// Fuse ADDUW followed by LW
+//   adduw rd, rs1, rs2
+//   lw rd, imm12(rd)
+def TuneADDUWLWFusion
+  : SimpleFusion<"adduw-lw-fusion", "HasADDUWLWFusion",
+                 "Enable ADD_UW+LW macrofusion",
+                 CheckOpcode<[ADD_UW]>,
+                 CheckOpcode<[LW]>>;
+
+// Fuse SHXADD followed by a load (lb, lh, lw, ld)
+//   shXadd rd, rs1, rs2
+//   load rd, imm12(rd)
+def TuneSHXADDLoadFusion
+  : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion",
+                 "Enable SH(1|2|3)ADD + load macrofusion",
+                 CheckOpcode<[SH1ADD, SH2ADD, SH3ADD]>,
+                 CheckOpcode<[LB, LH, LW, LD]>>;
+
+// Fuse SHXADD.UW followed by a load (lb, lh, lw, ld)
+//   shXadd.uw rd, rs1, rs2
+//   load rd, imm12(rd)
+def TuneSHXADDUWLoadFusion
+  : SimpleFusion<"shxadduw-load-fusion", "HasSHXADDUWLoadFusion",
+                 "Enable SH(1|2|3)ADDUW + load macrofusion",
+                 CheckOpcode<[SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
+                 CheckOpcode<[LB, LH, LW, LD]>>;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index a5ee41281607b..8148412c037a1 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -6,9 +6,15 @@
 ; CHECK-NEXT:   32bit                            - Implements RV32.
 ; CHECK-NEXT:   64bit                            - Implements RV64.
 ; CHECK-NEXT:   a                                - 'A' (Atomic Instructions).
+; CHECK-NEXT:   add-lw-fusion                    - Enable ADD+LW macrofusion.
+; CHECK-NEXT:   addi-ld-fusion                   - Enable ADDI+LD macrofusion.
+; CHECK-NEXT:   addi-lw-fusion                   - Enable ADDI+LW macrofusion.
+; CHECK-NEXT:   adduw-lw-fusion                  - Enable ADD_UW+LW macrofusion.
 ; CHECK-NEXT:   andes45                          - Andes 45-Series processors.
 ; CHECK-NEXT:   auipc-addi-fusion                - Enable AUIPC+ADDI macrofusion.
+; CHECK-NEXT:   auipc-ld-fusion                  - Enable AUIPC+LD macrofusion.
 ; CHECK-NEXT:   b                                - 'B' (the collection of the Zba, Zbb, Zbs extensions).
+; CHECK-NEXT:   bfext-fusion                     - Enable SLLI+SRLI (bitfield extract) macrofusion.
 ; CHECK-NEXT:   c                                - 'C' (Compressed Instructions).
 ; CHECK-NEXT:   conditional-cmv-fusion           - Enable branch+c.mv fusion.
 ; CHECK-NEXT:   d                                - 'D' (Double-Precision Floating-Point).
@@ -62,6 +68,7 @@
 ; CHECK-NEXT:   ld-add-fusion                    - Enable LD+ADD macrofusion.
 ; CHECK-NEXT:   log-vrgather                     - Has vrgather.vv with LMUL*log2(LMUL) latency
 ; CHECK-NEXT:   lui-addi-fusion                  - Enable LUI+ADDI macro fusion.
+; CHECK-NEXT:   lui-ld-fusion                    - Enable LUI+LD macrofusion.
 ; CHECK-NEXT:   m                                - 'M' (Integer Multiplication and Division).
 ; CHECK-NEXT:   mips-p8700                       - MIPS p8700 processor.
 ; CHECK-NEXT:   no-default-unroll                - Disable default unroll preference..
@@ -134,6 +141,8 @@
 ; CHECK-NEXT:   shvsatpa                         - 'Shvsatpa' (vsatp supports all modes supported by satp).
 ; CHECK-NEXT:   shvstvala                        - 'Shvstvala' (vstval provides all needed values).
 ; CHECK-NEXT:   shvstvecd                        - 'Shvstvecd' (vstvec supports Direct mode).
+; CHECK-NEXT:   shxadd-load-fusion               - Enable SH(1|2|3)ADD + load macrofusion.
+; CHECK-NEXT:   shxadduw-load-fusion             - Enable SH(1|2|3)ADDUW + load macrofusion.
 ; CHECK-NEXT:   sifive7                          - SiFive 7-Series processors.
 ; CHECK-NEXT:   smaia                            - 'Smaia' (Advanced Interrupt Architecture Machine Level).
 ; CHECK-NEXT:   smcdeleg                         - 'Smcdeleg' (Counter Delegation Machine Level).
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index 13464141ce27e..a8e6d887d57f8 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -2,7 +2,12 @@
 # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
 # RUN:   -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
 # RUN:   -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
+# RUN:   -mattr=+add-lw-fusion,+auipc-ld-fusion,+lui-ld-fusion,+addi-ld-fusion,+addi-lw-fusion \
+# RUN:   -mattr=+zba,+adduw-lw-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
 # RUN:   | FileCheck %s
+# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
+# RUN:   -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
+# RUN:   -mattr=+zba,+bfext-fusion | FileCheck --check-prefixes=CHECK-BFEXT %s
 
 # CHECK: lui_addi:%bb.0
 # CHECK: Macro fuse: {{.*}}LUI - ADDI
@@ -174,3 +179,558 @@ body:             |
     $x11 = COPY %5
     PseudoRET
 ...
+
+# CHECK: add_lw
+# CHECK: Macro fuse: {{.*}}ADD - LW
+---
+name: add_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: auipc_ld
+# CHECK: Macro fuse: {{.*}}AUIPC - LD
+---
+name: auipc_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LD %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: lui_ld
+# CHECK: Macro fuse: {{.*}}LUI - LD
+---
+name: lui_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = LUI 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LD %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK-BFEXT: bitfield_extract
+# CHECK-BFEXT: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: bitfield_extract
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = SLLI %1, 31
+    %3:gpr = XORI %1, 3
+    %4:gpr = SRLI %2, 48
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: addi_ld
+# CHECK: Macro fuse: {{.*}}ADDI - LD
+---
+name: addi_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADDI %1, 8
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: addi_lw
+# CHECK: Macro fuse: {{.*}}ADDI - LW
+---
+name: addi_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADDI %1, 8
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: adduw_lw
+# CHECK: Macro fuse: {{.*}}ADD_UW - LW
+---
+name: adduw_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1add_lb
+# CHECK: Macro fuse: {{.*}}SH1ADD - LB
+---
+name: sh1add_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LB %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2add_lb
+# CHECK: Macro fuse: {{.*}}SH2ADD - LB
+---
+name: sh2add_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LB %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3add_lb
+# CHECK: Macro fuse: {{.*}}SH3ADD - LB
+---
+name: sh3add_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LB %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1add_lh
+# CHECK: Macro fuse: {{.*}}SH1ADD - LH
+---
+name: sh1add_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2add_lh
+# CHECK: Macro fuse: {{.*}}SH2ADD - LH
+---
+name: sh2add_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3add_lh
+# CHECK: Macro fuse: {{.*}}SH3ADD - LH
+---
+name: sh3add_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1add_lw
+# CHECK: Macro fuse: {{.*}}SH1ADD - LW
+---
+name: sh1add_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2add_lw
+# CHECK: Macro fuse: {{.*}}SH2ADD - LW
+---
+name: sh2add_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3add_lw
+# CHECK: Macro fuse: {{.*}}SH3ADD - LW
+---
+name: sh3add_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1add_ld
+# CHECK: Macro fuse: {{.*}}SH1ADD - LD
+---
+name: sh1add_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2add_ld
+# CHECK: Macro fuse: {{.*}}SH2ADD - LD
+---
+name: sh2add_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3add_ld
+# CHECK: Macro fuse: {{.*}}SH3ADD - LD
+---
+name: sh3add_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1adduw_lb
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LB
+---
+name: sh1adduw_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LB %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2adduw_lb
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LB
+---
+name: sh2adduw_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LB %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3adduw_lb
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LB
+---
+name: sh3adduw_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LB %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1adduw_lh
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LH
+---
+name: sh1adduw_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2adduw_lh
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LH
+---
+name: sh2adduw_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3adduw_lh
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LH
+---
+name: sh3adduw_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1adduw_lw
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LW
+---
+name: sh1adduw_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2adduw_lw
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LW
+---
+name: sh2adduw_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3adduw_lw
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LW
+---
+name: sh3adduw_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LW %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1adduw_ld
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LD
+---
+name: sh1adduw_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2adduw_ld
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LD
+---
+name: sh2adduw_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3adduw_ld
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LD
+---
+name: sh3adduw_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...

>From 2238a79749344bda59715288b9e11d03e13da62f Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Tue, 29 Jul 2025 13:23:16 -0700
Subject: [PATCH 2/5] Add extra load macrofusion cases and other fixes

- add missing macrofusions in veyron-v1 processor def;

- fix ADD_UW/add.uw comment;

- add ADD + lb/lh/lbu/lhu/lwu macrofusions;

- add shXADD + lbu/lhu/lwu macrofusions;

- add shXADD_UW + lbu/lhu/lwu macrofusions.
---
 llvm/lib/Target/RISCV/RISCVMacroFusion.td |  24 +-
 llvm/lib/Target/RISCV/RISCVProcessors.td  |   5 +-
 llvm/test/CodeGen/RISCV/features-info.ll  |   1 +
 llvm/test/CodeGen/RISCV/macro-fusions.mir | 416 +++++++++++++++++++++-
 4 files changed, 438 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 34ad042c565d7..8df756b44bf89 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -104,6 +104,18 @@ def TuneADDLWFusion
                    CheckImmOperand<2, 0>
                  ]>>;
 
+// Fuse add followed by a load (lb, lh, lw, ld, lbu, lhu, lwu):
+//   add rd, rs1, rs2
+//   load rd, 0(rd)
+def TuneADDLoadFusion
+  : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD + load macrofusion",
+                 CheckOpcode<[ADD]>,
+                 CheckAll<[
+                   CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>,
+                   CheckIsImmOperand<2>,
+                   CheckImmOperand<2, 0>
+                 ]>>;
+
 // Fuse AUIPC followed by LD:
 //   auipc rd, imm20
 //   ld rd, imm12(rd)
@@ -156,8 +168,8 @@ def TuneADDILWFusion
                  CheckOpcode<[ADDI]>,
                  CheckOpcode<[LW]>>;
 
-// Fuse ADDUW followed by LW
-//   adduw rd, rs1, rs2
+// Fuse ADD_UW followed by LW
+//   add.uw rd, rs1, rs2
 //   lw rd, imm12(rd)
 def TuneADDUWLWFusion
   : SimpleFusion<"adduw-lw-fusion", "HasADDUWLWFusion",
@@ -165,20 +177,20 @@ def TuneADDUWLWFusion
                  CheckOpcode<[ADD_UW]>,
                  CheckOpcode<[LW]>>;
 
-// Fuse SHXADD followed by a load (lb, lh, lw, ld)
+// Fuse SHXADD followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   shXadd rd, rs1, rs2
 //   load rd, imm12(rd)
 def TuneSHXADDLoadFusion
   : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion",
                  "Enable SH(1|2|3)ADD + load macrofusion",
                  CheckOpcode<[SH1ADD, SH2ADD, SH3ADD]>,
-                 CheckOpcode<[LB, LH, LW, LD]>>;
+                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
 
-// Fuse SHXADD.UW followed by a load (lb, lh, lw, ld)
+// Fuse SHXADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   shXadd.uw rd, rs1, rs2
 //   load rd, imm12(rd)
 def TuneSHXADDUWLoadFusion
   : SimpleFusion<"shxadduw-load-fusion", "HasSHXADDUWLoadFusion",
                  "Enable SH(1|2|3)ADDUW + load macrofusion",
                  CheckOpcode<[SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
-                 CheckOpcode<[LB, LH, LW, LD]>>;
+                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 8445730446dd9..c516e2e48709e 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -598,7 +598,10 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                               TuneZExtHFusion,
                                               TuneZExtWFusion,
                                               TuneShiftedZExtWFusion,
-                                              TuneLDADDFusion]> {
+                                              TuneLDADDFusion,
+                                              TuneADDLWFusion,
+                                              TuneAUIPCLDFusion,
+                                              TuneLUILDFusion]> {
   let MVendorID = 0x61f;
   let MArchID = 0x8000000000010000;
   let MImpID = 0x111;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 8148412c037a1..1dd5eebb27424 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -6,6 +6,7 @@
 ; CHECK-NEXT:   32bit                            - Implements RV32.
 ; CHECK-NEXT:   64bit                            - Implements RV64.
 ; CHECK-NEXT:   a                                - 'A' (Atomic Instructions).
+; CHECK-NEXT:   add-load-fusion                  - Enable ADD + load macrofusion.
 ; CHECK-NEXT:   add-lw-fusion                    - Enable ADD+LW macrofusion.
 ; CHECK-NEXT:   addi-ld-fusion                   - Enable ADDI+LD macrofusion.
 ; CHECK-NEXT:   addi-lw-fusion                   - Enable ADDI+LW macrofusion.
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index a8e6d887d57f8..587830c5f2947 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
 # RUN:   -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
 # RUN:   -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
-# RUN:   -mattr=+add-lw-fusion,+auipc-ld-fusion,+lui-ld-fusion,+addi-ld-fusion,+addi-lw-fusion \
+# RUN:   -mattr=+add-load-fusion,+add-lw-fusion,+auipc-ld-fusion,+lui-ld-fusion,+addi-ld-fusion,+addi-lw-fusion \
 # RUN:   -mattr=+zba,+adduw-lw-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
 # RUN:   | FileCheck %s
 # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
@@ -180,6 +180,42 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: add_lb
+# CHECK: Macro fuse: {{.*}}ADD - LB
+---
+name: add_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LB %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: add_lh
+# CHECK: Macro fuse: {{.*}}ADD - LH
+---
+name: add_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
 # CHECK: add_lw
 # CHECK: Macro fuse: {{.*}}ADD - LW
 ---
@@ -198,6 +234,60 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: add_lbu
+# CHECK: Macro fuse: {{.*}}ADD - LBU
+---
+name: add_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: add_lhu
+# CHECK: Macro fuse: {{.*}}ADD - LHU
+---
+name: add_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: add_lwu
+# CHECK: Macro fuse: {{.*}}ADD - LWU
+---
+name: add_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
 # CHECK: auipc_ld
 # CHECK: Macro fuse: {{.*}}AUIPC - LD
 ---
@@ -519,6 +609,168 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: sh1add_lbu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LBU
+---
+name: sh1add_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2add_lbu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LBU
+---
+name: sh2add_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3add_lbu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LBU
+---
+name: sh3add_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1add_lhu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LHU
+---
+name: sh1add_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2add_lhu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LHU
+---
+name: sh2add_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3add_lhu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LHU
+---
+name: sh3add_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1add_lwu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LWU
+---
+name: sh1add_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2add_lwu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LWU
+---
+name: sh2add_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3add_lwu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LWU
+---
+name: sh3add_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
 # CHECK: sh1adduw_lb
 # CHECK: Macro fuse: {{.*}}SH1ADD_UW - LB
 ---
@@ -734,3 +986,165 @@ body:             |
     $x11 = COPY %5
     PseudoRET
 ...
+
+# CHECK: sh1adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LBU
+---
+name: sh1adduw_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LBU
+---
+name: sh2adduw_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LBU
+---
+name: sh3adduw_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LHU
+---
+name: sh1adduw_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LHU
+---
+name: sh2adduw_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LHU
+---
+name: sh3adduw_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh1adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LWU
+---
+name: sh1adduw_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH1ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh2adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LWU
+---
+name: sh2adduw_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH2ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: sh3adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LWU
+---
+name: sh3adduw_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = SH3ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 8
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...

>From e5cf75c12a814b81660fae45b6e9171e509dbe40 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Thu, 31 Jul 2025 07:26:53 -0700
Subject: [PATCH 3/5] RISCVMacroFusion.td: review changes

- add TuneAUIPCADDIWFusion;

- turn TuneAUIPCLDFusion (auipc+ld) into TuneAUIPCLoadFusion
  (auipc + lb/lh/lw/ld/lbu/lhu/lwu);

- turn TuneLUILDFusion (lui+ld) into TuneLUILoadFusion
  (lui + lb/lh/lw/ld/lbu/lhu/lwu);

- turn TuneADD_UWLWFusion (add.uw+lw) into TuneADD_UWLoadFusion
  (add.uw + lb/lh/lw/ld/lbu/lhu/lwu);

- remove TuneADDILWFusion. Turn TuneADDILDFusion into TuneADDILoadFusion
  (addi + lb/lh/lw/ld/lbu/lhu/lwu);

- remove the immediate check from TuneADDLoadFusion;

- remove the immediate check from TuneBFExtFusion: it was a copy/paste
  reminiscent from the existing slli+srli fusions;

- renames: TuneSHXADD_UWLoadFusion and "Enable SH(1|2|3)ADD_UW ..."
---
 llvm/lib/Target/RISCV/RISCVMacroFusion.td |  88 ++---
 llvm/lib/Target/RISCV/RISCVProcessors.td  |   4 +-
 llvm/test/CodeGen/RISCV/features-info.ll  |  12 +-
 llvm/test/CodeGen/RISCV/macro-fusions.mir | 433 +++++++++++++++++++++-
 4 files changed, 473 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 8df756b44bf89..f090b2c150c85 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -28,6 +28,15 @@ def TuneAUIPCADDIFusion
                  CheckOpcode<[AUIPC]>,
                  CheckOpcode<[ADDI]>>;
 
+// Fuse AUIPC followed by ADDIW:
+//   auipc rd, imm20
+//   addiw rd, rd, imm12
+def TuneAUIPCADDIWFusion
+  : SimpleFusion<"auipc-addiw-fusion", "HasAUIPCADDIWFusion",
+                 "Enable AUIPC+ADDIW macrofusion",
+                 CheckOpcode<[AUIPC]>,
+                 CheckOpcode<[ADDIW]>>;
+
 // Fuse zero extension of halfword:
 //   slli rd, rs1, 48
 //   srli rd, rd, 48
@@ -110,29 +119,25 @@ def TuneADDLWFusion
 def TuneADDLoadFusion
   : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD + load macrofusion",
                  CheckOpcode<[ADD]>,
-                 CheckAll<[
-                   CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>,
-                   CheckIsImmOperand<2>,
-                   CheckImmOperand<2, 0>
-                 ]>>;
+                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
 
-// Fuse AUIPC followed by LD:
+// Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   auipc rd, imm20
-//   ld rd, imm12(rd)
-def TuneAUIPCLDFusion
-  : SimpleFusion<"auipc-ld-fusion", "HasAUIPCLDFusion",
-                 "Enable AUIPC+LD macrofusion",
+//   load rd, imm12(rd)
+def TuneAUIPCLoadFusion
+  : SimpleFusion<"auipc-load-fusion", "HasAUIPCLoadFusion",
+                 "Enable AUIPC + load macrofusion",
                  CheckOpcode<[AUIPC]>,
-                 CheckOpcode<[LD]>>;
+                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
 
-// Fuse LUI followed by LD:
+// Fuse LUI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   lui rd, imm[31:12]
-//   ld rd, imm12(rd)
-def TuneLUILDFusion
-  : SimpleFusion<"lui-ld-fusion", "HasLUILDFusion",
-                 "Enable LUI+LD macrofusion",
+//   load rd, imm12(rd)
+def TuneLUILoadFusion
+  : SimpleFusion<"lui-load-fusion", "HasLUILoadFusion",
+                 "Enable LUI + load macrofusion",
                  CheckOpcode<[LUI]>,
-                 CheckOpcode<[LD]>>;
+                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
 
 // Bitfield extract fusion: similar to TuneShiftedZExtWFusion
 // but without the immediate restriction
@@ -141,41 +146,26 @@ def TuneLUILDFusion
 def TuneBFExtFusion
   : SimpleFusion<"bfext-fusion", "HasBFExtFusion",
                  "Enable SLLI+SRLI (bitfield extract) macrofusion",
-                 CheckAll<[
-                   CheckOpcode<[SLLI]>,
-                   CheckIsImmOperand<2>,
-                 ]>,
-                 CheckAll<[
-                   CheckOpcode<[SRLI]>,
-                   CheckIsImmOperand<2>,
-                 ]>>;
+                 CheckOpcode<[SLLI]>,
+                 CheckOpcode<[SRLI]>>;
 
-// Fuse ADDI followed by LD
+// Fuse ADDI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   addi rd, rs1, imm12
-//   ld rd, imm12(rd)
-def TuneADDILDFusion
-  : SimpleFusion<"addi-ld-fusion", "HasADDILDFusion",
-                 "Enable ADDI+LD macrofusion",
-                 CheckOpcode<[ADDI]>,
-                 CheckOpcode<[LD]>>;
-
-// Fuse ADDI followed by LW
-//   addi rd, rs1, imm12
-//   lw rd, imm12(rd)
-def TuneADDILWFusion
-  : SimpleFusion<"addi-lw-fusion", "HasADDILWFusion",
-                 "Enable ADDI+LW macrofusion",
+//   load rd, imm12(rd)
+def TuneADDILoadFusion
+  : SimpleFusion<"addi-load-fusion", "HasADDILoadFusion",
+                 "Enable ADDI + load macrofusion",
                  CheckOpcode<[ADDI]>,
-                 CheckOpcode<[LW]>>;
+                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
 
-// Fuse ADD_UW followed by LW
+// Fuse ADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   add.uw rd, rs1, rs2
-//   lw rd, imm12(rd)
-def TuneADDUWLWFusion
-  : SimpleFusion<"adduw-lw-fusion", "HasADDUWLWFusion",
-                 "Enable ADD_UW+LW macrofusion",
+//   load rd, imm12(rd)
+def TuneADD_UWLoadFusion
+  : SimpleFusion<"adduw-load-fusion", "HasADD_UWLoadFusion",
+                 "Enable ADD_UW + load macrofusion",
                  CheckOpcode<[ADD_UW]>,
-                 CheckOpcode<[LW]>>;
+                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
 
 // Fuse SHXADD followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   shXadd rd, rs1, rs2
@@ -189,8 +179,8 @@ def TuneSHXADDLoadFusion
 // Fuse SHXADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   shXadd.uw rd, rs1, rs2
 //   load rd, imm12(rd)
-def TuneSHXADDUWLoadFusion
-  : SimpleFusion<"shxadduw-load-fusion", "HasSHXADDUWLoadFusion",
-                 "Enable SH(1|2|3)ADDUW + load macrofusion",
+def TuneSHXADD_UWLoadFusion
+  : SimpleFusion<"shxadduw-load-fusion", "HasSHXADD_UWLoadFusion",
+                 "Enable SH(1|2|3)ADD_UW + load macrofusion",
                  CheckOpcode<[SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
                  CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index c516e2e48709e..5e572375d4f05 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -600,8 +600,8 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                               TuneShiftedZExtWFusion,
                                               TuneLDADDFusion,
                                               TuneADDLWFusion,
-                                              TuneAUIPCLDFusion,
-                                              TuneLUILDFusion]> {
+                                              TuneAUIPCLoadFusion,
+                                              TuneLUILoadFusion]> {
   let MVendorID = 0x61f;
   let MArchID = 0x8000000000010000;
   let MImpID = 0x111;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 1dd5eebb27424..b74ee299ad95f 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -8,12 +8,12 @@
 ; CHECK-NEXT:   a                                - 'A' (Atomic Instructions).
 ; CHECK-NEXT:   add-load-fusion                  - Enable ADD + load macrofusion.
 ; CHECK-NEXT:   add-lw-fusion                    - Enable ADD+LW macrofusion.
-; CHECK-NEXT:   addi-ld-fusion                   - Enable ADDI+LD macrofusion.
-; CHECK-NEXT:   addi-lw-fusion                   - Enable ADDI+LW macrofusion.
-; CHECK-NEXT:   adduw-lw-fusion                  - Enable ADD_UW+LW macrofusion.
+; CHECK-NEXT:   addi-load-fusion                 - Enable ADDI + load macrofusion.
+; CHECK-NEXT:   adduw-load-fusion                - Enable ADD_UW + load macrofusion.
 ; CHECK-NEXT:   andes45                          - Andes 45-Series processors.
 ; CHECK-NEXT:   auipc-addi-fusion                - Enable AUIPC+ADDI macrofusion.
-; CHECK-NEXT:   auipc-ld-fusion                  - Enable AUIPC+LD macrofusion.
+; CHECK-NEXT:   auipc-addiw-fusion               - Enable AUIPC+ADDIW macrofusion.
+; CHECK-NEXT:   auipc-load-fusion                - Enable AUIPC + load macrofusion.
 ; CHECK-NEXT:   b                                - 'B' (the collection of the Zba, Zbb, Zbs extensions).
 ; CHECK-NEXT:   bfext-fusion                     - Enable SLLI+SRLI (bitfield extract) macrofusion.
 ; CHECK-NEXT:   c                                - 'C' (Compressed Instructions).
@@ -69,7 +69,7 @@
 ; CHECK-NEXT:   ld-add-fusion                    - Enable LD+ADD macrofusion.
 ; CHECK-NEXT:   log-vrgather                     - Has vrgather.vv with LMUL*log2(LMUL) latency
 ; CHECK-NEXT:   lui-addi-fusion                  - Enable LUI+ADDI macro fusion.
-; CHECK-NEXT:   lui-ld-fusion                    - Enable LUI+LD macrofusion.
+; CHECK-NEXT:   lui-load-fusion                  - Enable LUI + load macrofusion.
 ; CHECK-NEXT:   m                                - 'M' (Integer Multiplication and Division).
 ; CHECK-NEXT:   mips-p8700                       - MIPS p8700 processor.
 ; CHECK-NEXT:   no-default-unroll                - Disable default unroll preference..
@@ -143,7 +143,7 @@
 ; CHECK-NEXT:   shvstvala                        - 'Shvstvala' (vstval provides all needed values).
 ; CHECK-NEXT:   shvstvecd                        - 'Shvstvecd' (vstvec supports Direct mode).
 ; CHECK-NEXT:   shxadd-load-fusion               - Enable SH(1|2|3)ADD + load macrofusion.
-; CHECK-NEXT:   shxadduw-load-fusion             - Enable SH(1|2|3)ADDUW + load macrofusion.
+; CHECK-NEXT:   shxadduw-load-fusion             - Enable SH(1|2|3)ADD_UW + load macrofusion.
 ; CHECK-NEXT:   sifive7                          - SiFive 7-Series processors.
 ; CHECK-NEXT:   smaia                            - 'Smaia' (Advanced Interrupt Architecture Machine Level).
 ; CHECK-NEXT:   smcdeleg                         - 'Smcdeleg' (Counter Delegation Machine Level).
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index 587830c5f2947..135dbb559cf9b 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -1,9 +1,9 @@
 # REQUIRES: asserts
 # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
 # RUN:   -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
-# RUN:   -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
-# RUN:   -mattr=+add-load-fusion,+add-lw-fusion,+auipc-ld-fusion,+lui-ld-fusion,+addi-ld-fusion,+addi-lw-fusion \
-# RUN:   -mattr=+zba,+adduw-lw-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
+# RUN:   -mattr=+lui-addi-fusion,+auipc-addi-fusion,+auipc-addiw-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
+# RUN:   -mattr=+add-load-fusion,+add-lw-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \
+# RUN:   -mattr=+zba,+adduw-load-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
 # RUN:   | FileCheck %s
 # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
 # RUN:   -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
@@ -43,6 +43,23 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: auipc_addiw
+# CHECK: Macro fuse: {{.*}}AUIPC - ADDIW
+---
+name: auipc_addiw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = ADDIW %2, 3
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
 # CHECK: slli_srli_shifted_zext
 # CHECK: Macro fuse: {{.*}}SLLI - SRLI
 ---
@@ -288,6 +305,57 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: auipc_lb
+# CHECK: Macro fuse: {{.*}}AUIPC - LB
+---
+name: auipc_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LB %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: auipc_lh
+# CHECK: Macro fuse: {{.*}}AUIPC - LH
+---
+name: auipc_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LH %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: auipc_lw
+# CHECK: Macro fuse: {{.*}}AUIPC - LW
+---
+name: auipc_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LW %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
 # CHECK: auipc_ld
 # CHECK: Macro fuse: {{.*}}AUIPC - LD
 ---
@@ -305,6 +373,108 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: auipc_lbu
+# CHECK: Macro fuse: {{.*}}AUIPC - LBU
+---
+name: auipc_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LBU %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: auipc_lhu
+# CHECK: Macro fuse: {{.*}}AUIPC - LHU
+---
+name: auipc_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LHU %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: auipc_lwu
+# CHECK: Macro fuse: {{.*}}AUIPC - LWU
+---
+name: auipc_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LWU %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: lui_lb
+# CHECK: Macro fuse: {{.*}}LUI - LB
+---
+name: lui_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = LUI 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LB %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: lui_lh
+# CHECK: Macro fuse: {{.*}}LUI - LH
+---
+name: lui_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = LUI 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LH %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: lui_lw
+# CHECK: Macro fuse: {{.*}}LUI - LW
+---
+name: lui_lw
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = LUI 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LW %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
 # CHECK: lui_ld
 # CHECK: Macro fuse: {{.*}}LUI - LD
 ---
@@ -322,6 +492,57 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: lui_lbu
+# CHECK: Macro fuse: {{.*}}LUI - LBU
+---
+name: lui_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = LUI 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LBU %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: lui_lhu
+# CHECK: Macro fuse: {{.*}}LUI - LHU
+---
+name: lui_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = LUI 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LHU %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: lui_lwu
+# CHECK: Macro fuse: {{.*}}LUI - LWU
+---
+name: lui_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = LUI 1
+    %3:gpr = XORI %1, 2
+    %4:gpr = LWU %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
 # CHECK-BFEXT: bitfield_extract
 # CHECK-BFEXT: Macro fuse: {{.*}}SLLI - SRLI
 ---
@@ -339,10 +560,10 @@ body:             |
     PseudoRET
 ...
 
-# CHECK: addi_ld
-# CHECK: Macro fuse: {{.*}}ADDI - LD
+# CHECK: addi_lb
+# CHECK: Macro fuse: {{.*}}ADDI - LB
 ---
-name: addi_ld
+name: addi_lb
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
@@ -351,7 +572,25 @@ body:             |
     %2:gpr = COPY $x11
     %3:gpr = ADDI %1, 8
     %4:gpr = XORI %2, 3
-    %5:gpr = LD %3, 0
+    %5:gpr = LB %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: addi_lh
+# CHECK: Macro fuse: {{.*}}ADDI - LH
+---
+name: addi_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADDI %1, 8
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 0
     $x10 = COPY %4
     $x11 = COPY %5
     PseudoRET
@@ -375,6 +614,114 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: addi_ld
+# CHECK: Macro fuse: {{.*}}ADDI - LD
+---
+name: addi_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADDI %1, 8
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: addi_lbu
+# CHECK: Macro fuse: {{.*}}ADDI - LBU
+---
+name: addi_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADDI %1, 8
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: addi_lhu
+# CHECK: Macro fuse: {{.*}}ADDI - LHU
+---
+name: addi_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADDI %1, 8
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: addi_lwu
+# CHECK: Macro fuse: {{.*}}ADDI - LWU
+---
+name: addi_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADDI %1, 8
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: adduw_lb
+# CHECK: Macro fuse: {{.*}}ADD_UW - LB
+---
+name: adduw_lb
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LB %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: adduw_lh
+# CHECK: Macro fuse: {{.*}}ADD_UW - LH
+---
+name: adduw_lh
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LH %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
 # CHECK: adduw_lw
 # CHECK: Macro fuse: {{.*}}ADD_UW - LW
 ---
@@ -393,6 +740,78 @@ body:             |
     PseudoRET
 ...
 
+# CHECK: adduw_ld
+# CHECK: Macro fuse: {{.*}}ADD_UW - LD
+---
+name: adduw_ld
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LD %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: adduw_lbu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LBU
+---
+name: adduw_lbu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LBU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: adduw_lhu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LHU
+---
+name: adduw_lhu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LHU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
+# CHECK: adduw_lwu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LWU
+---
+name: adduw_lwu
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD_UW %1, %2
+    %4:gpr = XORI %2, 3
+    %5:gpr = LWU %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...
+
 # CHECK: sh1add_lb
 # CHECK: Macro fuse: {{.*}}SH1ADD - LB
 ---

>From d26f03a907a7102092d8b5f783c9e530f4465dd9 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Fri, 1 Aug 2025 06:05:28 -0700
Subject: [PATCH 4/5] Remove and consolidate macrofusions

- remove auipc+addiw since it rarely/doesn't happen;
- remove add+lw. veyron-v1 now uses add+load;
- merge add+load and add.uw+load into a single fusion;
- merge shXadd+load and shXadd.uw+load into a single fusion;
- fix immediate 0 comment in AddLoad fusion.
---
 llvm/lib/Target/RISCV/RISCVMacroFusion.td | 57 ++++-------------------
 llvm/lib/Target/RISCV/RISCVProcessors.td  |  3 +-
 llvm/test/CodeGen/RISCV/features-info.ll  |  8 +---
 llvm/test/CodeGen/RISCV/macro-fusions.mir | 23 ++-------
 4 files changed, 15 insertions(+), 76 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index f090b2c150c85..459c8bece5bd7 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -28,15 +28,6 @@ def TuneAUIPCADDIFusion
                  CheckOpcode<[AUIPC]>,
                  CheckOpcode<[ADDI]>>;
 
-// Fuse AUIPC followed by ADDIW:
-//   auipc rd, imm20
-//   addiw rd, rd, imm12
-def TuneAUIPCADDIWFusion
-  : SimpleFusion<"auipc-addiw-fusion", "HasAUIPCADDIWFusion",
-                 "Enable AUIPC+ADDIW macrofusion",
-                 CheckOpcode<[AUIPC]>,
-                 CheckOpcode<[ADDIW]>>;
-
 // Fuse zero extension of halfword:
 //   slli rd, rs1, 48
 //   srli rd, rd, 48
@@ -101,24 +92,12 @@ def TuneLDADDFusion
                    CheckImmOperand<2, 0>
                  ]>>;
 
-// Fuse add with lw:
-//   add rd, rs1, rs2
-//   lw rd, 0(rd)
-def TuneADDLWFusion
-  : SimpleFusion<"add-lw-fusion", "HasADDLWFusion", "Enable ADD+LW macrofusion",
-                 CheckOpcode<[ADD]>,
-                 CheckAll<[
-                   CheckOpcode<[LW]>,
-                   CheckIsImmOperand<2>,
-                   CheckImmOperand<2, 0>
-                 ]>>;
-
-// Fuse add followed by a load (lb, lh, lw, ld, lbu, lhu, lwu):
-//   add rd, rs1, rs2
-//   load rd, 0(rd)
+// Fuse add(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu):
+//   add(.uw) rd, rs1, rs2
+//   load rd, imm12(rd)
 def TuneADDLoadFusion
-  : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD + load macrofusion",
-                 CheckOpcode<[ADD]>,
+  : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD(.UW) + load macrofusion",
+                 CheckOpcode<[ADD, ADD_UW]>,
                  CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
 
 // Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu)
@@ -158,29 +137,11 @@ def TuneADDILoadFusion
                  CheckOpcode<[ADDI]>,
                  CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
 
-// Fuse ADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
-//   add.uw rd, rs1, rs2
-//   load rd, imm12(rd)
-def TuneADD_UWLoadFusion
-  : SimpleFusion<"adduw-load-fusion", "HasADD_UWLoadFusion",
-                 "Enable ADD_UW + load macrofusion",
-                 CheckOpcode<[ADD_UW]>,
-                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-
-// Fuse SHXADD followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
-//   shXadd rd, rs1, rs2
+// Fuse shXadd(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
+//   shXadd(.uw) rd, rs1, rs2
 //   load rd, imm12(rd)
 def TuneSHXADDLoadFusion
   : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion",
-                 "Enable SH(1|2|3)ADD + load macrofusion",
-                 CheckOpcode<[SH1ADD, SH2ADD, SH3ADD]>,
-                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
-
-// Fuse SHXADD_UW followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
-//   shXadd.uw rd, rs1, rs2
-//   load rd, imm12(rd)
-def TuneSHXADD_UWLoadFusion
-  : SimpleFusion<"shxadduw-load-fusion", "HasSHXADD_UWLoadFusion",
-                 "Enable SH(1|2|3)ADD_UW + load macrofusion",
-                 CheckOpcode<[SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
+                 "Enable SH(1|2|3)ADD(.UW) + load macrofusion",
+                 CheckOpcode<[SH1ADD, SH2ADD, SH3ADD, SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
                  CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 5e572375d4f05..31d2b3a10db53 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -598,8 +598,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                               TuneZExtHFusion,
                                               TuneZExtWFusion,
                                               TuneShiftedZExtWFusion,
-                                              TuneLDADDFusion,
-                                              TuneADDLWFusion,
+                                              TuneADDLoadFusion,
                                               TuneAUIPCLoadFusion,
                                               TuneLUILoadFusion]> {
   let MVendorID = 0x61f;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index b74ee299ad95f..fb539211fcc31 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -6,13 +6,10 @@
 ; CHECK-NEXT:   32bit                            - Implements RV32.
 ; CHECK-NEXT:   64bit                            - Implements RV64.
 ; CHECK-NEXT:   a                                - 'A' (Atomic Instructions).
-; CHECK-NEXT:   add-load-fusion                  - Enable ADD + load macrofusion.
-; CHECK-NEXT:   add-lw-fusion                    - Enable ADD+LW macrofusion.
+; CHECK-NEXT:   add-load-fusion                  - Enable ADD(.UW) + load macrofusion.
 ; CHECK-NEXT:   addi-load-fusion                 - Enable ADDI + load macrofusion.
-; CHECK-NEXT:   adduw-load-fusion                - Enable ADD_UW + load macrofusion.
 ; CHECK-NEXT:   andes45                          - Andes 45-Series processors.
 ; CHECK-NEXT:   auipc-addi-fusion                - Enable AUIPC+ADDI macrofusion.
-; CHECK-NEXT:   auipc-addiw-fusion               - Enable AUIPC+ADDIW macrofusion.
 ; CHECK-NEXT:   auipc-load-fusion                - Enable AUIPC + load macrofusion.
 ; CHECK-NEXT:   b                                - 'B' (the collection of the Zba, Zbb, Zbs extensions).
 ; CHECK-NEXT:   bfext-fusion                     - Enable SLLI+SRLI (bitfield extract) macrofusion.
@@ -142,8 +139,7 @@
 ; CHECK-NEXT:   shvsatpa                         - 'Shvsatpa' (vsatp supports all modes supported by satp).
 ; CHECK-NEXT:   shvstvala                        - 'Shvstvala' (vstval provides all needed values).
 ; CHECK-NEXT:   shvstvecd                        - 'Shvstvecd' (vstvec supports Direct mode).
-; CHECK-NEXT:   shxadd-load-fusion               - Enable SH(1|2|3)ADD + load macrofusion.
-; CHECK-NEXT:   shxadduw-load-fusion             - Enable SH(1|2|3)ADD_UW + load macrofusion.
+; CHECK-NEXT:   shxadd-load-fusion               - Enable SH(1|2|3)ADD(.UW) + load macrofusion.
 ; CHECK-NEXT:   sifive7                          - SiFive 7-Series processors.
 ; CHECK-NEXT:   smaia                            - 'Smaia' (Advanced Interrupt Architecture Machine Level).
 ; CHECK-NEXT:   smcdeleg                         - 'Smcdeleg' (Counter Delegation Machine Level).
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index 135dbb559cf9b..ae5b52da2ac16 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -1,9 +1,9 @@
 # REQUIRES: asserts
 # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
 # RUN:   -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
-# RUN:   -mattr=+lui-addi-fusion,+auipc-addi-fusion,+auipc-addiw-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
-# RUN:   -mattr=+add-load-fusion,+add-lw-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \
-# RUN:   -mattr=+zba,+adduw-load-fusion,+shxadd-load-fusion,+shxadduw-load-fusion \
+# RUN:   -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
+# RUN:   -mattr=+add-load-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \
+# RUN:   -mattr=+zba,+shxadd-load-fusion \
 # RUN:   | FileCheck %s
 # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
 # RUN:   -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
@@ -43,23 +43,6 @@ body:             |
     PseudoRET
 ...
 
-# CHECK: auipc_addiw
-# CHECK: Macro fuse: {{.*}}AUIPC - ADDIW
----
-name: auipc_addiw
-tracksRegLiveness: true
-body:             |
-  bb.0.entry:
-    liveins: $x10
-    %1:gpr = COPY $x10
-    %2:gpr = AUIPC 1
-    %3:gpr = XORI %1, 2
-    %4:gpr = ADDIW %2, 3
-    $x10 = COPY %3
-    $x11 = COPY %4
-    PseudoRET
-...
-
 # CHECK: slli_srli_shifted_zext
 # CHECK: Macro fuse: {{.*}}SLLI - SRLI
 ---

>From 09ba18956a4ee9a7632eba73d1c9a5e38cb311a2 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Sat, 2 Aug 2025 05:59:07 -0700
Subject: [PATCH 5/5] Add 'Load' var to reduce repetition of load opcodes

---
 llvm/lib/Target/RISCV/RISCVMacroFusion.td | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 459c8bece5bd7..39e099bc947b2 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -92,13 +92,15 @@ def TuneLDADDFusion
                    CheckImmOperand<2, 0>
                  ]>>;
 
+defvar Load = [LB, LH, LW, LD, LBU, LHU, LWU];
+
 // Fuse add(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu):
 //   add(.uw) rd, rs1, rs2
 //   load rd, imm12(rd)
 def TuneADDLoadFusion
   : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD(.UW) + load macrofusion",
                  CheckOpcode<[ADD, ADD_UW]>,
-                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
+                 CheckOpcode<Load>>;
 
 // Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   auipc rd, imm20
@@ -107,7 +109,7 @@ def TuneAUIPCLoadFusion
   : SimpleFusion<"auipc-load-fusion", "HasAUIPCLoadFusion",
                  "Enable AUIPC + load macrofusion",
                  CheckOpcode<[AUIPC]>,
-                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
+                 CheckOpcode<Load>>;
 
 // Fuse LUI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   lui rd, imm[31:12]
@@ -116,7 +118,7 @@ def TuneLUILoadFusion
   : SimpleFusion<"lui-load-fusion", "HasLUILoadFusion",
                  "Enable LUI + load macrofusion",
                  CheckOpcode<[LUI]>,
-                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
+                 CheckOpcode<Load>>;
 
 // Bitfield extract fusion: similar to TuneShiftedZExtWFusion
 // but without the immediate restriction
@@ -135,7 +137,7 @@ def TuneADDILoadFusion
   : SimpleFusion<"addi-load-fusion", "HasADDILoadFusion",
                  "Enable ADDI + load macrofusion",
                  CheckOpcode<[ADDI]>,
-                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
+                 CheckOpcode<Load>>;
 
 // Fuse shXadd(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu)
 //   shXadd(.uw) rd, rs1, rs2
@@ -144,4 +146,4 @@ def TuneSHXADDLoadFusion
   : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion",
                  "Enable SH(1|2|3)ADD(.UW) + load macrofusion",
                  CheckOpcode<[SH1ADD, SH2ADD, SH3ADD, SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>,
-                 CheckOpcode<[LB, LH, LW, LD, LBU, LHU, LWU]>>;
+                 CheckOpcode<Load>>;



More information about the llvm-commits mailing list