[llvm] [RISCV] Macro-fusion support for veyron-v1 CPU. (PR #70012)

Mikhail Gudim via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 10 12:21:13 PST 2023


https://github.com/mgudim updated https://github.com/llvm/llvm-project/pull/70012

>From b5b0949af3a53a0432c9793f43449aec317e3288 Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at gmail.com>
Date: Wed, 11 Oct 2023 23:47:05 -0400
Subject: [PATCH 1/4] [RISCV] Macro-fusion support for veyron-v1 CPU.

Support was added for the following fusions:
  auipc-addi, slli-srli, ld-add
Some parts of the code became repetative, so small refactoring of
existing lui-addi fusion was done.
---
 llvm/lib/Target/RISCV/RISCVFeatures.td     |  20 ++++
 llvm/lib/Target/RISCV/RISCVMacroFusion.cpp | 110 ++++++++++++++++++---
 llvm/lib/Target/RISCV/RISCVSubtarget.h     |   5 +-
 3 files changed, 120 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index d6f988ede7f5bf9..b5da4f801cd5854 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -960,6 +960,16 @@ def TuneLUIADDIFusion
     : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
                        "true", "Enable LUI+ADDI macrofusion">;
 
+def TuneAUIPCADDIFusion
+    : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
+                       "true", "Enable AUIPC+ADDI macrofusion">;
+def TuneSLLISRLIFusion
+    : SubtargetFeature<"slli-srli-fusion", "HasSLLISRLIFusion",
+                       "true", "Enable SLLI+SRLI macrofusion">;
+def TuneLDADDFusion
+    : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
+                       "true", "Enable fusion of load with the last instruction of the address calculation">;
+
 def TuneNoDefaultUnroll
     : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
                        "Disable default unroll preference.">;
@@ -977,9 +987,19 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
                                    [TuneNoDefaultUnroll,
                                     TuneShortForwardBranchOpt]>;
 
+<<<<<<< HEAD
 def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
                                          "Ventana-Veyron Series processors",
                                          [TuneLUIADDIFusion]>;
+=======
+def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
+                                         "Ventana Veyron-Series processors",
+                                         [TuneLUIADDIFusion,
+                                          TuneAUIPCADDIFusion,
+                                          TuneSLLISRLIFusion,
+                                          TuneLDADDFusion]>;
+
+>>>>>>> bbf0196a7d42 ([RISCV] Macro-fusion support for veyron-v1 CPU.)
 
 // Assume that lock-free native-width atomics are available, even if the target
 // and operating system combination would not usually provide them. The user
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index 02a8d5c18fe1a0e..c33b3503aed0f97 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -18,6 +18,90 @@
 
 using namespace llvm;
 
+static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) {
+  if (SecondMI.getOperand(1).getReg() != FirstDest)
+    return false;
+
+  // If the input is virtual make sure this is the only user.
+  if (FirstDest.isVirtual()) {
+    auto &MRI = SecondMI.getMF()->getRegInfo();
+    return MRI.hasOneNonDBGUse(FirstDest);
+  }
+
+  return SecondMI.getOperand(0).getReg() == FirstDest;
+}
+
+// Fuse Load
+static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != RISCV::LD)
+    return false;
+
+  if (!SecondMI.getOperand(2).isImm())
+    return false;
+
+  if (SecondMI.getOperand(2).getImm() != 0)
+    return false;
+
+  // Given SecondMI, when FirstMI is unspecified, we must return
+  // if SecondMI may be part of a fused pair at all.
+  if (!FirstMI)
+    return true;
+
+  if (FirstMI->getOpcode() != RISCV::ADD)
+    return true;
+
+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse SLLI by 32 feeding into SRLI by 32 or less or
+// SLLI by exactly 48 feeding into SRLI by exactly 48.
+static bool isSLLISRLI(const MachineInstr *FirstMI,
+                       const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != RISCV::SRLI)
+    return false;
+
+  if (!SecondMI.getOperand(2).isImm())
+    return false;
+
+  unsigned SRLIImm = SecondMI.getOperand(2).getImm();
+  bool IsShiftBy48 = SRLIImm == 48;
+  if (SRLIImm > 32 && !IsShiftBy48)
+    return false;
+
+  // Given SecondMI, when FirstMI is unspecified, we must return
+  // if SecondMI may be part of a fused pair at all.
+  if (!FirstMI)
+    return true;
+
+  if (FirstMI->getOpcode() != RISCV::SLLI)
+    return false;
+
+  unsigned SLLIImm = FirstMI->getOperand(2).getImm();
+  if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm > 32))
+    return false;
+
+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse AUIPC followed by ADDI
+static bool isAUIPCADDI(const MachineInstr *FirstMI,
+                        const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != RISCV::ADDI)
+    return false;
+  // Assume the 1st instr to be a wildcard if it is unspecified.
+  if (!FirstMI)
+    return true;
+
+  if (FirstMI->getOpcode() != RISCV::AUIPC)
+    return false;
+
+  // The first operand of ADDI might be a frame index.
+  if (!SecondMI.getOperand(1).isReg())
+    return false;
+
+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
 // Fuse LUI followed by ADDI or ADDIW.
 // rd = imm[31:0] which decomposes to
 // lui rd, imm[31:12]
@@ -27,7 +111,6 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
   if (SecondMI.getOpcode() != RISCV::ADDI &&
       SecondMI.getOpcode() != RISCV::ADDIW)
     return false;
-
   // Assume the 1st instr to be a wildcard if it is unspecified.
   if (!FirstMI)
     return true;
@@ -35,21 +118,11 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
   if (FirstMI->getOpcode() != RISCV::LUI)
     return false;
 
-  Register FirstDest = FirstMI->getOperand(0).getReg();
-
-  // Destination of LUI should be the ADDI(W) source register.
-  if (SecondMI.getOperand(1).getReg() != FirstDest)
+  // The first operand of ADDI might be a frame index.
+  if (!SecondMI.getOperand(1).isReg())
     return false;
 
-  // If the input is virtual make sure this is the only user.
-  if (FirstDest.isVirtual()) {
-    auto &MRI = SecondMI.getMF()->getRegInfo();
-    return MRI.hasOneNonDBGUse(FirstDest);
-  }
-
-  // If the FirstMI destination is non-virtual, it should match the SecondMI
-  // destination.
-  return SecondMI.getOperand(0).getReg() == FirstDest;
+  return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
 }
 
 static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -61,6 +134,15 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI))
     return true;
 
+  if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
+    return true;
+
+  if (ST.hasSLLISRLIFusion() && isSLLISRLI(FirstMI, SecondMI))
+    return true;
+
+  if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index c135021333acabc..00a2481c84e8b6b 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -190,7 +190,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
     return UserReservedRegister[i];
   }
 
-  bool hasMacroFusion() const { return hasLUIADDIFusion(); }
+  bool hasMacroFusion() const {
+    return hasLUIADDIFusion() || hasAUIPCADDIFusion() || hasSLLISRLIFusion() ||
+           hasLDADDFusion();
+  }
 
   // Vector codegen related methods.
   bool hasVInstructions() const { return HasStdExtZve32x; }

>From 3498c0ddc669260d01759fd296d281652da288ba Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at gmail.com>
Date: Wed, 25 Oct 2023 01:09:03 -0400
Subject: [PATCH 2/4] addressed review comments.

---
 llvm/lib/Target/RISCV/RISCVMacroFusion.cpp | 23 ++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index c33b3503aed0f97..5e64c5799fd2512 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -19,6 +19,9 @@
 using namespace llvm;
 
 static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) {
+  if (!SecondMI.getOperand(1).isReg())
+    return false;
+
   if (SecondMI.getOperand(1).getReg() != FirstDest)
     return false;
 
@@ -53,8 +56,16 @@ static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
   return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
 }
 
-// Fuse SLLI by 32 feeding into SRLI by 32 or less or
-// SLLI by exactly 48 feeding into SRLI by exactly 48.
+// Fuse these patterns:
+//
+// $rd = slli $rs0, 32
+// $rd = srli $rs1, x
+// where 0 <= x <= 32
+//
+// and
+//
+// $rd = slli $rs0, 48
+// $rd = srli $rs1, 48
 static bool isSLLISRLI(const MachineInstr *FirstMI,
                        const MachineInstr &SecondMI) {
   if (SecondMI.getOpcode() != RISCV::SRLI)
@@ -95,10 +106,6 @@ static bool isAUIPCADDI(const MachineInstr *FirstMI,
   if (FirstMI->getOpcode() != RISCV::AUIPC)
     return false;
 
-  // The first operand of ADDI might be a frame index.
-  if (!SecondMI.getOperand(1).isReg())
-    return false;
-
   return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
 }
 
@@ -118,10 +125,6 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
   if (FirstMI->getOpcode() != RISCV::LUI)
     return false;
 
-  // The first operand of ADDI might be a frame index.
-  if (!SecondMI.getOperand(1).isReg())
-    return false;
-
   return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
 }
 

>From 265da9a32a55b54525d59b5507212d8aeeee9cd7 Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at gmail.com>
Date: Thu, 26 Oct 2023 16:06:10 -0400
Subject: [PATCH 3/4] Corrected slli-srli fusion - the immediate shift ammounts
 have to be the same.

---
 llvm/lib/Target/RISCV/RISCVFeatures.td     | 7 -------
 llvm/lib/Target/RISCV/RISCVMacroFusion.cpp | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index b5da4f801cd5854..20d66717ebc2791 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -987,11 +987,6 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
                                    [TuneNoDefaultUnroll,
                                     TuneShortForwardBranchOpt]>;
 
-<<<<<<< HEAD
-def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
-                                         "Ventana-Veyron Series processors",
-                                         [TuneLUIADDIFusion]>;
-=======
 def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
                                          "Ventana Veyron-Series processors",
                                          [TuneLUIADDIFusion,
@@ -999,8 +994,6 @@ def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "V
                                           TuneSLLISRLIFusion,
                                           TuneLDADDFusion]>;
 
->>>>>>> bbf0196a7d42 ([RISCV] Macro-fusion support for veyron-v1 CPU.)
-
 // Assume that lock-free native-width atomics are available, even if the target
 // and operating system combination would not usually provide them. The user
 // is responsible for providing any necessary __sync implementations. Code
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index 5e64c5799fd2512..2d5094bd2122e54 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -88,7 +88,7 @@ static bool isSLLISRLI(const MachineInstr *FirstMI,
     return false;
 
   unsigned SLLIImm = FirstMI->getOperand(2).getImm();
-  if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm > 32))
+  if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32))
     return false;
 
   return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);

>From 39198f7ebef48f9085bb210b869f44acefa816b9 Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at gmail.com>
Date: Fri, 10 Nov 2023 15:19:15 -0500
Subject: [PATCH 4/4] Added test.

---
 llvm/lib/Target/RISCV/RISCVProcessors.td      |   2 +-
 .../CodeGen/RISCV/macro-fusions-veyron-v1.mir | 159 ++++++++++++++++++
 2 files changed, 160 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir

diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 95389b07e9c1cdb..3f792b913ddbe3b 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -251,7 +251,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
                                              FeatureStdExtZicbop,
                                              FeatureStdExtZicboz,
                                              FeatureVendorXVentanaCondOps],
-                                             [TuneVentanaVeyron]>;
+                                             [TuneVeyronFusions]>;
 
 def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
                                           NoSchedModel,
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir b/llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir
new file mode 100644
index 000000000000000..1beda6c78e1e2d1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir
@@ -0,0 +1,159 @@
+# REQUIRES: asserts
+# RUN: llc -mtriple=riscv64-linux-gnu  -mcpu=veyron-v1 -x=mir < %s \
+# RUN:   -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
+# RUN:   -mattr=+lui-addi-fusion,+auipc-addi-fusion,+slli-srli-fusion,+ld-add-fusion \
+# RUN:   | FileCheck %s
+
+# CHECK: lui_addi:%bb.0
+# CHECK: Macro fuse: {{.*}}LUI - ADDI
+---
+name: lui_addi
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = LUI 1
+    %3:gpr = ADDI %1, 2
+    %4:gpr = ADDI %2, 3
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: auipc_addi
+# CHECK: Macro fuse: {{.*}}AUIPC - ADDI
+---
+name: auipc_addi
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = AUIPC 1
+    %3:gpr = ADDI %1, 2
+    %4:gpr = ADDI %2, 3
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: slli_srli
+# CHECK: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: slli_srli
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = SLLI %1, 32
+    %3:gpr = ADDI %1, 3
+    %4:gpr = SRLI %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: slli_srli_48
+# CHECK: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: slli_srli_48
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = SLLI %1, 48
+    %3:gpr = ADDI %1, 3
+    %4:gpr = SRLI %2, 48
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: slli_srli_no_fusion_0
+# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: slli_srli_no_fusion_0
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = SLLI %1, 32
+    %3:gpr = ADDI %1, 3
+    %4:gpr = SRLI %2, 33
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: slli_srli_no_fusion_1
+# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: slli_srli_no_fusion_1
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = SLLI %1, 48
+    %3:gpr = ADDI %1, 3
+    %4:gpr = SRLI %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: slli_srli_no_fusion_2
+# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: slli_srli_no_fusion_2
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = SLLI %1, 31
+    %3:gpr = ADDI %1, 3
+    %4:gpr = SRLI %2, 4
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: slli_srli_no_fusion_3
+# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: slli_srli_no_fusion_3
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+    %1:gpr = COPY $x10
+    %2:gpr = SLLI %1, 31
+    %3:gpr = ADDI %1, 3
+    %4:gpr = SRLI %2, 48
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...
+
+# CHECK: ld_add
+# CHECK: Macro fuse: {{.*}}ADD - LD
+---
+name: ld_add
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+    %1:gpr = COPY $x10
+    %2:gpr = COPY $x11
+    %3:gpr = ADD %1, %2
+    %4:gpr = ADDI %2, 3
+    %5:gpr = LD %3, 0
+    $x10 = COPY %4
+    $x11 = COPY %5
+    PseudoRET
+...



More information about the llvm-commits mailing list