[llvm] [RISCV] Add macro fusions for Xiangshan (PR #72362)

Wang Pengcheng via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 21 00:46:35 PST 2023


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/72362

>From e910bae5eefbe42e88f7df1938a544866cac2aca Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Fri, 10 Nov 2023 17:58:00 +0800
Subject: [PATCH 1/6] [MacroFusion] Support multiple predicators

The user can provide multiple predicators to MacroFusion and the
DAG mutation will be applied if one of them is evalated to true.

`ShouldSchedulePredTy` is renamed to `MacroFusionPredTy`.
---
 llvm/include/llvm/CodeGen/MacroFusion.h       | 20 ++++++-----
 llvm/lib/CodeGen/MacroFusion.cpp              | 36 ++++++++++++-------
 .../lib/Target/AArch64/AArch64MacroFusion.cpp |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp  |  2 +-
 llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp       |  4 +--
 llvm/lib/Target/ARM/ARMMacroFusion.cpp        |  2 +-
 llvm/lib/Target/PowerPC/PPCMacroFusion.cpp    |  2 +-
 llvm/lib/Target/RISCV/RISCVMacroFusion.cpp    |  2 +-
 llvm/lib/Target/X86/X86MacroFusion.cpp        |  2 +-
 9 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MacroFusion.h b/llvm/include/llvm/CodeGen/MacroFusion.h
index ea2c7a5faae385a..c2c08c6c84bce51 100644
--- a/llvm/include/llvm/CodeGen/MacroFusion.h
+++ b/llvm/include/llvm/CodeGen/MacroFusion.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_CODEGEN_MACROFUSION_H
 #define LLVM_CODEGEN_MACROFUSION_H
 
-#include <functional>
 #include <memory>
+#include <vector>
 
 namespace llvm {
 
@@ -29,10 +29,10 @@ class SUnit;
 /// Check if the instr pair, FirstMI and SecondMI, should be fused
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 /// SecondMI may be part of a fused pair at all.
-using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
-                                                const TargetSubtargetInfo &TSI,
-                                                const MachineInstr *FirstMI,
-                                                const MachineInstr &SecondMI)>;
+using MacroFusionPredTy = bool (*)(const TargetInstrInfo &TII,
+                                   const TargetSubtargetInfo &STI,
+                                   const MachineInstr *FirstMI,
+                                   const MachineInstr &SecondMI);
 
 /// Checks if the number of cluster edges between SU and its predecessors is
 /// less than FuseLimit
@@ -48,15 +48,17 @@ bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
 
 /// Create a DAG scheduling mutation to pair instructions back to back
 /// for instructions that benefit according to the target-specific
-/// shouldScheduleAdjacent predicate function.
+/// predicate functions. shouldScheduleAdjacent will be true if any of the
+/// provided predicates are true.
 std::unique_ptr<ScheduleDAGMutation>
-createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent);
+createMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates);
 
 /// Create a DAG scheduling mutation to pair branch instructions with one
 /// of their predecessors back to back for instructions that benefit according
-/// to the target-specific shouldScheduleAdjacent predicate function.
+/// to the target-specific predicate functions. shouldScheduleAdjacent will be
+/// true if any of the provided predicates are true.
 std::unique_ptr<ScheduleDAGMutation>
-createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent);
+createBranchMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates);
 
 } // end namespace llvm
 
diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index fa5df68b8abcc0f..d79e9572050b066 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -137,19 +137,33 @@ namespace {
 /// Post-process the DAG to create cluster edges between instrs that may
 /// be fused by the processor into a single operation.
 class MacroFusion : public ScheduleDAGMutation {
-  ShouldSchedulePredTy shouldScheduleAdjacent;
+  std::vector<MacroFusionPredTy> Predicates;
   bool FuseBlock;
   bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);
 
 public:
-  MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
-    : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
+  MacroFusion(std::vector<MacroFusionPredTy> Predicates, bool FuseBlock)
+      : Predicates(std::move(Predicates)), FuseBlock(FuseBlock) {}
 
   void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
+  bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+                              const TargetSubtargetInfo &STI,
+                              const MachineInstr *FirstMI,
+                              const MachineInstr &SecondMI);
 };
 
 } // end anonymous namespace
 
+bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII,
+                                         const TargetSubtargetInfo &STI,
+                                         const MachineInstr *FirstMI,
+                                         const MachineInstr &SecondMI) {
+  return llvm::any_of(Predicates, [&](MacroFusionPredTy Predicate) {
+    return Predicate(TII, STI, FirstMI, SecondMI);
+  });
+}
+
 void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
   if (FuseBlock)
     // For each of the SUnits in the scheduling block, try to fuse the instr in
@@ -197,17 +211,15 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
 }
 
 std::unique_ptr<ScheduleDAGMutation>
-llvm::createMacroFusionDAGMutation(
-     ShouldSchedulePredTy shouldScheduleAdjacent) {
-  if(EnableMacroFusion)
-    return std::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+llvm::createMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates) {
+  if (EnableMacroFusion)
+    return std::make_unique<MacroFusion>(std::move(Predicates), true);
   return nullptr;
 }
 
-std::unique_ptr<ScheduleDAGMutation>
-llvm::createBranchMacroFusionDAGMutation(
-     ShouldSchedulePredTy shouldScheduleAdjacent) {
-  if(EnableMacroFusion)
-    return std::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+std::unique_ptr<ScheduleDAGMutation> llvm::createBranchMacroFusionDAGMutation(
+    std::vector<MacroFusionPredTy> Predicates) {
+  if (EnableMacroFusion)
+    return std::make_unique<MacroFusion>(std::move(Predicates), false);
   return nullptr;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 05d60872bf51aca..8f46f3eabb3ef45 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -478,5 +478,5 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 
 std::unique_ptr<ScheduleDAGMutation>
 llvm::createAArch64MacroFusionDAGMutation() {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index 0cbabf3895a67ed..b2b11d661523e9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -60,7 +60,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
 namespace llvm {
 
 std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation() {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
index 29c9b9ccf27614f..0bddeeef9e9b1a3 100644
--- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -142,10 +142,10 @@ namespace {
 /// be turned into VOPD instructions
 /// Greedily pairs instruction candidates. O(n^2) algorithm.
 struct VOPDPairingMutation : ScheduleDAGMutation {
-  ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
+  MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
 
   VOPDPairingMutation(
-      ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
+      MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
 
   void apply(ScheduleDAGInstrs *DAG) override {
diff --git a/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/llvm/lib/Target/ARM/ARMMacroFusion.cpp
index 5aeb7abe92a38c8..7de117925e464fe 100644
--- a/llvm/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -63,7 +63,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 }
 
 std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation() {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index 7ad6ef8c39286d7..d6a4a5dd5faabae 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -287,7 +287,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 namespace llvm {
 
 std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation() {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index 02a8d5c18fe1a0e..1b82cc8b5b858f4 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -65,5 +65,5 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 }
 
 std::unique_ptr<ScheduleDAGMutation> llvm::createRISCVMacroFusionDAGMutation() {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp
index 82667b8cdbdb87d..382cc9a71c2a184 100644
--- a/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -68,7 +68,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 namespace llvm {
 
 std::unique_ptr<ScheduleDAGMutation> createX86MacroFusionDAGMutation() {
-  return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);
+  return createBranchMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
 
 } // end namespace llvm

>From 3787fedf10799ad2259a8e1229a910178bf8b40d Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Fri, 10 Nov 2023 18:18:18 +0800
Subject: [PATCH 2/6] [TableGen] Add a backend to generate MacroFusion
 predicators

`FusionPredicate` is used to predicate if target instruction matches
the requirement. The targets can be firstMI, secondMI or both.

The `Fusion` contains a list of `FusionPredicate`. The generated code
will be like:
```
bool isNAME(const TargetInstrInfo &TII,
            const TargetSubtargetInfo &STI,
            const MachineInstr *FirstMI,
            const MachineInstr &SecondMI) {
  auto &MRI = SecondMI.getMF()->getRegInfo();
  /* Predicates */
  return true;
}
```

A boilerplate class called `SimpleFusion` is added. `SimpleFusion` has
a predefined structure of predicates and accepts predicate for
`firstMI`, predicate for `secondMI` and epilog/prolog as arguments.
The generated code for `SimpleFusion` will be like:
```
bool isNAME(const TargetInstrInfo &TII,
            const TargetSubtargetInfo &STI,
            const MachineInstr *FirstMI,
            const MachineInstr &SecondMI) {
  auto &MRI = SecondMI.getMF()->getRegInfo();
  /* Prolog */
  /* Predicate for `SecondMI` */
  /* Wildcard */
  /* Predicate for `FirstMI` */
  /* Check One Use */
  /* Tie registers */
  /* Epilog */
  return true;
}
```
---
 .../llvm/Target/TargetInstrPredicate.td       |   6 +
 llvm/include/llvm/Target/TargetSchedule.td    | 113 ++++++++++
 llvm/utils/TableGen/CMakeLists.txt            |   1 +
 .../TableGen/MacroFusionPredicatorEmitter.cpp | 204 ++++++++++++++++++
 llvm/utils/TableGen/PredicateExpander.cpp     |   8 +
 llvm/utils/TableGen/PredicateExpander.h       |   1 +
 6 files changed, 333 insertions(+)
 create mode 100644 llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp

diff --git a/llvm/include/llvm/Target/TargetInstrPredicate.td b/llvm/include/llvm/Target/TargetInstrPredicate.td
index 9f2cde9d923050a..82c4c7b23a49b6a 100644
--- a/llvm/include/llvm/Target/TargetInstrPredicate.td
+++ b/llvm/include/llvm/Target/TargetInstrPredicate.td
@@ -95,6 +95,12 @@ class MCOperandPredicate<int Index> : MCInstPredicate {
 // Return true if machine operand at position `Index` is a register operand.
 class CheckIsRegOperand<int Index> : MCOperandPredicate<Index>;
 
+// Return true if machine operand at position `Index` is a virtual register operand.
+class CheckIsVRegOperand<int Index> : MCOperandPredicate<Index>;
+
+// Return true if machine operand at position `Index` is not a virtual register operand.
+class CheckIsNotVRegOperand<int Index> : CheckNot<CheckIsVRegOperand<Index>>;
+
 // Return true if machine operand at position `Index` is an immediate operand.
 class CheckIsImmOperand<int Index> : MCOperandPredicate<Index>;
 
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 949baa5d2105c45..74c9cd085b4ee2e 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -584,3 +584,116 @@ class MemoryQueue<ProcResourceKind PR> {
 
 class LoadQueue<ProcResourceKind LDQueue> : MemoryQueue<LDQueue>;
 class StoreQueue<ProcResourceKind STQueue> : MemoryQueue<STQueue>;
+
+// The target instruction that FusionPredicate will be evaluated on.
+class FusionTarget;
+def first : FusionTarget;
+def second : FusionTarget;
+def both : FusionTarget;
+
+// Base class of FusionPredicate, etc. The avaliable variables are:
+// * const TargetInstrInfo &TII
+// * const TargetSubtargetInfo &STI
+// * const MachineRegisterInfo &MRI
+// * const MachineInstr *FirstMI
+// * const MachineInstr &SecondMI
+class FusionPredicate<FusionTarget target> {
+  FusionTarget Target = target;
+}
+class FirstFusionPredicate: FusionPredicate<first>;
+class SecondFusionPredicate: FusionPredicate<second>;
+class BothFusionPredicate: FusionPredicate<both>;
+
+// FusionPredicate with raw code predicate.
+class FusionPredicateWithCode<code pred> : FusionPredicate<both> {
+  code Predicate = pred;
+}
+
+// FusionPredicate with MCInstPredicate.
+class FusionPredicateWithMCInstPredicate<FusionTarget target, MCInstPredicate pred>
+  : FusionPredicate<target> {
+  MCInstPredicate Predicate = pred;
+}
+class FirstFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
+  : FusionPredicateWithMCInstPredicate<first, pred>;
+class SecondFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
+  : FusionPredicateWithMCInstPredicate<second, pred>;
+// The pred will be applied on both firstMI and secondMI.
+class BothFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
+  : FusionPredicateWithMCInstPredicate<second, pred>;
+
+// Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position
+// `firstOpIdx` should be the same as the operand of `SecondMI` at position
+// `secondOpIdx`.
+class TieReg<int firstOpIdx, int secondOpIdx> : BothFusionPredicate {
+  int FirstOpIdx = firstOpIdx;
+  int SecondOpIdx = secondOpIdx;
+}
+
+// A predicate for wildcard. The generated code will be like:
+// ```
+// if (!FirstMI)
+//   return ReturnValue;
+// ```
+class WildcardPred<bit ret> : FirstFusionPredicate {
+  bit ReturnValue = ret;
+}
+def WildcardFalse : WildcardPred<0>;
+def WildcardTrue : WildcardPred<1>;
+
+// Indicates that the destination register of `FirstMI` should have one use if
+// it is a virtual register.
+class OneUsePred : FirstFusionPredicate;
+def OneUse : OneUsePred;
+
+// Handled by MacroFusionPredicatorEmitter backend.
+// The generated predicator will be like:
+// ```
+// bool isNAME(const TargetInstrInfo &TII,
+//             const TargetSubtargetInfo &STI,
+//             const MachineInstr *FirstMI,
+//             const MachineInstr &SecondMI) {
+//   auto &MRI = SecondMI.getMF()->getRegInfo();
+//   /* Predicates */
+//   return true;
+// }
+// ```
+class Fusion<list<FusionPredicate> predicates> {
+  list<FusionPredicate> Predicates = predicates;
+}
+
+// The generated predicator will be like:
+// ```
+// bool isNAME(const TargetInstrInfo &TII,
+//             const TargetSubtargetInfo &STI,
+//             const MachineInstr *FirstMI,
+//             const MachineInstr &SecondMI) {
+//   auto &MRI = SecondMI.getMF()->getRegInfo();
+//   /* Prolog */
+//   /* Predicate for `SecondMI` */
+//   /* Wildcard */
+//   /* Predicate for `FirstMI` */
+//   /* Check One Use */
+//   /* Tie registers */
+//   /* Epilog */
+//   return true;
+// }
+// ```
+class SimpleFusion<MCInstPredicate firstPred, MCInstPredicate secondPred,
+                   list<FusionPredicate> prolog = [],
+                   list<FusionPredicate> epilog = []>
+  : Fusion<!listconcat(
+                  prolog,
+                  [
+                    SecondFusionPredicateWithMCInstPredicate<secondPred>,
+                    WildcardTrue,
+                    FirstFusionPredicateWithMCInstPredicate<firstPred>,
+                    SecondFusionPredicateWithMCInstPredicate<
+                      CheckAny<[
+                        CheckIsVRegOperand<0>,
+                        CheckSameRegOperand<0, 1>
+                      ]>>,
+                    OneUse,
+                    TieReg<0, 1>,
+                  ],
+                  epilog)>;
diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt
index 071ea3bc07054bb..f765cc36d3bebed 100644
--- a/llvm/utils/TableGen/CMakeLists.txt
+++ b/llvm/utils/TableGen/CMakeLists.txt
@@ -72,6 +72,7 @@ add_tablegen(llvm-tblgen LLVM
   PredicateExpander.cpp
   PseudoLoweringEmitter.cpp
   CompressInstEmitter.cpp
+  MacroFusionPredicatorEmitter.cpp
   RegisterBankEmitter.cpp
   RegisterInfoEmitter.cpp
   SearchableTableEmitter.cpp
diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
new file mode 100644
index 000000000000000..efca602b476f35c
--- /dev/null
+++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
@@ -0,0 +1,204 @@
+//===------ MacroFusionPredicatorEmitter.cpp - Generator for Fusion ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// MacroFusionPredicatorEmitter implements a TableGen-driven predicators
+// generator for macro-op fusions.
+//
+//===---------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "PredicateExpander.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "macro-fusion-predicator"
+
+namespace {
+class MacroFusionPredicatorEmitter {
+  RecordKeeper &Records;
+  CodeGenTarget Target;
+
+  void emitMacroFusionDecl(std::vector<Record *> Fusions, PredicateExpander &PE,
+                           raw_ostream &OS);
+  void emitMacroFusionImpl(std::vector<Record *> Fusions, PredicateExpander &PE,
+                           raw_ostream &OS);
+  void emitPredicates(std::vector<Record *> &FirstPredicate,
+                      PredicateExpander &PE, raw_ostream &OS);
+  void emitFirstPredicate(Record *SecondPredicate, PredicateExpander &PE,
+                          raw_ostream &OS);
+  void emitSecondPredicate(Record *SecondPredicate, PredicateExpander &PE,
+                           raw_ostream &OS);
+  void emitBothPredicate(Record *Predicates, PredicateExpander &PE,
+                         raw_ostream &OS);
+
+public:
+  MacroFusionPredicatorEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+
+  void run(raw_ostream &OS);
+};
+} // End anonymous namespace.
+
+void MacroFusionPredicatorEmitter::emitMacroFusionDecl(
+    std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
+  OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n\n";
+
+  for (Record *Fusion : Fusions) {
+    OS << "bool is" << Fusion->getName() << "(const TargetInstrInfo &, "
+       << "const TargetSubtargetInfo &, "
+       << "const MachineInstr *, "
+       << "const MachineInstr &);\n";
+  }
+
+  OS << "\n#endif\n";
+  OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n";
+}
+
+void MacroFusionPredicatorEmitter::emitMacroFusionImpl(
+    std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
+  OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n";
+
+  for (Record *Fusion : Fusions) {
+    std::vector<Record *> Predicates =
+        Fusion->getValueAsListOfDefs("Predicates");
+
+    OS << "bool is" << Fusion->getName() << "(\n";
+    OS.indent(5) << "const TargetInstrInfo &TII,\n";
+    OS.indent(5) << "const TargetSubtargetInfo &STI,\n";
+    OS.indent(5) << "const MachineInstr *FirstMI,\n";
+    OS.indent(5) << "const MachineInstr &SecondMI) {\n";
+    OS.indent(2) << "auto &MRI = SecondMI.getMF()->getRegInfo();\n";
+
+    emitPredicates(Predicates, PE, OS);
+
+    OS.indent(2) << "return true;\n";
+    OS << "}\n";
+  }
+
+  OS << "\n#endif\n";
+  OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n";
+}
+
+void MacroFusionPredicatorEmitter::emitPredicates(
+    std::vector<Record *> &Predicates, PredicateExpander &PE, raw_ostream &OS) {
+  for (Record *Predicate : Predicates) {
+    Record *Target = Predicate->getValueAsDef("Target");
+    if (Target->getName() == "first")
+      emitFirstPredicate(Predicate, PE, OS);
+    else if (Target->getName() == "second")
+      emitSecondPredicate(Predicate, PE, OS);
+    else if (Target->getName() == "both")
+      emitBothPredicate(Predicate, PE, OS);
+    else
+      PrintFatalError(Target->getLoc(),
+                      "Unsupported 'FusionTarget': " + Target->getName());
+  }
+}
+
+void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate,
+                                                      PredicateExpander &PE,
+                                                      raw_ostream &OS) {
+  if (Predicate->isSubClassOf("WildcardPred")) {
+    OS.indent(2) << "if (!FirstMI)\n";
+    OS.indent(2) << "  return "
+                 << (Predicate->getValueAsBit("ReturnValue") ? "true" : "false")
+                 << ";\n";
+  } else if (Predicate->isSubClassOf("OneUsePred")) {
+    OS.indent(2) << "{\n";
+    OS.indent(4) << "Register FirstDest = FirstMI->getOperand(0).getReg();\n";
+    OS.indent(4)
+        << "if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))\n";
+    OS.indent(4) << "  return false;\n";
+    OS.indent(2) << "}\n";
+  } else if (Predicate->isSubClassOf(
+                 "FirstFusionPredicateWithMCInstPredicate")) {
+    OS.indent(2) << "{\n";
+    OS.indent(4) << "const MachineInstr *MI = FirstMI;\n";
+    OS.indent(4) << "if (";
+    PE.setNegatePredicate(true);
+    PE.setIndentLevel(3);
+    PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate"));
+    OS << ")\n";
+    OS.indent(4) << "  return false;\n";
+    OS.indent(2) << "}\n";
+  } else {
+    PrintFatalError(Predicate->getLoc(),
+                    "Unsupported predicate for first instruction: " +
+                        Predicate->getType()->getAsString());
+  }
+}
+
+void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
+                                                       PredicateExpander &PE,
+                                                       raw_ostream &OS) {
+  if (Predicate->isSubClassOf("SecondFusionPredicateWithMCInstPredicate")) {
+    OS.indent(2) << "{\n";
+    OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n";
+    OS.indent(4) << "if (";
+    PE.setNegatePredicate(true);
+    PE.setIndentLevel(3);
+    PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate"));
+    OS << ")\n";
+    OS.indent(4) << "  return false;\n";
+    OS.indent(2) << "}\n";
+  } else {
+    PrintFatalError(Predicate->getLoc(),
+                    "Unsupported predicate for first instruction: " +
+                        Predicate->getType()->getAsString());
+  }
+}
+
+void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate,
+                                                     PredicateExpander &PE,
+                                                     raw_ostream &OS) {
+  if (Predicate->isSubClassOf("FusionPredicateWithCode"))
+    OS << Predicate->getValueAsString("Predicate");
+  else if (Predicate->isSubClassOf("BothFusionPredicateWithMCInstPredicate")) {
+    Record *MCPred = Predicate->getValueAsDef("Predicate");
+    emitFirstPredicate(MCPred, PE, OS);
+    emitSecondPredicate(MCPred, PE, OS);
+  } else if (Predicate->isSubClassOf("TieReg")) {
+    int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx");
+    int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx");
+    OS.indent(2) << "if (!(FirstMI->getOperand(" << FirstOpIdx
+                 << ").isReg() &&\n";
+    OS.indent(2) << "      SecondMI.getOperand(" << SecondOpIdx
+                 << ").isReg() &&\n";
+    OS.indent(2) << "      FirstMI->getOperand(" << FirstOpIdx
+                 << ").getReg() == SecondMI.getOperand(" << SecondOpIdx
+                 << ").getReg()))\n";
+    OS.indent(2) << "  return false;\n";
+  } else
+    PrintFatalError(Predicate->getLoc(),
+                    "Unsupported predicate for both instruction: " +
+                        Predicate->getType()->getAsString());
+}
+
+void MacroFusionPredicatorEmitter::run(raw_ostream &OS) {
+  // Emit file header.
+  emitSourceFileHeader("Macro Fusion Predicators", OS);
+
+  PredicateExpander PE(Target.getName());
+  PE.setByRef(false);
+  PE.setExpandForMC(false);
+
+  std::vector<Record *> Fusions = Records.getAllDerivedDefinitions("Fusion");
+  // Sort macro fusions by name.
+  sort(Fusions, LessRecord());
+  emitMacroFusionDecl(Fusions, PE, OS);
+  emitMacroFusionImpl(Fusions, PE, OS);
+}
+
+static TableGen::Emitter::OptClass<MacroFusionPredicatorEmitter>
+    X("gen-macro-fusion-pred", "Generate macro fusion predicators.");
diff --git a/llvm/utils/TableGen/PredicateExpander.cpp b/llvm/utils/TableGen/PredicateExpander.cpp
index 8f96d3307ded8be..d3a73e02cd916f8 100644
--- a/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/llvm/utils/TableGen/PredicateExpander.cpp
@@ -194,6 +194,11 @@ void PredicateExpander::expandCheckIsRegOperand(raw_ostream &OS, int OpIndex) {
      << "getOperand(" << OpIndex << ").isReg() ";
 }
 
+void PredicateExpander::expandCheckIsVRegOperand(raw_ostream &OS, int OpIndex) {
+  OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
+     << "getOperand(" << OpIndex << ").getReg().isVirtual()";
+}
+
 void PredicateExpander::expandCheckIsImmOperand(raw_ostream &OS, int OpIndex) {
   OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
      << "getOperand(" << OpIndex << ").isImm() ";
@@ -319,6 +324,9 @@ void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) {
   if (Rec->isSubClassOf("CheckIsRegOperand"))
     return expandCheckIsRegOperand(OS, Rec->getValueAsInt("OpIndex"));
 
+  if (Rec->isSubClassOf("CheckIsVRegOperand"))
+    return expandCheckIsVRegOperand(OS, Rec->getValueAsInt("OpIndex"));
+
   if (Rec->isSubClassOf("CheckIsImmOperand"))
     return expandCheckIsImmOperand(OS, Rec->getValueAsInt("OpIndex"));
 
diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h
index 27f049a715aad56..cfb0a3d51e67764 100644
--- a/llvm/utils/TableGen/PredicateExpander.h
+++ b/llvm/utils/TableGen/PredicateExpander.h
@@ -75,6 +75,7 @@ class PredicateExpander {
                                bool IsCheckAll);
   void expandTIIFunctionCall(raw_ostream &OS, StringRef MethodName);
   void expandCheckIsRegOperand(raw_ostream &OS, int OpIndex);
+  void expandCheckIsVRegOperand(raw_ostream &OS, int OpIndex);
   void expandCheckIsImmOperand(raw_ostream &OS, int OpIndex);
   void expandCheckInvalidRegOperand(raw_ostream &OS, int OpIndex);
   void expandCheckFunctionPredicate(raw_ostream &OS, StringRef MCInstFn,

>From f4cd61c08286c810990807b2fcbcacbd18c0fa48 Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Tue, 14 Nov 2023 10:48:27 +0800
Subject: [PATCH 3/6] [TableGen] Add MacroFusions to SchedMachineModel

A list of `MacroFusion` is added to `SchedMachineModel` to indicate
supported macro fusions of this processor model.

For visibility, C++ enums of all MacroFusion are generated and new
`hasMacroFusion` is added to Subtarget to test if a MacroFusion is
supported.
---
 .../llvm/CodeGen/TargetSubtargetInfo.h        |  4 ++
 llvm/include/llvm/MC/MCSchedule.h             |  9 ++++
 llvm/include/llvm/MC/MCSubtargetInfo.h        | 13 +++++
 llvm/include/llvm/Target/TargetSchedule.td    |  4 ++
 llvm/lib/MC/MCSchedule.cpp                    |  1 +
 llvm/utils/TableGen/CodeGenSchedule.cpp       | 15 ++++++
 llvm/utils/TableGen/CodeGenSchedule.h         | 11 +++++
 .../TableGen/MacroFusionPredicatorEmitter.cpp | 30 ++++++++++++
 llvm/utils/TableGen/SubtargetEmitter.cpp      | 47 ++++++++++++++++++-
 9 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 55ef95c28543190..7c76293f3e5eaea 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -16,6 +16,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MacroFusion.h"
 #include "llvm/CodeGen/PBQPRAConstraint.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/GlobalValue.h"
@@ -323,6 +324,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   /// helps removing redundant copies generated by register allocator when
   /// handling complex eviction chains.
   virtual bool enableSpillageCopyElimination() const { return false; }
+
+  /// Get the list of MacroFusion predicates.
+  virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index 98ebe42cfd133b5..aa187e5cb400672 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_MC_MCSCHEDULE_H
 #define LLVM_MC_MCSCHEDULE_H
 
+#include "llvm/ADT/Bitset.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
@@ -196,6 +197,9 @@ struct MCExtraProcessorInfo {
   unsigned StoreQueueID;
 };
 
+const unsigned MaxMacroFusions = 256;
+using MacroFusionBitset = Bitset<MaxMacroFusions>;
+
 /// Machine model for scheduling, bundling, and heuristics.
 ///
 /// The machine model directly provides basic information about the
@@ -325,9 +329,14 @@ struct MCSchedModel {
   const InstrItinerary *InstrItineraries;
 
   const MCExtraProcessorInfo *ExtraProcessorInfo;
+  const MacroFusionBitset *MacroFusionBits;
 
   bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; }
 
+  const MacroFusionBitset *getMacroFusionBits() const {
+    return MacroFusionBits;
+  }
+
   unsigned getProcessorID() const { return ProcID; }
 
   /// Does this machine model include instruction-level scheduling.
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index f172a799aa3331c..66fb6c9383272e1 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -120,6 +120,12 @@ class MCSubtargetInfo {
     return FeatureBits[Feature];
   }
 
+  bool hasMacroFusion(unsigned MacroFusion) const {
+    const MacroFusionBitset *MacroFusionBits =
+        CPUSchedModel->getMacroFusionBits();
+    return MacroFusionBits && MacroFusionBits->test(MacroFusion);
+  }
+
 protected:
   /// Initialize the scheduling model and feature bits.
   ///
@@ -295,6 +301,13 @@ class MCSubtargetInfo {
 
   /// \return if target want to issue a prefetch in address space \p AS.
   virtual bool shouldPrefetchAddressSpace(unsigned AS) const;
+
+  /// Enable macro fusion for this subtarget.
+  virtual bool enableMacroFusion() const {
+    const MacroFusionBitset *MacroFusionBits =
+        CPUSchedModel->getMacroFusionBits();
+    return MacroFusionBits && MacroFusionBits->any();
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 74c9cd085b4ee2e..681f367132e8b92 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -53,6 +53,7 @@
 include "llvm/Target/TargetItinerary.td"
 
 class Predicate; // Forward def
+class Fusion;
 
 // DAG operator that interprets the DAG args as Instruction defs.
 def instrs;
@@ -122,6 +123,9 @@ class SchedMachineModel {
   // using intervals via ResourceSegments (see
   // llvm/include/llvm/CodeGen/MachineScheduler.h).
   bit EnableIntervals = false;
+
+  // List of Fusion.
+  list<Fusion> MacroFusions = [];
 }
 
 def NoSchedModel : SchedMachineModel {
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index 990a693559a7776..19c36cb0e58d9c1 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -37,6 +37,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
                                             0,
                                             0,
                                             nullptr,
+                                            nullptr,
                                             nullptr};
 
 int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index 54463da19821476..7411683363d29ab 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -226,6 +226,9 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
   // (For per-operand resources mapped to itinerary classes).
   collectProcUnsupportedFeatures();
 
+  // Find MacroFusion records for each processor.
+  collectMacroFusions();
+
   // Infer new SchedClasses from SchedVariant.
   inferSchedClasses();
 
@@ -1168,6 +1171,12 @@ bool CodeGenSchedModels::hasItineraries() const {
   return false;
 }
 
+bool CodeGenSchedModels::hasMacroFusions() const {
+  return llvm::any_of(ProcModels, [](const CodeGenProcModel &PM) {
+    return PM.hasMacroFusions();
+  });
+}
+
 // Gather the processor itineraries.
 void CodeGenSchedModels::collectProcItins() {
   LLVM_DEBUG(dbgs() << "\n+++ PROBLEM ITINERARIES (collectProcItins) +++\n");
@@ -1238,6 +1247,12 @@ void CodeGenSchedModels::collectProcUnsupportedFeatures() {
         ProcModel.ModelDef->getValueAsListOfDefs("UnsupportedFeatures"));
 }
 
+void CodeGenSchedModels::collectMacroFusions() {
+  for (CodeGenProcModel &ProcModel : ProcModels)
+    append_range(ProcModel.MacroFusions,
+                 ProcModel.ModelDef->getValueAsListOfDefs("MacroFusions"));
+}
+
 /// Infer new classes from existing classes. In the process, this may create new
 /// SchedWrites from sequences of existing SchedWrites.
 void CodeGenSchedModels::inferSchedClasses() {
diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h
index 76ef1e43953078e..317558c52fbef15 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/llvm/utils/TableGen/CodeGenSchedule.h
@@ -238,6 +238,10 @@ struct CodeGenProcModel {
   // This list is empty if the Processor has no UnsupportedFeatures.
   RecVec UnsupportedFeaturesDefs;
 
+  // List of MacroFusion.
+  // This list is empty if the Processor has no MacroFusion.
+  RecVec MacroFusions;
+
   // All read/write resources associated with this processor.
   RecVec WriteResDefs;
   RecVec ReadAdvanceDefs;
@@ -260,6 +264,8 @@ struct CodeGenProcModel {
     Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef),
     RetireControlUnit(nullptr), LoadQueue(nullptr), StoreQueue(nullptr) {}
 
+  bool hasMacroFusions() const { return !MacroFusions.empty(); }
+
   bool hasItineraries() const {
     return !ItinsDef->getValueAsListOfDefs("IID").empty();
   }
@@ -508,6 +514,9 @@ class CodeGenSchedModels {
   // Return true if any processors have itineraries.
   bool hasItineraries() const;
 
+  // Return true if any processors have MacroFusions.
+  bool hasMacroFusions() const;
+
   // Get a SchedWrite from its index.
   const CodeGenSchedRW &getSchedWrite(unsigned Idx) const {
     assert(Idx < SchedWrites.size() && "bad SchedWrite index");
@@ -610,6 +619,8 @@ class CodeGenSchedModels {
 
   void collectProcUnsupportedFeatures();
 
+  void collectMacroFusions();
+
   void inferSchedClasses();
 
   void checkMCInstPredicates() const;
diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
index efca602b476f35c..46a62c8cc6d3048 100644
--- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
+++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
@@ -30,6 +30,8 @@ class MacroFusionPredicatorEmitter {
   RecordKeeper &Records;
   CodeGenTarget Target;
 
+  void emitMacroFusionEnum(std::vector<Record *> Fusions, PredicateExpander &PE,
+                           raw_ostream &OS);
   void emitMacroFusionDecl(std::vector<Record *> Fusions, PredicateExpander &PE,
                            raw_ostream &OS);
   void emitMacroFusionImpl(std::vector<Record *> Fusions, PredicateExpander &PE,
@@ -50,6 +52,33 @@ class MacroFusionPredicatorEmitter {
 };
 } // End anonymous namespace.
 
+void MacroFusionPredicatorEmitter::emitMacroFusionEnum(
+    std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
+  unsigned N = Fusions.size();
+  if (N == 0)
+    return;
+  // 256 is `MaxMacroFusions` defined in MCSchedule.h
+  if (N > 256)
+    PrintFatalError("Too many macro fusions! Please bump MaxMacroFusions!");
+
+  OS << "#ifdef GET_MACRO_FUSION_ENUM\n\n";
+  OS << "namespace llvm {\n";
+  OS << "namespace " << Target.getName() << " {\n";
+  OS << "enum {\n";
+
+  for (unsigned Index = 0; Index < N; Index++) {
+    Record *Fusion = Fusions[Index];
+    // Get and emit name
+    OS << "  " << Fusion->getName() << " = " << Index << ",\n";
+  }
+
+  OS << "};\n";
+  OS << "} // end namespace " << Target.getName() << "\n";
+  OS << "} // end namespace llvm\n\n";
+  OS << "#endif\n";
+  OS << "#undef GET_MACRO_FUSION_ENUM\n\n";
+}
+
 void MacroFusionPredicatorEmitter::emitMacroFusionDecl(
     std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
   OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n\n";
@@ -196,6 +225,7 @@ void MacroFusionPredicatorEmitter::run(raw_ostream &OS) {
   std::vector<Record *> Fusions = Records.getAllDerivedDefinitions("Fusion");
   // Sort macro fusions by name.
   sort(Fusions, LessRecord());
+  emitMacroFusionEnum(Fusions, PE, OS);
   emitMacroFusionDecl(Fusions, PE, OS);
   emitMacroFusionImpl(Fusions, PE, OS);
 }
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index f7a7172d61fc618..86dce9527bf059c 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -133,6 +133,8 @@ class SubtargetEmitter {
   void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS);
 
   void EmitSchedModel(raw_ostream &OS);
+  void emitMacroFusionBits(const CodeGenProcModel &ProcModel, raw_ostream &OS);
+  void emitGetMacroFusions(const std::string &ClassName, raw_ostream &OS);
   void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
   void ParseFeaturesFunction(raw_ostream &OS);
 
@@ -869,6 +871,17 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
   OS << "};\n";
 }
 
+void SubtargetEmitter::emitMacroFusionBits(const CodeGenProcModel &ProcModel,
+                                           raw_ostream &OS) {
+  OS << "\nstatic const MacroFusionBitset " << ProcModel.ModelName
+     << "MacroFusionBits  = {\n";
+  std::vector<std::string> Predicates;
+  for (auto *R : ProcModel.MacroFusions)
+    Predicates.push_back("  " + Target + "::" + R->getNameInitAsString());
+  OS << llvm::join(Predicates, ",\n");
+  OS << "\n};\n";
+}
+
 // Find the WriteRes Record that defines processor resources for this
 // SchedWrite.
 Record *SubtargetEmitter::FindWriteResources(
@@ -1441,6 +1454,8 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     else if(!PM.ProcResourceDefs.empty())
       PrintFatalError(PM.ModelDef->getLoc(), "SchedMachineModel defines "
                     "ProcResources without defining WriteRes SchedWriteRes");
+    if (PM.hasMacroFusions())
+      emitMacroFusionBits(PM, OS);
 
     // Begin processor itinerary properties
     OS << "\n";
@@ -1487,7 +1502,11 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     if (PM.hasExtraProcessorInfo())
       OS << "  &" << PM.ModelName << "ExtraInfo,\n";
     else
-      OS << "  nullptr // No extra processor descriptor\n";
+      OS << "  nullptr, // No extra processor descriptor\n";
+    if (PM.hasMacroFusions()) {
+      OS << "  &" << PM.ModelName << "MacroFusionBits,\n";
+    } else
+      OS << "  nullptr, // No macro fusions\n";
     OS << "};\n";
   }
 }
@@ -1770,6 +1789,27 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
     PE.expandSTIPredicate(OS, Fn);
 }
 
+void SubtargetEmitter::emitGetMacroFusions(const std::string &ClassName,
+                                           raw_ostream &OS) {
+  OS << "std::vector<MacroFusionPredTy> " << ClassName
+     << "::getMacroFusions() const {\n";
+  OS.indent(2) << "switch(getSchedModel().getProcessorID()) {\n";
+  for (auto &Proc : TGT.getSchedModels().procModels()) {
+    if (Proc.hasMacroFusions()) {
+      OS.indent(4) << "case " << Proc.Index << ": // " << Proc.ModelName
+                   << "\n";
+      OS.indent(4) << "  return {";
+      std::vector<std::string> Predicates;
+      for (auto *R : Proc.MacroFusions)
+        Predicates.push_back("is" + R->getNameInitAsString());
+      OS << llvm::join(Predicates, ", ");
+      OS << "};\n";
+    }
+  }
+  OS.indent(2) << "}\n";
+  OS.indent(2) << "return {};\n}\n";
+}
+
 void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
                                        raw_ostream &OS) {
   const CodeGenHwModes &CGH = TGT.getHwModes();
@@ -1987,6 +2027,9 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << " const;\n";
   if (TGT.getHwModes().getNumModeIds() > 1)
     OS << "  unsigned getHwMode() const override;\n";
+  if (TGT.getSchedModels().hasMacroFusions())
+    OS << "  std::vector<MacroFusionPredTy> getMacroFusions() const "
+          "override;\n";
 
   STIPredicateExpander PE(Target);
   PE.setByRef(false);
@@ -2044,6 +2087,8 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 
   EmitSchedModelHelpers(ClassName, OS);
   EmitHwModeCheck(ClassName, OS);
+  if (TGT.getSchedModels().hasMacroFusions())
+    emitGetMacroFusions(ClassName, OS);
 
   OS << "} // end namespace llvm\n\n";
 

>From 31d8a994e68c6770c77a88d73cc8b3f27a4845df Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Tue, 14 Nov 2023 11:17:44 +0800
Subject: [PATCH 4/6] [RISCV] Use TableGen-based macro fusion

We convert LUIADDI macro fusion to TableGen.

For test, I added `MacroFusions` to `SiFive7Model`.
---
 llvm/lib/Target/RISCV/CMakeLists.txt          |  2 +-
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.h    |  3 +
 .../Target/RISCV/MCTargetDesc/RISCVMatInt.cpp |  2 +-
 llvm/lib/Target/RISCV/RISCV.td                |  1 +
 llvm/lib/Target/RISCV/RISCVFeatures.td        |  7 +-
 llvm/lib/Target/RISCV/RISCVMacroFusion.cpp    | 69 -------------------
 llvm/lib/Target/RISCV/RISCVMacroFusion.h      | 28 --------
 llvm/lib/Target/RISCV/RISCVMacroFusion.td     | 12 ++++
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td    |  1 +
 llvm/lib/Target/RISCV/RISCVSubtarget.cpp      | 14 ++--
 llvm/lib/Target/RISCV/RISCVSubtarget.h        |  5 +-
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  | 11 +--
 .../CodeGen/RISCV/macro-fusion-lui-addi.ll    | 10 +--
 13 files changed, 44 insertions(+), 121 deletions(-)
 delete mode 100644 llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
 delete mode 100644 llvm/lib/Target/RISCV/RISCVMacroFusion.h
 create mode 100644 llvm/lib/Target/RISCV/RISCVMacroFusion.td

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index a0c3345ec1bbd7e..ac88cd49db4e4ba 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -5,6 +5,7 @@ set(LLVM_TARGET_DEFINITIONS RISCV.td)
 tablegen(LLVM RISCVGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM RISCVGenCompressInstEmitter.inc -gen-compress-inst-emitter)
+tablegen(LLVM RISCVGenMacroFusion.inc -gen-macro-fusion-pred)
 tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info)
@@ -43,7 +44,6 @@ add_llvm_target(RISCVCodeGen
   RISCVISelDAGToDAG.cpp
   RISCVISelLowering.cpp
   RISCVMachineFunctionInfo.cpp
-  RISCVMacroFusion.cpp
   RISCVMergeBaseOffset.cpp
   RISCVOptWInstrs.cpp
   RISCVPostRAExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index 3cfddb530cdf630..f3f27efbee95f7d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -48,6 +48,9 @@ std::unique_ptr<MCObjectTargetWriter> createRISCVELFObjectWriter(uint8_t OSABI,
 #define GET_INSTRINFO_MC_HELPER_DECLS
 #include "RISCVGenInstrInfo.inc"
 
+#define GET_MACRO_FUSION_ENUM
+#include "RISCVGenMacroFusion.inc"
+
 #define GET_SUBTARGETINFO_ENUM
 #include "RISCVGenSubtargetInfo.inc"
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 4358a5b878e6316..dbc286288d2c897 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -236,7 +236,7 @@ InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) {
     // NOTE: We don't check for C extension to minimize differences in generated
     // code.
     bool IsShiftedCompressible =
-        isInt<6>(ShiftedVal) && !STI.hasFeature(RISCV::TuneLUIADDIFusion);
+        isInt<6>(ShiftedVal) && !STI.hasMacroFusion(RISCV::LUIADDI);
     RISCVMatInt::InstSeq TmpSeq;
     generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
 
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index be93d5933d3329e..8c7f5b4bb91e891 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -34,6 +34,7 @@ include "GISel/RISCVRegisterBanks.td"
 // RISC-V Scheduling Models
 //===----------------------------------------------------------------------===//
 
+include "RISCVMacroFusion.td"
 include "RISCVSchedRocket.td"
 include "RISCVSchedSiFive7.td"
 include "RISCVSchedSyntacoreSCR1.td"
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index c5d88ca306969f4..a3bc6218d9c86ae 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -971,10 +971,6 @@ def TuneDLenFactor2
    : SubtargetFeature<"dlen-factor-2", "DLenFactor2", "true",
                       "Vector unit DLEN(data path width) is half of VLEN">;
 
-def TuneLUIADDIFusion
-    : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
-                       "true", "Enable LUI+ADDI macrofusion">;
-
 def TuneNoDefaultUnroll
     : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
                        "Disable default unroll preference.">;
@@ -993,8 +989,7 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
                                     TuneShortForwardBranchOpt]>;
 
 def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
-                                         "Ventana-Veyron Series processors",
-                                         [TuneLUIADDIFusion]>;
+                                         "Ventana-Veyron Series processors">;
 
 // Assume that lock-free native-width atomics are available, even if the target
 // and operating system combination would not usually provide them. The user
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
deleted file mode 100644
index 1b82cc8b5b858f4..000000000000000
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-//===- RISCVMacroFusion.cpp - RISC-V Macro Fusion -------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file This file contains the RISC-V implementation of the DAG scheduling
-/// mutation to pair instructions back to back.
-//
-//===----------------------------------------------------------------------===//
-//
-#include "RISCVMacroFusion.h"
-#include "RISCVSubtarget.h"
-#include "llvm/CodeGen/MacroFusion.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-
-using namespace llvm;
-
-// Fuse LUI followed by ADDI or ADDIW.
-// rd = imm[31:0] which decomposes to
-// lui rd, imm[31:12]
-// addi(w) rd, rd, imm[11:0]
-static bool isLUIADDI(const MachineInstr *FirstMI,
-                      const MachineInstr &SecondMI) {
-  if (SecondMI.getOpcode() != RISCV::ADDI &&
-      SecondMI.getOpcode() != RISCV::ADDIW)
-    return false;
-
-  // Assume the 1st instr to be a wildcard if it is unspecified.
-  if (!FirstMI)
-    return true;
-
-  if (FirstMI->getOpcode() != RISCV::LUI)
-    return false;
-
-  Register FirstDest = FirstMI->getOperand(0).getReg();
-
-  // Destination of LUI should be the ADDI(W) source register.
-  if (SecondMI.getOperand(1).getReg() != FirstDest)
-    return false;
-
-  // If the input is virtual make sure this is the only user.
-  if (FirstDest.isVirtual()) {
-    auto &MRI = SecondMI.getMF()->getRegInfo();
-    return MRI.hasOneNonDBGUse(FirstDest);
-  }
-
-  // If the FirstMI destination is non-virtual, it should match the SecondMI
-  // destination.
-  return SecondMI.getOperand(0).getReg() == FirstDest;
-}
-
-static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
-                                   const TargetSubtargetInfo &TSI,
-                                   const MachineInstr *FirstMI,
-                                   const MachineInstr &SecondMI) {
-  const RISCVSubtarget &ST = static_cast<const RISCVSubtarget &>(TSI);
-
-  if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI))
-    return true;
-
-  return false;
-}
-
-std::unique_ptr<ScheduleDAGMutation> llvm::createRISCVMacroFusionDAGMutation() {
-  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
-}
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.h b/llvm/lib/Target/RISCV/RISCVMacroFusion.h
deleted file mode 100644
index 7598db3f8fe1434..000000000000000
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//===- RISCVMacroFusion.h - RISC-V Macro Fusion -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file This file contains the RISC-V definition of the DAG scheduling
-/// mutation to pair instructions back to back.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H
-#define LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H
-
-#include "llvm/CodeGen/MachineScheduler.h"
-
-namespace llvm {
-
-/// Note that you have to add:
-///   DAG.addMutation(createRISCVMacroFusionDAGMutation());
-/// to RISCVPassConfig::createMachineScheduler() to have an effect.
-std::unique_ptr<ScheduleDAGMutation> createRISCVMacroFusionDAGMutation();
-
-} // namespace llvm
-
-#endif
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
new file mode 100644
index 000000000000000..8df0be2931d1334
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -0,0 +1,12 @@
+//==----- RISCVMacroFusion.td - Macro Fusion Definitions -----*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the macro fusion predicators.
+
+def LUIADDI: SimpleFusion<CheckOpcode<[LUI]>, CheckOpcode<[ADDI, ADDIW]>>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 9da68dc9a139d32..e9a1b3d9c044239 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -200,6 +200,7 @@ def SiFive7Model : SchedMachineModel {
                              HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
                              HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
                              HasStdExtZkr];
+  let MacroFusions = [LUIADDI];
 }
 
 // The SiFive7 microarchitecture has three pipelines: A, B, V.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 3e6af1abc5d408b..20ee30ae377e6ae 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -16,8 +16,9 @@
 #include "GISel/RISCVRegisterBankInfo.h"
 #include "RISCV.h"
 #include "RISCVFrameLowering.h"
-#include "RISCVMacroFusion.h"
 #include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -29,11 +30,16 @@ using namespace llvm;
 #define GET_SUBTARGETINFO_CTOR
 #include "RISCVGenSubtargetInfo.inc"
 
-namespace llvm::RISCVTuneInfoTable {
+namespace llvm {
+#define GET_RISCV_MACRO_FUSION_PRED_IMPL
+#include "RISCVGenMacroFusion.inc"
+
+namespace RISCVTuneInfoTable {
 
 #define GET_RISCVTuneInfoTable_IMPL
 #include "RISCVGenSearchableTables.inc"
-} // namespace llvm::RISCVTuneInfoTable
+} // namespace RISCVTuneInfoTable
+} // namespace llvm
 
 static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
                                           cl::init(true), cl::Hidden);
@@ -183,7 +189,7 @@ bool RISCVSubtarget::enableSubRegLiveness() const {
 
 void RISCVSubtarget::getPostRAMutations(
     std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
-  Mutations.push_back(createRISCVMacroFusionDAGMutation());
+  Mutations.push_back(createMacroFusionDAGMutation(getMacroFusions()));
 }
 
   /// Enable use of alias analysis during code generation (during MI
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index c135021333acabc..6b5f2b812982a20 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -50,6 +50,9 @@ struct RISCVTuneInfo {
 #include "RISCVGenSearchableTables.inc"
 } // namespace RISCVTuneInfoTable
 
+#define GET_RISCV_MACRO_FUSION_PRED_DECL
+#include "RISCVGenMacroFusion.inc"
+
 class RISCVSubtarget : public RISCVGenSubtargetInfo {
 public:
   // clang-format off
@@ -190,8 +193,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
     return UserReservedRegister[i];
   }
 
-  bool hasMacroFusion() const { return hasLUIADDIFusion(); }
-
   // Vector codegen related methods.
   bool hasVInstructions() const { return HasStdExtZve32x; }
   bool hasVInstructionsI64() const { return HasStdExtZve64x; }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 524c1a5ca50c5d9..e620c41f2f791ed 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -14,7 +14,6 @@
 #include "MCTargetDesc/RISCVBaseInfo.h"
 #include "RISCV.h"
 #include "RISCVMachineFunctionInfo.h"
-#include "RISCVMacroFusion.h"
 #include "RISCVTargetObjectFile.h"
 #include "RISCVTargetTransformInfo.h"
 #include "TargetInfo/RISCVTargetInfo.h"
@@ -26,6 +25,8 @@
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
 #include "llvm/CodeGen/MIRParser/MIParser.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MacroFusion.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -345,9 +346,9 @@ class RISCVPassConfig : public TargetPassConfig {
   ScheduleDAGInstrs *
   createMachineScheduler(MachineSchedContext *C) const override {
     const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
-    if (ST.hasMacroFusion()) {
+    if (ST.enableMacroFusion()) {
       ScheduleDAGMILive *DAG = createGenericSchedLive(C);
-      DAG->addMutation(createRISCVMacroFusionDAGMutation());
+      DAG->addMutation(createMacroFusionDAGMutation(ST.getMacroFusions()));
       return DAG;
     }
     return nullptr;
@@ -356,9 +357,9 @@ class RISCVPassConfig : public TargetPassConfig {
   ScheduleDAGInstrs *
   createPostMachineScheduler(MachineSchedContext *C) const override {
     const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
-    if (ST.hasMacroFusion()) {
+    if (ST.enableMacroFusion()) {
       ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
-      DAG->addMutation(createRISCVMacroFusionDAGMutation());
+      DAG->addMutation(createMacroFusionDAGMutation(ST.getMacroFusions()));
       return DAG;
     }
     return nullptr;
diff --git a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
index 498e6cf23ba3495..38b12b739c4025b 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
+++ b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-;RUN: llc < %s -mtriple=riscv64 -mattr=+f -mcpu=sifive-u74 -target-abi=lp64f \
+;RUN: llc < %s -mtriple=riscv64 -mattr=+f -mcpu=sifive-u74 -misched-fusion=false -target-abi=lp64f \
 ;RUN:   | FileCheck %s --check-prefix=NOFUSION
-;RUN: llc < %s -mtriple=riscv64 -mattr=+f,+lui-addi-fusion -mcpu=sifive-u74 \
+;RUN: llc < %s -mtriple=riscv64 -mattr=+f -mcpu=sifive-u74 \
 ;RUN:   -target-abi=lp64f | FileCheck %s --check-prefix=FUSION
-;RUN: llc < %s -mtriple=riscv64 -mattr=+f,+lui-addi-fusion,+use-postra-scheduler -mcpu=sifive-u74 \
+;RUN: llc < %s -mtriple=riscv64 -mattr=+f,+use-postra-scheduler -mcpu=sifive-u74 \
 ;RUN:   -target-abi=lp64f | FileCheck %s --check-prefixes=FUSION-POSTRA
 
 @.str = private constant [4 x i8] c"%f\0A\00", align 1
@@ -40,8 +40,8 @@ declare void @bar(ptr, float)
 define i32 @test_matint() {
 ; NOFUSION-LABEL: test_matint:
 ; NOFUSION:       # %bb.0:
-; NOFUSION-NEXT:    li a0, 1
-; NOFUSION-NEXT:    slli a0, a0, 11
+; NOFUSION-NEXT:    lui a0, 1
+; NOFUSION-NEXT:    addiw a0, a0, -2048
 ; NOFUSION-NEXT:    ret
 ;
 ; FUSION-LABEL: test_matint:

>From e2bdff18dab4cc5cc08dfae80058581e8edd348d Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Thu, 9 Nov 2023 19:11:59 +0800
Subject: [PATCH 5/6] [Sched] Auto add MacroFusion mutation if enabled

We can get the information by `enableMacroFusion`.
---
 llvm/lib/CodeGen/MachineScheduler.cpp        | 15 +++++++++++--
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 22 --------------------
 2 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 4add33ba0996af0..d0cc006ff2598cc 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -3791,6 +3791,11 @@ ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
   // data and pass it to later mutations. Have a single mutation that gathers
   // the interesting nodes in one pass.
   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+
+  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+  // Add MacroFusionDAGMutation if enabled.
+  if (STI.enableMacroFusion())
+    DAG->addMutation(createMacroFusionDAGMutation(STI.getMacroFusions()));
   return DAG;
 }
 
@@ -3940,8 +3945,14 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
 }
 
 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
-  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
-                           /*RemoveKillFlags=*/true);
+  ScheduleDAGMI *DAG =
+      new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
+                        /*RemoveKillFlags=*/true);
+  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+  // Add MacroFusionDAGMutation if enabled.
+  if (STI.enableMacroFusion())
+    DAG->addMutation(createMacroFusionDAGMutation(STI.getMacroFusions()));
+  return DAG;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index e620c41f2f791ed..898ab1e37913135 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -343,28 +343,6 @@ class RISCVPassConfig : public TargetPassConfig {
     return getTM<RISCVTargetMachine>();
   }
 
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
-    if (ST.enableMacroFusion()) {
-      ScheduleDAGMILive *DAG = createGenericSchedLive(C);
-      DAG->addMutation(createMacroFusionDAGMutation(ST.getMacroFusions()));
-      return DAG;
-    }
-    return nullptr;
-  }
-
-  ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const override {
-    const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
-    if (ST.enableMacroFusion()) {
-      ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
-      DAG->addMutation(createMacroFusionDAGMutation(ST.getMacroFusions()));
-      return DAG;
-    }
-    return nullptr;
-  }
-
   void addIRPasses() override;
   bool addPreISel() override;
   bool addInstSelector() override;

>From 990d7358bb6ae1dba4c42c8dc76bc39b9ac8ad00 Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Wed, 15 Nov 2023 16:10:07 +0800
Subject: [PATCH 6/6] [RISCV] Add macro fusions for Xiangshan

Doc: https://xiangshan-doc.readthedocs.io/zh-cn/latest/frontend/decode/

This PR is to show the usage of TableGen-based macro fusions.

Some instrcution pairs can be folded into one MacroFusion definition
but I leave them standalone to show the different ways to define a
macro fusion.

This PR is stacked on #72219, #72222, #72223, #72224, #72227
---
 llvm/lib/Target/RISCV/RISCVMacroFusion.td | 92 ++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
index 8df0be2931d1334..a52c54fba8a31f8 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -9,4 +9,94 @@
 // ===---------------------------------------------------------------------===//
 // The following definitions describe the macro fusion predicators.
 
-def LUIADDI: SimpleFusion<CheckOpcode<[LUI]>, CheckOpcode<[ADDI, ADDIW]>>;
+class RISCVMacroFusion<list<Instruction> first,list<Instruction> second,
+                             list<MCInstPredicate> extraFirstPreds = [],
+                             list<MCInstPredicate> extraSecondPreds = []>
+  : SimpleFusion<CheckAll<!listconcat([CheckOpcode<first>], extraFirstPreds)>,
+                 CheckAll<!listconcat([CheckOpcode<second>], extraSecondPreds)>>;
+
+def LUIADDI: RISCVMacroFusion<[LUI], [ADDI, ADDIW]>;
+
+// clear upper 32 bits / get lower 32 bits: slli r1, r0, 32 + srli r1, r1, 32
+def ClearUpper32Bits : RISCVMacroFusion<[SLLI], [SRLI],
+                                        [CheckImmOperand<2, 32>],
+                                        [CheckImmOperand<2, 32>]>;
+
+// clear upper 48 bits / get lower 16 bits: slli r1, r0, 48 + srli r1, r1, 48
+def ClearUpper48Bits : RISCVMacroFusion<[SLLI], [SRLI],
+                                        [CheckImmOperand<2, 48>],
+                                        [CheckImmOperand<2, 48>]>;
+
+// clear upper 48 bits / get lower 16 bits: slliw r1, r0, 16 + srliw r1, r1, 16
+def GetLower16Bits : RISCVMacroFusion<[SLLIW], [SRLIW],
+                                      [CheckImmOperand<2, 16>],
+                                      [CheckImmOperand<2, 16>]>;
+
+// sign-extend a 16-bit number: slliw r1, r0, 16 + sraiw r1, r1, 16
+def SExtH : RISCVMacroFusion<[SLLIW], [SRAIW],
+                             [CheckImmOperand<2, 16>],
+                             [CheckImmOperand<2, 16>]>;
+
+// These should be covered by Zba extension?
+// shift left by one and add: slli r1, r0, 1 + add r1, r1, r2
+// shift left by two and add: slli r1, r0, 2 + add r1, r1, r2
+// shift left by three and add: slli r1, r0, 3 + add r1, r1, r2
+def ShiftNAdd : RISCVMacroFusion<[SLLI], [ADD],
+                                 [CheckAny<[CheckImmOperand<2, 1>,
+                                            CheckImmOperand<2, 2>,
+                                            CheckImmOperand<2, 3>]>]>;
+
+// shift zero-extended word left by one: slli r1, r0, 32 + srli r1, r0, 31
+// shift zero-extended word left by two: slli r1, r0, 32 + srli r1, r0, 30
+// shift zero-extended word left by three: slli r1, r0, 32 + srli r1, r0, 29
+def ShiftZExtByN : RISCVMacroFusion<[SLLI], [SRLI],
+                                    [CheckImmOperand<2, 32>],
+                                    [CheckAny<[CheckImmOperand<2, 29>,
+                                               CheckImmOperand<2, 30>,
+                                               CheckImmOperand<2, 31>]>]>;
+
+// get the second byte: srli r1, r0, 8 + andi r1, r1, 255
+def GetSecondByte : RISCVMacroFusion<[SRLI], [ANDI],
+                                     [CheckImmOperand<2, 8>],
+                                     [CheckImmOperand<2, 255>]>;
+
+// shift left by four and add: slli r1, r0, 4 + add r1, r1, r2
+def ShiftLeft4Add : RISCVMacroFusion<[SLLI], [ADD], [CheckImmOperand<2, 4>]>;
+
+// shift right by 29 and add: srli r1, r0, 29 + add r1, r1, r2
+// shift right by 30 and add: srli r1, r0, 30 + add r1, r1, r2
+// shift right by 31 and add: srli r1, r0, 31 + add r1, r1, r2
+// shift right by 32 and add: srli r1, r0, 32 + add r1, r1, r2
+def ShiftRightNAdd : RISCVMacroFusion<[SRLI], [ADD],
+                                      [CheckAny<[CheckImmOperand<2, 29>,
+                                                 CheckImmOperand<2, 30>,
+                                                 CheckImmOperand<2, 31>,
+                                                 CheckImmOperand<2, 32>]>]>;
+
+// add one if odd, otherwise unchanged: andi r1, r0, 1 + add r1, r1, r2
+// add one if odd (in word format), otherwise unchanged: andi r1, r0, 1 + addw r1, r1, r2
+def AddOneIfOdd : RISCVMacroFusion<[ANDI], [ADD, ADDW], [CheckImmOperand<2, 1>]>;
+
+// addw and extract its lower 8 bits (fused into addwbyte)
+// addw and extract its lower 1 bit (fused into addwbit)
+def AddAndExtractNBits : RISCVMacroFusion<[ADDW], [ANDI], [],
+                                          [CheckAny<[CheckImmOperand<2, 1>,
+                                                     CheckImmOperand<2, 255>]>]>;
+
+// addw and zext.h (fused into addwzexth)
+// addw and sext.h (fused into addwsexth)
+def AddwAndExt : RISCVMacroFusion<[ADDW], [ZEXT_H_RV32, ZEXT_H_RV64, SEXT_H]>;
+
+// logic operation and extract its LSB
+def LogicOpAndExtractLSB : RISCVMacroFusion<[AND, OR, XOR, ANDI, ORI, XORI, ORC_B], [ANDI], [],  
+                                            [CheckImmOperand<2, 1>]>;
+
+// logic operation and extract its lower 16 bits
+def LogicOpAndExtractLow16Bits : RISCVMacroFusion<[AND, OR, XOR, ANDI, ORI, XORI, ORC_B],
+                                                  [ZEXT_H_RV32, ZEXT_H_RV64]>;
+
+// OR(Cat(src1(63, 8), 0.U(8.W)), src2): andi r1, r0, -256 + or r1, r1, r2
+def OrCat : RISCVMacroFusion<[ANDI], [OR], [CheckImmOperand<2, -256>]>;
+
+// mul 7-bit data with 32-bit data: andi r1, r0, 127 + mulw r1, r1, r2
+def Mul7BitsWith32Bits : RISCVMacroFusion<[ANDI], [MULW], [CheckImmOperand<2, 127>]>;



More information about the llvm-commits mailing list