[llvm] [TableGen] Enhance testability of TableGen-based macro fusion (PR #73075)

Wang Pengcheng via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 21 19:44:18 PST 2023


https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/73075

We add an option `-mfusion` like `-mattr` to add/remove supported
macro fusions, so that we can test each macro fusion separately via
`llc`.

This PR is stacked on #72219, #72222, #72223



>From 4d9f597583d86061990a7c9ebb6e0d3575cdbf72 Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Fri, 10 Nov 2023 17:58:00 +0800
Subject: [PATCH 1/4] [MacroFusion] Support multiple predicators

The user can provide multiple predicators to MacroFusion and the
DAG mutation will be applied if one of them is evalated to true.

`ShouldSchedulePredTy` is renamed to `MacroFusionPredTy`.
---
 llvm/include/llvm/CodeGen/MacroFusion.h       | 18 ++++-----
 llvm/lib/CodeGen/MacroFusion.cpp              | 39 +++++++++++++------
 .../lib/Target/AArch64/AArch64MacroFusion.cpp |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp  |  4 +-
 llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp       |  4 +-
 llvm/lib/Target/ARM/ARMMacroFusion.cpp        |  4 +-
 llvm/lib/Target/PowerPC/PPCMacroFusion.cpp    |  4 +-
 llvm/lib/Target/RISCV/RISCVMacroFusion.cpp    |  2 +-
 llvm/lib/Target/X86/X86MacroFusion.cpp        |  5 +--
 9 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MacroFusion.h b/llvm/include/llvm/CodeGen/MacroFusion.h
index ea2c7a5faae385a9..a97f776335368c7c 100644
--- a/llvm/include/llvm/CodeGen/MacroFusion.h
+++ b/llvm/include/llvm/CodeGen/MacroFusion.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_CODEGEN_MACROFUSION_H
 #define LLVM_CODEGEN_MACROFUSION_H
 
-#include <functional>
 #include <memory>
+#include <vector>
 
 namespace llvm {
 
@@ -29,10 +29,10 @@ class SUnit;
 /// Check if the instr pair, FirstMI and SecondMI, should be fused
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 /// SecondMI may be part of a fused pair at all.
-using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
-                                                const TargetSubtargetInfo &TSI,
-                                                const MachineInstr *FirstMI,
-                                                const MachineInstr &SecondMI)>;
+using MacroFusionPredTy = bool (*)(const TargetInstrInfo &TII,
+                                   const TargetSubtargetInfo &STI,
+                                   const MachineInstr *FirstMI,
+                                   const MachineInstr &SecondMI);
 
 /// Checks if the number of cluster edges between SU and its predecessors is
 /// less than FuseLimit
@@ -48,15 +48,15 @@ bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
 
 /// Create a DAG scheduling mutation to pair instructions back to back
 /// for instructions that benefit according to the target-specific
-/// shouldScheduleAdjacent predicate function.
+/// predicate functions.
 std::unique_ptr<ScheduleDAGMutation>
-createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent);
+createMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates);
 
 /// Create a DAG scheduling mutation to pair branch instructions with one
 /// of their predecessors back to back for instructions that benefit according
-/// to the target-specific shouldScheduleAdjacent predicate function.
+/// to the target-specific predicate functions.
 std::unique_ptr<ScheduleDAGMutation>
-createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent);
+createBranchMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates);
 
 } // end namespace llvm
 
diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index fa5df68b8abcc0f6..1ce2f49763b076fa 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -137,19 +137,35 @@ namespace {
 /// Post-process the DAG to create cluster edges between instrs that may
 /// be fused by the processor into a single operation.
 class MacroFusion : public ScheduleDAGMutation {
-  ShouldSchedulePredTy shouldScheduleAdjacent;
+  std::vector<MacroFusionPredTy> Predicates;
   bool FuseBlock;
   bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);
 
 public:
-  MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
-    : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
+  MacroFusion(std::vector<MacroFusionPredTy> Predicates, bool FuseBlock)
+      : Predicates(std::move(Predicates)), FuseBlock(FuseBlock) {}
 
   void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
+  bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+                              const TargetSubtargetInfo &STI,
+                              const MachineInstr *FirstMI,
+                              const MachineInstr &SecondMI);
 };
 
 } // end anonymous namespace
 
+bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII,
+                                         const TargetSubtargetInfo &STI,
+                                         const MachineInstr *FirstMI,
+                                         const MachineInstr &SecondMI) {
+  for (MacroFusionPredTy Predicate : Predicates) {
+    if (Predicate(TII, STI, FirstMI, SecondMI))
+      return true;
+  }
+  return false;
+}
+
 void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
   if (FuseBlock)
     // For each of the SUnits in the scheduling block, try to fuse the instr in
@@ -197,17 +213,16 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
 }
 
 std::unique_ptr<ScheduleDAGMutation>
-llvm::createMacroFusionDAGMutation(
-     ShouldSchedulePredTy shouldScheduleAdjacent) {
-  if(EnableMacroFusion)
-    return std::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+llvm::createMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates) {
+  if (EnableMacroFusion) {
+    return std::make_unique<MacroFusion>(Predicates, true);
+  }
   return nullptr;
 }
 
-std::unique_ptr<ScheduleDAGMutation>
-llvm::createBranchMacroFusionDAGMutation(
-     ShouldSchedulePredTy shouldScheduleAdjacent) {
-  if(EnableMacroFusion)
-    return std::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+std::unique_ptr<ScheduleDAGMutation> llvm::createBranchMacroFusionDAGMutation(
+    std::vector<MacroFusionPredTy> Predicates) {
+  if (EnableMacroFusion)
+    return std::make_unique<MacroFusion>(Predicates, false);
   return nullptr;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 05d60872bf51acac..8f46f3eabb3ef45b 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -478,5 +478,5 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 
 std::unique_ptr<ScheduleDAGMutation>
 llvm::createAArch64MacroFusionDAGMutation() {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index c15c94ee17f8b1dd..b2b11d661523e9c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -59,8 +59,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
 
 namespace llvm {
 
-std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation () {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation() {
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
index 29c9b9ccf27614f0..0bddeeef9e9b1a3f 100644
--- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -142,10 +142,10 @@ namespace {
 /// be turned into VOPD instructions
 /// Greedily pairs instruction candidates. O(n^2) algorithm.
 struct VOPDPairingMutation : ScheduleDAGMutation {
-  ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
+  MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
 
   VOPDPairingMutation(
-      ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
+      MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
 
   void apply(ScheduleDAGInstrs *DAG) override {
diff --git a/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/llvm/lib/Target/ARM/ARMMacroFusion.cpp
index 38bf28ba8219b901..7de117925e464fe9 100644
--- a/llvm/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -62,8 +62,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   return false;
 }
 
-std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation () {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation() {
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index bf1c39a3a3a2d477..d6a4a5dd5faabaec 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -286,8 +286,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 
 namespace llvm {
 
-std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation () {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation() {
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index 02a8d5c18fe1a0ee..1b82cc8b5b858f44 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -65,5 +65,5 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 }
 
 std::unique_ptr<ScheduleDAGMutation> llvm::createRISCVMacroFusionDAGMutation() {
-  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+  return createMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp
index aa6e8645e0927f56..382cc9a71c2a1844 100644
--- a/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -67,9 +67,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 
 namespace llvm {
 
-std::unique_ptr<ScheduleDAGMutation>
-createX86MacroFusionDAGMutation () {
-  return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);
+std::unique_ptr<ScheduleDAGMutation> createX86MacroFusionDAGMutation() {
+  return createBranchMacroFusionDAGMutation({shouldScheduleAdjacent});
 }
 
 } // end namespace llvm

>From 3a56593b87db1e73a823b4db331c231f824f1585 Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Fri, 10 Nov 2023 18:18:18 +0800
Subject: [PATCH 2/4] [TableGen] Add a backend to generate MacroFusion
 predicators

The MacroFusion contains four predicates, which are for first MI,
second MI, prolog and epilog.

The generated code will be like:
```
bool isNAME(const TargetInstrInfo &TII,
            const TargetSubtargetInfo &STI,
            const MachineInstr *FirstMI,
            const MachineInstr &SecondMI) {
  auto &MRI = SecondMI.getMF()->getRegInfo();
  /* Predicates */
  return true;
}
```

The predicates of first/second MI can be any predicates defined in
`TargetInstrPredicate.td`
---
 .../llvm/Target/TargetInstrPredicate.td       |   6 +
 llvm/include/llvm/Target/TargetSchedule.td    | 113 ++++++++++
 llvm/utils/TableGen/CMakeLists.txt            |   1 +
 .../TableGen/MacroFusionPredicatorEmitter.cpp | 199 ++++++++++++++++++
 llvm/utils/TableGen/PredicateExpander.cpp     |   8 +
 llvm/utils/TableGen/PredicateExpander.h       |   1 +
 6 files changed, 328 insertions(+)
 create mode 100644 llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp

diff --git a/llvm/include/llvm/Target/TargetInstrPredicate.td b/llvm/include/llvm/Target/TargetInstrPredicate.td
index 9f2cde9d923050a8..82c4c7b23a49b6ac 100644
--- a/llvm/include/llvm/Target/TargetInstrPredicate.td
+++ b/llvm/include/llvm/Target/TargetInstrPredicate.td
@@ -95,6 +95,12 @@ class MCOperandPredicate<int Index> : MCInstPredicate {
 // Return true if machine operand at position `Index` is a register operand.
 class CheckIsRegOperand<int Index> : MCOperandPredicate<Index>;
 
+// Return true if machine operand at position `Index` is a virtual register operand.
+class CheckIsVRegOperand<int Index> : MCOperandPredicate<Index>;
+
+// Return true if machine operand at position `Index` is not a virtual register operand.
+class CheckIsNotVRegOperand<int Index> : CheckNot<CheckIsVRegOperand<Index>>;
+
 // Return true if machine operand at position `Index` is an immediate operand.
 class CheckIsImmOperand<int Index> : MCOperandPredicate<Index>;
 
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 949baa5d2105c451..6606c9867932bcde 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -584,3 +584,116 @@ class MemoryQueue<ProcResourceKind PR> {
 
 class LoadQueue<ProcResourceKind LDQueue> : MemoryQueue<LDQueue>;
 class StoreQueue<ProcResourceKind STQueue> : MemoryQueue<STQueue>;
+
+// The target instruction that FusionPredicate will evaluate on.
+class FusionTarget;
+def first : FusionTarget;
+def second : FusionTarget;
+def both : FusionTarget;
+
+// Base class of FusionPredicate, etc. The avaliable variables are:
+// * const TargetInstrInfo &TII
+// * const TargetSubtargetInfo &STI
+// * const MachineRegisterInfo &MRI
+// * const MachineInstr *FirstMI
+// * const MachineInstr &SecondMI
+class FusionPredicate<FusionTarget target> {
+  FusionTarget Target = target;
+}
+class FirstFusionPredicate: FusionPredicate<first>;
+class SecondFusionPredicate: FusionPredicate<second>;
+class BothFusionPredicate: FusionPredicate<both>;
+
+// FusionPredicate with raw code predicate.
+class FusionPredicateWithCode<code pred> : FusionPredicate<both> {
+  code Predicate = pred;
+}
+
+// FusionPredicate with MCInstPredicate.
+class FusionPredicateWithMCInstPredicate<FusionTarget target, MCInstPredicate pred>
+  : FusionPredicate<target> {
+  MCInstPredicate Predicate = pred;
+}
+class FirstFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
+  : FusionPredicateWithMCInstPredicate<first, pred>;
+class SecondFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
+  : FusionPredicateWithMCInstPredicate<second, pred>;
+// The pred will be applied on both firstMI and secondMI.
+class BothFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
+  : FusionPredicateWithMCInstPredicate<second, pred>;
+
+// Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position
+// `firstOpIdx` should be the same as the operand of `SenondMI` at position
+// `secondOpIdx`.
+class TieReg<int firstOpIdx, int secondOpIdx> : BothFusionPredicate {
+  int FirstOpIdx = firstOpIdx;
+  int SecondOpIdx = secondOpIdx;
+}
+
+// A predicate for wildcard. The generated code will be like:
+// ```
+// if (!FirstMI)
+//   return ReturnValue;
+// ```
+class WildcardPred<bit ret> : FirstFusionPredicate {
+  bit ReturnValue = ret;
+}
+def WildcardFalse : WildcardPred<0>;
+def WildcardTrue : WildcardPred<1>;
+
+// Indicates that the destination register of `FirstMI` should be have one
+// use if it is an virtual register.
+class OneUsePred : FirstFusionPredicate;
+def OneUse : OneUsePred;
+
+// Handled by MacroFusionPredicatorEmitter backend.
+// The generated predicator will be like:
+// ```
+// bool isNAME(const TargetInstrInfo &TII,
+//             const TargetSubtargetInfo &STI,
+//             const MachineInstr *FirstMI,
+//             const MachineInstr &SecondMI) {
+//   auto &MRI = SecondMI.getMF()->getRegInfo();
+//   /* Predicates */
+//   return true;
+// }
+// ```
+class Fusion<list<FusionPredicate> predicates> {
+  list<FusionPredicate> Predicates = predicates;
+}
+
+// The generated predicator will be like:
+// ```
+// bool isNAME(const TargetInstrInfo &TII,
+//             const TargetSubtargetInfo &STI,
+//             const MachineInstr *FirstMI,
+//             const MachineInstr &SecondMI) {
+//   auto &MRI = SecondMI.getMF()->getRegInfo();
+//   /* Prolog */
+//   /* Predicate for `SecondMI` */
+//   /* Wildcard */
+//   /* Predicate for `FirstMI` */
+//   /* Check One Use */
+//   /* Tie registers */
+//   /* Epilog */
+//   return true;
+// }
+// ```
+class SimpleFusion<MCInstPredicate firstPred, MCInstPredicate secondPred,
+                   list<FusionPredicate> prolog = [],
+                   list<FusionPredicate> epilog = []>
+  : Fusion<!listconcat(
+                  prolog,
+                  [
+                    SecondFusionPredicateWithMCInstPredicate<secondPred>,
+                    WildcardTrue,
+                    FirstFusionPredicateWithMCInstPredicate<firstPred>,
+                    SecondFusionPredicateWithMCInstPredicate<
+                      CheckAny<[
+                        CheckIsVRegOperand<0>,
+                        CheckSameRegOperand<0, 1>
+                      ]>>,
+                    OneUse,
+                    TieReg<0, 1>,
+                  ],
+                  epilog)>;
diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt
index 071ea3bc07054bba..f765cc36d3bebed8 100644
--- a/llvm/utils/TableGen/CMakeLists.txt
+++ b/llvm/utils/TableGen/CMakeLists.txt
@@ -72,6 +72,7 @@ add_tablegen(llvm-tblgen LLVM
   PredicateExpander.cpp
   PseudoLoweringEmitter.cpp
   CompressInstEmitter.cpp
+  MacroFusionPredicatorEmitter.cpp
   RegisterBankEmitter.cpp
   RegisterInfoEmitter.cpp
   SearchableTableEmitter.cpp
diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
new file mode 100644
index 0000000000000000..86a4b315222ceb27
--- /dev/null
+++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
@@ -0,0 +1,199 @@
+//===--- MacroFusionPredicatorEmitter.cpp - Generator for Fusion ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// MacroFusionPredicatorEmitter implements a TableGen-driven predicators
+// generator for macro-op fusions.
+//
+//===---------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "PredicateExpander.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "macro-fusion-predicator"
+
+namespace {
+class MacroFusionPredicatorEmitter {
+  RecordKeeper &Records;
+  CodeGenTarget Target;
+
+  void emitMacroFusionDecl(std::vector<Record *> Fusions, PredicateExpander &PE,
+                           raw_ostream &OS);
+  void emitMacroFusionImpl(std::vector<Record *> Fusions, PredicateExpander &PE,
+                           raw_ostream &OS);
+  void emitPredicates(std::vector<Record *> &FirstPredicate,
+                      PredicateExpander &PE, raw_ostream &OS);
+  void emitFirstPredicate(Record *SecondPredicate, PredicateExpander &PE,
+                          raw_ostream &OS);
+  void emitSecondPredicate(Record *SecondPredicate, PredicateExpander &PE,
+                           raw_ostream &OS);
+  void emitBothPredicate(Record *Predicates, PredicateExpander &PE,
+                         raw_ostream &OS);
+
+public:
+  MacroFusionPredicatorEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+
+  void run(raw_ostream &OS);
+};
+} // End anonymous namespace.
+
+void MacroFusionPredicatorEmitter::emitMacroFusionDecl(
+    std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
+  OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n\n";
+
+  for (Record *Fusion : Fusions) {
+    OS << "bool is" << Fusion->getName() << "(const TargetInstrInfo &, "
+       << "const TargetSubtargetInfo &, " << "const MachineInstr *, "
+       << "const MachineInstr &);\n";
+  }
+
+  OS << "\n#endif\n";
+  OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n";
+}
+
+void MacroFusionPredicatorEmitter::emitMacroFusionImpl(
+    std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
+  OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n";
+
+  for (Record *Fusion : Fusions) {
+    std::vector<Record *> Predicates =
+        Fusion->getValueAsListOfDefs("Predicates");
+
+    OS << "bool is" << Fusion->getName() << "(\n";
+    OS.indent(5) << "const TargetInstrInfo &TII,\n";
+    OS.indent(5) << "const TargetSubtargetInfo &STI,\n";
+    OS.indent(5) << "const MachineInstr *FirstMI,\n";
+    OS.indent(5) << "const MachineInstr &SecondMI) {\n";
+    OS.indent(2) << "auto &MRI = SecondMI.getMF()->getRegInfo();\n";
+
+    emitPredicates(Predicates, PE, OS);
+
+    OS.indent(2) << "return true;\n";
+    OS << "}\n";
+  }
+
+  OS << "\n#endif\n";
+  OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n";
+}
+
+void MacroFusionPredicatorEmitter::emitPredicates(
+    std::vector<Record *> &Predicates, PredicateExpander &PE, raw_ostream &OS) {
+  for (Record *Predicate : Predicates) {
+    Record *Target = Predicate->getValueAsDef("Target");
+    if (Target->getName() == "first")
+      emitFirstPredicate(Predicate, PE, OS);
+    else if (Target->getName() == "second")
+      emitSecondPredicate(Predicate, PE, OS);
+    else if (Target->getName() == "both")
+      emitBothPredicate(Predicate, PE, OS);
+    else
+      PrintFatalError(Target->getLoc(),
+                      "Unsupported 'FusionTarget': " + Target->getName());
+  }
+}
+
+void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate,
+                                                      PredicateExpander &PE,
+                                                      raw_ostream &OS) {
+  if (Predicate->isSubClassOf("WildcardPred")) {
+    OS.indent(2) << "if (!FirstMI)\n";
+    OS.indent(2) << "  return "
+                 << (Predicate->getValueAsBit("ReturnValue") ? "true" : "false")
+                 << ";\n";
+  } else if (Predicate->isSubClassOf("OneUsePred")) {
+    OS.indent(2) << "{\n";
+    OS.indent(4) << "Register FirstDest = FirstMI->getOperand(0).getReg();\n";
+    OS.indent(4)
+        << "if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))\n";
+    OS.indent(4) << "  return false;\n";
+    OS.indent(2) << "}\n";
+  } else if (Predicate->isSubClassOf(
+                 "FirstFusionPredicateWithMCInstPredicate")) {
+    OS.indent(2) << "{\n";
+    OS.indent(4) << "const MachineInstr *MI = FirstMI;\n";
+    OS.indent(4) << "if (";
+    PE.setNegatePredicate(true);
+    PE.setIndentLevel(3);
+    PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate"));
+    OS << ")\n";
+    OS.indent(4) << "  return false;\n";
+    OS.indent(2) << "}\n";
+  } else
+    PrintFatalError(Predicate->getLoc(),
+                    "Unsupported predicate for first instruction: " +
+                        Predicate->getType()->getAsString());
+}
+
+void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
+                                                       PredicateExpander &PE,
+                                                       raw_ostream &OS) {
+  if (Predicate->isSubClassOf("SecondFusionPredicateWithMCInstPredicate")) {
+    OS.indent(2) << "{\n";
+    OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n";
+    OS.indent(4) << "if (";
+    PE.setNegatePredicate(true);
+    PE.setIndentLevel(3);
+    PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate"));
+    OS << ")\n";
+    OS.indent(4) << "  return false;\n";
+    OS.indent(2) << "}\n";
+  } else
+    PrintFatalError(Predicate->getLoc(),
+                    "Unsupported predicate for first instruction: " +
+                        Predicate->getType()->getAsString());
+}
+
+void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate,
+                                                     PredicateExpander &PE,
+                                                     raw_ostream &OS) {
+  if (Predicate->isSubClassOf("FusionPredicateWithCode"))
+    OS << Predicate->getValueAsString("Predicate");
+  else if (Predicate->isSubClassOf("BothFusionPredicateWithMCInstPredicate")) {
+    Record *MCPred = Predicate->getValueAsDef("Predicate");
+    emitFirstPredicate(MCPred, PE, OS);
+    emitSecondPredicate(MCPred, PE, OS);
+  } else if (Predicate->isSubClassOf("TieReg")) {
+    int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx");
+    int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx");
+    OS.indent(2) << "if (!(FirstMI->getOperand(" << FirstOpIdx
+                 << ").isReg() &&\n";
+    OS.indent(2) << "      SecondMI.getOperand(" << SecondOpIdx
+                 << ").isReg() &&\n";
+    OS.indent(2) << "      FirstMI->getOperand(" << FirstOpIdx
+                 << ").getReg() == SecondMI.getOperand(" << SecondOpIdx
+                 << ").getReg()))\n";
+    OS.indent(2) << "  return false;\n";
+  } else
+    PrintFatalError(Predicate->getLoc(),
+                    "Unsupported predicate for both instruction: " +
+                        Predicate->getType()->getAsString());
+}
+
+void MacroFusionPredicatorEmitter::run(raw_ostream &OS) {
+  // Emit file header.
+  emitSourceFileHeader("Macro Fusion Predicators", OS);
+
+  PredicateExpander PE(Target.getName());
+  PE.setByRef(false);
+  PE.setExpandForMC(false);
+
+  std::vector<Record *> Fusions = Records.getAllDerivedDefinitions("Fusion");
+  // Sort macro fusions by name.
+  llvm::sort(Fusions, LessRecord());
+  emitMacroFusionDecl(Fusions, PE, OS);
+  emitMacroFusionImpl(Fusions, PE, OS);
+}
+
+static TableGen::Emitter::OptClass<MacroFusionPredicatorEmitter>
+    X("gen-macro-fusion-pred", "Generate macro fusion predicators.");
diff --git a/llvm/utils/TableGen/PredicateExpander.cpp b/llvm/utils/TableGen/PredicateExpander.cpp
index 8f96d3307ded8bec..d3a73e02cd916f8e 100644
--- a/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/llvm/utils/TableGen/PredicateExpander.cpp
@@ -194,6 +194,11 @@ void PredicateExpander::expandCheckIsRegOperand(raw_ostream &OS, int OpIndex) {
      << "getOperand(" << OpIndex << ").isReg() ";
 }
 
+void PredicateExpander::expandCheckIsVRegOperand(raw_ostream &OS, int OpIndex) {
+  OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
+     << "getOperand(" << OpIndex << ").getReg().isVirtual()";
+}
+
 void PredicateExpander::expandCheckIsImmOperand(raw_ostream &OS, int OpIndex) {
   OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
      << "getOperand(" << OpIndex << ").isImm() ";
@@ -319,6 +324,9 @@ void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) {
   if (Rec->isSubClassOf("CheckIsRegOperand"))
     return expandCheckIsRegOperand(OS, Rec->getValueAsInt("OpIndex"));
 
+  if (Rec->isSubClassOf("CheckIsVRegOperand"))
+    return expandCheckIsVRegOperand(OS, Rec->getValueAsInt("OpIndex"));
+
   if (Rec->isSubClassOf("CheckIsImmOperand"))
     return expandCheckIsImmOperand(OS, Rec->getValueAsInt("OpIndex"));
 
diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h
index 27f049a715aad56e..cfb0a3d51e677645 100644
--- a/llvm/utils/TableGen/PredicateExpander.h
+++ b/llvm/utils/TableGen/PredicateExpander.h
@@ -75,6 +75,7 @@ class PredicateExpander {
                                bool IsCheckAll);
   void expandTIIFunctionCall(raw_ostream &OS, StringRef MethodName);
   void expandCheckIsRegOperand(raw_ostream &OS, int OpIndex);
+  void expandCheckIsVRegOperand(raw_ostream &OS, int OpIndex);
   void expandCheckIsImmOperand(raw_ostream &OS, int OpIndex);
   void expandCheckInvalidRegOperand(raw_ostream &OS, int OpIndex);
   void expandCheckFunctionPredicate(raw_ostream &OS, StringRef MCInstFn,

>From 56e1b5a2ed568b5129966738a393f3b7dc4a3ed1 Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Tue, 14 Nov 2023 10:48:27 +0800
Subject: [PATCH 3/4] [TableGen] Add MacroFusions to SchedMachineModel

A list of `MacroFusion` is added to `SchedMachineModel` to indicate
supported macro fusions of this processor model.

For visibility, C++ enums of all MacroFusion are generated and new
`hasMacroFusion` is added to Subtarget to test if a MacroFusion is
supported.
---
 .../llvm/CodeGen/TargetSubtargetInfo.h        |  4 ++
 llvm/include/llvm/MC/MCSchedule.h             |  9 ++++
 llvm/include/llvm/MC/MCSubtargetInfo.h        | 13 +++++
 llvm/include/llvm/Target/TargetSchedule.td    |  4 ++
 llvm/lib/MC/MCSchedule.cpp                    |  1 +
 llvm/utils/TableGen/CodeGenSchedule.cpp       | 15 ++++++
 llvm/utils/TableGen/CodeGenSchedule.h         | 11 +++++
 .../TableGen/MacroFusionPredicatorEmitter.cpp | 30 ++++++++++++
 llvm/utils/TableGen/SubtargetEmitter.cpp      | 47 ++++++++++++++++++-
 9 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 55ef95c285431906..7c76293f3e5eaeaf 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -16,6 +16,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MacroFusion.h"
 #include "llvm/CodeGen/PBQPRAConstraint.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/GlobalValue.h"
@@ -323,6 +324,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   /// helps removing redundant copies generated by register allocator when
   /// handling complex eviction chains.
   virtual bool enableSpillageCopyElimination() const { return false; }
+
+  /// Get the list of MacroFusion predicates.
+  virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index 98ebe42cfd133b54..aa187e5cb400672d 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_MC_MCSCHEDULE_H
 #define LLVM_MC_MCSCHEDULE_H
 
+#include "llvm/ADT/Bitset.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
@@ -196,6 +197,9 @@ struct MCExtraProcessorInfo {
   unsigned StoreQueueID;
 };
 
+const unsigned MaxMacroFusions = 256;
+using MacroFusionBitset = Bitset<MaxMacroFusions>;
+
 /// Machine model for scheduling, bundling, and heuristics.
 ///
 /// The machine model directly provides basic information about the
@@ -325,9 +329,14 @@ struct MCSchedModel {
   const InstrItinerary *InstrItineraries;
 
   const MCExtraProcessorInfo *ExtraProcessorInfo;
+  const MacroFusionBitset *MacroFusionBits;
 
   bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; }
 
+  const MacroFusionBitset *getMacroFusionBits() const {
+    return MacroFusionBits;
+  }
+
   unsigned getProcessorID() const { return ProcID; }
 
   /// Does this machine model include instruction-level scheduling.
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index f172a799aa3331c8..66fb6c9383272e18 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -120,6 +120,12 @@ class MCSubtargetInfo {
     return FeatureBits[Feature];
   }
 
+  bool hasMacroFusion(unsigned MacroFusion) const {
+    const MacroFusionBitset *MacroFusionBits =
+        CPUSchedModel->getMacroFusionBits();
+    return MacroFusionBits && MacroFusionBits->test(MacroFusion);
+  }
+
 protected:
   /// Initialize the scheduling model and feature bits.
   ///
@@ -295,6 +301,13 @@ class MCSubtargetInfo {
 
   /// \return if target want to issue a prefetch in address space \p AS.
   virtual bool shouldPrefetchAddressSpace(unsigned AS) const;
+
+  /// Enable macro fusion for this subtarget.
+  virtual bool enableMacroFusion() const {
+    const MacroFusionBitset *MacroFusionBits =
+        CPUSchedModel->getMacroFusionBits();
+    return MacroFusionBits && MacroFusionBits->any();
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 6606c9867932bcde..613e4721b0d7c2a9 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -53,6 +53,7 @@
 include "llvm/Target/TargetItinerary.td"
 
 class Predicate; // Forward def
+class Fusion;
 
 // DAG operator that interprets the DAG args as Instruction defs.
 def instrs;
@@ -122,6 +123,9 @@ class SchedMachineModel {
   // using intervals via ResourceSegments (see
   // llvm/include/llvm/CodeGen/MachineScheduler.h).
   bit EnableIntervals = false;
+
+  // List of Fusion.
+  list<Fusion> MacroFusions = [];
 }
 
 def NoSchedModel : SchedMachineModel {
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index 990a693559a77769..19c36cb0e58d9c10 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -37,6 +37,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
                                             0,
                                             0,
                                             nullptr,
+                                            nullptr,
                                             nullptr};
 
 int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index c3c5e4f8eb2d8c3b..e05b7714ab39436a 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -208,6 +208,9 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
   // (For per-operand resources mapped to itinerary classes).
   collectProcUnsupportedFeatures();
 
+  // Find MacroFusion records for each processor.
+  collectMacroFusions();
+
   // Infer new SchedClasses from SchedVariant.
   inferSchedClasses();
 
@@ -1150,6 +1153,12 @@ bool CodeGenSchedModels::hasItineraries() const {
   return false;
 }
 
+bool CodeGenSchedModels::hasMacroFusions() const {
+  return llvm::any_of(ProcModels, [](const CodeGenProcModel &PM) {
+    return PM.hasMacroFusions();
+  });
+}
+
 // Gather the processor itineraries.
 void CodeGenSchedModels::collectProcItins() {
   LLVM_DEBUG(dbgs() << "\n+++ PROBLEM ITINERARIES (collectProcItins) +++\n");
@@ -1220,6 +1229,12 @@ void CodeGenSchedModels::collectProcUnsupportedFeatures() {
         ProcModel.ModelDef->getValueAsListOfDefs("UnsupportedFeatures"));
 }
 
+void CodeGenSchedModels::collectMacroFusions() {
+  for (CodeGenProcModel &ProcModel : ProcModels)
+    append_range(ProcModel.MacroFusions,
+                 ProcModel.ModelDef->getValueAsListOfDefs("MacroFusions"));
+}
+
 /// Infer new classes from existing classes. In the process, this may create new
 /// SchedWrites from sequences of existing SchedWrites.
 void CodeGenSchedModels::inferSchedClasses() {
diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h
index 76ef1e43953078e7..317558c52fbef15b 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/llvm/utils/TableGen/CodeGenSchedule.h
@@ -238,6 +238,10 @@ struct CodeGenProcModel {
   // This list is empty if the Processor has no UnsupportedFeatures.
   RecVec UnsupportedFeaturesDefs;
 
+  // List of MacroFusion.
+  // This list is empty if the Processor has no MacroFusion.
+  RecVec MacroFusions;
+
   // All read/write resources associated with this processor.
   RecVec WriteResDefs;
   RecVec ReadAdvanceDefs;
@@ -260,6 +264,8 @@ struct CodeGenProcModel {
     Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef),
     RetireControlUnit(nullptr), LoadQueue(nullptr), StoreQueue(nullptr) {}
 
+  bool hasMacroFusions() const { return !MacroFusions.empty(); }
+
   bool hasItineraries() const {
     return !ItinsDef->getValueAsListOfDefs("IID").empty();
   }
@@ -508,6 +514,9 @@ class CodeGenSchedModels {
   // Return true if any processors have itineraries.
   bool hasItineraries() const;
 
+  // Return true if any processors have MacroFusions.
+  bool hasMacroFusions() const;
+
   // Get a SchedWrite from its index.
   const CodeGenSchedRW &getSchedWrite(unsigned Idx) const {
     assert(Idx < SchedWrites.size() && "bad SchedWrite index");
@@ -610,6 +619,8 @@ class CodeGenSchedModels {
 
   void collectProcUnsupportedFeatures();
 
+  void collectMacroFusions();
+
   void inferSchedClasses();
 
   void checkMCInstPredicates() const;
diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
index 86a4b315222ceb27..835cd1c271f84863 100644
--- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
+++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
@@ -28,6 +28,8 @@ class MacroFusionPredicatorEmitter {
   RecordKeeper &Records;
   CodeGenTarget Target;
 
+  void emitMacroFusionEnum(std::vector<Record *> Fusions, PredicateExpander &PE,
+                           raw_ostream &OS);
   void emitMacroFusionDecl(std::vector<Record *> Fusions, PredicateExpander &PE,
                            raw_ostream &OS);
   void emitMacroFusionImpl(std::vector<Record *> Fusions, PredicateExpander &PE,
@@ -48,6 +50,33 @@ class MacroFusionPredicatorEmitter {
 };
 } // End anonymous namespace.
 
+void MacroFusionPredicatorEmitter::emitMacroFusionEnum(
+    std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
+  unsigned N = Fusions.size();
+  if (N == 0)
+    return;
+  // 256 is `MaxMacroFusions` defined in MCSchedule.h
+  if (N > 256)
+    PrintFatalError("Too many macro fusions! Please bump MaxMacroFusions!");
+
+  OS << "#ifdef GET_MACRO_FUSION_ENUM\n\n";
+  OS << "namespace llvm {\n";
+  OS << "namespace " << Target.getName() << " {\n";
+  OS << "enum {\n";
+
+  for (unsigned Index = 0; Index < N; Index++) {
+    Record *Fusion = Fusions[Index];
+    // Get and emit name
+    OS << "  " << Fusion->getName() << " = " << Index << ",\n";
+  }
+
+  OS << "};\n";
+  OS << "} // end namespace " << Target.getName() << "\n";
+  OS << "} // end namespace llvm\n\n";
+  OS << "#endif\n";
+  OS << "#undef GET_MACRO_FUSION_ENUM\n\n";
+}
+
 void MacroFusionPredicatorEmitter::emitMacroFusionDecl(
     std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
   OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n\n";
@@ -191,6 +220,7 @@ void MacroFusionPredicatorEmitter::run(raw_ostream &OS) {
   std::vector<Record *> Fusions = Records.getAllDerivedDefinitions("Fusion");
   // Sort macro fusions by name.
   llvm::sort(Fusions, LessRecord());
+  emitMacroFusionEnum(Fusions, PE, OS);
   emitMacroFusionDecl(Fusions, PE, OS);
   emitMacroFusionImpl(Fusions, PE, OS);
 }
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index f7a7172d61fc618b..86dce9527bf059c8 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -133,6 +133,8 @@ class SubtargetEmitter {
   void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS);
 
   void EmitSchedModel(raw_ostream &OS);
+  void emitMacroFusionBits(const CodeGenProcModel &ProcModel, raw_ostream &OS);
+  void emitGetMacroFusions(const std::string &ClassName, raw_ostream &OS);
   void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
   void ParseFeaturesFunction(raw_ostream &OS);
 
@@ -869,6 +871,17 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
   OS << "};\n";
 }
 
+void SubtargetEmitter::emitMacroFusionBits(const CodeGenProcModel &ProcModel,
+                                           raw_ostream &OS) {
+  OS << "\nstatic const MacroFusionBitset " << ProcModel.ModelName
+     << "MacroFusionBits  = {\n";
+  std::vector<std::string> Predicates;
+  for (auto *R : ProcModel.MacroFusions)
+    Predicates.push_back("  " + Target + "::" + R->getNameInitAsString());
+  OS << llvm::join(Predicates, ",\n");
+  OS << "\n};\n";
+}
+
 // Find the WriteRes Record that defines processor resources for this
 // SchedWrite.
 Record *SubtargetEmitter::FindWriteResources(
@@ -1441,6 +1454,8 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     else if(!PM.ProcResourceDefs.empty())
       PrintFatalError(PM.ModelDef->getLoc(), "SchedMachineModel defines "
                     "ProcResources without defining WriteRes SchedWriteRes");
+    if (PM.hasMacroFusions())
+      emitMacroFusionBits(PM, OS);
 
     // Begin processor itinerary properties
     OS << "\n";
@@ -1487,7 +1502,11 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     if (PM.hasExtraProcessorInfo())
       OS << "  &" << PM.ModelName << "ExtraInfo,\n";
     else
-      OS << "  nullptr // No extra processor descriptor\n";
+      OS << "  nullptr, // No extra processor descriptor\n";
+    if (PM.hasMacroFusions()) {
+      OS << "  &" << PM.ModelName << "MacroFusionBits,\n";
+    } else
+      OS << "  nullptr, // No macro fusions\n";
     OS << "};\n";
   }
 }
@@ -1770,6 +1789,27 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
     PE.expandSTIPredicate(OS, Fn);
 }
 
+void SubtargetEmitter::emitGetMacroFusions(const std::string &ClassName,
+                                           raw_ostream &OS) {
+  OS << "std::vector<MacroFusionPredTy> " << ClassName
+     << "::getMacroFusions() const {\n";
+  OS.indent(2) << "switch(getSchedModel().getProcessorID()) {\n";
+  for (auto &Proc : TGT.getSchedModels().procModels()) {
+    if (Proc.hasMacroFusions()) {
+      OS.indent(4) << "case " << Proc.Index << ": // " << Proc.ModelName
+                   << "\n";
+      OS.indent(4) << "  return {";
+      std::vector<std::string> Predicates;
+      for (auto *R : Proc.MacroFusions)
+        Predicates.push_back("is" + R->getNameInitAsString());
+      OS << llvm::join(Predicates, ", ");
+      OS << "};\n";
+    }
+  }
+  OS.indent(2) << "}\n";
+  OS.indent(2) << "return {};\n}\n";
+}
+
 void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
                                        raw_ostream &OS) {
   const CodeGenHwModes &CGH = TGT.getHwModes();
@@ -1987,6 +2027,9 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << " const;\n";
   if (TGT.getHwModes().getNumModeIds() > 1)
     OS << "  unsigned getHwMode() const override;\n";
+  if (TGT.getSchedModels().hasMacroFusions())
+    OS << "  std::vector<MacroFusionPredTy> getMacroFusions() const "
+          "override;\n";
 
   STIPredicateExpander PE(Target);
   PE.setByRef(false);
@@ -2044,6 +2087,8 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 
   EmitSchedModelHelpers(ClassName, OS);
   EmitHwModeCheck(ClassName, OS);
+  if (TGT.getSchedModels().hasMacroFusions())
+    emitGetMacroFusions(ClassName, OS);
 
   OS << "} // end namespace llvm\n\n";
 

>From 0eba0600e708ecf8b38579843aa8a210ecde01fc Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Tue, 21 Nov 2023 20:55:12 +0800
Subject: [PATCH 4/4] [TableGen] Enhance testability of TableGen-based macro
 fusion

We add an option `-mfusion` like `-mattr` to add/remove supported
macro fusions, so that we can test each macro fusion separately via
`llc`.

This PR is stacked on #72219, #72222, #72223
---
 .../llvm/CodeGen/TargetSubtargetInfo.h        | 26 +++++++-
 llvm/include/llvm/MC/MCSubtargetInfo.h        | 17 +++---
 llvm/include/llvm/Target/TargetSchedule.td    | 35 +++++------
 llvm/lib/CodeGen/TargetSubtargetInfo.cpp      | 60 +++++++++++++++++-
 llvm/lib/MC/MCSubtargetInfo.cpp               |  2 +
 llvm/unittests/CodeGen/MFCommon.inc           |  2 +-
 llvm/utils/TableGen/SubtargetEmitter.cpp      | 61 ++++++++++---------
 7 files changed, 145 insertions(+), 58 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 7c76293f3e5eaeaf..7091776b04a445a7 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -54,6 +54,22 @@ class TargetRegisterInfo;
 class TargetSchedModel;
 class Triple;
 
+//===----------------------------------------------------------------------===//
+
+/// Used to provide information for macro fusion.
+struct MacroFusionEntry {
+  const char *Name;       ///< Name of macro fusion
+  MacroFusionPredTy Pred; ///< Predicator function of macro fusion
+
+  /// Compare routine for std::lower_bound
+  bool operator<(StringRef S) const { return StringRef(Name) < S; }
+
+  /// Compare routine for std::is_sorted.
+  bool operator<(const MacroFusionEntry &Other) const {
+    return StringRef(Name) < StringRef(Other.Name);
+  }
+};
+
 //===----------------------------------------------------------------------===//
 ///
 /// TargetSubtargetInfo - Generic base class for all target subtargets.  All
@@ -61,6 +77,9 @@ class Triple;
 /// be exposed through a TargetSubtargetInfo-derived class.
 ///
 class TargetSubtargetInfo : public MCSubtargetInfo {
+private:
+  ArrayRef<MacroFusionEntry> MacroFusionTable;
+
 protected: // Can only create subclasses...
   TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU,
                       StringRef FS, ArrayRef<SubtargetFeatureKV> PF,
@@ -68,7 +87,10 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
                       const MCWriteProcResEntry *WPR,
                       const MCWriteLatencyEntry *WL,
                       const MCReadAdvanceEntry *RA, const InstrStage *IS,
-                      const unsigned *OC, const unsigned *FP);
+                      const unsigned *OC, const unsigned *FP,
+                      ArrayRef<MacroFusionEntry> MF);
+
+  void overrideFusionBits();
 
 public:
   // AntiDepBreakMode - Type of anti-dependence breaking that should
@@ -326,7 +348,7 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   virtual bool enableSpillageCopyElimination() const { return false; }
 
   /// Get the list of MacroFusion predicates.
-  virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; }
+  virtual std::vector<MacroFusionPredTy> getMacroFusions() const;
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index 66fb6c9383272e18..1ae5134f047198fc 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -92,6 +92,8 @@ class MCSubtargetInfo {
   FeatureBitset FeatureBits;           // Feature bits for current CPU + FS
   std::string FeatureString;           // Feature string
 
+  MacroFusionBitset FusionBits; // Fusion bits
+
 public:
   MCSubtargetInfo(const MCSubtargetInfo &) = default;
   MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU,
@@ -120,10 +122,13 @@ class MCSubtargetInfo {
     return FeatureBits[Feature];
   }
 
+  const MacroFusionBitset &getMacroFusionBits() const { return FusionBits; }
+  void enableMacroFusion(unsigned MacroFusion) { FusionBits.set(MacroFusion); }
+  void disableMacroFusion(unsigned MacroFusion) {
+    FusionBits.reset(MacroFusion);
+  }
   bool hasMacroFusion(unsigned MacroFusion) const {
-    const MacroFusionBitset *MacroFusionBits =
-        CPUSchedModel->getMacroFusionBits();
-    return MacroFusionBits && MacroFusionBits->test(MacroFusion);
+    return FusionBits.test(MacroFusion);
   }
 
 protected:
@@ -303,11 +308,7 @@ class MCSubtargetInfo {
   virtual bool shouldPrefetchAddressSpace(unsigned AS) const;
 
   /// Enable macro fusion for this subtarget.
-  virtual bool enableMacroFusion() const {
-    const MacroFusionBitset *MacroFusionBits =
-        CPUSchedModel->getMacroFusionBits();
-    return MacroFusionBits && MacroFusionBits->any();
-  }
+  virtual bool enableMacroFusion() const { return FusionBits.any(); }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 613e4721b0d7c2a9..e81b6048b939ea59 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -662,7 +662,8 @@ def OneUse : OneUsePred;
 //   return true;
 // }
 // ```
-class Fusion<list<FusionPredicate> predicates> {
+class Fusion<string name, list<FusionPredicate> predicates> {
+  string Name = name;
   list<FusionPredicate> Predicates = predicates;
 }
 
@@ -683,21 +684,21 @@ class Fusion<list<FusionPredicate> predicates> {
 //   return true;
 // }
 // ```
-class SimpleFusion<MCInstPredicate firstPred, MCInstPredicate secondPred,
+class SimpleFusion<string name, MCInstPredicate firstPred, MCInstPredicate secondPred,
                    list<FusionPredicate> prolog = [],
                    list<FusionPredicate> epilog = []>
-  : Fusion<!listconcat(
-                  prolog,
-                  [
-                    SecondFusionPredicateWithMCInstPredicate<secondPred>,
-                    WildcardTrue,
-                    FirstFusionPredicateWithMCInstPredicate<firstPred>,
-                    SecondFusionPredicateWithMCInstPredicate<
-                      CheckAny<[
-                        CheckIsVRegOperand<0>,
-                        CheckSameRegOperand<0, 1>
-                      ]>>,
-                    OneUse,
-                    TieReg<0, 1>,
-                  ],
-                  epilog)>;
+  : Fusion<name, !listconcat(
+                    prolog,
+                    [
+                      SecondFusionPredicateWithMCInstPredicate<secondPred>,
+                      WildcardTrue,
+                      FirstFusionPredicateWithMCInstPredicate<firstPred>,
+                      SecondFusionPredicateWithMCInstPredicate<
+                        CheckAny<[
+                          CheckIsVRegOperand<0>,
+                          CheckSameRegOperand<0, 1>
+                        ]>>,
+                      OneUse,
+                      TieReg<0, 1>,
+                    ],
+                    epilog)>;
diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index 6c97bc0568bdeeee..9e3f1f4171611ec5 100644
--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -14,16 +14,62 @@
 
 using namespace llvm;
 
+static cl::list<std::string> MFusions("mfusion", cl::CommaSeparated,
+                                      cl::desc("Target specific macro fusions"),
+                                      cl::value_desc("a1,+a2,-a3,..."));
+
 TargetSubtargetInfo::TargetSubtargetInfo(
     const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
     ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
     const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
     const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC,
-    const unsigned *FP)
-    : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {}
+    const unsigned *FP, ArrayRef<MacroFusionEntry> MF)
+    : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP),
+      MacroFusionTable(MF) {
+  // assert if MacroFusionTable is not sorted.
+  assert(llvm::is_sorted(MacroFusionTable));
+  overrideFusionBits();
+}
 
 TargetSubtargetInfo::~TargetSubtargetInfo() = default;
 
+void TargetSubtargetInfo::overrideFusionBits() {
+  if (MFusions.getNumOccurrences() != 0) {
+    for (std::string &MFusion : MFusions) {
+      char Prefix = MFusion[0];
+      bool Disable = Prefix == '-';
+      if (Prefix == '+' || Prefix == '-')
+        MFusion = MFusion.substr(1);
+
+      // MacroFusionTable is sorted.
+      const auto *Pos = std::lower_bound(
+          MacroFusionTable.begin(), MacroFusionTable.end(), MFusion,
+          [](const MacroFusionEntry &LHS, const std::string &RHS) {
+            int CmpName = StringRef(LHS.Name).compare(RHS);
+            if (CmpName < 0)
+              return true;
+            if (CmpName > 0)
+              return false;
+            return false;
+          });
+
+      if (Pos == MacroFusionTable.end()) {
+        errs() << "'" << MFusion
+               << "' is not a recognized macro fusion for this "
+               << "target (ignoring it)\n";
+        continue;
+      }
+
+      // The index is the same as the enum value.
+      unsigned Idx = Pos - MacroFusionTable.begin();
+      if (Disable)
+        disableMacroFusion(Idx);
+      else
+        enableMacroFusion(Idx);
+    }
+  }
+}
+
 bool TargetSubtargetInfo::enableAtomicExpand() const {
   return true;
 }
@@ -58,3 +104,13 @@ bool TargetSubtargetInfo::useAA() const {
 }
 
 void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { }
+
+std::vector<MacroFusionPredTy> TargetSubtargetInfo::getMacroFusions() const {
+  std::vector<MacroFusionPredTy> Fusions;
+  const MacroFusionBitset &Bits = getMacroFusionBits();
+  for (unsigned I = 0; I < MacroFusionTable.size(); I++)
+    if (Bits[I])
+      Fusions.push_back(MacroFusionTable[I].Pred);
+
+  return Fusions;
+}
diff --git a/llvm/lib/MC/MCSubtargetInfo.cpp b/llvm/lib/MC/MCSubtargetInfo.cpp
index 8ee823e0377b730e..8ea1aca92e048df1 100644
--- a/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -215,6 +215,8 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU,
     CPUSchedModel = &getSchedModelForCPU(TuneCPU);
   else
     CPUSchedModel = &MCSchedModel::GetDefaultSchedModel();
+  if (CPUSchedModel->getMacroFusionBits())
+    FusionBits = *CPUSchedModel->getMacroFusionBits();
 }
 
 void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU,
diff --git a/llvm/unittests/CodeGen/MFCommon.inc b/llvm/unittests/CodeGen/MFCommon.inc
index 7de7eabdd1f6040b..954c7e3f9b8e2863 100644
--- a/llvm/unittests/CodeGen/MFCommon.inc
+++ b/llvm/unittests/CodeGen/MFCommon.inc
@@ -76,7 +76,7 @@ class BogusSubtarget : public TargetSubtargetInfo {
 public:
   BogusSubtarget(TargetMachine &TM)
       : TargetSubtargetInfo(Triple(""), "", "", "", {}, {}, nullptr, nullptr,
-                            nullptr, nullptr, nullptr, nullptr),
+                            nullptr, nullptr, nullptr, nullptr, {}),
         FL(), TL(TM) {}
   ~BogusSubtarget() override {}
 
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 86dce9527bf059c8..461bc67a1ab60a07 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -16,6 +16,7 @@
 #include "PredicateExpander.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstrItineraries.h"
@@ -134,7 +135,7 @@ class SubtargetEmitter {
 
   void EmitSchedModel(raw_ostream &OS);
   void emitMacroFusionBits(const CodeGenProcModel &ProcModel, raw_ostream &OS);
-  void emitGetMacroFusions(const std::string &ClassName, raw_ostream &OS);
+  void emitMacroFusionTable(RecVec Fusions, raw_ostream &OS);
   void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
   void ParseFeaturesFunction(raw_ostream &OS);
 
@@ -1789,25 +1790,23 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
     PE.expandSTIPredicate(OS, Fn);
 }
 
-void SubtargetEmitter::emitGetMacroFusions(const std::string &ClassName,
-                                           raw_ostream &OS) {
-  OS << "std::vector<MacroFusionPredTy> " << ClassName
-     << "::getMacroFusions() const {\n";
-  OS.indent(2) << "switch(getSchedModel().getProcessorID()) {\n";
-  for (auto &Proc : TGT.getSchedModels().procModels()) {
-    if (Proc.hasMacroFusions()) {
-      OS.indent(4) << "case " << Proc.Index << ": // " << Proc.ModelName
-                   << "\n";
-      OS.indent(4) << "  return {";
-      std::vector<std::string> Predicates;
-      for (auto *R : Proc.MacroFusions)
-        Predicates.push_back("is" + R->getNameInitAsString());
-      OS << llvm::join(Predicates, ", ");
-      OS << "};\n";
-    }
+void SubtargetEmitter::emitMacroFusionTable(RecVec Fusions, raw_ostream &OS) {
+  OS << "const llvm::MacroFusionEntry " << Target << "MacroFusionTable[] = {\n";
+
+  SmallSet<StringRef, 32> Names;
+  for (auto &Fusion : Fusions) {
+    StringRef Name = Fusion->getValueAsString("Name");
+    if (Name.empty())
+      PrintFatalError(Fusion->getLoc(),
+                      "The name of macro fusion cannot be empty");
+    if (Names.contains(Name))
+      PrintFatalError(Fusion->getLoc(),
+                      "The name of macro fusion already exists");
+    OS.indent(2) << "{\"" << Name << "\", "
+                 << "llvm::is" + Fusion->getNameInitAsString() << "},\n";
   }
-  OS.indent(2) << "}\n";
-  OS.indent(2) << "return {};\n}\n";
+
+  OS << "};\n\n";
 }
 
 void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
@@ -2027,9 +2026,6 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << " const;\n";
   if (TGT.getHwModes().getNumModeIds() > 1)
     OS << "  unsigned getHwMode() const override;\n";
-  if (TGT.getSchedModels().hasMacroFusions())
-    OS << "  std::vector<MacroFusionPredTy> getMacroFusions() const "
-          "override;\n";
 
   STIPredicateExpander PE(Target);
   PE.setByRef(false);
@@ -2044,6 +2040,13 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "\n#ifdef GET_SUBTARGETINFO_CTOR\n";
   OS << "#undef GET_SUBTARGETINFO_CTOR\n\n";
 
+  std::vector<Record *> Fusions = Records.getAllDerivedDefinitions("Fusion");
+  // Sort macro fusions by name.
+  llvm::sort(Fusions, LessRecord());
+
+  if (!Fusions.empty())
+    emitMacroFusionTable(Fusions, OS);
+
   OS << "#include \"llvm/CodeGen/TargetSchedule.h\"\n\n";
   OS << "namespace llvm {\n";
   OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n";
@@ -2078,17 +2081,19 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << Target << "ReadAdvanceTable, ";
   OS << '\n'; OS.indent(24);
   if (SchedModels.hasItineraries()) {
-    OS << Target << "Stages, "
-       << Target << "OperandCycles, "
-       << Target << "ForwardingPaths";
+    OS << Target << "Stages, " << Target << "OperandCycles, " << Target
+       << "ForwardingPaths, ";
   } else
-    OS << "nullptr, nullptr, nullptr";
+    OS << "nullptr, nullptr, nullptr, ";
+  if (!Fusions.empty()) {
+    OS << "ArrayRef(" << Target << "MacroFusionTable, " << Fusions.size()
+       << ")";
+  } else
+    OS << "std::nullopt";
   OS << ") {}\n\n";
 
   EmitSchedModelHelpers(ClassName, OS);
   EmitHwModeCheck(ClassName, OS);
-  if (TGT.getSchedModels().hasMacroFusions())
-    emitGetMacroFusions(ClassName, OS);
 
   OS << "} // end namespace llvm\n\n";
 



More information about the llvm-commits mailing list