[llvm] 39525a6 - [DFAPacketizer] Allow up to 64 functional units

James Molloy via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 5 08:40:50 PST 2019


Author: jmolloy
Date: 2019-11-05T15:41:42Z
New Revision: 39525a6723854e9920b2560aadaa399cfbbd006a

URL: https://github.com/llvm/llvm-project/commit/39525a6723854e9920b2560aadaa399cfbbd006a
DIFF: https://github.com/llvm/llvm-project/commit/39525a6723854e9920b2560aadaa399cfbbd006a.diff

LOG: [DFAPacketizer] Allow up to 64 functional units

Summary:
To drive the automaton we used a uint64_t as an action type. This
contained the transition's resource requirements as a conjunction:

  (a OR b) AND (b OR c)

We encoded this conjunction as a sequence of four 16-bit bitmasks.
This limited the number of addressable functional units to 16, which
is quite low and has bitten many people in the past.

Instead, the DFAEmitter now generates a lookup table from InstrItinerary
class (index of the ItinData inside the ProcItineraries) to an internal
action index which is essentially a dense embedding of the conjunctive
form. Because we never materialize the conjunctive form, we no longer
have the 16 FU restriction.

In this patch we limit to 64 functional units due to using a uint64_t
bitmask in the DFAEmitter. Now that we've decoupled these representations
we can increase this in future.

Reviewers: ThomasRaoux, kparzysz, majnemer

Reviewed By: ThomasRaoux

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69110

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/DFAPacketizer.h
    llvm/lib/CodeGen/DFAPacketizer.cpp
    llvm/utils/TableGen/DFAPacketizerEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h
index 705465b15c4c..9cdaedc9e861 100644
--- a/llvm/include/llvm/CodeGen/DFAPacketizer.h
+++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h
@@ -46,43 +46,18 @@ class MCInstrDesc;
 class SUnit;
 class TargetInstrInfo;
 
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
-// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
-//
-// e.g. terms x resource bit combinations that fit in uint32_t:
-//      4 terms x 8  bits = 32 bits
-//      3 terms x 10 bits = 30 bits
-//      2 terms x 16 bits = 32 bits
-//
-// e.g. terms x resource bit combinations that fit in uint64_t:
-//      8 terms x 8  bits = 64 bits
-//      7 terms x 9  bits = 63 bits
-//      6 terms x 10 bits = 60 bits
-//      5 terms x 12 bits = 60 bits
-//      4 terms x 16 bits = 64 bits <--- current
-//      3 terms x 21 bits = 63 bits
-//      2 terms x 32 bits = 64 bits
-//
-#define DFA_MAX_RESTERMS        4   // The max # of AND'ed resource terms.
-#define DFA_MAX_RESOURCES       16  // The max # of resource bits in one term.
-
-using DFAInput = uint64_t;
-using DFAStateInput = int64_t;
-
-#define DFA_TBLTYPE             "int64_t" // For generating DFAStateInputTable.
-// --------------------------------------------------------------------
-
 class DFAPacketizer {
 private:
   const InstrItineraryData *InstrItins;
-  Automaton<DFAInput> A;
+  Automaton<uint64_t> A;
+  /// For every itinerary, an "action" to apply to the automaton. This removes
+  /// the redundancy in actions between itinerary classes.
+  ArrayRef<unsigned> ItinActions;
 
 public:
-  DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a) :
-      InstrItins(InstrItins), A(std::move(a)) {
+  DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a,
+                ArrayRef<unsigned> ItinActions)
+      : InstrItins(InstrItins), A(std::move(a)), ItinActions(ItinActions) {
     // Start off with resource tracking disabled.
     A.enableTranscription(false);
   }
@@ -99,12 +74,6 @@ class DFAPacketizer {
     A.enableTranscription(Track);
   }
 
-  // Return the DFAInput for an instruction class.
-  DFAInput getInsnInput(unsigned InsnClass);
-
-  // Return the DFAInput for an instruction class input vector.
-  static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass);
-
   // Check if the resources occupied by a MCInstrDesc are available in
   // the current state.
   bool canReserveResources(const MCInstrDesc *MID);

diff  --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
index a169c3cb16b2..afcf014bca40 100644
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -52,68 +52,22 @@ static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
 
 static unsigned InstrCount = 0;
 
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
-  return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
-}
-
-/// Return the DFAInput for an instruction class input vector.
-/// This function is used in both DFAPacketizer.cpp and in
-/// DFAPacketizerEmitter.cpp.
-static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
-  DFAInput InsnInput = 0;
-  assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
-         "Exceeded maximum number of DFA terms");
-  for (auto U : InsnClass)
-    InsnInput = addDFAFuncUnits(InsnInput, U);
-  return InsnInput;
-}
-
-// --------------------------------------------------------------------
-
-// Make sure DFA types are large enough for the number of terms & resources.
-static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
-              (8 * sizeof(DFAInput)),
-              "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
-static_assert(
-    (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
-    "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
-
-// Return the DFAInput for an instruction class.
-DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
-  // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
-  DFAInput InsnInput = 0;
-  unsigned i = 0;
-  (void)i;
-  for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
-       *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
-    InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
-    assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
-  }
-  return InsnInput;
-}
-
-// Return the DFAInput for an instruction class input vector.
-DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
-  return getDFAInsnInput(InsnClass);
-}
-
 // Check if the resources occupied by a MCInstrDesc are available in the
 // current state.
 bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
-  unsigned InsnClass = MID->getSchedClass();
-  DFAInput InsnInput = getInsnInput(InsnClass);
-  return A.canAdd(InsnInput);
+  unsigned Action = ItinActions[MID->getSchedClass()];
+  if (MID->getSchedClass() == 0 || Action == 0)
+    return false;
+  return A.canAdd(Action);
 }
 
 // Reserve the resources occupied by a MCInstrDesc and change the current
 // state to reflect that change.
 void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
-  unsigned InsnClass = MID->getSchedClass();
-  DFAInput InsnInput = getInsnInput(InsnClass);
-  A.add(InsnInput);
+  unsigned Action = ItinActions[MID->getSchedClass()];
+  if (MID->getSchedClass() == 0 || Action == 0)
+    return;
+  A.add(Action);
 }
 
 // Check if the resources occupied by a machine instruction are available

diff  --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index ccb4ef1b9678..018bda1b6090 100644
--- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -16,15 +16,16 @@
 
 #define DEBUG_TYPE "dfa-emitter"
 
+#include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
 #include "DFAEmitter.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
 #include <cassert>
 #include <cstdint>
 #include <map>
@@ -35,216 +36,101 @@
 
 using namespace llvm;
 
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
-// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
-//
-// e.g. terms x resource bit combinations that fit in uint32_t:
-//      4 terms x 8  bits = 32 bits
-//      3 terms x 10 bits = 30 bits
-//      2 terms x 16 bits = 32 bits
-//
-// e.g. terms x resource bit combinations that fit in uint64_t:
-//      8 terms x 8  bits = 64 bits
-//      7 terms x 9  bits = 63 bits
-//      6 terms x 10 bits = 60 bits
-//      5 terms x 12 bits = 60 bits
-//      4 terms x 16 bits = 64 bits <--- current
-//      3 terms x 21 bits = 63 bits
-//      2 terms x 32 bits = 64 bits
-//
-#define DFA_MAX_RESTERMS        4   // The max # of AND'ed resource terms.
-#define DFA_MAX_RESOURCES       16  // The max # of resource bits in one term.
-
-typedef uint64_t                DFAInput;
-typedef int64_t                 DFAStateInput;
-#define DFA_TBLTYPE             "int64_t" // For generating DFAStateInputTable.
+// We use a uint64_t to represent a resource bitmask.
+#define DFA_MAX_RESOURCES 64
 
 namespace {
+using ResourceVector = SmallVector<uint64_t, 4>;
 
-  DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
-    return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
-  }
-
-  /// Return the DFAInput for an instruction class input vector.
-  /// This function is used in both DFAPacketizer.cpp and in
-  /// DFAPacketizerEmitter.cpp.
-  DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
-    DFAInput InsnInput = 0;
-    assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
-           "Exceeded maximum number of DFA terms");
-    for (auto U : InsnClass)
-      InsnInput = addDFAFuncUnits(InsnInput, U);
-    return InsnInput;
-  }
-
-} // end anonymous namespace
+struct ScheduleClass {
+  /// The parent itinerary index (processor model ID).
+  unsigned ItineraryID;
 
-// --------------------------------------------------------------------
+  /// Index within this itinerary of the schedule class.
+  unsigned Idx;
 
-#ifndef NDEBUG
-// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
-//
-// dbgsInsnClass - When debugging, print instruction class stages.
-//
-void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
-//
-// dbgsStateInfo - When debugging, print the set of state info.
-//
-void dbgsStateInfo(const std::set<unsigned> &stateInfo);
-//
-// dbgsIndent - When debugging, indent by the specified amount.
-//
-void dbgsIndent(unsigned indent);
-#endif
+  /// The index within the uniqued set of required resources of Resources.
+  unsigned ResourcesIdx;
 
-//
-// class DFAPacketizerEmitter: class that generates and prints out the DFA
-// for resource tracking.
-//
-namespace {
+  /// Conjunctive list of resource requirements:
+  ///   {a|b, b|c} => (a OR b) AND (b or c).
+  /// Resources are unique across all itineraries.
+  ResourceVector Resources;
+};
 
+// Generates and prints out the DFA for resource tracking.
 class DFAPacketizerEmitter {
 private:
   std::string TargetName;
-  //
-  // allInsnClasses is the set of all possible resources consumed by an
-  // InstrStage.
-  //
-  std::vector<std::vector<unsigned>> allInsnClasses;
   RecordKeeper &Records;
 
+  UniqueVector<ResourceVector> UniqueResources;
+  std::vector<ScheduleClass> ScheduleClasses;
+  std::map<std::string, uint64_t> FUNameToBitsMap;
+  std::map<unsigned, uint64_t> ComboBitToBitsMap;
+
 public:
   DFAPacketizerEmitter(RecordKeeper &R);
 
-  //
-  // collectAllFuncUnits - Construct a map of function unit names to bits.
-  //
-  int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
-                           std::map<std::string, unsigned> &FUNameToBitsMap,
-                           int &maxResources,
-                           raw_ostream &OS);
-
-  //
-  // collectAllComboFuncs - Construct a map from a combo function unit bit to
-  //                        the bits of all included functional units.
-  //
-  int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
-                           std::map<std::string, unsigned> &FUNameToBitsMap,
-                           std::map<unsigned, unsigned> &ComboBitToBitsMap,
-                           raw_ostream &OS);
-
-  //
-  // collectOneInsnClass - Populate allInsnClasses with one instruction class.
-  //
-  int collectOneInsnClass(const std::string &ProcName,
-                           std::vector<Record*> &ProcItinList,
-                           std::map<std::string, unsigned> &FUNameToBitsMap,
-                           Record *ItinData,
-                           raw_ostream &OS);
-
-  //
-  // collectAllInsnClasses - Populate allInsnClasses which is a set of units
-  // used in each stage.
-  //
-  int collectAllInsnClasses(const std::string &ProcName,
-                           std::vector<Record*> &ProcItinList,
-                           std::map<std::string, unsigned> &FUNameToBitsMap,
-                           std::vector<Record*> &ItinDataList,
-                           int &maxStages,
-                           raw_ostream &OS);
+  // Construct a map of function unit names to bits.
+  int collectAllFuncUnits(
+      ArrayRef<const CodeGenProcModel *> ProcModels);
+
+  // Construct a map from a combo function unit bit to the bits of all included
+  // functional units.
+  int collectAllComboFuncs(ArrayRef<Record *> ComboFuncList);
+
+  ResourceVector getResourcesForItinerary(Record *Itinerary);
+  void createScheduleClasses(unsigned ItineraryIdx, const RecVec &Itineraries);
 
   // Emit code for a subset of itineraries.
   void emitForItineraries(raw_ostream &OS,
-                          std::vector<Record *> &ProcItinList,
+                          std::vector<const CodeGenProcModel *> &ProcItinList,
                           std::string DFAName);
 
   void run(raw_ostream &OS);
 };
 } // end anonymous namespace
 
-#ifndef NDEBUG
-// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
-//
-// dbgsInsnClass - When debugging, print instruction class stages.
-//
-void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
-  LLVM_DEBUG(dbgs() << "InsnClass: ");
-  for (unsigned i = 0; i < InsnClass.size(); ++i) {
-    if (i > 0) {
-      LLVM_DEBUG(dbgs() << ", ");
-    }
-    LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i]));
-  }
-  DFAInput InsnInput = getDFAInsnInput(InsnClass);
-  LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")");
-}
+DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R)
+    : TargetName(CodeGenTarget(R).getName()), Records(R) {}
 
-//
-// dbgsIndent - When debugging, indent by the specified amount.
-//
-void dbgsIndent(unsigned indent) {
-  for (unsigned i = 0; i < indent; ++i) {
-    LLVM_DEBUG(dbgs() << " ");
-  }
-}
-#endif // NDEBUG
-
-DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
-  TargetName(CodeGenTarget(R).getName()), Records(R) {}
-
-//
-// collectAllFuncUnits - Construct a map of function unit names to bits.
-//
 int DFAPacketizerEmitter::collectAllFuncUnits(
-                            std::vector<Record*> &ProcItinList,
-                            std::map<std::string, unsigned> &FUNameToBitsMap,
-                            int &maxFUs,
-                            raw_ostream &OS) {
+    ArrayRef<const CodeGenProcModel *> ProcModels) {
   LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
                        "----------------------\n");
   LLVM_DEBUG(dbgs() << "collectAllFuncUnits");
-  LLVM_DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
+  LLVM_DEBUG(dbgs() << " (" << ProcModels.size() << " itineraries)\n");
+
+  std::set<Record *> ProcItinList;
+  for (const CodeGenProcModel *Model : ProcModels)
+    ProcItinList.insert(Model->ItinsDef);
 
   int totalFUs = 0;
   // Parse functional units for all the itineraries.
-  for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
-    Record *Proc = ProcItinList[i];
-    std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
+  for (Record *Proc : ProcItinList) {
+    std::vector<Record *> FUs = Proc->getValueAsListOfDefs("FU");
 
-    LLVM_DEBUG(dbgs() << "    FU:" << i << " (" << FUs.size() << " FUs) "
-                      << Proc->getName());
+    LLVM_DEBUG(dbgs() << "    FU:"
+                      << " (" << FUs.size() << " FUs) " << Proc->getName());
 
     // Convert macros to bits for each stage.
     unsigned numFUs = FUs.size();
     for (unsigned j = 0; j < numFUs; ++j) {
-      assert ((j < DFA_MAX_RESOURCES) &&
-                      "Exceeded maximum number of representable resources");
-      unsigned FuncResources = (unsigned) (1U << j);
+      assert((j < DFA_MAX_RESOURCES) &&
+             "Exceeded maximum number of representable resources");
+      uint64_t FuncResources = 1ULL << j;
       FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
       LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x"
                         << Twine::utohexstr(FuncResources));
     }
-    if (((int) numFUs) > maxFUs) {
-      maxFUs = numFUs;
-    }
     totalFUs += numFUs;
     LLVM_DEBUG(dbgs() << "\n");
   }
   return totalFUs;
 }
 
-//
-// collectAllComboFuncs - Construct a map from a combo function unit bit to
-//                        the bits of all included functional units.
-//
-int DFAPacketizerEmitter::collectAllComboFuncs(
-                            std::vector<Record*> &ComboFuncList,
-                            std::map<std::string, unsigned> &FUNameToBitsMap,
-                            std::map<unsigned, unsigned> &ComboBitToBitsMap,
-                            raw_ostream &OS) {
+int DFAPacketizerEmitter::collectAllComboFuncs(ArrayRef<Record *> ComboFuncList) {
   LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
                        "----------------------\n");
   LLVM_DEBUG(dbgs() << "collectAllComboFuncs");
@@ -253,27 +139,27 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
   int numCombos = 0;
   for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
     Record *Func = ComboFuncList[i];
-    std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
+    std::vector<Record *> FUs = Func->getValueAsListOfDefs("CFD");
 
     LLVM_DEBUG(dbgs() << "    CFD:" << i << " (" << FUs.size() << " combo FUs) "
                       << Func->getName() << "\n");
 
     // Convert macros to bits for each stage.
     for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
-      assert ((j < DFA_MAX_RESOURCES) &&
-                      "Exceeded maximum number of DFA resources");
+      assert((j < DFA_MAX_RESOURCES) &&
+             "Exceeded maximum number of DFA resources");
       Record *FuncData = FUs[j];
       Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
-      const std::vector<Record*> &FuncList =
-                                   FuncData->getValueAsListOfDefs("FuncList");
+      const std::vector<Record *> &FuncList =
+          FuncData->getValueAsListOfDefs("FuncList");
       const std::string &ComboFuncName = ComboFunc->getName();
-      unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
-      unsigned ComboResources = ComboBit;
+      uint64_t ComboBit = FUNameToBitsMap[ComboFuncName];
+      uint64_t ComboResources = ComboBit;
       LLVM_DEBUG(dbgs() << "      combo: " << ComboFuncName << ":0x"
                         << Twine::utohexstr(ComboResources) << "\n");
       for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
         std::string FuncName = FuncList[k]->getName();
-        unsigned FuncResources = FUNameToBitsMap[FuncName];
+        uint64_t FuncResources = FUNameToBitsMap[FuncName];
         LLVM_DEBUG(dbgs() << "        " << FuncName << ":0x"
                           << Twine::utohexstr(FuncResources) << "\n");
         ComboResources |= FuncResources;
@@ -288,101 +174,33 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
   return numCombos;
 }
 
-//
-// collectOneInsnClass - Populate allInsnClasses with one instruction class
-//
-int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
-                        std::vector<Record*> &ProcItinList,
-                        std::map<std::string, unsigned> &FUNameToBitsMap,
-                        Record *ItinData,
-                        raw_ostream &OS) {
-  const std::vector<Record*> &StageList =
-    ItinData->getValueAsListOfDefs("Stages");
-
-  // The number of stages.
-  unsigned NStages = StageList.size();
-
-  LLVM_DEBUG(dbgs() << "    " << ItinData->getValueAsDef("TheClass")->getName()
-                    << "\n");
-
-  std::vector<unsigned> UnitBits;
-
-  // Compute the bitwise or of each unit used in this stage.
-  for (unsigned i = 0; i < NStages; ++i) {
-    const Record *Stage = StageList[i];
-
-    // Get unit list.
-    const std::vector<Record*> &UnitList =
-      Stage->getValueAsListOfDefs("Units");
-
-    LLVM_DEBUG(dbgs() << "        stage:" << i << " [" << UnitList.size()
-                      << " units]:");
-    unsigned dbglen = 26;  // cursor after stage dbgs
-
-    // Compute the bitwise or of each unit used in this stage.
-    unsigned UnitBitValue = 0;
-    for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
-      // Conduct bitwise or.
-      std::string UnitName = UnitList[j]->getName();
-      LLVM_DEBUG(dbgs() << " " << j << ":" << UnitName);
-      dbglen += 3 + UnitName.length();
-      assert(FUNameToBitsMap.count(UnitName));
-      UnitBitValue |= FUNameToBitsMap[UnitName];
-    }
-
-    if (UnitBitValue != 0)
-      UnitBits.push_back(UnitBitValue);
-
-    while (dbglen <= 64) {   // line up bits dbgs
-        dbglen += 8;
-        LLVM_DEBUG(dbgs() << "\t");
+ResourceVector
+DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) {
+  ResourceVector Resources;
+  assert(Itinerary);
+  for (Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) {
+    uint64_t StageResources = 0;
+    for (Record *Unit : StageDef->getValueAsListOfDefs("Units")) {
+      StageResources |= FUNameToBitsMap[Unit->getName()];
     }
-    LLVM_DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue)
-                      << ")\n");
+    if (StageResources != 0)
+      Resources.push_back(StageResources);
   }
-
-  if (!UnitBits.empty())
-    allInsnClasses.push_back(UnitBits);
-
-  LLVM_DEBUG({
-    dbgs() << "        ";
-    dbgsInsnClass(UnitBits);
-    dbgs() << "\n";
-  });
-
-  return NStages;
+  return Resources;
 }
 
-//
-// collectAllInsnClasses - Populate allInsnClasses which is a set of units
-// used in each stage.
-//
-int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
-                            std::vector<Record*> &ProcItinList,
-                            std::map<std::string, unsigned> &FUNameToBitsMap,
-                            std::vector<Record*> &ItinDataList,
-                            int &maxStages,
-                            raw_ostream &OS) {
-  // Collect all instruction classes.
-  unsigned M = ItinDataList.size();
-
-  int numInsnClasses = 0;
-  LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
-                       "----------------------\n"
-                    << "collectAllInsnClasses " << ProcName << " (" << M
-                    << " classes)\n");
-
-  // Collect stages for each instruction class for all itinerary data
-  for (unsigned j = 0; j < M; j++) {
-    Record *ItinData = ItinDataList[j];
-    int NStages = collectOneInsnClass(ProcName, ProcItinList,
-                                      FUNameToBitsMap, ItinData, OS);
-    if (NStages > maxStages) {
-      maxStages = NStages;
+void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx,
+                                                 const RecVec &Itineraries) {
+  unsigned Idx = 0;
+  for (Record *Itinerary : Itineraries) {
+    if (!Itinerary) {
+      ScheduleClasses.push_back({ItineraryIdx, Idx++, 0, ResourceVector{}});
+      continue;
     }
-    numInsnClasses++;
+    ResourceVector Resources = getResourcesForItinerary(Itinerary);
+    ScheduleClasses.push_back(
+        {ItineraryIdx, Idx++, UniqueResources.insert(Resources), Resources});
   }
-  return numInsnClasses;
 }
 
 //
@@ -393,19 +211,17 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
      << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
   OS << "namespace llvm {\n";
 
-  OS << "\n// Input format:\n";
-  OS << "#define DFA_MAX_RESTERMS        " << DFA_MAX_RESTERMS
-     << "\t// maximum AND'ed resource terms\n";
-  OS << "#define DFA_MAX_RESOURCES       " << DFA_MAX_RESOURCES
-     << "\t// maximum resource bits in one term\n";
-
-  // Collect processor iteraries.
-  std::vector<Record*> ProcItinList =
-    Records.getAllDerivedDefinitions("ProcessorItineraries");
+  CodeGenTarget CGT(Records);
+  CodeGenSchedModels CGS(Records, CGT);
 
-  std::unordered_map<std::string, std::vector<Record*>> ItinsByNamespace;
-  for (Record *R : ProcItinList)
-    ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R);
+  std::unordered_map<std::string, std::vector<const CodeGenProcModel *>>
+      ItinsByNamespace;
+  for (const CodeGenProcModel &ProcModel : CGS.procModels()) {
+    if (ProcModel.hasItineraries()) {
+      auto NS = ProcModel.ItinsDef->getValueAsString("PacketizerNamespace");
+      ItinsByNamespace[NS].push_back(&ProcModel);
+    }
+  }
 
   for (auto &KV : ItinsByNamespace)
     emitForItineraries(OS, KV.second, KV.first);
@@ -413,80 +229,68 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
 }
 
 void DFAPacketizerEmitter::emitForItineraries(
-    raw_ostream &OS, std::vector<Record *> &ProcItinList,
+    raw_ostream &OS, std::vector<const CodeGenProcModel *> &ProcModels,
     std::string DFAName) {
-  //
-  // Collect the Functional units.
-  //
-  std::map<std::string, unsigned> FUNameToBitsMap;
-  int maxResources = 0;
-  collectAllFuncUnits(ProcItinList,
-                              FUNameToBitsMap, maxResources, OS);
-
-  //
-  // Collect the Combo Functional units.
-  //
-  std::map<unsigned, unsigned> ComboBitToBitsMap;
-  std::vector<Record*> ComboFuncList =
-    Records.getAllDerivedDefinitions("ComboFuncUnits");
-  collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS);
-
-  //
-  // Collect the itineraries.
-  //
-  int maxStages = 0;
-  int numInsnClasses = 0;
-  for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
-    Record *Proc = ProcItinList[i];
-
-    // Get processor itinerary name.
-    const std::string &ProcName = Proc->getName();
-
-    // Skip default.
-    if (ProcName == "NoItineraries")
-      continue;
-
-    // Sanity check for at least one instruction itinerary class.
-    unsigned NItinClasses =
-      Records.getAllDerivedDefinitions("InstrItinClass").size();
-    if (NItinClasses == 0)
-      return;
+  OS << "} // end namespace llvm\n\n";
+  OS << "namespace {\n";
+  collectAllFuncUnits(ProcModels);
+  collectAllComboFuncs(Records.getAllDerivedDefinitions("ComboFuncUnits"));
 
-    // Get itinerary data list.
-    std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
+  // Collect the itineraries.
+  DenseMap<const CodeGenProcModel *, unsigned> ProcModelStartIdx;
+  for (const CodeGenProcModel *Model : ProcModels) {
+    assert(Model->hasItineraries());
+    ProcModelStartIdx[Model] = ScheduleClasses.size();
+    createScheduleClasses(Model->Index, Model->ItinDefList);
+  }
 
-    // Collect all instruction classes
-    numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
-                          FUNameToBitsMap, ItinDataList, maxStages, OS);
+  // Output the mapping from ScheduleClass to ResourcesIdx.
+  unsigned Idx = 0;
+  OS << "unsigned " << TargetName << DFAName << "ResourceIndices[] = {";
+  for (const ScheduleClass &SC : ScheduleClasses) {
+    if (Idx++ % 32 == 0)
+      OS << "\n  ";
+    OS << SC.ResourcesIdx << ", ";
+  }
+  OS << "\n};\n\n";
+
+  // And the mapping from Itinerary index into the previous table.
+  OS << "unsigned " << TargetName << DFAName
+     << "ProcResourceIndexStart[] = {\n";
+  OS << "  0, // NoSchedModel\n";
+  for (const CodeGenProcModel *Model : ProcModels) {
+    OS << "  " << ProcModelStartIdx[Model] << ", // " << Model->ModelName
+       << "\n";
   }
+  OS << ScheduleClasses.size() << "\n};\n\n";
 
   // The type of a state in the nondeterministic automaton we're defining.
-  using NfaStateTy = unsigned;
+  using NfaStateTy = uint64_t;
 
   // Given a resource state, return all resource states by applying
   // InsnClass.
-  auto applyInsnClass = [&](ArrayRef<unsigned> InsnClass,
-                            NfaStateTy State) -> std::deque<unsigned> {
-    std::deque<unsigned> V(1, State);
+  auto applyInsnClass = [&](const ResourceVector &InsnClass,
+                            NfaStateTy State) -> std::deque<NfaStateTy> {
+    std::deque<NfaStateTy> V(1, State);
     // Apply every stage in the class individually.
-    for (unsigned Stage : InsnClass) {
+    for (NfaStateTy Stage : InsnClass) {
       // Apply this stage to every existing member of V in turn.
       size_t Sz = V.size();
       for (unsigned I = 0; I < Sz; ++I) {
-        unsigned S = V.front();
+        NfaStateTy S = V.front();
         V.pop_front();
 
         // For this stage, state combination, try all possible resources.
         for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) {
-          unsigned ResourceMask = 1U << J;
+          NfaStateTy ResourceMask = 1ULL << J;
           if ((ResourceMask & Stage) == 0)
             // This resource isn't required by this stage.
             continue;
-          unsigned Combo = ComboBitToBitsMap[ResourceMask];
+          NfaStateTy Combo = ComboBitToBitsMap[ResourceMask];
           if (Combo && ((~S & Combo) != Combo))
             // This combo units bits are not available.
             continue;
-          unsigned ResultingResourceState = S | ResourceMask | Combo;
+          NfaStateTy ResultingResourceState = S | ResourceMask | Combo;
           if (ResultingResourceState == S)
             continue;
           V.push_back(ResultingResourceState);
@@ -499,9 +303,9 @@ void DFAPacketizerEmitter::emitForItineraries(
   // Given a resource state, return a quick (conservative) guess as to whether
   // InsnClass can be applied. This is a filter for the more heavyweight
   // applyInsnClass.
-  auto canApplyInsnClass = [](ArrayRef<unsigned> InsnClass,
+  auto canApplyInsnClass = [](const ResourceVector &InsnClass,
                               NfaStateTy State) -> bool {
-    for (unsigned Resources : InsnClass) {
+    for (NfaStateTy Resources : InsnClass) {
       if ((State | Resources) == State)
         return false;
     }
@@ -515,20 +319,18 @@ void DFAPacketizerEmitter::emitForItineraries(
   while (!Worklist.empty()) {
     NfaStateTy State = Worklist.front();
     Worklist.pop_front();
-    for (unsigned i = 0; i < allInsnClasses.size(); i++) {
-      const std::vector<unsigned> &InsnClass = allInsnClasses[i];
-      if (!canApplyInsnClass(InsnClass, State))
+    for (const ResourceVector &Resources : UniqueResources) {
+      if (!canApplyInsnClass(Resources, State))
         continue;
-      for (unsigned NewState : applyInsnClass(InsnClass, State)) {
+      unsigned ResourcesID = UniqueResources.idFor(Resources);
+      for (uint64_t NewState : applyInsnClass(Resources, State)) {
         if (SeenStates.emplace(NewState).second)
           Worklist.emplace_back(NewState);
-        Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass));
+        Emitter.addTransition(State, NewState, ResourcesID);
       }
     }
   }
 
-  OS << "} // end namespace llvm\n\n";
-  OS << "namespace {\n";
   std::string TargetAndDFAName = TargetName + DFAName;
   Emitter.emit(TargetAndDFAName, OS);
   OS << "} // end anonymous namespace\n\n";
@@ -541,7 +343,13 @@ void DFAPacketizerEmitter::emitForItineraries(
      << "  static Automaton<uint64_t> A(ArrayRef<" << TargetAndDFAName
      << "Transition>(" << TargetAndDFAName << "Transitions), "
      << TargetAndDFAName << "TransitionInfo);\n"
-     << "  return new DFAPacketizer(IID, A);\n"
+     << "  unsigned ProcResIdxStart = " << TargetAndDFAName
+     << "ProcResourceIndexStart[IID->SchedModel.ProcID];\n"
+     << "  unsigned ProcResIdxNum = " << TargetAndDFAName
+     << "ProcResourceIndexStart[IID->SchedModel.ProcID + 1] - "
+        "ProcResIdxStart;\n"
+     << "  return new DFAPacketizer(IID, A, {&" << TargetAndDFAName
+     << "ResourceIndices[ProcResIdxStart], ProcResIdxNum});\n"
      << "\n}\n\n";
 }
 


        


More information about the llvm-commits mailing list