[llvm] 116838b - [MachinePipeliner] Fix the interpretation of the scheduling model

Thu Sep 15 18:26:30 PDT 2022

Author: Yuta Mukai
Date: 2022-09-16T09:51:48+09:00
New Revision: 116838b1516a0c856b476061d7d8c938281e147c

URL: https://github.com/llvm/llvm-project/commit/116838b1516a0c856b476061d7d8c938281e147c
DIFF: https://github.com/llvm/llvm-project/commit/116838b1516a0c856b476061d7d8c938281e147c.diff

LOG: [MachinePipeliner] Fix the interpretation of the scheduling model

The method of counting resource consumption is modified to be based on
"Cycles" value when DFA is not used.

The calculation of ResMII is modified to total "Cycles" and divide it
by the number of units for each resource. Previously, ResMII was
excessive because it was assumed that resources were consumed for
the cycles of "Latency" value.

The method of resource reservation is modified similarly. When a
value of "Cycles" is larger than 1, the resource is considered to be
consumed by 1 for cycles of its length from the scheduled cycle.
To realize this, ResourceManager maintains a resource table for all
slots. Previously, resource consumption was always 1 for 1 cycle
regardless of the value of "Cycles" or "Latency".

In addition, the number of micro operations per cycle is modified to
be constrained by "IssueWidth". To disable the constraint,
--pipeliner-force-issue-width=100 can be used.

For the case of using DFA, the scheduling results are unchanged.

Reviewed By: dpenry

Differential Revision: https://reviews.llvm.org/D133572

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/MachinePipeliner.h
    llvm/lib/CodeGen/MachinePipeliner.cpp
    llvm/test/CodeGen/PowerPC/sms-iterator.ll
    llvm/test/CodeGen/PowerPC/sms-phi-1.ll
    llvm/test/CodeGen/PowerPC/sms-phi-2.ll
    llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
    llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
    llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
    llvm/test/CodeGen/Thumb2/swp-fixedii.mir
    llvm/test/CodeGen/Thumb2/swp-regpressure.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index fc1cc0a879ca..24a248f8e3d5 100644

--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -58,6 +58,7 @@ class NodeSet;
 class SMSchedule;
 
 extern cl::opt<bool> SwpEnableCopyToPhi;
+extern cl::opt<int> SwpForceIssueWidth;
 
 /// The main class in the implementation of the target independent
 /// software pipeliner pass.
@@ -444,46 +445,80 @@ class ResourceManager {
 private:
   const MCSubtargetInfo *STI;
   const MCSchedModel &SM;
+  const TargetSubtargetInfo *ST;
+  const TargetInstrInfo *TII;
+  SwingSchedulerDAG *DAG;
   const bool UseDFA;
-  std::unique_ptr<DFAPacketizer> DFAResources;
+  /// DFA resources for each slot
+  llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources;
+  /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
+  /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
+  llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT;
+  /// The number of scheduled micro operations for each slot. Micro operations
+  /// are assumed to be scheduled one per cycle, starting with the cycle in
+  /// which the instruction is scheduled.
+  llvm::SmallVector<int> NumScheduledMops;
   /// Each processor resource is associated with a so-called processor resource
   /// mask. This vector allows to correlate processor resource IDs with
   /// processor resource masks. There is exactly one element per each processor
   /// resource declared by the scheduling model.
   llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
+  int InitiationInterval;
+  /// The number of micro operations that can be scheduled at a cycle.
+  int IssueWidth;
+
+  int calculateResMIIDFA() const;
+  /// Check if MRT is overbooked
+  bool isOverbooked() const;
+  /// Reserve resources on MRT
+  void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
+  /// Unreserve resources on MRT
+  void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
+
+  /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
+  /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
+  /// II).
+  int positiveModulo(int Dividend, int Divisor) const {
+    assert(Divisor > 0);
+    int R = Dividend % Divisor;
+    if (R < 0)
+      R += Divisor;
+    return R;
+  }
 
-  llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceCount;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD void dumpMRT() const;
+#endif
 
 public:
-  ResourceManager(const TargetSubtargetInfo *ST)
-      : STI(ST), SM(ST->getSchedModel()), UseDFA(ST->useDFAforSMS()),
+  ResourceManager(const TargetSubtargetInfo *ST, SwingSchedulerDAG *DAG)
+      : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
+        DAG(DAG), UseDFA(ST->useDFAforSMS()),
         ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
-        ProcResourceCount(SM.getNumProcResourceKinds(), 0) {
-    if (UseDFA)
-      DFAResources.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
+        IssueWidth(SM.IssueWidth) {
     initProcResourceVectors(SM, ProcResourceMasks);
+    if (IssueWidth <= 0)
+      // If IssueWidth is not specified, set a sufficiently large value
+      IssueWidth = 100;
+    if (SwpForceIssueWidth > 0)
+      IssueWidth = SwpForceIssueWidth;
   }
 
   void initProcResourceVectors(const MCSchedModel &SM,
                                SmallVectorImpl<uint64_t> &Masks);
-  /// Check if the resources occupied by a MCInstrDesc are available in
-  /// the current state.
-  bool canReserveResources(const MCInstrDesc *MID) const;
-
-  /// Reserve the resources occupied by a MCInstrDesc and change the current
-  /// state to reflect that change.
-  void reserveResources(const MCInstrDesc *MID);
 
   /// Check if the resources occupied by a machine instruction are available
   /// in the current state.
-  bool canReserveResources(const MachineInstr &MI) const;
+  bool canReserveResources(SUnit &SU, int Cycle);
 
   /// Reserve the resources occupied by a machine instruction and change the
   /// current state to reflect that change.
-  void reserveResources(const MachineInstr &MI);
+  void reserveResources(SUnit &SU, int Cycle);
+
+  int calculateResMII() const;
 
-  /// Reset the state
-  void clearResources();
+  /// Initialize resources with the initiation interval II.
+  void init(int II);
 };
 
 /// This class represents the scheduled code.  The main data structure is a
@@ -521,8 +556,9 @@ class SMSchedule {
   ResourceManager ProcItinResources;
 
 public:
-  SMSchedule(MachineFunction *mf)
-      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), ProcItinResources(&ST) {}
+  SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
+      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
+        ProcItinResources(&ST, DAG) {}
 
   void reset() {
     ScheduledInstrs.clear();
@@ -533,7 +569,10 @@ class SMSchedule {
   }
 
   /// Set the initiation interval for this schedule.
-  void setInitiationInterval(int ii) { InitiationInterval = ii; }
+  void setInitiationInterval(int ii) {
+    InitiationInterval = ii;
+    ProcItinResources.init(ii);
+  }
 
   /// Return the initiation interval for this schedule.
   int getInitiationInterval() const { return InitiationInterval; }

diff  --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 2d709949b3ab..721bd52448ac 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -43,6 +43,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CycleAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -84,9 +85,11 @@
 #include <cstdint>
 #include <deque>
 #include <functional>
+#include <iomanip>
 #include <iterator>
 #include <map>
 #include <memory>
+#include <sstream>
 #include <tuple>
 #include <utility>
 #include <vector>
@@ -121,6 +124,12 @@ static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
                               cl::desc("Size limit for the MII."),
                               cl::Hidden, cl::init(27));
 
+/// A command line argument to force pipeliner to use specified initial
+/// interval.
+static cl::opt<int> SwpForceII("pipeliner-force-ii",
+                               cl::desc("Force pipeliner to use specified II."),
+                               cl::Hidden, cl::init(-1));
+
 /// A command line argument to limit the number of stages in the pipeline.
 static cl::opt<int>
     SwpMaxStages("pipeliner-max-stages",
@@ -172,6 +181,13 @@ cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
                                  cl::init(true),
                                  cl::desc("Enable CopyToPhi DAG Mutation"));
 
+/// A command line argument to force pipeliner to use specified issue
+/// width.
+cl::opt<int> SwpForceIssueWidth(
+    "pipeliner-force-issue-width",
+    cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
+    cl::init(-1));
+
 } // end namespace llvm
 
 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
@@ -454,14 +470,18 @@ void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
-  if (II_setByPragma > 0)
+  if (SwpForceII > 0)
+    MII = SwpForceII;
+  else if (II_setByPragma > 0)
     MII = II_setByPragma;
   else
     MII = std::max(ResMII, RecMII);
 }
 
 void SwingSchedulerDAG::setMAX_II() {
-  if (II_setByPragma > 0)
+  if (SwpForceII > 0)
+    MAX_II = SwpForceII;
+  else if (II_setByPragma > 0)
     MAX_II = II_setByPragma;
   else
     MAX_II = MII + 10;
@@ -560,7 +580,7 @@ void SwingSchedulerDAG::schedule() {
   // check for node order issues
   checkValidNodeOrder(Circuits);
 
-  SMSchedule Schedule(Pass.MF);
+  SMSchedule Schedule(Pass.MF, this);
   Scheduled = schedulePipeline(Schedule);
 
   if (!Scheduled){
@@ -1093,72 +1113,9 @@ struct FuncUnitSorter {
 /// to add it to each existing DFA, until a legal space is found. If the
 /// instruction cannot be reserved in an existing DFA, we create a new one.
 unsigned SwingSchedulerDAG::calculateResMII() {
-
   LLVM_DEBUG(dbgs() << "calculateResMII:\n");
-  SmallVector<ResourceManager*, 8> Resources;
-  MachineBasicBlock *MBB = Loop.getHeader();
-  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
-
-  // Sort the instructions by the number of available choices for scheduling,
-  // least to most. Use the number of critical resources as the tie breaker.
-  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
-  for (MachineInstr &MI :
-       llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator()))
-    FUS.calcCriticalResources(MI);
-  PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
-      FuncUnitOrder(FUS);
-
-  for (MachineInstr &MI :
-       llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator()))
-    FuncUnitOrder.push(&MI);
-
-  while (!FuncUnitOrder.empty()) {
-    MachineInstr *MI = FuncUnitOrder.top();
-    FuncUnitOrder.pop();
-    if (TII->isZeroCost(MI->getOpcode()))
-      continue;
-    // Attempt to reserve the instruction in an existing DFA. At least one
-    // DFA is needed for each cycle.
-    unsigned NumCycles = getSUnit(MI)->Latency;
-    unsigned ReservedCycles = 0;
-    SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
-    SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
-    LLVM_DEBUG({
-      dbgs() << "Trying to reserve resource for " << NumCycles
-             << " cycles for \n";
-      MI->dump();
-    });
-    for (unsigned C = 0; C < NumCycles; ++C)
-      while (RI != RE) {
-        if ((*RI)->canReserveResources(*MI)) {
-          (*RI)->reserveResources(*MI);
-          ++ReservedCycles;
-          break;
-        }
-        RI++;
-      }
-    LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
-                      << ", NumCycles:" << NumCycles << "\n");
-    // Add new DFAs, if needed, to reserve resources.
-    for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
-      LLVM_DEBUG(if (SwpDebugResource) dbgs()
-                 << "NewResource created to reserve resources"
-                 << "\n");
-      ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
-      assert(NewResource->canReserveResources(*MI) && "Reserve error.");
-      NewResource->reserveResources(*MI);
-      Resources.push_back(NewResource);
-    }
-  }
-  int Resmii = Resources.size();
-  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
-  // Delete the memory for each of the DFAs that were created earlier.
-  for (ResourceManager *RI : Resources) {
-    ResourceManager *D = RI;
-    delete D;
-  }
-  Resources.clear();
-  return Resmii;
+  ResourceManager RM(&MF.getSubtarget(), this);
+  return RM.calculateResMII();
 }
 
 /// Calculate the recurrence-constrainted minimum initiation interval.
@@ -2375,28 +2332,15 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
   for (int curCycle = StartCycle; curCycle != termCycle;
        forward ? ++curCycle : --curCycle) {
 
-    // Add the already scheduled instructions at the specified cycle to the
-    // DFA.
-    ProcItinResources.clearResources();
-    for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
-         checkCycle <= LastCycle; checkCycle += II) {
-      std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
-
-      for (SUnit *CI : cycleInstrs) {
-        if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode()))
-          continue;
-        assert(ProcItinResources.canReserveResources(*CI->getInstr()) &&
-               "These instructions have already been scheduled.");
-        ProcItinResources.reserveResources(*CI->getInstr());
-      }
-    }
     if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
-        ProcItinResources.canReserveResources(*SU->getInstr())) {
+        ProcItinResources.canReserveResources(*SU, curCycle)) {
       LLVM_DEBUG({
         dbgs() << "\tinsert at cycle " << curCycle << " ";
         SU->getInstr()->dump();
       });
 
+      if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+        ProcItinResources.reserveResources(*SU, curCycle);
       ScheduledInstrs[curCycle].push_back(SU);
       InstrToCycle.insert(std::make_pair(SU, curCycle));
       if (curCycle > LastCycle)
@@ -3025,6 +2969,26 @@ void SMSchedule::print(raw_ostream &os) const {
 LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
 LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
 
+void ResourceManager::dumpMRT() const {
+  LLVM_DEBUG({
+    if (UseDFA)
+      return;
+    std::stringstream SS;
+    SS << "MRT:\n";
+    SS << std::setw(4) << "Slot";
+    for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
+      SS << std::setw(3) << I;
+    SS << std::setw(7) << "#Mops"
+       << "\n";
+    for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
+      SS << std::setw(4) << Slot;
+      for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
+        SS << std::setw(3) << MRT[Slot][I];
+      SS << std::setw(7) << NumScheduledMops[Slot] << "\n";
+    }
+    dbgs() << SS.str();
+  });
+}
 #endif
 
 void ResourceManager::initProcResourceVectors(
@@ -3069,97 +3033,244 @@ void ResourceManager::initProcResourceVectors(
   });
 }
 
-bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
-
+bool ResourceManager::canReserveResources(SUnit &SU, int Cycle) {
   LLVM_DEBUG({
     if (SwpDebugResource)
       dbgs() << "canReserveResources:\n";
   });
   if (UseDFA)
-    return DFAResources->canReserveResources(MID);
+    return DFAResources[positiveModulo(Cycle, InitiationInterval)]
+        ->canReserveResources(&SU.getInstr()->getDesc());
 
-  unsigned InsnClass = MID->getSchedClass();
-  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+  const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
   if (!SCDesc->isValid()) {
     LLVM_DEBUG({
       dbgs() << "No valid Schedule Class Desc for schedClass!\n";
-      dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
+      dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
     });
     return true;
   }
 
-  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
-  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
-  for (; I != E; ++I) {
-    if (!I->Cycles)
-      continue;
-    const MCProcResourceDesc *ProcResource =
-        SM.getProcResource(I->ProcResourceIdx);
-    unsigned NumUnits = ProcResource->NumUnits;
-    LLVM_DEBUG({
-      if (SwpDebugResource)
-        dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
-                         ProcResource->Name, I->ProcResourceIdx,
-                         ProcResourceCount[I->ProcResourceIdx], NumUnits,
-                         I->Cycles);
-    });
-    if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
-      return false;
-  }
-  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
-  return true;
+  reserveResources(SCDesc, Cycle);
+  bool Result = !isOverbooked();
+  unreserveResources(SCDesc, Cycle);
+
+  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n";);
+  return Result;
 }
 
-void ResourceManager::reserveResources(const MCInstrDesc *MID) {
+void ResourceManager::reserveResources(SUnit &SU, int Cycle) {
   LLVM_DEBUG({
     if (SwpDebugResource)
       dbgs() << "reserveResources:\n";
   });
   if (UseDFA)
-    return DFAResources->reserveResources(MID);
+    return DFAResources[positiveModulo(Cycle, InitiationInterval)]
+        ->reserveResources(&SU.getInstr()->getDesc());
 
-  unsigned InsnClass = MID->getSchedClass();
-  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+  const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
   if (!SCDesc->isValid()) {
     LLVM_DEBUG({
       dbgs() << "No valid Schedule Class Desc for schedClass!\n";
-      dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
+      dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
     });
     return;
   }
-  for (const MCWriteProcResEntry &PRE :
-       make_range(STI->getWriteProcResBegin(SCDesc),
-                  STI->getWriteProcResEnd(SCDesc))) {
-    if (!PRE.Cycles)
-      continue;
-    ++ProcResourceCount[PRE.ProcResourceIdx];
-    LLVM_DEBUG({
-      if (SwpDebugResource) {
-        const MCProcResourceDesc *ProcResource =
-            SM.getProcResource(PRE.ProcResourceIdx);
-        dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
-                         ProcResource->Name, PRE.ProcResourceIdx,
-                         ProcResourceCount[PRE.ProcResourceIdx],
-                         ProcResource->NumUnits, PRE.Cycles);
-      }
-    });
-  }
+
+  reserveResources(SCDesc, Cycle);
+
   LLVM_DEBUG({
-    if (SwpDebugResource)
+    if (SwpDebugResource) {
+      dumpMRT();
       dbgs() << "reserveResources: done!\n\n";
+    }
   });
 }
 
-bool ResourceManager::canReserveResources(const MachineInstr &MI) const {
-  return canReserveResources(&MI.getDesc());
+void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
+                                       int Cycle) {
+  assert(!UseDFA);
+  for (const MCWriteProcResEntry &PRE : make_range(
+           STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
+    for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
+      ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
+
+  for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
+    ++NumScheduledMops[positiveModulo(C, InitiationInterval)];
 }
 
-void ResourceManager::reserveResources(const MachineInstr &MI) {
-  return reserveResources(&MI.getDesc());
+void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
+                                         int Cycle) {
+  assert(!UseDFA);
+  for (const MCWriteProcResEntry &PRE : make_range(
+           STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
+    for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
+      --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
+
+  for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
+    --NumScheduledMops[positiveModulo(C, InitiationInterval)];
 }
 
-void ResourceManager::clearResources() {
+bool ResourceManager::isOverbooked() const {
+  assert(!UseDFA);
+  for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
+    for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+      const MCProcResourceDesc *Desc = SM.getProcResource(I);
+      if (MRT[Slot][I] > Desc->NumUnits)
+        return true;
+    }
+    if (NumScheduledMops[Slot] > IssueWidth)
+      return true;
+  }
+  return false;
+}
+
+int ResourceManager::calculateResMIIDFA() const {
+  assert(UseDFA);
+
+  // Sort the instructions by the number of available choices for scheduling,
+  // least to most. Use the number of critical resources as the tie breaker.
+  FuncUnitSorter FUS = FuncUnitSorter(*ST);
+  for (SUnit &SU : DAG->SUnits)
+    FUS.calcCriticalResources(*SU.getInstr());
+  PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
+      FuncUnitOrder(FUS);
+
+  for (SUnit &SU : DAG->SUnits)
+    FuncUnitOrder.push(SU.getInstr());
+
+  SmallVector<std::unique_ptr<DFAPacketizer>, 8> Resources;
+  Resources.push_back(
+      std::unique_ptr<DFAPacketizer>(TII->CreateTargetScheduleState(*ST)));
+
+  while (!FuncUnitOrder.empty()) {
+    MachineInstr *MI = FuncUnitOrder.top();
+    FuncUnitOrder.pop();
+    if (TII->isZeroCost(MI->getOpcode()))
+      continue;
+
+    // Attempt to reserve the instruction in an existing DFA. At least one
+    // DFA is needed for each cycle.
+    unsigned NumCycles = DAG->getSUnit(MI)->Latency;
+    unsigned ReservedCycles = 0;
+    auto *RI = Resources.begin();
+    auto *RE = Resources.end();
+    LLVM_DEBUG({
+      dbgs() << "Trying to reserve resource for " << NumCycles
+             << " cycles for \n";
+      MI->dump();
+    });
+    for (unsigned C = 0; C < NumCycles; ++C)
+      while (RI != RE) {
+        if ((*RI)->canReserveResources(*MI)) {
+          (*RI)->reserveResources(*MI);
+          ++ReservedCycles;
+          break;
+        }
+        RI++;
+      }
+    LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
+                      << ", NumCycles:" << NumCycles << "\n");
+    // Add new DFAs, if needed, to reserve resources.
+    for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
+      LLVM_DEBUG(if (SwpDebugResource) dbgs()
+                 << "NewResource created to reserve resources"
+                 << "\n");
+      auto *NewResource = TII->CreateTargetScheduleState(*ST);
+      assert(NewResource->canReserveResources(*MI) && "Reserve error.");
+      NewResource->reserveResources(*MI);
+      Resources.push_back(std::unique_ptr<DFAPacketizer>(NewResource));
+    }
+  }
+
+  int Resmii = Resources.size();
+  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
+  return Resmii;
+}
+
+int ResourceManager::calculateResMII() const {
   if (UseDFA)
-    return DFAResources->clearResources();
-  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
+    return calculateResMIIDFA();
+
+  // Count each resource consumption and divide it by the number of units.
+  // ResMII is the max value among them.
+
+  int NumMops = 0;
+  SmallVector<uint64_t> ResourceCount(SM.getNumProcResourceKinds());
+  for (SUnit &SU : DAG->SUnits) {
+    if (TII->isZeroCost(SU.getInstr()->getOpcode()))
+      continue;
+
+    const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
+    if (!SCDesc->isValid())
+      continue;
+
+    LLVM_DEBUG({
+      if (SwpDebugResource) {
+        DAG->dumpNode(SU);
+        dbgs() << "  #Mops: " << SCDesc->NumMicroOps << "\n"
+               << "  WriteProcRes: ";
+      }
+    });
+    NumMops += SCDesc->NumMicroOps;
+    for (const MCWriteProcResEntry &PRE :
+         make_range(STI->getWriteProcResBegin(SCDesc),
+                    STI->getWriteProcResEnd(SCDesc))) {
+      LLVM_DEBUG({
+        if (SwpDebugResource) {
+          const MCProcResourceDesc *Desc =
+              SM.getProcResource(PRE.ProcResourceIdx);
+          dbgs() << Desc->Name << ": " << PRE.Cycles << ", ";
+        }
+      });
+      ResourceCount[PRE.ProcResourceIdx] += PRE.Cycles;
+    }
+    LLVM_DEBUG(if (SwpDebugResource) dbgs() << "\n");
+  }
+
+  int Result = (NumMops + IssueWidth - 1) / IssueWidth;
+  LLVM_DEBUG({
+    if (SwpDebugResource)
+      dbgs() << "#Mops: " << NumMops << ", "
+             << "IssueWidth: " << IssueWidth << ", "
+             << "Cycles: " << Result << "\n";
+  });
+
+  LLVM_DEBUG({
+    if (SwpDebugResource) {
+      std::stringstream SS;
+      SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)
+         << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"
+         << "\n";
+      dbgs() << SS.str();
+    }
+  });
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc *Desc = SM.getProcResource(I);
+    int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;
+    LLVM_DEBUG({
+      if (SwpDebugResource) {
+        std::stringstream SS;
+        SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)
+           << Desc->NumUnits << std::setw(10) << ResourceCount[I]
+           << std::setw(10) << Cycles << "\n";
+        dbgs() << SS.str();
+      }
+    });
+    if (Cycles > Result)
+      Result = Cycles;
+  }
+  return Result;
+}
+
+void ResourceManager::init(int II) {
+  InitiationInterval = II;
+  DFAResources.clear();
+  DFAResources.resize(II);
+  for (auto &I : DFAResources)
+    I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
+  MRT.clear();
+  MRT.resize(II, SmallVector<uint64_t>(SM.getNumProcResourceKinds()));
+  NumScheduledMops.clear();
+  NumScheduledMops.resize(II);
 }

diff  --git a/llvm/test/CodeGen/PowerPC/sms-iterator.ll b/llvm/test/CodeGen/PowerPC/sms-iterator.ll
index 6d025a405726..0fcb4895b98f 100644
--- a/llvm/test/CodeGen/PowerPC/sms-iterator.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-iterator.ll
@@ -5,7 +5,7 @@
 
 %0 = type { i32, [16 x double] }
 
-; CHECK: MII = 8 MAX_II = 18
+; CHECK: MII = 3 MAX_II = 13 (rec=3, res=2)
 
 define dso_local fastcc double @_ZN3povL9polysolveEiPdS0_() unnamed_addr #0 {
   br label %1

diff  --git a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
index 1fff3e58269c..61dd310812ba 100644
--- a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
-; RUN:       -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s
+; RUN:       -mcpu=pwr9 --ppc-enable-pipeliner --pipeliner-force-ii=4 2>&1 | FileCheck %s
 
 define void @main() nounwind #0 {
 ; CHECK-LABEL: main:

diff  --git a/llvm/test/CodeGen/PowerPC/sms-phi-2.ll b/llvm/test/CodeGen/PowerPC/sms-phi-2.ll
index e44d4f760b1e..dd94ee8d2d3f 100644
--- a/llvm/test/CodeGen/PowerPC/sms-phi-2.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-phi-2.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
-; RUN:       -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s
+; RUN:       -mcpu=pwr9 --ppc-enable-pipeliner --pipeliner-force-ii=15 2>&1 | FileCheck %s
 
 define void @phi2(i32, i32, i8*) local_unnamed_addr {
 ; CHECK-LABEL: phi2:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
index f53c3e747661..450aa9b81520 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -O3 -mattr=+mve.fp,+use-mipipeliner -mcpu=cortex-m55 %s -o - -verify-machineinstrs | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -O3 -mattr=+mve.fp,+use-mipipeliner -mcpu=cortex-m55 %s -o - -verify-machineinstrs --pipeliner-force-issue-width=10 | FileCheck %s
 
 define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32 noundef %numSamples, ptr nocapture noundef writeonly %realResult, ptr nocapture noundef writeonly %imagResult) {
 ; CHECK-LABEL: arm_cmplx_dot_prod_q15:

diff  --git a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
index 3f3ff5e4bd29..fd8aa9d5d00c 100644
--- a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
+# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner --pipeliner-force-issue-width=10 -o - %s | FileCheck %s --check-prefix=CHECK
 
 --- |
   define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {

diff  --git a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
index 22f69a630e08..56a4426e7beb 100644
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mcpu=cortex-m85 -mattr=+use-mipipeliner -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
+# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mcpu=cortex-m85 -mattr=+use-mipipeliner -run-pass=pipeliner --pipeliner-force-issue-width=10 -o - %s | FileCheck %s --check-prefix=CHECK
 
 --- |
   define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {

diff  --git a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
index eabf84abd7ef..e65abb789d80 100644
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mcpu=cortex-m85 -mattr=+use-mipipeliner -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
+# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mcpu=cortex-m85 -mattr=+use-mipipeliner -run-pass=pipeliner --pipeliner-force-issue-width=10 -o - %s | FileCheck %s --check-prefix=CHECK
 
 --- |
   define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {

diff  --git a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
index 483b572ef06f..f755fbfccf9d 100644
--- a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
+# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner --pipeliner-force-issue-width=10 -o - %s | FileCheck %s --check-prefix=CHECK
 
 # This test checks that too much register pressure will cause the modulo
 # schedule to be rejected and that a test with the same resource usage