[llvm] r332652 - [llvm-mca] Make Dispatch a subclass of Stage.

Thu May 17 12:22:30 PDT 2018

Author: mattd
Date: Thu May 17 12:22:29 2018
New Revision: 332652

URL: http://llvm.org/viewvc/llvm-project?rev=332652&view=rev
Log:
[llvm-mca] Make Dispatch a subclass of Stage.

Summary:
The logic of dispatch remains the same, but now DispatchUnit is a Stage (DispatchStage).

This change has the benefit of simplifying the backend runCycle() code.
The same logic applies, but it belongs to different components now.  This is just a start,
eventually we will need to remove the call to the DispatchStage in Scheduler.cpp, but
that will be a separate patch.  This change is mostly a renaming and moving of existing logic.

This change also encouraged me to remove the Subtarget (STI) member from the
Backend class.  That member was used to initialize the other members of Backend
and to eventually call DispatchUnit::dispatch().  Now that we have Stages, we
can eliminate this by instantiating the DispatchStage with everything it needs
at the time of construction (e.g., Subtarget).  That change allows us to call
DispatchStage::execute(IR) as we expect to call execute() for all other stages.

Once we add the Stage list (D46907) we can more cleanly call preExecute() on
all of the stages, DispatchStage, will probably wrap cycleEvent() in that
case.

Made some formatting and minor cleanups to README.txt.  Some of the text
was re-flowed to stay within 80 cols.


Reviewers: andreadb, courbet, RKSimon

Reviewed By: andreadb, courbet

Subscribers: mgorny, javed.absar, tschuett, gbedwell, llvm-commits

Differential Revision: https://reviews.llvm.org/D46983

Added:
    llvm/trunk/tools/llvm-mca/DispatchStage.cpp
      - copied, changed from r332641, llvm/trunk/tools/llvm-mca/Dispatch.cpp
    llvm/trunk/tools/llvm-mca/DispatchStage.h
      - copied, changed from r332641, llvm/trunk/tools/llvm-mca/Dispatch.h
Removed:
    llvm/trunk/tools/llvm-mca/Dispatch.cpp
    llvm/trunk/tools/llvm-mca/Dispatch.h
Modified:
    llvm/trunk/tools/llvm-mca/Backend.cpp
    llvm/trunk/tools/llvm-mca/Backend.h
    llvm/trunk/tools/llvm-mca/CMakeLists.txt
    llvm/trunk/tools/llvm-mca/HWEventListener.h
    llvm/trunk/tools/llvm-mca/README.txt
    llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp
    llvm/trunk/tools/llvm-mca/RetireControlUnit.h
    llvm/trunk/tools/llvm-mca/Scheduler.cpp
    llvm/trunk/tools/llvm-mca/Scheduler.h

Modified: llvm/trunk/tools/llvm-mca/Backend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Backend.cpp?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================

--- llvm/trunk/tools/llvm-mca/Backend.cpp (original)
+++ llvm/trunk/tools/llvm-mca/Backend.cpp Thu May 17 12:22:29 2018
@@ -30,7 +30,7 @@ void Backend::addEventListener(HWEventLi
 }
 
 void Backend::run() {
-  while (Fetch->isReady() || !DU->isRCUEmpty())
+  while (Fetch->isReady() || !Dispatch->isReady())
     runCycle(Cycles++);
 }
 
@@ -39,10 +39,8 @@ void Backend::runCycle(unsigned Cycle) {
 
   InstRef IR;
   while (Fetch->execute(IR)) {
-    const InstrDesc &Desc = IR.getInstruction()->getDesc();
-    if (!DU->isAvailable(Desc.NumMicroOps) || !DU->canDispatch(IR))
+    if (!Dispatch->execute(IR))
       break;
-    DU->dispatch(IR, STI);
     Fetch->postExecute(IR);
   }
 
@@ -54,7 +52,7 @@ void Backend::notifyCycleBegin(unsigned
   for (HWEventListener *Listener : Listeners)
     Listener->onCycleBegin();
 
-  DU->cycleEvent();
+  Dispatch->cycleEvent();
   HWS->cycleEvent();
 }
 

Modified: llvm/trunk/tools/llvm-mca/Backend.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Backend.h?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Backend.h (original)
+++ llvm/trunk/tools/llvm-mca/Backend.h Thu May 17 12:22:29 2018
@@ -15,7 +15,7 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_BACKEND_H
 #define LLVM_TOOLS_LLVM_MCA_BACKEND_H
 
-#include "Dispatch.h"
+#include "DispatchStage.h"
 #include "FetchStage.h"
 #include "InstrBuilder.h"
 #include "Scheduler.h"
@@ -51,15 +51,12 @@ class HWStallEvent;
 /// histograms. For example, it tracks how the dispatch group size changes
 /// over time.
 class Backend {
-  const llvm::MCSubtargetInfo &STI;
-
   /// This is the initial stage of the pipeline.
   /// TODO: Eventually this will become a list of unique Stage* that this
   /// backend pipeline executes.
   std::unique_ptr<FetchStage> Fetch;
-
   std::unique_ptr<Scheduler> HWS;
-  std::unique_ptr<DispatchUnit> DU;
+  std::unique_ptr<DispatchStage> Dispatch;
   std::set<HWEventListener *> Listeners;
   unsigned Cycles;
 
@@ -71,15 +68,14 @@ public:
           std::unique_ptr<FetchStage> InitialStage, unsigned DispatchWidth = 0,
           unsigned RegisterFileSize = 0, unsigned LoadQueueSize = 0,
           unsigned StoreQueueSize = 0, bool AssumeNoAlias = false)
-      : STI(Subtarget), Fetch(std::move(InitialStage)),
+      : Fetch(std::move(InitialStage)),
         HWS(llvm::make_unique<Scheduler>(this, Subtarget.getSchedModel(),
                                          LoadQueueSize, StoreQueueSize,
                                          AssumeNoAlias)),
-        DU(llvm::make_unique<DispatchUnit>(this, Subtarget.getSchedModel(), MRI,
-                                           RegisterFileSize, DispatchWidth,
-                                           HWS.get())),
+        Dispatch(llvm::make_unique<DispatchStage>(
+            this, Subtarget, MRI, RegisterFileSize, DispatchWidth, HWS.get())),
         Cycles(0) {
-    HWS->setDispatchUnit(DU.get());
+    HWS->setDispatchStage(Dispatch.get());
   }
 
   void run();

Modified: llvm/trunk/tools/llvm-mca/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/CMakeLists.txt?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/CMakeLists.txt (original)
+++ llvm/trunk/tools/llvm-mca/CMakeLists.txt Thu May 17 12:22:29 2018
@@ -13,7 +13,7 @@ add_llvm_tool(llvm-mca
   Backend.cpp
   BackendPrinter.cpp
   CodeRegion.cpp
-  Dispatch.cpp
+  DispatchStage.cpp
   DispatchStatistics.cpp
   FetchStage.cpp
   HWEventListener.cpp

Removed: llvm/trunk/tools/llvm-mca/Dispatch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Dispatch.cpp?rev=332651&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-mca/Dispatch.cpp (original)
+++ llvm/trunk/tools/llvm-mca/Dispatch.cpp (removed)
@@ -1,151 +0,0 @@
-//===--------------------- Dispatch.cpp -------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements methods declared by the DispatchUnit class.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Dispatch.h"
-#include "Backend.h"
-#include "HWEventListener.h"
-#include "Scheduler.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-void DispatchUnit::notifyInstructionDispatched(const InstRef &IR,
-                                               ArrayRef<unsigned> UsedRegs) {
-  LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: " << IR << '\n');
-  Owner->notifyInstructionEvent(HWInstructionDispatchedEvent(IR, UsedRegs));
-}
-
-void DispatchUnit::notifyInstructionRetired(const InstRef &IR) {
-  LLVM_DEBUG(dbgs() << "[E] Instruction Retired: " << IR << '\n');
-  SmallVector<unsigned, 4> FreedRegs(RAT->getNumRegisterFiles());
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-
-  for (const std::unique_ptr<WriteState> &WS : IR.getInstruction()->getDefs())
-    RAT->removeRegisterWrite(*WS.get(), FreedRegs, !Desc.isZeroLatency());
-  Owner->notifyInstructionEvent(HWInstructionRetiredEvent(IR, FreedRegs));
-}
-
-bool DispatchUnit::checkRAT(const InstRef &IR) {
-  SmallVector<unsigned, 4> RegDefs;
-  for (const std::unique_ptr<WriteState> &RegDef :
-       IR.getInstruction()->getDefs())
-    RegDefs.emplace_back(RegDef->getRegisterID());
-
-  unsigned RegisterMask = RAT->isAvailable(RegDefs);
-  // A mask with all zeroes means: register files are available.
-  if (RegisterMask) {
-    Owner->notifyStallEvent(HWStallEvent(HWStallEvent::RegisterFileStall, IR));
-    return false;
-  }
-
-  return true;
-}
-
-bool DispatchUnit::checkRCU(const InstRef &IR) {
-  const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
-  if (RCU->isAvailable(NumMicroOps))
-    return true;
-  Owner->notifyStallEvent(
-      HWStallEvent(HWStallEvent::RetireControlUnitStall, IR));
-  return false;
-}
-
-bool DispatchUnit::checkScheduler(const InstRef &IR) {
-  return SC->canBeDispatched(IR);
-}
-
-void DispatchUnit::updateRAWDependencies(ReadState &RS,
-                                         const MCSubtargetInfo &STI) {
-  SmallVector<WriteState *, 4> DependentWrites;
-
-  collectWrites(DependentWrites, RS.getRegisterID());
-  RS.setDependentWrites(DependentWrites.size());
-  LLVM_DEBUG(dbgs() << "Found " << DependentWrites.size()
-                    << " dependent writes\n");
-  // We know that this read depends on all the writes in DependentWrites.
-  // For each write, check if we have ReadAdvance information, and use it
-  // to figure out in how many cycles this read becomes available.
-  const ReadDescriptor &RD = RS.getDescriptor();
-  if (!RD.HasReadAdvanceEntries) {
-    for (WriteState *WS : DependentWrites)
-      WS->addUser(&RS, /* ReadAdvance */ 0);
-    return;
-  }
-
-  const MCSchedModel &SM = STI.getSchedModel();
-  const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
-  for (WriteState *WS : DependentWrites) {
-    unsigned WriteResID = WS->getWriteResourceID();
-    int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
-    WS->addUser(&RS, ReadAdvance);
-  }
-  // Prepare the set for another round.
-  DependentWrites.clear();
-}
-
-void DispatchUnit::dispatch(InstRef IR, const MCSubtargetInfo &STI) {
-  assert(!CarryOver && "Cannot dispatch another instruction!");
-  Instruction &IS = *IR.getInstruction();
-  const InstrDesc &Desc = IS.getDesc();
-  const unsigned NumMicroOps = Desc.NumMicroOps;
-  if (NumMicroOps > DispatchWidth) {
-    assert(AvailableEntries == DispatchWidth);
-    AvailableEntries = 0;
-    CarryOver = NumMicroOps - DispatchWidth;
-  } else {
-    assert(AvailableEntries >= NumMicroOps);
-    AvailableEntries -= NumMicroOps;
-  }
-
-  // A dependency-breaking instruction doesn't have to wait on the register
-  // input operands, and it is often optimized at register renaming stage.
-  // Update RAW dependencies if this instruction is not a dependency-breaking
-  // instruction. A dependency-breaking instruction is a zero-latency
-  // instruction that doesn't consume hardware resources.
-  // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
-  if (!Desc.isZeroLatency())
-    for (std::unique_ptr<ReadState> &RS : IS.getUses())
-      updateRAWDependencies(*RS, STI);
-
-  // By default, a dependency-breaking zero-latency instruction is expected to
-  // be optimized at register renaming stage. That means, no physical register
-  // is allocated to the instruction.
-  SmallVector<unsigned, 4> RegisterFiles(RAT->getNumRegisterFiles());
-  for (std::unique_ptr<WriteState> &WS : IS.getDefs())
-    RAT->addRegisterWrite(*WS, RegisterFiles, !Desc.isZeroLatency());
-
-  // Reserve slots in the RCU, and notify the instruction that it has been
-  // dispatched to the schedulers for execution.
-  IS.dispatch(RCU->reserveSlot(IR, NumMicroOps));
-
-  // Notify listeners of the "instruction dispatched" event.
-  notifyInstructionDispatched(IR, RegisterFiles);
-
-  // Now move the instruction into the scheduler's queue.
-  // The scheduler is responsible for checking if this is a zero-latency
-  // instruction that doesn't consume pipeline/scheduler resources.
-  SC->scheduleInstruction(IR);
-}
-
-#ifndef NDEBUG
-void DispatchUnit::dump() const {
-  RAT->dump();
-  RCU->dump();
-}
-#endif
-} // namespace mca

Removed: llvm/trunk/tools/llvm-mca/Dispatch.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Dispatch.h?rev=332651&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-mca/Dispatch.h (original)
+++ llvm/trunk/tools/llvm-mca/Dispatch.h (removed)
@@ -1,122 +0,0 @@
-//===----------------------- Dispatch.h -------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements classes that are used to model reorder buffers and
-/// the hardware dispatch logic.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H
-#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H
-
-#include "Instruction.h"
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-namespace mca {
-
-class WriteState;
-class Scheduler;
-class Backend;
-
-// Implements the hardware dispatch logic.
-//
-// This class is responsible for the dispatch stage, in which instructions are
-// dispatched in groups to the Scheduler.  An instruction can be dispatched if
-// functional units are available.
-// To be more specific, an instruction can be dispatched to the Scheduler if:
-//  1) There are enough entries in the reorder buffer (implemented by class
-//     RetireControlUnit) to accomodate all opcodes.
-//  2) There are enough temporaries to rename output register operands.
-//  3) There are enough entries available in the used buffered resource(s).
-//
-// The number of micro opcodes that can be dispatched in one cycle is limited by
-// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
-// processor resources are not available (i.e. at least one of the
-// abovementioned checks fails). Dispatch stall events are counted during the
-// entire execution of the code, and displayed by the performance report when
-// flag '-verbose' is specified.
-//
-// If the number of micro opcodes of an instruction is bigger than
-// DispatchWidth, then it can only be dispatched at the beginning of one cycle.
-// The DispatchUnit will still have to wait for a number of cycles (depending on
-// the DispatchWidth and the number of micro opcodes) before it can serve other
-// instructions.
-class DispatchUnit {
-  unsigned DispatchWidth;
-  unsigned AvailableEntries;
-  unsigned CarryOver;
-  Scheduler *SC;
-
-  std::unique_ptr<RegisterFile> RAT;
-  std::unique_ptr<RetireControlUnit> RCU;
-  Backend *Owner;
-
-  bool checkRAT(const InstRef &IR);
-  bool checkRCU(const InstRef &IR);
-  bool checkScheduler(const InstRef &IR);
-
-  void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
-  void notifyInstructionDispatched(const InstRef &IR,
-                                   llvm::ArrayRef<unsigned> UsedPhysRegs);
-
-public:
-  DispatchUnit(Backend *B, const llvm::MCSchedModel &SM,
-               const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize,
-               unsigned MaxDispatchWidth, Scheduler *Sched)
-      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
-        CarryOver(0U), SC(Sched),
-        RAT(llvm::make_unique<RegisterFile>(SM, MRI, RegisterFileSize)),
-        RCU(llvm::make_unique<RetireControlUnit>(SM, this)), Owner(B) {}
-
-  unsigned getDispatchWidth() const { return DispatchWidth; }
-
-  bool isAvailable(unsigned NumEntries) const {
-    return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
-  }
-
-  bool isRCUEmpty() const { return RCU->isEmpty(); }
-
-  bool canDispatch(const InstRef &IR) {
-    assert(isAvailable(IR.getInstruction()->getDesc().NumMicroOps));
-    return checkRCU(IR) && checkRAT(IR) && checkScheduler(IR);
-  }
-
-  void dispatch(InstRef IR, const llvm::MCSubtargetInfo &STI);
-
-  void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
-                     unsigned RegID) const {
-    return RAT->collectWrites(Vec, RegID);
-  }
-
-  void cycleEvent() {
-    RCU->cycleEvent();
-    AvailableEntries =
-        CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
-    CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
-  }
-
-  void notifyInstructionRetired(const InstRef &IR);
-
-  void notifyDispatchStall(const InstRef &IR, unsigned EventType);
-
-  void onInstructionExecuted(unsigned TokenID) {
-    RCU->onInstructionExecuted(TokenID);
-  }
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-} // namespace mca
-
-#endif

Copied: llvm/trunk/tools/llvm-mca/DispatchStage.cpp (from r332641, llvm/trunk/tools/llvm-mca/Dispatch.cpp)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/DispatchStage.cpp?p2=llvm/trunk/tools/llvm-mca/DispatchStage.cpp&p1=llvm/trunk/tools/llvm-mca/Dispatch.cpp&r1=332641&r2=332652&rev=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Dispatch.cpp (original)
+++ llvm/trunk/tools/llvm-mca/DispatchStage.cpp Thu May 17 12:22:29 2018
@@ -8,11 +8,11 @@
 //===----------------------------------------------------------------------===//
 /// \file
 ///
-/// This file implements methods declared by the DispatchUnit class.
+/// This file implements methods declared by the DispatchStage class.
 ///
 //===----------------------------------------------------------------------===//
 
-#include "Dispatch.h"
+#include "DispatchStage.h"
 #include "Backend.h"
 #include "HWEventListener.h"
 #include "Scheduler.h"
@@ -24,13 +24,13 @@ using namespace llvm;
 
 namespace mca {
 
-void DispatchUnit::notifyInstructionDispatched(const InstRef &IR,
-                                               ArrayRef<unsigned> UsedRegs) {
+void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
+                                                ArrayRef<unsigned> UsedRegs) {
   LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: " << IR << '\n');
   Owner->notifyInstructionEvent(HWInstructionDispatchedEvent(IR, UsedRegs));
 }
 
-void DispatchUnit::notifyInstructionRetired(const InstRef &IR) {
+void DispatchStage::notifyInstructionRetired(const InstRef &IR) {
   LLVM_DEBUG(dbgs() << "[E] Instruction Retired: " << IR << '\n');
   SmallVector<unsigned, 4> FreedRegs(RAT->getNumRegisterFiles());
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
@@ -40,7 +40,7 @@ void DispatchUnit::notifyInstructionReti
   Owner->notifyInstructionEvent(HWInstructionRetiredEvent(IR, FreedRegs));
 }
 
-bool DispatchUnit::checkRAT(const InstRef &IR) {
+bool DispatchStage::checkRAT(const InstRef &IR) {
   SmallVector<unsigned, 4> RegDefs;
   for (const std::unique_ptr<WriteState> &RegDef :
        IR.getInstruction()->getDefs())
@@ -56,7 +56,7 @@ bool DispatchUnit::checkRAT(const InstRe
   return true;
 }
 
-bool DispatchUnit::checkRCU(const InstRef &IR) {
+bool DispatchStage::checkRCU(const InstRef &IR) {
   const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
   if (RCU->isAvailable(NumMicroOps))
     return true;
@@ -65,12 +65,12 @@ bool DispatchUnit::checkRCU(const InstRe
   return false;
 }
 
-bool DispatchUnit::checkScheduler(const InstRef &IR) {
+bool DispatchStage::checkScheduler(const InstRef &IR) {
   return SC->canBeDispatched(IR);
 }
 
-void DispatchUnit::updateRAWDependencies(ReadState &RS,
-                                         const MCSubtargetInfo &STI) {
+void DispatchStage::updateRAWDependencies(ReadState &RS,
+                                          const MCSubtargetInfo &STI) {
   SmallVector<WriteState *, 4> DependentWrites;
 
   collectWrites(DependentWrites, RS.getRegisterID());
@@ -98,7 +98,7 @@ void DispatchUnit::updateRAWDependencies
   DependentWrites.clear();
 }
 
-void DispatchUnit::dispatch(InstRef IR, const MCSubtargetInfo &STI) {
+void DispatchStage::dispatch(InstRef IR) {
   assert(!CarryOver && "Cannot dispatch another instruction!");
   Instruction &IS = *IR.getInstruction();
   const InstrDesc &Desc = IS.getDesc();
@@ -142,8 +142,16 @@ void DispatchUnit::dispatch(InstRef IR,
   SC->scheduleInstruction(IR);
 }
 
+bool DispatchStage::execute(InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  if (!isAvailable(Desc.NumMicroOps) || !canDispatch(IR))
+    return false;
+  dispatch(IR);
+  return true;
+}
+
 #ifndef NDEBUG
-void DispatchUnit::dump() const {
+void DispatchStage::dump() const {
   RAT->dump();
   RCU->dump();
 }

Copied: llvm/trunk/tools/llvm-mca/DispatchStage.h (from r332641, llvm/trunk/tools/llvm-mca/Dispatch.h)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/DispatchStage.h?p2=llvm/trunk/tools/llvm-mca/DispatchStage.h&p1=llvm/trunk/tools/llvm-mca/Dispatch.h&r1=332641&r2=332652&rev=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Dispatch.h (original)
+++ llvm/trunk/tools/llvm-mca/DispatchStage.h Thu May 17 12:22:29 2018
@@ -19,6 +19,7 @@
 #include "Instruction.h"
 #include "RegisterFile.h"
 #include "RetireControlUnit.h"
+#include "Stage.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 
@@ -35,69 +36,68 @@ class Backend;
 // functional units are available.
 // To be more specific, an instruction can be dispatched to the Scheduler if:
 //  1) There are enough entries in the reorder buffer (implemented by class
-//     RetireControlUnit) to accomodate all opcodes.
+//     RetireControlUnit) to accommodate all opcodes.
 //  2) There are enough temporaries to rename output register operands.
 //  3) There are enough entries available in the used buffered resource(s).
 //
 // The number of micro opcodes that can be dispatched in one cycle is limited by
 // the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
 // processor resources are not available (i.e. at least one of the
-// abovementioned checks fails). Dispatch stall events are counted during the
+// aforementioned checks fails). Dispatch stall events are counted during the
 // entire execution of the code, and displayed by the performance report when
 // flag '-verbose' is specified.
 //
 // If the number of micro opcodes of an instruction is bigger than
 // DispatchWidth, then it can only be dispatched at the beginning of one cycle.
-// The DispatchUnit will still have to wait for a number of cycles (depending on
-// the DispatchWidth and the number of micro opcodes) before it can serve other
-// instructions.
-class DispatchUnit {
+// The DispatchStage will still have to wait for a number of cycles (depending
+// on the DispatchWidth and the number of micro opcodes) before it can serve
+// other instructions.
+class DispatchStage : public Stage {
   unsigned DispatchWidth;
   unsigned AvailableEntries;
   unsigned CarryOver;
   Scheduler *SC;
-
   std::unique_ptr<RegisterFile> RAT;
   std::unique_ptr<RetireControlUnit> RCU;
   Backend *Owner;
+  const llvm::MCSubtargetInfo &STI;
 
   bool checkRAT(const InstRef &IR);
   bool checkRCU(const InstRef &IR);
   bool checkScheduler(const InstRef &IR);
-
+  void dispatch(InstRef IR);
+  bool isRCUEmpty() const { return RCU->isEmpty(); }
   void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
+
   void notifyInstructionDispatched(const InstRef &IR,
                                    llvm::ArrayRef<unsigned> UsedPhysRegs);
 
-public:
-  DispatchUnit(Backend *B, const llvm::MCSchedModel &SM,
-               const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize,
-               unsigned MaxDispatchWidth, Scheduler *Sched)
-      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
-        CarryOver(0U), SC(Sched),
-        RAT(llvm::make_unique<RegisterFile>(SM, MRI, RegisterFileSize)),
-        RCU(llvm::make_unique<RetireControlUnit>(SM, this)), Owner(B) {}
-
-  unsigned getDispatchWidth() const { return DispatchWidth; }
-
   bool isAvailable(unsigned NumEntries) const {
     return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
   }
 
-  bool isRCUEmpty() const { return RCU->isEmpty(); }
-
   bool canDispatch(const InstRef &IR) {
     assert(isAvailable(IR.getInstruction()->getDesc().NumMicroOps));
     return checkRCU(IR) && checkRAT(IR) && checkScheduler(IR);
   }
 
-  void dispatch(InstRef IR, const llvm::MCSubtargetInfo &STI);
-
   void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
                      unsigned RegID) const {
     return RAT->collectWrites(Vec, RegID);
   }
 
+public:
+  DispatchStage(Backend *B, const llvm::MCSubtargetInfo &Subtarget,
+                const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize,
+                unsigned MaxDispatchWidth, Scheduler *Sched)
+      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+        CarryOver(0U), SC(Sched),
+        RAT(llvm::make_unique<RegisterFile>(Subtarget.getSchedModel(), MRI,
+                                            RegisterFileSize)),
+        RCU(llvm::make_unique<RetireControlUnit>(Subtarget.getSchedModel(),
+                                                 this)),
+        Owner(B), STI(Subtarget) {}
+
   void cycleEvent() {
     RCU->cycleEvent();
     AvailableEntries =
@@ -105,8 +105,9 @@ public:
     CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
   }
 
+  virtual bool isReady() const override final { return isRCUEmpty(); }
+  virtual bool execute(InstRef &IR) override final;
   void notifyInstructionRetired(const InstRef &IR);
-
   void notifyDispatchStall(const InstRef &IR, unsigned EventType);
 
   void onInstructionExecuted(unsigned TokenID) {

Modified: llvm/trunk/tools/llvm-mca/HWEventListener.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/HWEventListener.h?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/HWEventListener.h (original)
+++ llvm/trunk/tools/llvm-mca/HWEventListener.h Thu May 17 12:22:29 2018
@@ -32,7 +32,7 @@ public:
   // Subtargets are free to define additional event types, that are goin to be
   // handled by generic components as opaque values, but can still be
   // emitted by subtarget-specific pipeline components (e.g. Scheduler,
-  // DispatchUnit, ...) and interpreted by subtarget-specific EventListener
+  // DispatchStage, ...) and interpreted by subtarget-specific EventListener
   // implementations.
   enum GenericEventType {
     Invalid = 0,
@@ -94,7 +94,7 @@ class HWStallEvent {
 public:
   enum GenericEventType {
     Invalid = 0,
-    // Generic stall events generated by the DispatchUnit.
+    // Generic stall events generated by the DispatchStage.
     RegisterFileStall,
     RetireControlUnitStall,
     // Generic stall events generated by the Scheduler.

Modified: llvm/trunk/tools/llvm-mca/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/README.txt?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/README.txt (original)
+++ llvm/trunk/tools/llvm-mca/README.txt Thu May 17 12:22:29 2018
@@ -2,8 +2,8 @@ llvm-mca - LLVM Machine Code Analyzer
 -------------------------------------
 
 llvm-mca is a performance analysis tool that uses information which is already
-available in LLVM (e.g. scheduling models) to statically measure the performance
-of machine code in a specific cpu.
+available in LLVM (e.g., scheduling models) to statically measure the
+performance of machine code in a specific cpu.
 
 Performance is measured in terms of throughput as well as processor resource
 consumption.  The tool currently works for processors with an out-of-order
@@ -25,9 +25,8 @@ bottlenecks.
 Scheduling models are mostly used to compute instruction latencies, to obtain
 read-advance information, and understand how processor resources are used by
 instructions.  By design, the quality of the performance analysis conducted by
-the tool is inevitably affected by the quality of the target scheduling models. 
-
-However, scheduling models intentionally do not describe all processors details,
+the tool is inevitably affected by the quality of the target scheduling models.
+However, scheduling models intentionally do not describe all processor details,
 since the goal is just to enable the scheduling of machine instructions during
 compilation. That means, there are processor details which are not important for
 the purpose of scheduling instructions (and therefore not described by the
@@ -90,8 +89,8 @@ Resources:
 
 
 Resource pressure per iteration:
-[0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    
- -      -      -      -     2.00   1.00    -      -      -      -     
+[0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+ -      -      -      -     2.00   1.00    -      -      -      -
 
 Resource pressure by instruction:
 [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    	Instructions:
@@ -120,8 +119,8 @@ for a total of 900 instructions dynamica
 
 The report is structured in three main sections.  A first section collects a few
 performance numbers; the goal of this section is to give a very quick overview
-of the performance throughput. In this example, the two important perforamce
-indicators are a) the predicted total number of cycles, and b) the IPC. 
+of the performance throughput. In this example, the two important performance
+indicators are a) the predicted total number of cycles, and b) the IPC.
 IPC is probably the most important throughput indicator. A big delta between the
 Dispatch Width and the computed IPC is an indicator of potential performance
 issues.
@@ -139,8 +138,8 @@ pipeline JFPU1, while horizontal FP adds
 
 The third (and last) section of the report shows the latency and reciprocal
 throughput of every instruction in the sequence. That section also reports extra
-information related to the number of micro opcodes, and opcode properties (i.e.
-'MayLoad', 'MayStore' and 'UnmodeledSideEffects').
+information related to the number of micro opcodes, and opcode properties (i.e.,
+'MayLoad', 'MayStore', and 'UnmodeledSideEffects').
 
 The resource pressure view helps with identifying bottlenecks caused by high
 usage of specific hardware resources.  Situations with resource pressure mainly
@@ -158,7 +157,7 @@ timeline view for the dot-product exampl
 ///////////////
 Timeline view:
      	          012345
-Index	0123456789      
+Index	0123456789
 
 [0,0]	DeeER.    .    .	vmulps	%xmm0, %xmm1, %xmm2
 [0,1]	D==eeeER  .    .	vhaddps	%xmm2, %xmm2, %xmm3
@@ -197,11 +196,11 @@ sub-optimal usage of hardware resources.
 
 An instruction in the timeline view is identified by a pair of indices, where
 the 'first' index identifies an iteration, and the 'second' index is the actual
-instruction index (i.e. where it appears in the code sequence).
+instruction index (i.e., where it appears in the code sequence).
 
-Excluding the first and last column, the remaining columns are in cycles.  Cycles
-are numbered sequentially starting from 0.  The following characters are used to
-describe the state of an instruction:
+Excluding the first and last column, the remaining columns are in cycles.
+Cycles are numbered sequentially starting from 0.  The following characters are
+used to describe the state of an instruction:
 
  D : Instruction dispatched.
  e : Instruction executing.
@@ -216,7 +215,7 @@ Based on the timeline view from the exam
   - Instruction [1, 0] reached the write back stage at cycle 4.
   - Instruction [1, 0] was retired at cycle 10.
 
-Instruction [1, 0] (i.e. the vmulps from iteration #1) doesn't have to wait in
+Instruction [1, 0] (i.e., the vmulps from iteration #1) doesn't have to wait in
 the scheduler's queue for the operands to become available. By the time the
 vmulps is dispatched, operands are already available, and pipeline JFPU1 is
 ready to serve another instruction.  So the instruction can be immediately
@@ -225,7 +224,7 @@ instruction only spent 1cy in the schedu
 
 There is a gap of 5 cycles between the write-back stage and the retire event.
 That is because instructions must retire in program order, so [1,0] has to wait
-for [0, 2] to be retired first (i.e it has to wait unti cycle 10).
+for [0, 2] to be retired first (i.e., it has to wait until cycle 10).
 
 In the dot-product example, all instructions are in a RAW (Read After Write)
 dependency chain.  Register %xmm2 written by the vmulps is immediately used by
@@ -250,7 +249,7 @@ scheduler's queue.  So the difference be
 indicator of how big of an impact data dependencies had on the execution of
 instructions.  When performance is mostly limited by the lack of hardware
 resources, the delta between the two counters is small.  However, the number of
-cycles spent in the queue tends to be bigger (i.e. more than 1-3cy) especially
+cycles spent in the queue tends to be bigger (i.e., more than 1-3cy) especially
 when compared with other low latency instructions.
 
 Extra statistics to further diagnose performance issues.
@@ -317,13 +316,13 @@ instructions 51.5% of the time.  The dis
 instruction 44.6% of the cycles, which corresponds to 272 cycles.
 
 If we look at section "Dynamic Dispatch Stall Cycles", we can see how counter
-SCHEDQ reports 272 cycles.  Counter SCHEDQ is incremented every time the dispatch
-logic is unable to dispatch a full group of two instructions because the
-scheduler's queue is full.
+SCHEDQ reports 272 cycles.  Counter SCHEDQ is incremented every time the
+dispatch logic is unable to dispatch a full group of two instructions because
+the scheduler's queue is full.
 
 Section "Scheduler's queue usage" shows how the maximum number of buffer entries
-(i.e. scheduler's queue entries) used at runtime for resource JFPU01 reached its
-maximum. Note that AMD Jaguar implements three schedulers:
+(i.e., scheduler's queue entries) used at runtime for resource JFPU01 reached
+its maximum. Note that AMD Jaguar implements three schedulers:
   * JALU01 - A scheduler for ALU instructions
   * JLSAGU - A scheduler for address generation
   * JFPU01 - A scheduler floating point operations.
@@ -346,7 +345,7 @@ LLVM-MCA instruction flow
 -------------------------
 
 This section describes the instruction flow through the out-of-order backend, as
-well as the functional units involved in the process. 
+well as the functional units involved in the process.
 
 An instruction goes through a default sequence of stages:
     - Dispatch (Instruction is dispatched to the schedulers).
@@ -368,11 +367,11 @@ Instruction Dispatch
 
 During the Dispatch stage, instructions are picked in program order from a queue
 of already decoded instructions, and dispatched in groups to the hardware
-schedulers.  The dispatch logic is implemented by class DispatchUnit in file
-Dispatch.h.
+schedulers.  The dispatch logic is implemented by class DispatchStage in file
+DispatchStage.h.
 
 The size of a dispatch group depends on the availability of hardware resources,
-and it cannot exceed the value of field 'DispatchWidth' in class DispatchUnit.
+and it cannot exceed the value of field 'DispatchWidth' in class DispatchStage.
 Note that field DispatchWidth defaults to the value of field 'IssueWidth' from
 the scheduling model.
 
@@ -385,34 +384,35 @@ An instruction can be dispatched if:
  - There are enough temporary registers to do register renaming
  - Schedulers are not full.
 
-Since r329067, scheduling models can now optionally specify which register files
-are available on the processor. Class DispatchUnit(see Dispatch.h) would use
-that information to initialize register file descriptors.
+Since r329067, scheduling models can now optionally specify which register
+files are available on the processor. Class DispatchStage(see DispatchStage.h)
+would use that information to initialize register file descriptors.
 
 By default, if the model doesn't describe register files, the tool
 (optimistically) assumes a single register file with an unbounded number of
-temporary registers.  Users can limit the number of temporary registers that are
-globally available for register renaming using flag `-register-file-size=<N>`,
-where N is the number of temporaries.  A value of zero for N means 'unbounded'.
-Knowing how many temporaries are available for register renaming, the tool can
-predict dispatch stalls caused by the lack of temporaries.
+temporary registers.  Users can limit the number of temporary registers that
+are globally available for register renaming using flag
+`-register-file-size=<N>`, where N is the number of temporaries.  A value of
+zero for N means 'unbounded'.  Knowing how many temporaries are available for
+register renaming, the tool can predict dispatch stalls caused by the lack of
+temporaries.
 
 The number of reorder buffer entries consumed by an instruction depends on the
 number of micro-opcodes it specifies in the target scheduling model (see field
-'NumMicroOpcodes' of tablegen class ProcWriteResources and its derived classes;
+'NumMicroOpcodes' of TableGen class ProcWriteResources and its derived classes;
 TargetSchedule.td).
 
-The reorder buffer is implemented by class RetireControlUnit (see Dispatch.h).
-Its goal is to track the progress of instructions that are "in-flight", and
-retire instructions in program order.  The number of entries in the reorder
-buffer defaults to the value of field 'MicroOpBufferSize' from the target
-scheduling model.
+The reorder buffer is implemented by class RetireControlUnit (see
+DispatchStage.h).  Its goal is to track the progress of instructions that are
+"in-flight", and retire instructions in program order.  The number of entries
+in the reorder buffer defaults to the value of field 'MicroOpBufferSize' from
+the target scheduling model.
 
 Instructions that are dispatched to the schedulers consume scheduler buffer
 entries.  The tool queries the scheduling model to figure out the set of
 buffered resources consumed by an instruction.  Buffered resources are treated
 like "scheduler" resources, and the field 'BufferSize' (from the processor
-resource tablegen definition) defines the size of the scheduler's queue.
+resource TableGen definition) defines the size of the scheduler's queue.
 
 Zero latency instructions (for example NOP instructions) don't consume scheduler
 resources.  However, those instructions still reserve a number of slots in the
@@ -485,7 +485,7 @@ Load/Store Unit and Memory Consistency M
 The tool attempts to emulate out-of-order execution of memory operations.  Class
 LSUnit (see file LSUnit.h) emulates a load/store unit implementing queues for
 speculative execution of loads and stores.
- 
+
 Each load (or store) consumes an entry in the load (or store) queue.  The number
 of slots in the load/store queues is unknown by the tool, since there is no
 mention of it in the scheduling model.  In practice, users can specify flag
@@ -502,13 +502,14 @@ rules are:
 4) A younger load is allowed to pass an older store provided that the load does
    not alias with the store.
 
-By default, this class conservatively (i.e. pessimistically) assumes that loads
-always may-alias store operations.  Essentially, this LSUnit doesn't perform any
-sort of alias analysis to rule out cases where loads and stores don't overlap
-with each other.  The downside of this approach however is that younger loads are
-never allowed to pass older stores.  To make it possible for a younger load to
-pass an older store, users can use the command line flag -noalias.  Under
-'noalias', a younger load is always allowed to pass an older store.
+By default, this class conservatively (i.e., pessimistically) assumes that loads
+always may-alias store operations.  Essentially, this LSUnit doesn't perform
+any sort of alias analysis to rule out cases where loads and stores don't
+overlap with each other.  The downside of this approach however is that younger
+loads are never allowed to pass older stores.  To make it possible for a
+younger load to pass an older store, users can use the command line flag
+-noalias.  Under 'noalias', a younger load is always allowed to pass an older
+store.
 
 Note that, in the case of write-combining memory, rule 2. could be relaxed a bit
 to allow reordering of non-aliasing store operations.  That being said, at the
@@ -573,7 +574,7 @@ the processor model used by the tool.
 Most recent Intel and AMD processors implement dedicated LoopBuffer/OpCache in
 the hardware frontend to speedup the throughput in the presence of tight loops.
 The presence of these buffers complicates the decoding logic, and requires
-knowledge on the branch predictor too.  Class 'SchedMachineModel' in tablegen
+knowledge on the branch predictor too.  Class 'SchedMachineModel' in TableGen
 provides a field named 'LoopMicroOpBufferSize' which is used to describe loop
 buffers.  However, the purpose of that field is to enable loop unrolling of
 tight loops; essentially, it affects the cost model used by pass loop-unroll.
@@ -609,9 +610,9 @@ the compiler to predict the latency of i
 accordingly. For such targets, there is no dynamic scheduling done by the
 hardware.
 
-Existing classes (DispatchUnit, Scheduler, etc.) could be extended/adapted to
+Existing classes (DispatchStage, Scheduler, etc.) could be extended/adapted to
 support processors with a single dispatch/issue stage. The execution flow would
-require some changes in the way how existing components (i.e.  DispatchUnit,
+require some changes in the way how existing components (i.e.,  DispatchStage,
 Scheduler, etc.) interact. This can be a future development.
 
 The following sections describes other known limitations.  The goal is not to
@@ -690,8 +691,8 @@ To get accurate performance analysis, th
 perform a partial register update, and which instructions fully update the
 destination's super-register.
 
-One way to expose this information is (again) via tablegen.  For example, we
-could add a flag in the tablegen instruction class to tag instructions that
+One way to expose this information is (again) via TableGen.  For example, we
+could add a flag in the TableGen instruction class to tag instructions that
 perform partial register updates. Something like this: 'bit
 hasPartialRegisterUpdate = 1'. However, this would force a `let
 hasPartialRegisterUpdate = 0` on several instruction definitions.
@@ -707,7 +708,7 @@ idea. But the plan is to have this fixed
 The tool doesn't know about macro-op fusion. On modern x86 processors, a
 'cmp/test' followed by a 'jmp' is fused into a single macro operation.  The
 advantage is that the fused pair only consumes a single slot in the dispatch
-group. 
+group.
 
 As a future development, the tool should be extended to address macro-fusion.
 Ideally, we could have LLVM generate a table enumerating all the opcode pairs
@@ -777,9 +778,9 @@ the associated MCInstrDesc object.
 However class MCInstrDesc describes properties and operands of MachineInstr
 objects. Essentially, MCInstrDesc is not meant to be used to describe MCInst
 objects.  To be more specific, MCInstrDesc objects are automatically generated
-via tablegen from the instruction set description in the target .td files.  For
+via TableGen from the instruction set description in the target .td files.  For
 example, field `MCInstrDesc::NumDefs' is always equal to the cardinality of the
-`(outs)` set from the tablegen instruction definition.
+`(outs)` set from the TableGen instruction definition.
 
 By construction, register definitions always appear at the beginning of the
 MachineOperands list in MachineInstr. Basically, the (outs) are the first
@@ -792,8 +793,8 @@ objects through a lowering step. By defa
 over the machine operands of a MachineInstr, and converts/expands them into
 equivalent MCOperand objects.
 
-The default lowering strategy has the advantage of preserving all of the
-above mentioned assumptions on the machine operand sequence. That means, register
+The default lowering strategy has the advantage of preserving all of the above
+mentioned assumptions on the machine operand sequence. That means, register
 definitions would still be at the beginning of the MCOperand sequence, and
 register uses would come after.
 
@@ -803,7 +804,7 @@ assumptions on the machine operand seque
 Luckily, this is not the most common form of lowering done by the targets, and
 the vast majority of the MachineInstr are lowered based on the default strategy
 which preserves the original machine operand sequence.  This is especially true
-for x86, where the custom lowering logic always preserves the original (i.e.
+for x86, where the custom lowering logic always preserves the original (i.e.,
 from the MachineInstr) operand sequence.
 
 This tool currently works under the strong (and potentially incorrect)
@@ -821,7 +822,7 @@ index for every register MCOperand (or -
 original MachineInstr). The mapping could look like this <0,1,3,2>.  Here,
 MCOperand #2 was obtained from the lowering of MachineOperand #3. etc.
 
-This information could be automatically generated via tablegen for all the
+This information could be automatically generated via TableGen for all the
 instructions whose custom lowering step breaks assumptions made by the tool on
 the register operand sequence (In general, these instructions should be the
 minority of a target's instruction set). Unfortunately, we don't have that

Modified: llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp (original)
+++ llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp Thu May 17 12:22:29 2018
@@ -12,8 +12,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "Dispatch.h"
 #include "RetireControlUnit.h"
+#include "DispatchStage.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -23,9 +23,9 @@ using namespace llvm;
 namespace mca {
 
 RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM,
-                                     DispatchUnit *DU)
+                                     DispatchStage *DS)
     : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
-      AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0), Owner(DU) {
+      AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0), Owner(DS) {
   // Check if the scheduling model provides extra information about the machine
   // processor. If so, then use that information to set the reorder buffer size
   // and the maximum number of instructions retired per cycle.

Modified: llvm/trunk/tools/llvm-mca/RetireControlUnit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/RetireControlUnit.h?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/RetireControlUnit.h (original)
+++ llvm/trunk/tools/llvm-mca/RetireControlUnit.h Thu May 17 12:22:29 2018
@@ -23,14 +23,14 @@
 
 namespace mca {
 
-class DispatchUnit;
+class DispatchStage;
 
 /// This class tracks which instructions are in-flight (i.e., dispatched but not
 /// retired) in the OoO backend.
 //
 /// This class checks on every cycle if/which instructions can be retired.
 /// Instructions are retired in program order.
-/// In the event of instruction retired, the DispatchUnit object that owns
+/// In the event of instruction retired, the DispatchStage object that owns
 /// this RetireControlUnit (RCU) gets notified.
 /// On instruction retired, register updates are all architecturally
 /// committed, and any temporary registers originally allocated for the
@@ -62,10 +62,10 @@ private:
   unsigned AvailableSlots;
   unsigned MaxRetirePerCycle; // 0 means no limit.
   std::vector<RUToken> Queue;
-  DispatchUnit *Owner;
+  DispatchStage *Owner;
 
 public:
-  RetireControlUnit(const llvm::MCSchedModel &SM, DispatchUnit *DU);
+  RetireControlUnit(const llvm::MCSchedModel &SM, DispatchStage *DU);
 
   bool isFull() const { return !AvailableSlots; }
   bool isEmpty() const { return AvailableSlots == Queue.size(); }

Modified: llvm/trunk/tools/llvm-mca/Scheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Scheduler.cpp?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Scheduler.cpp (original)
+++ llvm/trunk/tools/llvm-mca/Scheduler.cpp Thu May 17 12:22:29 2018
@@ -468,7 +468,7 @@ void Scheduler::notifyInstructionExecute
   LLVM_DEBUG(dbgs() << "[E] Instruction Executed: " << IR << '\n');
   Owner->notifyInstructionEvent(
       HWInstructionEvent(HWInstructionEvent::Executed, IR));
-  DU->onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+  DS->onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
 }
 
 void Scheduler::notifyInstructionReady(const InstRef &IR) {

Modified: llvm/trunk/tools/llvm-mca/Scheduler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Scheduler.h?rev=332652&r1=332651&r2=332652&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Scheduler.h (original)
+++ llvm/trunk/tools/llvm-mca/Scheduler.h Thu May 17 12:22:29 2018
@@ -24,7 +24,7 @@
 namespace mca {
 
 class Backend;
-class DispatchUnit;
+class DispatchStage;
 
 /// Used to notify the internal state of a processor resource.
 ///
@@ -411,7 +411,7 @@ class Scheduler {
   Backend *const Owner;
 
   // The dispatch unit gets notified when instructions are executed.
-  DispatchUnit *DU;
+  DispatchStage *DS;
 
   using QueueEntryTy = std::pair<unsigned, Instruction *>;
   std::map<unsigned, Instruction *> WaitQueue;
@@ -454,13 +454,13 @@ public:
                                       AssumeNoAlias)),
         Owner(B) {}
 
-  void setDispatchUnit(DispatchUnit *DispUnit) { DU = DispUnit; }
+  void setDispatchStage(DispatchStage *DispStage) { DS = DispStage; }
 
   /// Check if the instruction in 'IR' can be dispatched.
   ///
-  /// The DispatchUnit is responsible for querying the Scheduler before
+  /// The DispatchStage is responsible for querying the Scheduler before
   /// dispatching new instructions. Queries are performed through method
-  /// `Scheduler::CanBeDispatched`. If scheduling resources are available,
+  /// `Scheduler::canBeDispatched`. If scheduling resources are available,
   /// and the instruction can be dispatched, then this method returns true.
   /// Otherwise, a generic HWStallEvent is notified to the listeners.
   bool canBeDispatched(const InstRef &IR) const;