[llvm] r361950 - [MCA] Refactor class LSUnit. NFCI

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Wed May 29 04:38:27 PDT 2019


Author: adibiagio
Date: Wed May 29 04:38:27 2019
New Revision: 361950

URL: http://llvm.org/viewvc/llvm-project?rev=361950&view=rev
Log:
[MCA] Refactor class LSUnit. NFCI

This should be the last bit of refactoring in preparation for a patch that would
finally fix PR37494.

This patch introduces the concept of memory dependency groups (class
MemoryGroup) and "Load/Store Unit token" (LSUToken) to track the status of a
memory operation.

A MemoryGroup is a node of a memory dependency graph. It is used internally to
classify memory operations based on the memory operations they depend on.  Let I
and J be two memory operations, we say that I and J equivalent (for the purpose
of mapping instructions to memory dependency groups) if the set of memory
operations they depend depend on is identical.

MemoryGroups are identified by so-called LSUToken (a unique group identifier
assigned by the LSUnit to every group). When an instruction I is dispatched to
the LSUnit, the LSUnit maps I to a group, and then returns a LSUToken.
LSUTokens are used by class Scheduler to track memory dependencies.

This patch simplifies the LSUnit interface and moves most of the implementation
details to its base class (LSUnitBase). There is no user visible change to the
output.

Modified:
    llvm/trunk/include/llvm/MCA/HardwareUnits/LSUnit.h
    llvm/trunk/include/llvm/MCA/HardwareUnits/Scheduler.h
    llvm/trunk/include/llvm/MCA/Instruction.h
    llvm/trunk/lib/MCA/HardwareUnits/LSUnit.cpp
    llvm/trunk/lib/MCA/HardwareUnits/Scheduler.cpp

Modified: llvm/trunk/include/llvm/MCA/HardwareUnits/LSUnit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MCA/HardwareUnits/LSUnit.h?rev=361950&r1=361949&r2=361950&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MCA/HardwareUnits/LSUnit.h (original)
+++ llvm/trunk/include/llvm/MCA/HardwareUnits/LSUnit.h Wed May 29 04:38:27 2019
@@ -15,7 +15,8 @@
 #ifndef LLVM_MCA_LSUNIT_H
 #define LLVM_MCA_LSUNIT_H
 
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
 #include "llvm/MCA/Instruction.h"
@@ -25,6 +26,143 @@ namespace mca {
 
 class Scheduler;
 
+/// A node of a memory dependency graph. A MemoryGroup describes a set of
+/// instructions with same memory dependencies.
+///
+/// By construction, instructions of a MemoryGroup don't depend on each other.
+/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
+/// A Memory group identifier is then stored as a "token" in field
+/// Instruction::LSUTokenID of each dispatched instructions. That token is used
+/// internally by the LSUnit to track memory dependencies.
+class MemoryGroup {
+  unsigned NumPredecessors;
+  unsigned NumExecutingPredecessors;
+  unsigned NumExecutedPredecessors;
+
+  unsigned NumInstructions;
+  unsigned NumExecuting;
+  unsigned NumExecuted;
+  SmallVector<MemoryGroup *, 4> Succ;
+
+  CriticalDependency CriticalPredecessor;
+  InstRef CriticalMemoryInstruction;
+
+  MemoryGroup(const MemoryGroup &) = delete;
+  MemoryGroup &operator=(const MemoryGroup &) = delete;
+
+public:
+  MemoryGroup()
+      : NumPredecessors(0), NumExecutingPredecessors(0),
+        NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
+        NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {}
+  MemoryGroup(MemoryGroup &&) = default;
+
+  ArrayRef<MemoryGroup *> getSuccessors() const { return Succ; }
+  unsigned getNumSuccessors() const { return Succ.size(); }
+  unsigned getNumPredecessors() const { return NumPredecessors; }
+  unsigned getNumExecutingPredecessors() const {
+    return NumExecutingPredecessors;
+  }
+  unsigned getNumExecutedPredecessors() const {
+    return NumExecutedPredecessors;
+  }
+  unsigned getNumInstructions() const { return NumInstructions; }
+  unsigned getNumExecuting() const { return NumExecuting; }
+  unsigned getNumExecuted() const { return NumExecuted; }
+
+  const InstRef &getCriticalMemoryInstruction() const { 
+    return CriticalMemoryInstruction;
+  }
+  const CriticalDependency &getCriticalPredecessor() const {
+    return CriticalPredecessor;
+  }
+
+  void addSuccessor(MemoryGroup *Group) {
+    Group->NumPredecessors++;
+    assert(!isExecuted() && "Should have been removed!");
+    if (isExecuting())
+      Group->onGroupIssued(CriticalMemoryInstruction);
+    Succ.emplace_back(Group);
+  }
+
+  bool isWaiting() const {
+    return NumPredecessors >
+           (NumExecutingPredecessors + NumExecutedPredecessors);
+  }
+  bool isPending() const {
+    return NumExecutingPredecessors &&
+           ((NumExecutedPredecessors + NumExecutingPredecessors) ==
+            NumPredecessors);
+  }
+  bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
+  bool isExecuting() const {
+    return NumExecuting == NumInstructions - NumExecuted;
+  }
+  bool isExecuted() const { return NumInstructions == NumExecuted; }
+
+  void onGroupIssued(const InstRef &IR) {
+    assert(!isReady() && "Unexpected group-start event!");
+    NumExecutingPredecessors++;
+
+    unsigned Cycles = IR.getInstruction()->getCyclesLeft();
+    if (CriticalPredecessor.Cycles < Cycles) {
+      CriticalPredecessor.IID = IR.getSourceIndex();
+      CriticalPredecessor.Cycles = Cycles;
+    }
+  }
+
+  void onGroupExecuted() {
+    assert(!isReady() && "Inconsistent state found!");
+    NumExecutingPredecessors--;
+    NumExecutedPredecessors++;
+  }
+
+  void onInstructionIssued(const InstRef &IR) {
+    assert(!isExecuting() && "Invalid internal state!");
+    ++NumExecuting;
+
+    // update the CriticalMemDep.
+    const Instruction &IS = *IR.getInstruction();
+    if ((bool)CriticalMemoryInstruction) {
+      const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction();
+      if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
+        CriticalMemoryInstruction = IR;
+    } else {
+      CriticalMemoryInstruction = IR;
+    }
+
+    if (!isExecuting())
+      return;
+
+    // Notify successors that this group started execution.
+    for (MemoryGroup *MG : Succ)
+      MG->onGroupIssued(CriticalMemoryInstruction);
+  }
+
+  void onInstructionExecuted() {
+    assert(isReady() && !isExecuted() && "Invalid internal state!");
+    --NumExecuting;
+    ++NumExecuted;
+
+    if (!isExecuted())
+      return;
+
+    // Notify successors that this group has finished execution.
+    for (MemoryGroup *MG : Succ)
+      MG->onGroupExecuted();
+  }
+
+  void addInstruction() {
+    assert(!getNumSuccessors() && "Cannot add instructions to this group!");
+    ++NumInstructions;
+  }
+
+  void cycleEvent() {
+    if (CriticalPredecessor.Cycles)
+      CriticalPredecessor.Cycles--;
+  }
+};
+
 /// Abstract base interface for LS (load/store) units in llvm-mca.
 class LSUnitBase : public HardwareUnit {
   /// Load queue size.
@@ -43,6 +181,9 @@ class LSUnitBase : public HardwareUnit {
   /// llvm/Target/TargetSchedule.td).
   unsigned SQSize;
 
+  unsigned UsedLQEntries;
+  unsigned UsedSQEntries;
+
   /// True if loads don't alias with stores.
   ///
   /// By default, the LS unit assumes that loads and stores don't alias with
@@ -50,6 +191,10 @@ class LSUnitBase : public HardwareUnit {
   /// alias with stores.
   const bool NoAlias;
 
+  /// Used to map group identifiers to MemoryGroups.
+  DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups;
+  unsigned NextGroupID;
+
 public:
   LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
              unsigned StoreQueueSize, bool AssumeNoAlias);
@@ -62,6 +207,11 @@ public:
   /// Returns the total number of entries in the store queue.
   unsigned getStoreQueueSize() const { return SQSize; }
 
+  unsigned getUsedLQEntries() const { return UsedLQEntries; }
+  unsigned getUsedSQEntries() const { return UsedSQEntries; }
+  unsigned assignLQSlot() { return UsedLQEntries++; }
+  unsigned assignSQSlot() { return UsedSQEntries++; }
+
   bool assumeNoAlias() const { return NoAlias; }
 
   enum Status {
@@ -81,25 +231,71 @@ public:
   ///
   /// This method assumes that a previous call to `isAvailable(IR)` succeeded
   /// with a LSUnitBase::Status value of LSU_AVAILABLE.
-  virtual void dispatch(const InstRef &IR) = 0;
+  /// Returns the GroupID associated with this instruction. That value will be
+  /// used to set the LSUTokenID field in class Instruction.
+  virtual unsigned dispatch(const InstRef &IR) = 0;
+
+  bool isSQEmpty() const { return !UsedSQEntries; }
+  bool isLQEmpty() const { return !UsedLQEntries; }
+  bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
+  bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
+
+  bool isValidGroupID(unsigned Index) const {
+    return Index && (Groups.find(Index) != Groups.end());
+  }
 
   /// Check if a peviously dispatched instruction IR is now ready for execution.
-  ///
-  /// Instruction IR is assumed to be a memory operation. If IR is still waiting
-  /// on another memory instruction M, then M is returned to the caller. If IR
-  /// depends on more than one memory operations, then this method returns one
-  /// of them.
-  ///
-  /// Derived classes can implement memory consistency rules for simulated
-  /// processor within this member function.
-  virtual const InstRef &isReady(const InstRef &IR) const = 0;
+  bool isReady(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    assert(isValidGroupID(GroupID) &&
+           "Invalid group associated with this instruction!");
+    const MemoryGroup &Group = *Groups.find(GroupID)->second;
+    return Group.isReady();
+  }
+
+  /// Check if a previously dispatched instruction IR only depends on
+  /// instructions that are currently executing.
+  bool isPending(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    assert(isValidGroupID(GroupID) &&
+           "Invalid group associated with this instruction!");
+    const MemoryGroup &Group = *Groups.find(GroupID)->second;
+    return Group.isPending();
+  }
+
+  const MemoryGroup &getGroup(unsigned Index) const {
+    assert(isValidGroupID(Index) && "Group doesn't exist!");
+    return *Groups.find(Index)->second;
+  }
+
+  MemoryGroup &getGroup(unsigned Index) {
+    assert(isValidGroupID(Index) && "Group doesn't exist!");
+    return *Groups.find(Index)->second;
+  }
+
+  unsigned createMemoryGroup() {
+    Groups.insert(std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
+    return NextGroupID++;
+  }
+
+  // Instruction executed event handlers.
+  virtual void onInstructionExecuted(const InstRef &IR);
+
+  virtual void onInstructionIssued(const InstRef &IR) {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    Groups[GroupID]->onInstructionIssued(IR);
+  }
+
+  virtual void cycleEvent();
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
 };
 
-/// A Load/Store Unit implementing a load and store queues.
+/// Default Load/Store Unit (LS Unit) for simulated processors.
 ///
-/// This class implements a load queue and a store queue to emulate the
-/// out-of-order execution of memory operations.
-/// Each load (or store) consumes an entry in the load (or store) queue.
+/// Each load (or store) consumes one entry in the load (or store) queue.
 ///
 /// Rules are:
 /// 1) A younger load is allowed to pass an older load only if there are no
@@ -159,14 +355,6 @@ public:
 /// the load/store queue(s). That also means, all the older loads/stores have
 /// already been executed.
 class LSUnit : public LSUnitBase {
-  // When a `MayLoad` instruction is dispatched to the schedulers for execution,
-  // the LSUnit reserves an entry in the `LoadQueue` for it.
-  //
-  // LoadQueue keeps track of all the loads that are in-flight. A load
-  // instruction is eventually removed from the LoadQueue when it reaches
-  // completion stage. That means, a load leaves the queue whe it is 'executed',
-  // and its value can be forwarded on the data path to outside units.
-  //
   // This class doesn't know about the latency of a load instruction. So, it
   // conservatively/pessimistically assumes that the latency of a load opcode
   // matches the instruction latency.
@@ -197,30 +385,17 @@ class LSUnit : public LSUnitBase {
   // alternative approaches that let instructions specify the number of
   // load/store queue entries which they consume at dispatch stage (See
   // PR39830).
-  SmallSet<InstRef, 16> LoadQueue;
-  SmallSet<InstRef, 16> StoreQueue;
-
-  void assignLQSlot(const InstRef &IR);
-  void assignSQSlot(const InstRef &IR);
-
+  //
   // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
   // conservatively treated as a store barrier. It forces older store to be
   // executed before newer stores are issued.
-  SmallSet<InstRef, 8> StoreBarriers;
-
+  //
   // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
   // conservatively treated as a load barrier. It forces older loads to execute
   // before newer loads are issued.
-  SmallSet<InstRef, 8> LoadBarriers;
-
-  bool isSQEmpty() const { return StoreQueue.empty(); }
-  bool isLQEmpty() const { return LoadQueue.empty(); }
-  bool isSQFull() const {
-    return getStoreQueueSize() != 0 && StoreQueue.size() == getStoreQueueSize();
-  }
-  bool isLQFull() const {
-    return getLoadQueueSize() != 0 && LoadQueue.size() == getLoadQueueSize();
-  }
+  unsigned CurrentLoadGroupID;
+  unsigned CurrentLoadBarrierGroupID;
+  unsigned CurrentStoreGroupID;
 
 public:
   LSUnit(const MCSchedModel &SM)
@@ -228,11 +403,8 @@ public:
   LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
       : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
   LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
-      : LSUnitBase(SM, LQ, SQ, AssumeNoAlias) {}
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
+      : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
+        CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {}
 
   /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
   /// accomodate instruction IR.
@@ -242,9 +414,6 @@ public:
   ///
   /// This method assumes that a previous call to `isAvailable(IR)` succeeded
   /// returning LSU_AVAILABLE.
-  void dispatch(const InstRef &IR) override;
-
-  /// Check if a peviously dispatched instruction IR is now ready for execution.
   ///
   /// Rules are:
   /// By default, rules are:
@@ -254,19 +423,12 @@ public:
   /// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
   /// 5. A load has to wait until an older load barrier is fully executed.
   /// 6. A store has to wait until an older store barrier is fully executed.
-  const InstRef &isReady(const InstRef &IR) const override;
+  unsigned dispatch(const InstRef &IR) override;
 
-  /// Instruction executed event handler.
-  ///
-  /// Load and store instructions are tracked by their corresponding queues from
-  /// dispatch until "instruction executed" event.
-  /// When a load instruction Ld reaches the 'Executed' stage, its value
-  /// is propagated to all the dependent users, and the LS unit stops tracking
-  /// Ld.
-  /// FIXME: For simplicity, we optimistically assume a similar behavior for
-  /// store instructions. In practice, store operations don't tend to leave the
-  /// store queue until they reach the 'Retired' stage (See PR39830).
-  void onInstructionExecuted(const InstRef &IR);
+  // FIXME: For simplicity, we optimistically assume a similar behavior for
+  // store instructions. In practice, store operations don't tend to leave the
+  // store queue until they reach the 'Retired' stage (See PR39830).
+  void onInstructionExecuted(const InstRef &IR) override;
 };
 
 } // namespace mca

Modified: llvm/trunk/include/llvm/MCA/HardwareUnits/Scheduler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MCA/HardwareUnits/Scheduler.h?rev=361950&r1=361949&r2=361950&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MCA/HardwareUnits/Scheduler.h (original)
+++ llvm/trunk/include/llvm/MCA/HardwareUnits/Scheduler.h Wed May 29 04:38:27 2019
@@ -191,7 +191,11 @@ public:
   /// Returns true if instruction IR is ready to be issued to the underlying
   /// pipelines. Note that this operation cannot fail; it assumes that a
   /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
-  bool dispatch(const InstRef &IR);
+  ///
+  /// If IR is a memory operation, then the Scheduler queries the LS unit to
+  /// obtain a LS token. An LS token is used internally to track memory
+  /// dependencies.
+  bool dispatch(InstRef &IR);
 
   /// Issue an instruction and populates a vector of used pipeline resources,
   /// and a vector of instructions that transitioned to the ready state as a

Modified: llvm/trunk/include/llvm/MCA/Instruction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MCA/Instruction.h?rev=361950&r1=361949&r2=361950&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MCA/Instruction.h (original)
+++ llvm/trunk/include/llvm/MCA/Instruction.h Wed May 29 04:38:27 2019
@@ -458,6 +458,11 @@ class Instruction : public InstructionBa
   // Retire Unit token ID for this instruction.
   unsigned RCUTokenID;
 
+  // LS token ID for this instruction.
+  // This field is set to the invalid null token if this is not a memory
+  // operation.
+  unsigned LSUTokenID;
+
   // Critical register dependency.
   CriticalDependency CriticalRegDep;
 
@@ -469,19 +474,18 @@ class Instruction : public InstructionBa
   // cycle because of unavailable pipeline resources.
   uint64_t CriticalResourceMask;
 
-  // Used internally by the logic that computes the critical memory dependency.
-  const Instruction *CurrentMemDep;
-
   // True if this instruction has been optimized at register renaming stage.
   bool IsEliminated;
 
 public:
   Instruction(const InstrDesc &D)
       : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
-        RCUTokenID(0), CriticalRegDep(), CriticalMemDep(),
-        CriticalResourceMask(0), CurrentMemDep(nullptr), IsEliminated(false) {}
+        RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(),
+        CriticalResourceMask(0), IsEliminated(false) {}
 
   unsigned getRCUTokenID() const { return RCUTokenID; }
+  unsigned getLSUTokenID() const { return LSUTokenID; }
+  void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; }
   int getCyclesLeft() const { return CyclesLeft; }
 
   // Transition to the dispatch stage, and assign a RCUToken to this
@@ -523,13 +527,9 @@ public:
   const CriticalDependency &getCriticalRegDep() const { return CriticalRegDep; }
   const CriticalDependency &getCriticalMemDep() const { return CriticalMemDep; }
   const CriticalDependency &computeCriticalRegDep();
-
-  void setCriticalMemDep(unsigned IID, unsigned Cycles) {
-    CriticalMemDep.IID = IID;
-    CriticalMemDep.Cycles = Cycles;
+  void setCriticalMemDep(const CriticalDependency &MemDep) {
+    CriticalMemDep = MemDep;
   }
-  const Instruction *getCurrentMemDep() const { return CurrentMemDep; }
-  void setCurrentMemDep(const Instruction *CMD) { CurrentMemDep = CMD; }
 
   uint64_t getCriticalResourceMask() const { return CriticalResourceMask; }
   void setCriticalResourceMask(uint64_t ResourceMask) {

Modified: llvm/trunk/lib/MCA/HardwareUnits/LSUnit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MCA/HardwareUnits/LSUnit.cpp?rev=361950&r1=361949&r2=361950&view=diff
==============================================================================
--- llvm/trunk/lib/MCA/HardwareUnits/LSUnit.cpp (original)
+++ llvm/trunk/lib/MCA/HardwareUnits/LSUnit.cpp Wed May 29 04:38:27 2019
@@ -23,7 +23,8 @@ namespace mca {
 
 LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
                        bool AssumeNoAlias)
-    : LQSize(LQ), SQSize(SQ), NoAlias(AssumeNoAlias) {
+    : LQSize(LQ), SQSize(SQ), UsedLQEntries(0), UsedSQEntries(0),
+      NoAlias(AssumeNoAlias), NextGroupID(1) {
   if (SM.hasExtraProcessorInfo()) {
     const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
     if (!LQSize && EPI.LoadQueueID) {
@@ -40,47 +41,113 @@ LSUnitBase::LSUnitBase(const MCSchedMode
 
 LSUnitBase::~LSUnitBase() {}
 
+void LSUnitBase::cycleEvent() {
+  for (const std::pair<unsigned, std::unique_ptr<MemoryGroup>> &G : Groups)
+    G.second->cycleEvent();
+}
+
 #ifndef NDEBUG
-void LSUnit::dump() const {
+void LSUnitBase::dump() const {
   dbgs() << "[LSUnit] LQ_Size = " << getLoadQueueSize() << '\n';
   dbgs() << "[LSUnit] SQ_Size = " << getStoreQueueSize() << '\n';
-  dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
-  dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
+  dbgs() << "[LSUnit] NextLQSlotIdx = " << getUsedLQEntries() << '\n';
+  dbgs() << "[LSUnit] NextSQSlotIdx = " << getUsedSQEntries() << '\n';
+  dbgs() << "\n";
+  for (const auto &GroupIt : Groups) {
+    const MemoryGroup &Group = *GroupIt.second;
+    dbgs() << "[LSUnit] Group (" << GroupIt.first << "): "
+           << "[ #Preds = " << Group.getNumPredecessors()
+           << ", #GIssued = " << Group.getNumExecutingPredecessors()
+           << ", #GExecuted = " << Group.getNumExecutedPredecessors()
+           << ", #Inst = " << Group.getNumInstructions()
+           << ", #IIssued = " << Group.getNumExecuting()
+           << ", #IExecuted = " << Group.getNumExecuted() << '\n';
+  }
 }
 #endif
 
-void LSUnit::assignLQSlot(const InstRef &IR) {
-  assert(!isLQFull() && "Load Queue is full!");
+unsigned LSUnit::dispatch(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  unsigned IsMemBarrier = Desc.HasSideEffects;
+  assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
 
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << IR.getSourceIndex()
-                    << ",slot=" << LoadQueue.size() << ">\n");
-  LoadQueue.insert(IR);
-}
+  if (Desc.MayLoad)
+    assignLQSlot();
+  if (Desc.MayStore)
+    assignSQSlot();
 
-void LSUnit::assignSQSlot(const InstRef &IR) {
-  assert(!isSQFull() && "Store Queue is full!");
+  if (Desc.MayStore) {
+    // Always create a new group for store operations.
 
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << IR.getSourceIndex()
-                    << ",slot=" << StoreQueue.size() << ">\n");
-  StoreQueue.insert(IR);
-}
+    // A store may not pass a previous store or store barrier.
+    unsigned NewGID = createMemoryGroup();
+    MemoryGroup &NewGroup = getGroup(NewGID);
+    NewGroup.addInstruction();
 
-void LSUnit::dispatch(const InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  unsigned IsMemBarrier = Desc.HasSideEffects;
-  assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
+    // A store may not pass a previous load or load barrier.
+    unsigned ImmediateLoadDominator =
+        std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID);
+    if (ImmediateLoadDominator) {
+      MemoryGroup &IDom = getGroup(ImmediateLoadDominator);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator
+                        << ") --> (" << NewGID << ")\n");
+      IDom.addSuccessor(&NewGroup);
+    }
+    if (CurrentStoreGroupID) {
+      MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+                        << ") --> (" << NewGID << ")\n");
+      StoreGroup.addSuccessor(&NewGroup);
+    }
 
-  if (Desc.MayLoad) {
-    if (IsMemBarrier)
-      LoadBarriers.insert(IR);
-    assignLQSlot(IR);
+    CurrentStoreGroupID = NewGID;
+    if (Desc.MayLoad) {
+      CurrentLoadGroupID = NewGID;
+      if (IsMemBarrier)
+        CurrentLoadBarrierGroupID = NewGID;
+    }
+
+    return NewGID;
   }
 
-  if (Desc.MayStore) {
+  assert(Desc.MayLoad && "Expected a load!");
+
+  // Always create a new memory group if this is the first load of the sequence.
+
+  // A load may not pass a previous store unless flag 'NoAlias' is set.
+  // A load may pass a previous load.
+  // A younger load cannot pass a older load barrier.
+  // A load barrier cannot pass a older load.
+  bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier ||
+                               CurrentLoadGroupID <= CurrentStoreGroupID ||
+                               CurrentLoadGroupID <= CurrentLoadBarrierGroupID;
+  if (ShouldCreateANewGroup) {
+    unsigned NewGID = createMemoryGroup();
+    MemoryGroup &NewGroup = getGroup(NewGID);
+    NewGroup.addInstruction();
+
+    if (!assumeNoAlias() && CurrentStoreGroupID) {
+      MemoryGroup &StGroup = getGroup(CurrentStoreGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+                        << ") --> (" << NewGID << ")\n");
+      StGroup.addSuccessor(&NewGroup);
+    }
+    if (CurrentLoadBarrierGroupID) {
+      MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID
+                        << ") --> (" << NewGID << ")\n");
+      LdGroup.addSuccessor(&NewGroup);
+    }
+
+    CurrentLoadGroupID = NewGID;
     if (IsMemBarrier)
-      StoreBarriers.insert(IR);
-    assignSQSlot(IR);
+      CurrentLoadBarrierGroupID = NewGID;
+    return NewGID;
   }
+
+  MemoryGroup &Group = getGroup(CurrentLoadGroupID);
+  Group.addInstruction();
+  return CurrentLoadGroupID;
 }
 
 LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
@@ -92,106 +159,46 @@ LSUnit::Status LSUnit::isAvailable(const
   return LSUnit::LSU_AVAILABLE;
 }
 
-const InstRef &LSUnit::isReady(const InstRef &IR) const {
+void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  const unsigned Index = IR.getSourceIndex();
   bool IsALoad = Desc.MayLoad;
   bool IsAStore = Desc.MayStore;
-  assert((IsALoad || IsAStore) && "Not a memory operation!");
-
-  if (IsALoad && !LoadBarriers.empty()) {
-    const InstRef &LoadBarrier = *LoadBarriers.begin();
-    // A younger load cannot pass a older load barrier.
-    if (Index > LoadBarrier.getSourceIndex())
-      return LoadBarrier;
-    // A load barrier cannot pass a older load.
-    if (Index == LoadBarrier.getSourceIndex()) {
-      const InstRef &Load = *LoadQueue.begin();
-      if (Index != Load.getSourceIndex())
-        return Load;
-    }
-  }
+  assert((IsALoad || IsAStore) && "Expected a memory operation!");
 
-  if (IsAStore && !StoreBarriers.empty()) {
-    const InstRef &StoreBarrier = *StoreBarriers.begin();
-    // A younger store cannot pass a older store barrier.
-    if (Index > StoreBarrier.getSourceIndex())
-      return StoreBarrier;
-    // A store barrier cannot pass a older store.
-    if (Index == StoreBarrier.getSourceIndex()) {
-      const InstRef &Store = *StoreQueue.begin();
-      if (Index != Store.getSourceIndex())
-        return Store;
-    }
+  unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+  auto It = Groups.find(GroupID);
+  It->second->onInstructionExecuted();
+  if (It->second->isExecuted()) {
+    Groups.erase(It);
   }
 
-  // A load may not pass a previous store unless flag 'NoAlias' is set.
-  // A load may pass a previous load.
-  if (assumeNoAlias() && IsALoad)
-    return IR;
-
-  if (StoreQueue.size()) {
-    // A load may not pass a previous store.
-    // A store may not pass a previous store.
-    const InstRef &Store = *StoreQueue.begin();
-    if (Index > Store.getSourceIndex())
-      return Store;
-  }
-
-  // Okay, we are older than the oldest store in the queue.
-  if (isLQEmpty())
-    return IR;
-
-  // Check if there are no older loads.
-  const InstRef &Load = *LoadQueue.begin();
-  if (Index <= Load.getSourceIndex())
-    return IR;
-
-  // A load may pass a previous load.
-  if (IsALoad)
-    return IR;
-
-  // A store may not pass a previous load.
-  return Load;
-}
-
-void LSUnit::onInstructionExecuted(const InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  const unsigned Index = IR.getSourceIndex();
-  bool IsALoad = Desc.MayLoad;
-  bool IsAStore = Desc.MayStore;
-
   if (IsALoad) {
-    if (LoadQueue.erase(IR)) {
-      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                        << " has been removed from the load queue.\n");
-    }
-    if (!LoadBarriers.empty()) {
-      const InstRef &LoadBarrier = *LoadBarriers.begin();
-      if (Index == LoadBarrier.getSourceIndex()) {
-        LLVM_DEBUG(
-            dbgs() << "[LSUnit]: Instruction idx=" << Index
-                   << " has been removed from the set of load barriers.\n");
-        LoadBarriers.erase(IR);
-      }
-    }
+    UsedLQEntries--;
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+                      << " has been removed from the load queue.\n");
   }
 
   if (IsAStore) {
-    if (StoreQueue.erase(IR)) {
-      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                        << " has been removed from the store queue.\n");
-    }
+    UsedSQEntries--;
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+                      << " has been removed from the store queue.\n");
+  }
+}
 
-    if (!StoreBarriers.empty()) {
-      const InstRef &StoreBarrier = *StoreBarriers.begin();
-      if (Index == StoreBarrier.getSourceIndex()) {
-        LLVM_DEBUG(
-            dbgs() << "[LSUnit]: Instruction idx=" << Index
-                   << " has been removed from the set of store barriers.\n");
-        StoreBarriers.erase(IR);
-      }
-    }
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+  const Instruction &IS = *IR.getInstruction();
+  if (!IS.isMemOp())
+    return;
+
+  LSUnitBase::onInstructionExecuted(IR);
+  unsigned GroupID = IS.getLSUTokenID();
+  if (!isValidGroupID(GroupID)) {
+    if (GroupID == CurrentLoadGroupID)
+      CurrentLoadGroupID = 0;
+    if (GroupID == CurrentStoreGroupID)
+      CurrentStoreGroupID = 0;
+    if (GroupID == CurrentLoadBarrierGroupID)
+      CurrentLoadBarrierGroupID = 0;
   }
 }
 

Modified: llvm/trunk/lib/MCA/HardwareUnits/Scheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MCA/HardwareUnits/Scheduler.cpp?rev=361950&r1=361949&r2=361950&view=diff
==============================================================================
--- llvm/trunk/lib/MCA/HardwareUnits/Scheduler.cpp (original)
+++ llvm/trunk/lib/MCA/HardwareUnits/Scheduler.cpp Wed May 29 04:38:27 2019
@@ -84,6 +84,12 @@ void Scheduler::issueInstructionImpl(
 
   IS->computeCriticalRegDep();
 
+  if (IS->isMemOp()) {
+    LSU.onInstructionIssued(IR);
+    const MemoryGroup &Group = LSU.getGroup(IS->getLSUTokenID());
+    IS->setCriticalMemDep(Group.getCriticalPredecessor());
+  }
+
   if (IS->isExecuting())
     IssuedSet.emplace_back(IR);
   else if (IS->isExecuted())
@@ -115,59 +121,6 @@ void Scheduler::issueInstruction(
     promoteToReadySet(ReadyInstructions);
 }
 
-static bool initializeCriticalMemDepInfo(InstRef &IR, const LSUnit &LSU) {
-  Instruction &IS = *IR.getInstruction();
-  assert(IS.isMemOp() && "Not a memory operation!");
-
-  // Check if this instruction depends on another memory operation.
-  InstRef DependentMemOp = LSU.isReady(IR);
-  const Instruction *MemOp = DependentMemOp.getInstruction();
-  IS.setCurrentMemDep(MemOp);
-
-  // Initialize the CriticalMemDep structure.
-  unsigned Cycles = 0;
-  if (MemOp->isExecuting())
-    Cycles = static_cast<unsigned>(MemOp->getCyclesLeft());
-  IS.setCriticalMemDep(DependentMemOp.getSourceIndex(), Cycles);
-  return IR.getSourceIndex() == DependentMemOp.getSourceIndex();
-}
-
-static bool updateMemoryDependencyInfo(InstRef &IR, const LSUnit &LSU) {
-  Instruction &IS = *IR.getInstruction();
-  assert(IS.isMemOp() && "Not a memory operation!");
-
-  const Instruction *MemOp = IS.getCurrentMemDep();
-  if (!MemOp && initializeCriticalMemDepInfo(IR, LSU))
-    return true;
-
-  MemOp = IS.getCurrentMemDep();
-  if (MemOp == IR.getInstruction())
-    return true;
-
-  const CriticalDependency &CMD = IS.getCriticalMemDep();
-  if (MemOp->isExecuting() && !CMD.Cycles) {
-    // Update the critical memory dependency info.
-    IS.setCriticalMemDep(CMD.IID, MemOp->getCyclesLeft());
-    return false;
-  }
-
-  if (!MemOp->isExecuted() && !MemOp->isRetired())
-    return false;
-
-  // Check if there are still unsolved memory dependencies.
-  InstRef DependentMemOp = LSU.isReady(IR);
-  MemOp = DependentMemOp.getInstruction();
-  IS.setCurrentMemDep(MemOp);
-  if (DependentMemOp == IR)
-    return true;
-
-  unsigned Cycles = 0;
-  if (MemOp->isExecuting())
-    Cycles = static_cast<unsigned>(MemOp->getCyclesLeft());
-  IS.setCriticalMemDep(DependentMemOp.getSourceIndex(), Cycles);
-  return false;
-}
-
 bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
   // Scan the set of waiting instructions and promote them to the
   // ready set if operands are all ready.
@@ -179,7 +132,7 @@ bool Scheduler::promoteToReadySet(SmallV
 
     // Check if there are unsolved memory dependencies.
     Instruction &IS = *IR.getInstruction();
-    if (IS.isMemOp() && !updateMemoryDependencyInfo(IR, LSU)) {
+    if (IS.isMemOp() && !LSU.isReady(IR)) {
       ++I;
       continue;
     }
@@ -298,14 +251,16 @@ void Scheduler::analyzeDataDependencies(
     if (Resources->checkAvailability(IS.getDesc()))
       continue;
 
-    const CriticalDependency &CMD = IS.getCriticalMemDep();
-    if (IS.isMemOp() && IS.getCurrentMemDep() != &IS && !CMD.Cycles)
-      continue;
+    if (IS.isMemOp()) {
+      const MemoryGroup &Group = LSU.getGroup(IS.getLSUTokenID());
+      if (Group.isWaiting())
+        continue;
+      if (Group.isPending())
+        MemDeps.emplace_back(IR);
+    }
 
     if (IS.isPending())
       RegDeps.emplace_back(IR);
-    if (CMD.Cycles)
-      MemDeps.emplace_back(IR);
   }
 }
 
@@ -313,6 +268,8 @@ void Scheduler::cycleEvent(SmallVectorIm
                            SmallVectorImpl<InstRef> &Executed,
                            SmallVectorImpl<InstRef> &Pending,
                            SmallVectorImpl<InstRef> &Ready) {
+  LSU.cycleEvent();
+
   // Release consumed resources.
   Resources->cycleEvent(Freed);
 
@@ -343,14 +300,14 @@ bool Scheduler::mustIssueImmediately(con
   return Desc.MustIssueImmediately;
 }
 
-bool Scheduler::dispatch(const InstRef &IR) {
-  const Instruction &IS = *IR.getInstruction();
+bool Scheduler::dispatch(InstRef &IR) {
+  Instruction &IS = *IR.getInstruction();
   const InstrDesc &Desc = IS.getDesc();
   Resources->reserveBuffers(Desc.Buffers);
 
   // If necessary, reserve queue entries in the load-store unit (LSU).
   if (IS.isMemOp())
-    LSU.dispatch(IR);
+    IS.setLSUTokenID(LSU.dispatch(IR));
 
   if (IS.isPending()) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
@@ -360,9 +317,9 @@ bool Scheduler::dispatch(const InstRef &
     return false;
   }
 
-  // Memory operations that are not in a ready state are initially assigned to
-  // the WaitSet.
-  if (!IS.isReady() || (IS.isMemOp() && LSU.isReady(IR) != IR)) {
+  // Memory operations that still have unsolved memory dependencies are
+  // initially dispatched to the WaitSet.
+  if (!IS.isReady() || (IS.isMemOp() && !LSU.isReady(IR))) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
     WaitSet.push_back(IR);
     return false;




More information about the llvm-commits mailing list