[llvm] f5bdc88 - [MCA] Improved handling of negative read-advance cycles.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 23 07:49:09 PDT 2021


Author: Andrea Di Biagio
Date: 2021-03-23T14:47:23Z
New Revision: f5bdc88e4d2b096414d0ac1781840a6c73e9f3a3

URL: https://github.com/llvm/llvm-project/commit/f5bdc88e4d2b096414d0ac1781840a6c73e9f3a3
DIFF: https://github.com/llvm/llvm-project/commit/f5bdc88e4d2b096414d0ac1781840a6c73e9f3a3.diff

LOG: [MCA] Improved handling of negative read-advance cycles.

Before this patch, register writes were always invalidated by the
RegisterFile at instruction commit stage. So,
the RegisterFile was often losing the knowledge about the `execute
cycle` of writes already committed. While this was not problematic
for non-delayed reads, this was sometimes leading to inaccurate read
latency computations in the presence of negative read-advance cycles.

This patch fixes the issue by changing how the RegisterFile component
internally keeps track of the `execute cycle` information of each
write. On every instruction executed, the RegisterFile gets notified
by the RetireStage, so that it can internally record the execute
cycle of each executed write.
The `execute cycle` information is stored within WriteRef itself, and
it is not invalidated when the write is committed.

Added: 
    llvm/test/tools/llvm-mca/X86/BtVer2/negative-read-advance.s

Modified: 
    llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
    llvm/include/llvm/MCA/Instruction.h
    llvm/include/llvm/MCA/Stages/RetireStage.h
    llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
    llvm/lib/MCA/Instruction.cpp
    llvm/lib/MCA/Stages/DispatchStage.cpp
    llvm/lib/MCA/Stages/InOrderIssueStage.cpp
    llvm/lib/MCA/Stages/RetireStage.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h b/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
index 7884d01c707a..735ed7470280 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -28,7 +28,53 @@ namespace mca {
 
 class ReadState;
 class WriteState;
-class WriteRef;
+class Instruction;
+
+/// A reference to a register write.
+///
+/// This class is mainly used by the register file to describe register
+/// mappings. It correlates a register write to the source index of the
+/// defining instruction.
+class WriteRef {
+  unsigned IID;
+  unsigned WriteBackCycle;
+  unsigned WriteResID;
+  MCPhysReg RegisterID;
+  WriteState *Write;
+
+  static const unsigned INVALID_IID;
+
+public:
+  WriteRef() : IID(INVALID_IID), WriteBackCycle(), WriteResID(), Write() {}
+  WriteRef(unsigned SourceIndex, WriteState *WS);
+
+  unsigned getSourceIndex() const { return IID; }
+  unsigned getWriteBackCycle() const;
+
+  const WriteState *getWriteState() const { return Write; }
+  WriteState *getWriteState() { return Write; }
+  unsigned getWriteResourceID() const;
+  MCPhysReg getRegisterID() const;
+
+  void commit();
+  void notifyExecuted(unsigned Cycle);
+
+  bool hasKnownWriteBackCycle() const;
+  bool isWriteZero() const;
+  bool isValid() const { return getSourceIndex() != INVALID_IID; }
+
+  /// Returns true if this register write has been executed, and the new
+  /// register value is therefore available to users.
+  bool isAvailable() const { return hasKnownWriteBackCycle(); }
+
+  bool operator==(const WriteRef &Other) const {
+    return Write && Other.Write && Write == Other.Write;
+  }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
 
 /// Manages hardware register files, and tracks register definitions for
 /// register renaming purposes.
@@ -145,6 +191,8 @@ class RegisterFile : public HardwareUnit {
   // the target. Bits are set for registers that are known to be zero.
   APInt ZeroRegisters;
 
+  unsigned CurrentCycle;
+
   // This method creates a new register file descriptor.
   // The new register file owns all of the registers declared by register
   // classes in the 'RegisterClasses' set.
@@ -183,8 +231,9 @@ class RegisterFile : public HardwareUnit {
                unsigned NumRegs = 0);
 
   // Collects writes that are in a RAW dependency with RS.
-  void collectWrites(const ReadState &RS,
-                     SmallVectorImpl<WriteRef> &Writes) const;
+  void collectWrites(const MCSubtargetInfo &STI, const ReadState &RS,
+                     SmallVectorImpl<WriteRef> &Writes,
+                     SmallVectorImpl<WriteRef> &CommittedWrites) const;
 
   // This method updates the register mappings inserting a new register
   // definition. This method is also responsible for updating the number of
@@ -223,9 +272,15 @@ class RegisterFile : public HardwareUnit {
   // Returns the number of PRFs implemented by this processor.
   unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
 
+  unsigned getElapsedCyclesFromWriteBack(const WriteRef &WR) const;
+
+  void onInstructionExecuted(Instruction *IS);
+
   // Notify each PRF that a new cycle just started.
   void cycleStart();
 
+  void cycleEnd() { ++CurrentCycle; }
+
 #ifndef NDEBUG
   void dump() const;
 #endif

diff  --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 2d3b0ab22e2c..cc886a190254 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -595,45 +595,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const InstRef &IR) {
 }
 #endif
 
-/// A reference to a register write.
-///
-/// This class is mainly used by the register file to describe register
-/// mappings. It correlates a register write to the source index of the
-/// defining instruction.
-class WriteRef {
-  std::pair<unsigned, WriteState *> Data;
-  static const unsigned INVALID_IID;
-
-public:
-  WriteRef() : Data(INVALID_IID, nullptr) {}
-  WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {}
-
-  unsigned getSourceIndex() const { return Data.first; }
-  const WriteState *getWriteState() const { return Data.second; }
-  WriteState *getWriteState() { return Data.second; }
-  void invalidate() { Data.second = nullptr; }
-  bool isWriteZero() const {
-    assert(isValid() && "Invalid null WriteState found!");
-    return getWriteState()->isWriteZero();
-  }
-
-  /// Returns true if this register write has been executed, and the new
-  /// register value is therefore available to users.
-  bool isAvailable() const {
-    if (getSourceIndex() == INVALID_IID)
-      return false;
-    const WriteState *WS = getWriteState();
-    return !WS || WS->isExecuted();
-  }
-
-  bool isValid() const { return Data.second && Data.first != INVALID_IID; }
-  bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-
 } // namespace mca
 } // namespace llvm
 

diff  --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h
index 81d7cd86ca47..27fb9c31d7cd 100644
--- a/llvm/include/llvm/MCA/Stages/RetireStage.h
+++ b/llvm/include/llvm/MCA/Stages/RetireStage.h
@@ -43,6 +43,7 @@ class RetireStage final : public Stage {
     return !RCU.isEmpty() || !RetireInst.empty();
   }
   Error cycleStart() override;
+  Error cycleEnd() override;
   Error execute(InstRef &IR) override;
   void notifyInstructionRetired(const InstRef &IR) const;
 };

diff  --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
index 11a24a6889f1..eface5b37118 100644
--- a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
+++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -22,11 +22,47 @@
 namespace llvm {
 namespace mca {
 
+const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();
+
+WriteRef::WriteRef(unsigned SourceIndex, WriteState *WS)
+    : IID(SourceIndex), WriteBackCycle(), WriteResID(), Write(WS) {}
+
+void WriteRef::commit() {
+  assert(Write && Write->isExecuted() && "Cannot commit before write back!");
+  Write = nullptr;
+}
+
+void WriteRef::notifyExecuted(unsigned Cycle) {
+  assert(Write && Write->isExecuted() && "Not executed!");
+  WriteBackCycle = Cycle;
+}
+
+bool WriteRef::hasKnownWriteBackCycle() const {
+  return isValid() && (!Write || Write->isExecuted());
+}
+
+bool WriteRef::isWriteZero() const {
+  assert(isValid() && "Invalid null WriteState found!");
+  return getWriteState()->isWriteZero();
+}
+
+unsigned WriteRef::getWriteResourceID() const {
+  if (Write)
+    return Write->getWriteResourceID();
+  return WriteResID;
+}
+
+MCPhysReg WriteRef::getRegisterID() const {
+  if (Write)
+    return Write->getRegisterID();
+  return RegisterID;
+}
+
 RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
                            unsigned NumRegs)
     : MRI(mri),
       RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}),
-      ZeroRegisters(mri.getNumRegs(), false) {
+      ZeroRegisters(mri.getNumRegs(), false), CurrentCycle() {
   initialize(SM, NumRegs);
 }
 
@@ -63,6 +99,43 @@ void RegisterFile::cycleStart() {
     RMT.NumMoveEliminated = 0;
 }
 
+void RegisterFile::onInstructionExecuted(Instruction *IS) {
+  assert(IS && IS->isExecuted() && "Unexpected internal state found!");
+  for (WriteState &WS : IS->getDefs()) {
+    if (WS.isEliminated())
+      return;
+
+    MCPhysReg RegID = WS.getRegisterID();
+    assert(RegID != 0 && "A write of an invalid register?");
+    assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
+           "The number of cycles should be known at this point!");
+    assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
+
+    MCPhysReg RenameAs = RegisterMappings[RegID].second.RenameAs;
+    if (RenameAs && RenameAs != RegID)
+      RegID = RenameAs;
+
+    WriteRef &WR = RegisterMappings[RegID].first;
+    if (WR.getWriteState() == &WS)
+      WR.notifyExecuted(CurrentCycle);
+
+    for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+      WriteRef &OtherWR = RegisterMappings[*I].first;
+      if (OtherWR.getWriteState() == &WS)
+        OtherWR.notifyExecuted(CurrentCycle);
+    }
+
+    if (!WS.clearsSuperRegisters())
+      continue;
+
+    for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+      WriteRef &OtherWR = RegisterMappings[*I].first;
+      if (OtherWR.getWriteState() == &WS)
+        OtherWR.notifyExecuted(CurrentCycle);
+    }
+  }
+}
+
 void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF,
                                    ArrayRef<MCRegisterCostEntry> Entries) {
   // A default register file is always allocated at index #0. That register file
@@ -261,12 +334,12 @@ void RegisterFile::removeRegisterWrite(
 
   WriteRef &WR = RegisterMappings[RegID].first;
   if (WR.getWriteState() == &WS)
-    WR.invalidate();
+    WR.commit();
 
   for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
     WriteRef &OtherWR = RegisterMappings[*I].first;
     if (OtherWR.getWriteState() == &WS)
-      OtherWR.invalidate();
+      OtherWR.commit();
   }
 
   if (!WS.clearsSuperRegisters())
@@ -275,7 +348,7 @@ void RegisterFile::removeRegisterWrite(
   for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
     WriteRef &OtherWR = RegisterMappings[*I].first;
     if (OtherWR.getWriteState() == &WS)
-      OtherWR.invalidate();
+      OtherWR.commit();
   }
 }
 
@@ -344,8 +417,25 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
   return true;
 }
 
-void RegisterFile::collectWrites(const ReadState &RS,
-                                 SmallVectorImpl<WriteRef> &Writes) const {
+unsigned WriteRef::getWriteBackCycle() const {
+  assert(hasKnownWriteBackCycle() && "Instruction not executed!");
+  assert((!Write || Write->getCyclesLeft() <= 0) &&
+         "Inconsistent state found!");
+  return WriteBackCycle;
+}
+
+unsigned RegisterFile::getElapsedCyclesFromWriteBack(const WriteRef &WR) const {
+  assert(WR.hasKnownWriteBackCycle() && "Write hasn't been committed yet!");
+  return CurrentCycle - WR.getWriteBackCycle();
+}
+
+void RegisterFile::collectWrites(
+    const MCSubtargetInfo &STI, const ReadState &RS,
+    SmallVectorImpl<WriteRef> &Writes,
+    SmallVectorImpl<WriteRef> &CommittedWrites) const {
+  const ReadDescriptor &RD = RS.getDescriptor();
+  const MCSchedModel &SM = STI.getSchedModel();
+  const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
   MCPhysReg RegID = RS.getRegisterID();
   assert(RegID && RegID < RegisterMappings.size());
   LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
@@ -357,14 +447,32 @@ void RegisterFile::collectWrites(const ReadState &RS,
     RegID = RRI.AliasRegID;
 
   const WriteRef &WR = RegisterMappings[RegID].first;
-  if (WR.isValid())
+  if (WR.getWriteState()) {
     Writes.push_back(WR);
+  } else if (WR.hasKnownWriteBackCycle()) {
+    unsigned WriteResID = WR.getWriteResourceID();
+    int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+    if (ReadAdvance < 0) {
+      unsigned Elapsed = getElapsedCyclesFromWriteBack(WR);
+      if (Elapsed < static_cast<unsigned>(-ReadAdvance))
+        CommittedWrites.push_back(WR);
+    }
+  }
 
   // Handle potential partial register updates.
   for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
     const WriteRef &WR = RegisterMappings[*I].first;
-    if (WR.isValid())
+    if (WR.getWriteState()) {
       Writes.push_back(WR);
+    } else if (WR.hasKnownWriteBackCycle()) {
+      unsigned WriteResID = WR.getWriteResourceID();
+      int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+      if (ReadAdvance < 0) {
+        unsigned Elapsed = getElapsedCyclesFromWriteBack(WR);
+        if (Elapsed < static_cast<unsigned>(-ReadAdvance))
+          CommittedWrites.push_back(WR);
+      }
+    }
   }
 
   // Remove duplicate entries and resize the input vector.
@@ -398,21 +506,34 @@ void RegisterFile::addRegisterRead(ReadState &RS,
     RS.setReadZero();
 
   SmallVector<WriteRef, 4> DependentWrites;
-  collectWrites(RS, DependentWrites);
-  RS.setDependentWrites(DependentWrites.size());
+  SmallVector<WriteRef, 4> CompletedWrites;
+  collectWrites(STI, RS, DependentWrites, CompletedWrites);
+  RS.setDependentWrites(DependentWrites.size() + CompletedWrites.size());
 
   // We know that this read depends on all the writes in DependentWrites.
   // For each write, check if we have ReadAdvance information, and use it
-  // to figure out in how many cycles this read becomes available.
+  // to figure out in how many cycles this read will be available.
   const ReadDescriptor &RD = RS.getDescriptor();
   const MCSchedModel &SM = STI.getSchedModel();
   const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
   for (WriteRef &WR : DependentWrites) {
+    unsigned WriteResID = WR.getWriteResourceID();
     WriteState &WS = *WR.getWriteState();
-    unsigned WriteResID = WS.getWriteResourceID();
     int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
     WS.addUser(WR.getSourceIndex(), &RS, ReadAdvance);
   }
+
+  for (WriteRef &WR : CompletedWrites) {
+    unsigned WriteResID = WR.getWriteResourceID();
+    assert(WR.hasKnownWriteBackCycle() && "Invalid write!");
+    assert(STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID) < 0);
+    unsigned ReadAdvance = static_cast<unsigned>(
+        -STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID));
+    unsigned Elapsed = getElapsedCyclesFromWriteBack(WR);
+    assert(Elapsed < ReadAdvance && "Should not have been added to the set!");
+    RS.writeStartEvent(WR.getSourceIndex(), WR.getRegisterID(),
+                       ReadAdvance - Elapsed);
+  }
 }
 
 unsigned RegisterFile::isAvailable(ArrayRef<MCPhysReg> Regs) const {
@@ -463,6 +584,14 @@ unsigned RegisterFile::isAvailable(ArrayRef<MCPhysReg> Regs) const {
 }
 
 #ifndef NDEBUG
+void WriteRef::dump() const {
+  dbgs() << "IID=" << getSourceIndex() << ' ';
+  if (isValid())
+    getWriteState()->dump();
+  else
+    dbgs() << "(null)";
+}
+
 void RegisterFile::dump() const {
   for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
     const RegisterMapping &RM = RegisterMappings[I];

diff  --git a/llvm/lib/MCA/Instruction.cpp b/llvm/lib/MCA/Instruction.cpp
index e5f2c4fd1eec..e658b869a67e 100644
--- a/llvm/lib/MCA/Instruction.cpp
+++ b/llvm/lib/MCA/Instruction.cpp
@@ -27,7 +27,8 @@ void WriteState::writeStartEvent(unsigned IID, MCPhysReg RegID,
   DependentWrite = nullptr;
 }
 
-void ReadState::writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles) {
+void ReadState::writeStartEvent(unsigned IID, MCPhysReg RegID,
+                                unsigned Cycles) {
   assert(DependentWrites);
   assert(CyclesLeft == UNKNOWN_CYCLES);
 
@@ -125,14 +126,6 @@ void WriteState::dump() const {
   dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID "
          << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }";
 }
-
-void WriteRef::dump() const {
-  dbgs() << "IID=" << getSourceIndex() << ' ';
-  if (isValid())
-    getWriteState()->dump();
-  else
-    dbgs() << "(null)";
-}
 #endif
 
 const CriticalDependency &Instruction::computeCriticalRegDep() {
@@ -248,7 +241,5 @@ void Instruction::cycleEvent() {
     Stage = IS_EXECUTED;
 }
 
-const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();
-
 } // namespace mca
 } // namespace llvm

diff  --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp
index 3a3d82259160..c44018dc30f4 100644
--- a/llvm/lib/MCA/Stages/DispatchStage.cpp
+++ b/llvm/lib/MCA/Stages/DispatchStage.cpp
@@ -136,8 +136,8 @@ Error DispatchStage::dispatch(InstRef IR) {
 }
 
 Error DispatchStage::cycleStart() {
-  PRF.cycleStart();
-
+  // The retire stage is responsible for calling method `cycleStart`
+  // on the PRF.
   if (!CarryOver) {
     AvailableEntries = DispatchWidth;
     return ErrorSuccess();

diff  --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
index dd2270d3a8f3..cf536979578b 100644
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -91,12 +91,13 @@ static unsigned checkRegisterHazard(const RegisterFile &PRF,
                                     const InstRef &IR) {
   unsigned StallCycles = 0;
   SmallVector<WriteRef, 4> Writes;
+  SmallVector<WriteRef, 4> CommittedWrites;
 
   for (const ReadState &RS : IR.getInstruction()->getUses()) {
     const ReadDescriptor &RD = RS.getDescriptor();
     const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
 
-    PRF.collectWrites(RS, Writes);
+    PRF.collectWrites(STI, RS, Writes, CommittedWrites);
     for (const WriteRef &WR : Writes) {
       const WriteState *WS = WR.getWriteState();
       unsigned WriteResID = WS->getWriteResourceID();
@@ -118,6 +119,19 @@ static unsigned checkRegisterHazard(const RegisterFile &PRF,
       }
     }
     Writes.clear();
+
+    for (const WriteRef &WR : CommittedWrites) {
+      unsigned WriteResID = WR.getWriteResourceID();
+      assert(!WR.getWriteState() && "Should be already committed!");
+      assert(WR.hasKnownWriteBackCycle() && "Invalid write!");
+      assert(STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID) < 0);
+      unsigned ReadAdvance = static_cast<unsigned>(
+          -STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID));
+      unsigned Elapsed = PRF.getElapsedCyclesFromWriteBack(WR);
+      assert(Elapsed < ReadAdvance && "Should not have been added to the set!");
+      unsigned CyclesLeft = (ReadAdvance - Elapsed);
+      StallCycles = std::max(StallCycles, CyclesLeft);
+    }
   }
 
   return StallCycles;
@@ -293,6 +307,8 @@ llvm::Error InOrderIssueStage::updateIssuedInst() {
 llvm::Error InOrderIssueStage::cycleStart() {
   NumIssued = 0;
 
+  PRF.cycleStart();
+
   // Release consumed resources.
   SmallVector<ResourceRef, 4> Freed;
   RM->cycleEvent(Freed);
@@ -320,6 +336,8 @@ llvm::Error InOrderIssueStage::cycleStart() {
 }
 
 llvm::Error InOrderIssueStage::cycleEnd() {
+  PRF.cycleEnd();
+
   if (StallCyclesLeft > 0)
     --StallCyclesLeft;
 

diff  --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp
index 6b24b48065d5..43f71c2e3642 100644
--- a/llvm/lib/MCA/Stages/RetireStage.cpp
+++ b/llvm/lib/MCA/Stages/RetireStage.cpp
@@ -23,6 +23,8 @@ namespace llvm {
 namespace mca {
 
 llvm::Error RetireStage::cycleStart() {
+  PRF.cycleStart();
+
   const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
   unsigned NumRetired = 0;
   while (!RCU.isEmpty()) {
@@ -46,9 +48,15 @@ llvm::Error RetireStage::cycleStart() {
   return llvm::ErrorSuccess();
 }
 
+llvm::Error RetireStage::cycleEnd() {
+  PRF.cycleEnd();
+  return llvm::ErrorSuccess();
+}
+
 llvm::Error RetireStage::execute(InstRef &IR) {
   Instruction &IS = *IR.getInstruction();
 
+  PRF.onInstructionExecuted(&IS);
   unsigned TokenID = IS.getRCUTokenID();
   if (TokenID != RetireControlUnit::UnhandledTokenID) {
     RCU.onInstructionExecuted(TokenID);

diff  --git a/llvm/test/tools/llvm-mca/X86/BtVer2/negative-read-advance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/negative-read-advance.s
new file mode 100644
index 000000000000..de409e1c28d0
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/negative-read-advance.s
@@ -0,0 +1,90 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline < %s | FileCheck %s
+
+add %ebx, %ebx
+vpinsrd $1, %ebx, %xmm0, %xmm1
+vpinsrd $1, %ebx, %xmm2, %xmm3
+vpinsrd $2, %ebx, %xmm4, %xmm5
+vpinsrd $2, %ebx, %xmm6, %xmm7
+vpinsrd $3, %ebx, %xmm8, %xmm10
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      6
+# CHECK-NEXT: Total Cycles:      13
+# CHECK-NEXT: Total uOps:        11
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.85
+# CHECK-NEXT: IPC:               0.46
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.50                        addl	%ebx, %ebx
+# CHECK-NEXT:  2      7     0.50                        vpinsrd	$1, %ebx, %xmm0, %xmm1
+# CHECK-NEXT:  2      7     0.50                        vpinsrd	$1, %ebx, %xmm2, %xmm3
+# CHECK-NEXT:  2      7     0.50                        vpinsrd	$2, %ebx, %xmm4, %xmm5
+# CHECK-NEXT:  2      7     0.50                        vpinsrd	$2, %ebx, %xmm6, %xmm7
+# CHECK-NEXT:  2      7     0.50                        vpinsrd	$3, %ebx, %xmm8, %xmm10
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - JALU0
+# CHECK-NEXT: [1]   - JALU1
+# CHECK-NEXT: [2]   - JDiv
+# CHECK-NEXT: [3]   - JFPA
+# CHECK-NEXT: [4]   - JFPM
+# CHECK-NEXT: [5]   - JFPU0
+# CHECK-NEXT: [6]   - JFPU1
+# CHECK-NEXT: [7]   - JLAGU
+# CHECK-NEXT: [8]   - JMul
+# CHECK-NEXT: [9]   - JSAGU
+# CHECK-NEXT: [10]  - JSTC
+# CHECK-NEXT: [11]  - JVALU0
+# CHECK-NEXT: [12]  - JVALU1
+# CHECK-NEXT: [13]  - JVIMUL
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
+# CHECK-NEXT:  -     1.00    -      -      -     2.00   3.00    -      -      -      -     2.00   3.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -     addl	%ebx, %ebx
+# CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -      -      -     1.00    -     vpinsrd	$1, %ebx, %xmm0, %xmm1
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -     1.00    -      -     vpinsrd	$1, %ebx, %xmm2, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -      -      -     1.00    -     vpinsrd	$2, %ebx, %xmm4, %xmm5
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -     1.00    -      -     vpinsrd	$2, %ebx, %xmm6, %xmm7
+# CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -      -      -     1.00    -     vpinsrd	$3, %ebx, %xmm8, %xmm10
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     012
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeER .    . .   addl	%ebx, %ebx
+# CHECK-NEXT: [0,1]     .D======eER .   vpinsrd	$1, %ebx, %xmm0, %xmm1
+# CHECK-NEXT: [0,2]     . D=====eER .   vpinsrd	$1, %ebx, %xmm2, %xmm3
+# CHECK-NEXT: [0,3]     .  D=====eER.   vpinsrd	$2, %ebx, %xmm4, %xmm5
+# CHECK-NEXT: [0,4]     .   D====eER.   vpinsrd	$2, %ebx, %xmm6, %xmm7
+# CHECK-NEXT: [0,5]     .    D====eER   vpinsrd	$3, %ebx, %xmm8, %xmm10
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       addl	%ebx, %ebx
+# CHECK-NEXT: 1.     1     7.0    0.0    0.0       vpinsrd	$1, %ebx, %xmm0, %xmm1
+# CHECK-NEXT: 2.     1     6.0    0.0    0.0       vpinsrd	$1, %ebx, %xmm2, %xmm3
+# CHECK-NEXT: 3.     1     6.0    1.0    0.0       vpinsrd	$2, %ebx, %xmm4, %xmm5
+# CHECK-NEXT: 4.     1     5.0    1.0    0.0       vpinsrd	$2, %ebx, %xmm6, %xmm7
+# CHECK-NEXT: 5.     1     5.0    2.0    0.0       vpinsrd	$3, %ebx, %xmm8, %xmm10
+# CHECK-NEXT:        1     5.0    0.8    0.0       <total>


        


More information about the llvm-commits mailing list