[llvm] r345865 - [llvm-mca] Add extra counters for move elimination in view RegisterFileStatistics.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 1 11:04:39 PDT 2018


Author: adibiagio
Date: Thu Nov  1 11:04:39 2018
New Revision: 345865

URL: http://llvm.org/viewvc/llvm-project?rev=345865&view=rev
Log:
[llvm-mca] Add extra counters for move elimination in view RegisterFileStatistics.

This patch teaches view RegisterFileStatistics how to report events for
optimizable register moves.

For each processor register file, view RegisterFileStatistics reports the
following extra information:
 - Number of optimizable register moves
 - Number of register moves eliminated
 - Number of zero moves (i.e. register moves that propagate a zero)
 - Max Number of moves eliminated per cycle.

Differential Revision: https://reviews.llvm.org/D53976

Modified:
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s
    llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.cpp
    llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.h
    llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h
    llvm/trunk/tools/llvm-mca/include/Instruction.h
    llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h
    llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp
    llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s Thu Nov  1 11:04:39 2018
@@ -39,6 +39,10 @@ vaddps %xmm1, %xmm1, %xmm2
 # CHECK-NEXT:    Number of physical registers:     72
 # CHECK-NEXT:    Total number of mappings created: 3
 # CHECK-NEXT:    Max number of mappings used:      3
+# CHECK-NEXT:    Number of optimizable moves:      3
+# CHECK-NEXT:    Number of moves eliminated:       3  (100.0%)
+# CHECK-NEXT:    Number of zero moves:             3  (100.0%)
+# CHECK-NEXT:    Max moves eliminated per cycle:   1
 
 # CHECK:      *  Register File #2 -- JIntegerPRF:
 # CHECK-NEXT:    Number of physical registers:     64

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s Thu Nov  1 11:04:39 2018
@@ -49,6 +49,10 @@ movdqu %xmm5, %xmm0
 # CHECK-NEXT:    Number of physical registers:     72
 # CHECK-NEXT:    Total number of mappings created: 0
 # CHECK-NEXT:    Max number of mappings used:      0
+# CHECK-NEXT:    Number of optimizable moves:      21
+# CHECK-NEXT:    Number of moves eliminated:       21  (100.0%)
+# CHECK-NEXT:    Number of zero moves:             21  (100.0%)
+# CHECK-NEXT:    Max moves eliminated per cycle:   2
 
 # CHECK:      *  Register File #2 -- JIntegerPRF:
 # CHECK-NEXT:    Number of physical registers:     64

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s Thu Nov  1 11:04:39 2018
@@ -44,6 +44,10 @@ vmovdqu %xmm5, %xmm0
 # CHECK-NEXT:    Number of physical registers:     72
 # CHECK-NEXT:    Total number of mappings created: 0
 # CHECK-NEXT:    Max number of mappings used:      0
+# CHECK-NEXT:    Number of optimizable moves:      18
+# CHECK-NEXT:    Number of moves eliminated:       18  (100.0%)
+# CHECK-NEXT:    Number of zero moves:             18  (100.0%)
+# CHECK-NEXT:    Max moves eliminated per cycle:   2
 
 # CHECK:      *  Register File #2 -- JIntegerPRF:
 # CHECK-NEXT:    Number of physical registers:     64

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s Thu Nov  1 11:04:39 2018
@@ -45,6 +45,10 @@ mov %edx, %eax
 # CHECK-NEXT:    Number of physical registers:     64
 # CHECK-NEXT:    Total number of mappings created: 0
 # CHECK-NEXT:    Max number of mappings used:      0
+# CHECK-NEXT:    Number of optimizable moves:      12
+# CHECK-NEXT:    Number of moves eliminated:       12  (100.0%)
+# CHECK-NEXT:    Number of zero moves:             12  (100.0%)
+# CHECK-NEXT:    Max moves eliminated per cycle:   2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - JALU0

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s Thu Nov  1 11:04:39 2018
@@ -45,6 +45,10 @@ mov %rdx, %rax
 # CHECK-NEXT:    Number of physical registers:     64
 # CHECK-NEXT:    Total number of mappings created: 0
 # CHECK-NEXT:    Max number of mappings used:      0
+# CHECK-NEXT:    Number of optimizable moves:      12
+# CHECK-NEXT:    Number of moves eliminated:       12  (100.0%)
+# CHECK-NEXT:    Number of zero moves:             12  (100.0%)
+# CHECK-NEXT:    Max moves eliminated per cycle:   2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - JALU0

Modified: llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.cpp?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.cpp (original)
+++ llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.cpp Thu Nov  1 11:04:39 2018
@@ -21,10 +21,12 @@ namespace mca {
 RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
     : STI(sti) {
   const MCSchedModel &SM = STI.getSchedModel();
-  RegisterFileUsage Empty = {0, 0, 0};
+  RegisterFileUsage RFUEmpty = {0, 0, 0};
+  MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0};
   if (!SM.hasExtraProcessorInfo()) {
     // Assume a single register file.
-    RegisterFiles.emplace_back(Empty);
+    PRFUsage.emplace_back(RFUEmpty);
+    MoveElimInfo.emplace_back(MEIEmpty);
     return;
   }
 
@@ -35,8 +37,42 @@ RegisterFileStatistics::RegisterFileStat
   // be skipped. If there are no user defined register files, then reserve a
   // single entry for the default register file at index #0.
   unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
-  RegisterFiles.resize(NumRegFiles);
-  std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty);
+
+  PRFUsage.resize(NumRegFiles);
+  std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty);
+
+  MoveElimInfo.resize(NumRegFiles);
+  std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty);
+}
+
+void RegisterFileStatistics::updateRegisterFileUsage(
+    ArrayRef<unsigned> UsedPhysRegs) {
+  for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) {
+    RegisterFileUsage &RFU = PRFUsage[I];
+    unsigned NumUsedPhysRegs = UsedPhysRegs[I];
+    RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
+    RFU.TotalMappings += NumUsedPhysRegs;
+    RFU.MaxUsedMappings =
+        std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
+  }
+}
+
+void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) {
+  if (!Inst.isOptimizableMove())
+    return;
+
+  assert(Inst.getDefs().size() == 1 && "Expected a single definition!");
+  assert(Inst.getUses().size() == 1 && "Expected a single register use!");
+  const WriteState &WS = Inst.getDefs()[0];
+  const ReadState &RS = Inst.getUses()[0];
+
+  MoveEliminationInfo &Info =
+      MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()];
+  Info.TotalMoveEliminationCandidates++;
+  if (WS.isEliminated())
+    Info.CurrentMovesEliminated++;
+  if (WS.isWriteZero() && RS.isReadZero())
+    Info.TotalMovesThatPropagateZero++;
 }
 
 void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
@@ -45,37 +81,40 @@ void RegisterFileStatistics::onEvent(con
     break;
   case HWInstructionEvent::Retired: {
     const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
-    for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I)
-      RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
+    for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I)
+      PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
     break;
   }
   case HWInstructionEvent::Dispatched: {
     const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
-    for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) {
-      RegisterFileUsage &RFU = RegisterFiles[I];
-      unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I];
-      RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
-      RFU.TotalMappings += NumUsedPhysRegs;
-      RFU.MaxUsedMappings =
-          std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
-    }
+    updateRegisterFileUsage(DE.UsedPhysRegs);
+    updateMoveElimInfo(*DE.IR.getInstruction());
   }
   }
 }
 
+void RegisterFileStatistics::onCycleEnd() {
+  for (MoveEliminationInfo &MEI : MoveElimInfo) {
+    unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle;
+    CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated);
+    MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated;
+    MEI.CurrentMovesEliminated = 0;
+  }
+}
+
 void RegisterFileStatistics::printView(raw_ostream &OS) const {
   std::string Buffer;
   raw_string_ostream TempStream(Buffer);
 
   TempStream << "\n\nRegister File statistics:";
-  const RegisterFileUsage &GlobalUsage = RegisterFiles[0];
+  const RegisterFileUsage &GlobalUsage = PRFUsage[0];
   TempStream << "\nTotal number of mappings created:    "
              << GlobalUsage.TotalMappings;
   TempStream << "\nMax number of mappings used:         "
              << GlobalUsage.MaxUsedMappings << '\n';
 
-  for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) {
-    const RegisterFileUsage &RFU = RegisterFiles[I];
+  for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) {
+    const RegisterFileUsage &RFU = PRFUsage[I];
     // Obtain the register file descriptor from the scheduling model.
     assert(STI.getSchedModel().hasExtraProcessorInfo() &&
            "Unable to find register file info!");
@@ -98,6 +137,27 @@ void RegisterFileStatistics::printView(r
                << RFU.TotalMappings;
     TempStream << "\n   Max number of mappings used:      "
                << RFU.MaxUsedMappings << '\n';
+    const MoveEliminationInfo &MEI = MoveElimInfo[I];
+
+    if (MEI.TotalMoveEliminationCandidates) {
+      TempStream << "   Number of optimizable moves:      "
+                 << MEI.TotalMoveEliminationCandidates;
+      double EliminatedMovProportion = (double)MEI.TotalMovesEliminated /
+                                       MEI.TotalMoveEliminationCandidates *
+                                       100.0;
+      double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero /
+                                 MEI.TotalMoveEliminationCandidates * 100.0;
+      TempStream << "\n   Number of moves eliminated:       "
+                 << MEI.TotalMovesEliminated << "  "
+                 << format("(%.1f%%)",
+                           floor((EliminatedMovProportion * 10) + 0.5) / 10);
+      TempStream << "\n   Number of zero moves:             "
+                 << MEI.TotalMovesThatPropagateZero << "  "
+                 << format("(%.1f%%)",
+                           floor((ZeroMovProportion * 10) + 0.5) / 10);
+      TempStream << "\n   Max moves eliminated per cycle:   "
+                 << MEI.MaxMovesEliminatedPerCycle << '\n';
+    }
   }
 
   TempStream.flush();

Modified: llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.h?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.h (original)
+++ llvm/trunk/tools/llvm-mca/Views/RegisterFileStatistics.h Thu Nov  1 11:04:39 2018
@@ -21,6 +21,10 @@
 ///    Number of physical registers:     72
 ///    Total number of mappings created: 0
 ///    Max number of mappings used:      0
+///    Number of optimizable moves:      200
+///    Number of moves eliminated:       200 (100.0%)
+///    Number of zero moves:             200 (100.0%)
+///    Max moves eliminated per cycle:   2
 ///
 /// *  Register File #2 -- IntegerPRF:
 ///    Number of physical registers:     64
@@ -49,12 +53,25 @@ class RegisterFileStatistics : public Vi
     unsigned CurrentlyUsedMappings;
   };
 
+  struct MoveEliminationInfo {
+    unsigned TotalMoveEliminationCandidates;
+    unsigned TotalMovesEliminated;
+    unsigned TotalMovesThatPropagateZero;
+    unsigned MaxMovesEliminatedPerCycle;
+    unsigned CurrentMovesEliminated;
+  };
+
   // There is one entry for each register file implemented by the processor.
-  llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles;
+  llvm::SmallVector<RegisterFileUsage, 4> PRFUsage;
+  llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo;
+
+  void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs);
+  void updateMoveElimInfo(const Instruction &Inst);
 
 public:
   RegisterFileStatistics(const llvm::MCSubtargetInfo &sti);
 
+  void onCycleEnd() override;
   void onEvent(const HWInstructionEvent &Event) override;
   void printView(llvm::raw_ostream &OS) const override;
 };

Modified: llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h (original)
+++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h Thu Nov  1 11:04:39 2018
@@ -173,6 +173,11 @@ class RegisterFile : public HardwareUnit
   void freePhysRegs(const RegisterRenamingInfo &Entry,
                     MutableArrayRef<unsigned> FreedPhysRegs);
 
+  // Collects writes that are in a RAW dependency with RS.
+  // This method is called from `addRegisterRead()`.
+  void collectWrites(const ReadState &RS,
+                     SmallVectorImpl<WriteRef> &Writes) const;
+
   // Create an instance of RegisterMappingTracker for every register file
   // specified by the processor model.
   // If no register file is specified, then this method creates a default
@@ -189,6 +194,10 @@ public:
   // No physical regiser is allocated if this write is from a zero-idiom.
   void addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs);
 
+  // Collect writes that are in a data dependency with RS, and update RS
+  // internal state.
+  void addRegisterRead(ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const;
+
   // Removes write \param WS from the register mappings.
   // Physical registers may be released to reflect this update.
   // No registers are released if this write is from a zero-idiom.
@@ -200,7 +209,7 @@ public:
   // If RS is a read from a zero register, and WS is eliminated, then
   // `WS.WritesZero` is also set, so that method addRegisterWrite() would not
   // reserve a physical register for it.
-  bool tryEliminateMove(WriteState &WS, const ReadState &RS);
+  bool tryEliminateMove(WriteState &WS, ReadState &RS);
 
   // Checks if there are enough physical registers in the register files.
   // Returns a "response mask" where each bit represents the response from a
@@ -212,7 +221,8 @@ public:
   // Current implementation can simulate up to 32 register files (including the
   // special register file at index #0).
   unsigned isAvailable(ArrayRef<unsigned> Regs) const;
-  void collectWrites(SmallVectorImpl<WriteRef> &Writes, unsigned RegID) const;
+
+  // Returns the number of PRFs implemented by this processor.
   unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
 
   // Notify each PRF that a new cycle just started.

Modified: llvm/trunk/tools/llvm-mca/include/Instruction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/include/Instruction.h?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/include/Instruction.h (original)
+++ llvm/trunk/tools/llvm-mca/include/Instruction.h Thu Nov  1 11:04:39 2018
@@ -101,6 +101,9 @@ class WriteState {
   // field RegisterID from WD.
   unsigned RegisterID;
 
+  // Physical register file that serves register RegisterID.
+  unsigned PRFID;
+
   // True if this write implicitly clears the upper portion of RegisterID's
   // super-registers.
   bool ClearsSuperRegs;
@@ -135,7 +138,7 @@ public:
   WriteState(const WriteDescriptor &Desc, unsigned RegID,
              bool clearsSuperRegs = false, bool writesZero = false)
       : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
-        ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
+        PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
         IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
 
   WriteState(const WriteState &Other) = default;
@@ -144,6 +147,7 @@ public:
   int getCyclesLeft() const { return CyclesLeft; }
   unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
   unsigned getRegisterID() const { return RegisterID; }
+  unsigned getRegisterFileID() const { return PRFID; }
   unsigned getLatency() const { return WD->Latency; }
 
   void addUser(ReadState *Use, int ReadAdvance);
@@ -168,6 +172,8 @@ public:
     IsEliminated = true;
   }
 
+  void setPRF(unsigned PRF) { PRFID = PRF; }
+
   // On every cycle, update CyclesLeft and notify dependent users.
   void cycleEvent();
   void onInstructionIssued();
@@ -185,6 +191,8 @@ class ReadState {
   const ReadDescriptor *RD;
   // Physical register identified associated to this read.
   unsigned RegisterID;
+  // Physical register file that serves register RegisterID.
+  unsigned PRFID;
   // Number of writes that contribute to the definition of RegisterID.
   // In the absence of partial register updates, the number of DependentWrites
   // cannot be more than one.
@@ -201,18 +209,21 @@ class ReadState {
   // This field is set to true only if there are no dependent writes, and
   // there are no `CyclesLeft' to wait.
   bool IsReady;
+  // True if this is a read from a known zero register.
+  bool IsZero;
   // True if this register read is from a dependency-breaking instruction.
   bool IndependentFromDef;
 
 public:
   ReadState(const ReadDescriptor &Desc, unsigned RegID)
-      : RD(&Desc), RegisterID(RegID), DependentWrites(0),
+      : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
         CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
-        IndependentFromDef(false) {}
+        IsZero(false), IndependentFromDef(false) {}
 
   const ReadDescriptor &getDescriptor() const { return *RD; }
   unsigned getSchedClass() const { return RD->SchedClassID; }
   unsigned getRegisterID() const { return RegisterID; }
+  unsigned getRegisterFileID() const { return PRFID; }
 
   bool isReady() const { return IsReady; }
   bool isImplicitRead() const { return RD->isImplicitRead(); }
@@ -226,6 +237,10 @@ public:
     DependentWrites = Writes;
     IsReady = !Writes;
   }
+
+  bool isReadZero() const { return IsZero; }
+  void setReadZero() { IsZero = true; }
+  void setPRF(unsigned ID) { PRFID = ID; }
 };
 
 /// A sequence of cycles.

Modified: llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h (original)
+++ llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h Thu Nov  1 11:04:39 2018
@@ -68,10 +68,6 @@ class DispatchStage final : public Stage
                                    ArrayRef<unsigned> UsedPhysRegs,
                                    unsigned uOps) const;
 
-  void collectWrites(SmallVectorImpl<WriteRef> &Vec, unsigned RegID) const {
-    return PRF.collectWrites(Vec, RegID);
-  }
-
 public:
   DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
                 unsigned MaxDispatchWidth, RetireControlUnit &R,

Modified: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp (original)
+++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp Thu Nov  1 11:04:39 2018
@@ -173,6 +173,7 @@ void RegisterFile::addRegisterWrite(Writ
   bool IsEliminated = WS.isEliminated();
   bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
   const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  WS.setPRF(RRI.IndexPlusCost.first);
 
   if (RRI.RenameAs && RRI.RenameAs != RegID) {
     RegID = RRI.RenameAs;
@@ -217,9 +218,9 @@ void RegisterFile::addRegisterWrite(Writ
       RegisterMappings[*I].second.AliasRegID = 0U;
     }
 
-    // No physical registers are allocated for instructions that are optimized in
-    // hardware. For example, zero-latency data-dependency breaking instructions
-    // don't consume physical registers.
+    // No physical registers are allocated for instructions that are optimized
+    // in hardware. For example, zero-latency data-dependency breaking
+    // instructions don't consume physical registers.
     if (ShouldAllocatePhysRegs)
       allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
   }
@@ -288,7 +289,7 @@ void RegisterFile::removeRegisterWrite(
   }
 }
 
-bool RegisterFile::tryEliminateMove(WriteState &WS, const ReadState &RS) {
+bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
   const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
   const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
 
@@ -349,15 +350,18 @@ bool RegisterFile::tryEliminateMove(Writ
   }
 
   RMT.NumMoveEliminated++;
-  if (IsZeroMove)
+  if (IsZeroMove) {
     WS.setWriteZero();
+    RS.setReadZero();
+  }
   WS.setEliminated();
 
   return true;
 }
 
-void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
-                                 unsigned RegID) const {
+void RegisterFile::collectWrites(const ReadState &RS,
+                                 SmallVectorImpl<WriteRef> &Writes) const {
+  unsigned RegID = RS.getRegisterID();
   assert(RegID && RegID < RegisterMappings.size());
   LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
                     << MRI.getName(RegID) << '\n');
@@ -379,11 +383,13 @@ void RegisterFile::collectWrites(SmallVe
   }
 
   // Remove duplicate entries and resize the input vector.
-  sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
-    return Lhs.getWriteState() < Rhs.getWriteState();
-  });
-  auto It = std::unique(Writes.begin(), Writes.end());
-  Writes.resize(std::distance(Writes.begin(), It));
+  if (Writes.size() > 1) {
+    sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
+      return Lhs.getWriteState() < Rhs.getWriteState();
+    });
+    auto It = std::unique(Writes.begin(), Writes.end());
+    Writes.resize(std::distance(Writes.begin(), It));
+  }
 
   LLVM_DEBUG({
     for (const WriteRef &WR : Writes) {
@@ -395,6 +401,20 @@ void RegisterFile::collectWrites(SmallVe
   });
 }
 
+void RegisterFile::addRegisterRead(ReadState &RS,
+                                   SmallVectorImpl<WriteRef> &Defs) const {
+  unsigned RegID = RS.getRegisterID();
+  const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  RS.setPRF(RRI.IndexPlusCost.first);
+  if (RS.isIndependentFromDef())
+    return;
+
+  if (ZeroRegisters[RS.getRegisterID()])
+    RS.setReadZero();
+  collectWrites(RS, Defs);
+  RS.setDependentWrites(Defs.size());
+}
+
 unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
   SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
 

Modified: llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp?rev=345865&r1=345864&r2=345865&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp (original)
+++ llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp Thu Nov  1 11:04:39 2018
@@ -67,8 +67,9 @@ void DispatchStage::updateRAWDependencie
                                           const MCSubtargetInfo &STI) {
   SmallVector<WriteRef, 4> DependentWrites;
 
-  collectWrites(DependentWrites, RS.getRegisterID());
-  RS.setDependentWrites(DependentWrites.size());
+  // Collect all the dependent writes, and update RS internal state.
+  PRF.addRegisterRead(RS, DependentWrites);
+
   // We know that this read depends on all the writes in DependentWrites.
   // For each write, check if we have ReadAdvance information, and use it
   // to figure out in how many cycles this read becomes available.
@@ -116,10 +117,8 @@ Error DispatchStage::dispatch(InstRef IR
   // We also don't update data dependencies for instructions that have been
   // eliminated at register renaming stage.
   if (!IsEliminated) {
-    for (ReadState &RS : IS.getUses()) {
-      if (!RS.isIndependentFromDef())
-        updateRAWDependencies(RS, STI);
-    }
+    for (ReadState &RS : IS.getUses())
+      updateRAWDependencies(RS, STI);
   }
 
   // By default, a dependency-breaking zero-idiom is expected to be optimized
@@ -127,8 +126,7 @@ Error DispatchStage::dispatch(InstRef IR
   // to the instruction.
   SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
   for (WriteState &WS : IS.getDefs())
-    PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS),
-                         RegisterFiles);
+    PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles);
 
   // Reserve slots in the RCU, and notify the instruction that it has been
   // dispatched to the schedulers for execution.




More information about the llvm-commits mailing list