[llvm] r332249 - [llvm-mca] Improved support for dependency-breaking instructions.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Mon May 14 08:08:22 PDT 2018


Author: adibiagio
Date: Mon May 14 08:08:22 2018
New Revision: 332249

URL: http://llvm.org/viewvc/llvm-project?rev=332249&view=rev
Log:
[llvm-mca] Improved support for dependency-breaking instructions.

The tool assumes that a zero-latency instruction that doesn't consume hardware
resources is an optimizable dependency-breaking instruction. That means, it
doesn't have to wait on register input operands, and it doesn't consume any
physical register. The PRF knows how to optimize it at register renaming stage.

Modified:
    llvm/trunk/tools/llvm-mca/Dispatch.cpp
    llvm/trunk/tools/llvm-mca/Dispatch.h
    llvm/trunk/tools/llvm-mca/Instruction.h
    llvm/trunk/tools/llvm-mca/Scheduler.cpp

Modified: llvm/trunk/tools/llvm-mca/Dispatch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Dispatch.cpp?rev=332249&r1=332248&r2=332249&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Dispatch.cpp (original)
+++ llvm/trunk/tools/llvm-mca/Dispatch.cpp Mon May 14 08:08:22 2018
@@ -91,8 +91,8 @@ void RegisterFile::addRegisterFile(Array
   }
 }
 
-void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
-                                     MutableArrayRef<unsigned> UsedPhysRegs) {
+void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry,
+                                    MutableArrayRef<unsigned> UsedPhysRegs) {
   unsigned RegisterFileIndex = Entry.first;
   unsigned Cost = Entry.second;
   if (RegisterFileIndex) {
@@ -106,8 +106,8 @@ void RegisterFile::createNewMappings(Ind
   UsedPhysRegs[0] += Cost;
 }
 
-void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
-                                  MutableArrayRef<unsigned> FreedPhysRegs) {
+void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry,
+                                MutableArrayRef<unsigned> FreedPhysRegs) {
   unsigned RegisterFileIndex = Entry.first;
   unsigned Cost = Entry.second;
   if (RegisterFileIndex) {
@@ -121,8 +121,9 @@ void RegisterFile::removeMappings(IndexP
   FreedPhysRegs[0] += Cost;
 }
 
-void RegisterFile::addRegisterMapping(WriteState &WS,
-                                      MutableArrayRef<unsigned> UsedPhysRegs) {
+void RegisterFile::addRegisterWrite(WriteState &WS,
+                                    MutableArrayRef<unsigned> UsedPhysRegs,
+                                    bool ShouldAllocatePhysRegs) {
   unsigned RegID = WS.getRegisterID();
   assert(RegID && "Adding an invalid register definition?");
 
@@ -131,7 +132,11 @@ void RegisterFile::addRegisterMapping(Wr
   for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
     RegisterMappings[*I].first = &WS;
 
-  createNewMappings(Mapping.second, UsedPhysRegs);
+  // No physical registers are allocated for instructions that are optimized in
+  // hardware. For example, zero-latency data-dependency breaking instructions
+  // don't consume physical registers.
+  if (ShouldAllocatePhysRegs)
+    allocatePhysRegs(Mapping.second, UsedPhysRegs);
 
   // If this is a partial update, then we are done.
   if (!WS.fullyUpdatesSuperRegs())
@@ -141,8 +146,9 @@ void RegisterFile::addRegisterMapping(Wr
     RegisterMappings[*I].first = &WS;
 }
 
-void RegisterFile::invalidateRegisterMapping(
-    const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
+void RegisterFile::removeRegisterWrite(
+    const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs,
+    bool ShouldFreePhysRegs) {
   unsigned RegID = WS.getRegisterID();
   bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs();
 
@@ -154,7 +160,8 @@ void RegisterFile::invalidateRegisterMap
   if (!Mapping.first)
     return;
 
-  removeMappings(Mapping.second, FreedPhysRegs);
+  if (ShouldFreePhysRegs)
+    freePhysRegs(Mapping.second, FreedPhysRegs);
 
   if (Mapping.first == &WS)
     Mapping.first = nullptr;
@@ -261,8 +268,10 @@ void DispatchUnit::notifyInstructionDisp
 void DispatchUnit::notifyInstructionRetired(const InstRef &IR) {
   LLVM_DEBUG(dbgs() << "[E] Instruction Retired: " << IR << '\n');
   SmallVector<unsigned, 4> FreedRegs(RAT->getNumRegisterFiles());
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+
   for (const std::unique_ptr<WriteState> &WS : IR.getInstruction()->getDefs())
-    RAT->invalidateRegisterMapping(*WS.get(), FreedRegs);
+    RAT->removeRegisterWrite(*WS.get(), FreedRegs, !Desc.isZeroLatency());
   Owner->notifyInstructionEvent(HWInstructionRetiredEvent(IR, FreedRegs));
   Owner->eraseInstruction(IR);
 }
@@ -339,18 +348,22 @@ void DispatchUnit::dispatch(InstRef IR,
     AvailableEntries -= NumMicroOps;
   }
 
-  // Update RAW dependencies if this instruction is not a zero-latency
-  // instruction. The assumption is that a zero-latency instruction doesn't
-  // require to be issued to the scheduler for execution. More importantly, it
-  // doesn't have to wait on the register input operands.
-  if (Desc.MaxLatency || !Desc.Resources.empty())
+  // A dependency-breaking instruction doesn't have to wait on the register
+  // input operands, and it is often optimized at register renaming stage.
+  // Update RAW dependencies if this instruction is not a dependency-breaking
+  // instruction. A dependency-breaking instruction is a zero-latency
+  // instruction that doesn't consume hardware resources.
+  // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
+  if (!Desc.isZeroLatency())
     for (std::unique_ptr<ReadState> &RS : IS.getUses())
       updateRAWDependencies(*RS, STI);
 
-  // Allocate new mappings.
+  // By default, a dependency-breaking zero-latency instruction is expected to
+  // be optimized at register renaming stage. That means, no physical register
+  // is allocated to the instruction.
   SmallVector<unsigned, 4> RegisterFiles(RAT->getNumRegisterFiles());
   for (std::unique_ptr<WriteState> &WS : IS.getDefs())
-    RAT->addRegisterMapping(*WS, RegisterFiles);
+    RAT->addRegisterWrite(*WS, RegisterFiles, !Desc.isZeroLatency());
 
   // Reserve slots in the RCU, and notify the instruction that it has been
   // dispatched to the schedulers for execution.

Modified: llvm/trunk/tools/llvm-mca/Dispatch.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Dispatch.h?rev=332249&r1=332248&r2=332249&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Dispatch.h (original)
+++ llvm/trunk/tools/llvm-mca/Dispatch.h Mon May 14 08:08:22 2018
@@ -104,14 +104,14 @@ class RegisterFile {
 
   // Allocates register mappings in register file specified by the
   // IndexPlusCostPairTy object. This method is called from addRegisterMapping.
-  void createNewMappings(IndexPlusCostPairTy IPC,
-                         llvm::MutableArrayRef<unsigned> UsedPhysRegs);
+  void allocatePhysRegs(IndexPlusCostPairTy IPC,
+                        llvm::MutableArrayRef<unsigned> UsedPhysRegs);
 
   // Removes a previously allocated mapping from the register file referenced
   // by the IndexPlusCostPairTy object. This method is called from
   // invalidateRegisterMapping.
-  void removeMappings(IndexPlusCostPairTy IPC,
-                      llvm::MutableArrayRef<unsigned> FreedPhysRegs);
+  void freePhysRegs(IndexPlusCostPairTy IPC,
+                    llvm::MutableArrayRef<unsigned> FreedPhysRegs);
 
   // Create an instance of RegisterMappingTracker for every register file
   // specified by the processor model.
@@ -126,17 +126,21 @@ public:
     initialize(SM, NumRegs);
   }
 
-  // Creates a new register mapping for RegID.
-  // This reserves a microarchitectural register in every register file that
-  // contains RegID.
-  void addRegisterMapping(WriteState &WS,
-                          llvm::MutableArrayRef<unsigned> UsedPhysRegs);
-
-  // Invalidates register mappings associated to the input WriteState object.
-  // This releases previously allocated mappings for the physical register
-  // associated to the WriteState.
-  void invalidateRegisterMapping(const WriteState &WS,
-                                 llvm::MutableArrayRef<unsigned> FreedPhysRegs);
+  // This method updates the data dependency graph by inserting a new register
+  // definition. This method is also responsible for updating the number of used
+  // physical registers in the register file(s). The number of physical
+  // registers is updated only if flag ShouldAllocatePhysRegs is set.
+  void addRegisterWrite(WriteState &WS,
+                        llvm::MutableArrayRef<unsigned> UsedPhysRegs,
+                        bool ShouldAllocatePhysRegs = true);
+
+  // Updates the data dependency graph by removing a write. It also updates the
+  // internal state of the register file(s) by freeing physical registers.
+  // The number of physical registers is updated only if flag ShouldFreePhysRegs
+  // is set.
+  void removeRegisterWrite(const WriteState &WS,
+                           llvm::MutableArrayRef<unsigned> FreedPhysRegs,
+                           bool ShouldFreePhysRegs = true);
 
   // Checks if there are enough microarchitectural registers in the register
   // files.  Returns a "response mask" where each bit is the response from a

Modified: llvm/trunk/tools/llvm-mca/Instruction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Instruction.h?rev=332249&r1=332248&r2=332249&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Instruction.h (original)
+++ llvm/trunk/tools/llvm-mca/Instruction.h Mon May 14 08:08:22 2018
@@ -269,6 +269,9 @@ struct InstrDesc {
   bool MayLoad;
   bool MayStore;
   bool HasSideEffects;
+
+  // A zero latency instruction doesn't consume any scheduler resources.
+  bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
 };
 
 /// An instruction dispatched to the out-of-order backend.

Modified: llvm/trunk/tools/llvm-mca/Scheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Scheduler.cpp?rev=332249&r1=332248&r2=332249&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Scheduler.cpp (original)
+++ llvm/trunk/tools/llvm-mca/Scheduler.cpp Mon May 14 08:08:22 2018
@@ -260,13 +260,12 @@ void Scheduler::scheduleInstruction(Inst
   // targets, zero-idiom instructions (for example: a xor that clears the value
   // of a register) are treated speacially, and are often eliminated at register
   // renaming stage.
-  bool IsZeroLatency = !Desc.MaxLatency && Desc.Resources.empty();
 
   // Instructions that use an in-order dispatch/issue processor resource must be
   // issued immediately to the pipeline(s). Any other in-order buffered
   // resources (i.e. BufferSize=1) is consumed.
 
-  if (!IsZeroLatency && !Resources->mustIssueImmediately(Desc)) {
+  if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding " << IR
                       << " to the Ready Queue\n");
     ReadyQueue[IR.getSourceIndex()] = IR.getInstruction();




More information about the llvm-commits mailing list