[llvm] r327987 - [Hexagon] Improve scheduling heuristic for large basic blocks

Tue Mar 20 07:54:01 PDT 2018

Author: kparzysz
Date: Tue Mar 20 07:54:01 2018
New Revision: 327987

URL: http://llvm.org/viewvc/llvm-project?rev=327987&view=rev
Log:
[Hexagon] Improve scheduling heuristic for large basic blocks

This patch changes the isLatencyBound heuristic to look at the
path length based upon the number of packets needed to schedule
a basic block. For small basic blocks, the heuristic uses a small
threshold for isLatencyBound. For large basic blocks, the
heuristic uses a large threshold.

The goal is to increase the priority of an instruction in a small
basic block that has a large height or depth relative to the code
size. For large functions, the height and depth are ignored
because it increases the live range of a register and causes more
spills. That is, for large functions, it is more important to
schedule instructions when available, and attempt to keep the defs
and uses closer together.

Patch by Brendon Cahoon.

Modified:
    llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h
    llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.h

Modified: llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp?rev=327987&r1=327986&r2=327987&view=diff
==============================================================================

--- llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.cpp Tue Mar 20 07:54:01 2018
@@ -32,6 +32,7 @@ void HexagonHazardRecognizer::Reset() {
   UsesDotCur = nullptr;
   DotCurPNum = -1;
   UsesLoad = false;
+  PrefVectorStoreNew = nullptr;
   RegDefs.clear();
 }
 
@@ -80,6 +81,7 @@ void HexagonHazardRecognizer::AdvanceCyc
     DotCurPNum = -1;
   }
   UsesLoad = false;
+  PrefVectorStoreNew = nullptr;
   PacketNum++;
   RegDefs.clear();
 }
@@ -89,8 +91,14 @@ void HexagonHazardRecognizer::AdvanceCyc
 /// bank conflict. Case 2 - if a packet contains a dot cur instruction, then we
 /// prefer the instruction that can use the dot cur result. However, if the use
 /// is not scheduled in the same packet, then prefer other instructions in the
-/// subsequent packet.
+/// subsequent packet. Case 3 - we prefer a vector store that can be converted
+/// to a .new store. The packetizer will not generate the .new store if the
+/// store doesn't have resources to fit in the packet (but the .new store may
+/// have resources). We attempt to schedule the store as soon as possible to
+/// help packetize the two instructions together.
 bool HexagonHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+  if (PrefVectorStoreNew != nullptr && PrefVectorStoreNew != SU)
+    return true;
   if (UsesLoad && SU->isInstr() && SU->getInstr()->mayLoad())
     return true;
   return UsesDotCur && ((SU == UsesDotCur) ^ (DotCurPNum == (int)PacketNum));
@@ -144,4 +152,13 @@ void HexagonHazardRecognizer::EmitInstru
   }
 
   UsesLoad = MI->mayLoad();
+
+  if (TII->isHVXVec(*MI) && !MI->mayLoad() && !MI->mayStore())
+    for (auto &S : SU->Succs)
+      if (S.isAssignedRegDep() && S.getLatency() == 0 &&
+          TII->mayBeNewStore(*S.getSUnit()->getInstr()) &&
+          Resources->canReserveResources(*S.getSUnit()->getInstr())) {
+        PrefVectorStoreNew = S.getSUnit();
+        break;
+      }
 }

Modified: llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h?rev=327987&r1=327986&r2=327987&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonHazardRecognizer.h Tue Mar 20 07:54:01 2018
@@ -23,15 +23,21 @@ namespace llvm {
 class HexagonHazardRecognizer : public ScheduleHazardRecognizer {
   DFAPacketizer *Resources;
   const HexagonInstrInfo *TII;
-  unsigned PacketNum;
+  unsigned PacketNum = 0;
   // If the packet contains a potential dot cur instruction. This is
   // used for the scheduling priority function.
-  SUnit *UsesDotCur;
+  SUnit *UsesDotCur = nullptr;
   // The packet number when a dor cur is emitted. If its use is not generated
   // in the same packet, then try to wait another cycle before emitting.
-  int DotCurPNum;
+  int DotCurPNum = -1;
   // Does the packet contain a load. Used to restrict another load, if possible.
   bool UsesLoad = false;
+  // Check if we should prefer a vector store that will become a .new version.
+  // The .new store uses different resources than a normal store, and the
+  // packetizer will not generate the .new if the regular store does not have
+  // resources available (even if the .new version does). To help, the schedule
+  // attempts to schedule the .new as soon as possible in the packet.
+  SUnit *PrefVectorStoreNew = nullptr;
   // The set of registers defined by instructions in the current packet.
   SmallSet<unsigned, 8> RegDefs;
 
@@ -39,8 +45,7 @@ public:
   HexagonHazardRecognizer(const InstrItineraryData *II,
                           const HexagonInstrInfo *HII,
                           const HexagonSubtarget &ST)
-    : Resources(ST.createDFAPacketizer(II)), TII(HII), PacketNum(0),
-    UsesDotCur(nullptr), DotCurPNum(-1) { }
+    : Resources(ST.createDFAPacketizer(II)), TII(HII) { }
 
   ~HexagonHazardRecognizer() override {
     if (Resources)

Modified: llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.cpp?rev=327987&r1=327986&r2=327987&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.cpp Tue Mar 20 07:54:01 2018
@@ -473,7 +473,11 @@ SUnit *ConvergingVLIWScheduler::VLIWSche
   if (CheckPending)
     releasePending();
 
-  for (unsigned i = 0; Available.empty(); ++i) {
+  for (unsigned i = 0;
+       Available.empty() ||
+         (Available.size() == 1 &&
+          !ResourceModel->isResourceAvailable(*Available.begin(), isTop()));
+       ++i) {
     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
            "permanent hazard"); (void)i;
     ResourceModel->reserveResources(nullptr, isTop());
@@ -625,6 +629,10 @@ int ConvergingVLIWScheduler::pressureCha
   return 0;
 }
 
+static unsigned getWeakLeft(const SUnit *SU, bool IsTop) {
+  return (IsTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
 // Constants used to denote relative importance of
 // heuristic components for cost computation.
 static const unsigned PriorityOne = 200;
@@ -782,7 +790,7 @@ int ConvergingVLIWScheduler::SchedulingC
 
   // Give preference to a zero latency instruction if the dependent
   // instruction is in the current packet.
-  if (Q.getID() == TopQID) {
+  if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
     for (const SDep &PI : SU->Preds) {
       if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
           PI.getLatency() == 0 &&
@@ -791,7 +799,7 @@ int ConvergingVLIWScheduler::SchedulingC
         DEBUG(if (verbose) dbgs() << "Z|");
       }
     }
-  } else {
+  } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
     for (const SDep &SI : SU->Succs) {
       if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
           SI.getLatency() == 0 &&

Modified: llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.h?rev=327987&r1=327986&r2=327987&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonMachineScheduler.h Tue Mar 20 07:54:01 2018
@@ -98,6 +98,7 @@ public:
   void schedule() override;
 
   RegisterClassInfo *getRegClassInfo() { return RegClassInfo; }
+  int getBBSize() { return BB->size(); }
 };
 
 //===----------------------------------------------------------------------===//
@@ -143,6 +144,7 @@ class ConvergingVLIWScheduler : public M
 
     unsigned CurrCycle = 0;
     unsigned IssueCount = 0;
+    unsigned CriticalPathLength = 0;
 
     /// MinReadyCycle - Cycle of the soonest available instruction.
     unsigned MinReadyCycle = std::numeric_limits<unsigned>::max();
@@ -166,6 +168,25 @@ class ConvergingVLIWScheduler : public M
       SchedModel = smodel;
       CurrCycle = 0;
       IssueCount = 0;
+      // Initialize the critical path length limit, which used by the scheduling
+      // cost model to determine the value for scheduling an instruction. We use
+      // a slightly different heuristic for small and large functions. For small
+      // functions, it's important to use the height/depth of the instruction.
+      // For large functions, prioritizing by height or depth increases spills.
+      CriticalPathLength = DAG->getBBSize() / SchedModel->getIssueWidth();
+      if (DAG->getBBSize() < 50)
+        // We divide by two as a cheap and simple heuristic to reduce the
+        // critcal path length, which increases the priority of using the graph
+        // height/depth in the scheduler's cost computation.
+        CriticalPathLength >>= 1;
+      else {
+        // For large basic blocks, we prefer a larger critical path length to
+        // decrease the priority of using the graph height/depth.
+        unsigned MaxPath = 0;
+        for (auto &SU : DAG->SUnits)
+          MaxPath = std::max(MaxPath, isTop() ? SU.getHeight() : SU.getDepth());
+        CriticalPathLength = std::max(CriticalPathLength, MaxPath) + 1;
+      }
     }
 
     bool isTop() const {
@@ -185,6 +206,13 @@ class ConvergingVLIWScheduler : public M
     void removeReady(SUnit *SU);
 
     SUnit *pickOnlyChoice();
+ 
+    bool isLatencyBound(SUnit *SU) {
+      if (CurrCycle >= CriticalPathLength)
+        return true;
+      unsigned PathLength = isTop() ? SU->getHeight() : SU->getDepth();
+      return CriticalPathLength - CurrCycle <= PathLength;
+    }
   };
 
   VLIWMachineScheduler *DAG = nullptr;