[llvm-commits] [llvm] r158021 - in /llvm/trunk: include/llvm/CodeGen/ScheduleDAG.h include/llvm/CodeGen/ScheduleDAGInstrs.h include/llvm/MC/MCInstrItineraries.h include/llvm/Target/TargetInstrInfo.h lib/CodeGen/MachineScheduler.cpp lib/CodeGen/ScheduleDAGInstrs.cpp lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h lib/CodeGen/TwoAddressInstructionPass.cpp lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMBaseInstrInfo.h lib/Target/TargetInstrInfo.cpp

Wed Jun 6 06:32:37 PDT 2012

On Tue, 05 Jun 2012 21:11:27 -0000
Andrew Trick <atrick at apple.com> wrote:

> Author: atrick
> Date: Tue Jun  5 16:11:27 2012
> New Revision: 158021
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=158021&view=rev
> Log:
> misched: API for minimum vs. expected latency.

Andy,

If I have some loads that I expect to cache miss, can I use this API to
get better scheduling for them?

Thanks again,
Hal

> 
> Minimum latency determines per-cycle scheduling groups.
> Expected latency determines critical path and cost.
> 
> Modified:
>     llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
>     llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
>     llvm/trunk/include/llvm/MC/MCInstrItineraries.h
>     llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>     llvm/trunk/lib/CodeGen/MachineScheduler.cpp
>     llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>     llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
>     llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
>     llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
>     llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h
>     llvm/trunk/lib/Target/TargetInstrInfo.cpp
> 
> Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h (original) +++
> llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Tue Jun  5 16:11:27
> 2012 @@ -272,6 +272,9 @@ unsigned Depth;                     // Node
> depth. unsigned Height;                    // Node height.
>    public:
> +    unsigned TopReadyCycle; // Cycle relative to start when node is
> ready.
> +    unsigned BotReadyCycle; // Cycle relative to end when node is
> ready. +
>      const TargetRegisterClass *CopyDstRC; // Is a special copy node
> if not null. const TargetRegisterClass *CopySrcRC;
>  
> @@ -287,7 +290,7 @@
>          isScheduleHigh(false), isScheduleLow(false), isCloned(false),
>          SchedulingPref(Sched::None),
>          isDepthCurrent(false), isHeightCurrent(false), Depth(0),
> Height(0),
> -        CopyDstRC(NULL), CopySrcRC(NULL) {}
> +        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL),
> CopySrcRC(NULL) {} 
>      /// SUnit - Construct an SUnit for post-regalloc scheduling to
> represent /// a MachineInstr.
> @@ -301,7 +304,7 @@
>          isScheduleHigh(false), isScheduleLow(false), isCloned(false),
>          SchedulingPref(Sched::None),
>          isDepthCurrent(false), isHeightCurrent(false), Depth(0),
> Height(0),
> -        CopyDstRC(NULL), CopySrcRC(NULL) {}
> +        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL),
> CopySrcRC(NULL) {} 
>      /// SUnit - Construct a placeholder SUnit.
>      SUnit()
> @@ -314,7 +317,7 @@
>          isScheduleHigh(false), isScheduleLow(false), isCloned(false),
>          SchedulingPref(Sched::None),
>          isDepthCurrent(false), isHeightCurrent(false), Depth(0),
> Height(0),
> -        CopyDstRC(NULL), CopySrcRC(NULL) {}
> +        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL),
> CopySrcRC(NULL) {} 
>      /// setNode - Assign the representative SDNode for this SUnit.
>      /// This may be used during pre-regalloc scheduling.
> @@ -552,12 +555,6 @@
>      ///
>      virtual void computeLatency(SUnit *SU) = 0;
>  
> -    /// ComputeOperandLatency - Override dependence edge latency
> using
> -    /// operand use/def information
> -    ///
> -    virtual void computeOperandLatency(SUnit *, SUnit *,
> -                                       SDep&) const { }
> -
>      /// ForceUnitLatencies - Return true if all scheduling edges
> should be given /// a latency value of one.  The default is to return
> false; schedulers may /// override this as needed.
> 
> Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Tue Jun  5
> 16:11:27 2012 @@ -291,11 +291,15 @@ ///
>      virtual void computeLatency(SUnit *SU);
>  
> -    /// computeOperandLatency - Override dependence edge latency
> using
> +    /// computeOperandLatency - Return dependence edge latency using
>      /// operand use/def information
>      ///
> -    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
> -                                       SDep& dep) const;
> +    /// FindMin may be set to get the minimum vs. expected latency.
> Minimum
> +    /// latency is used for scheduling groups, while expected
> latency is for
> +    /// instruction cost and critical path.
> +    virtual unsigned computeOperandLatency(SUnit *Def, SUnit *Use,
> +                                           const SDep& dep,
> +                                           bool FindMin = false)
> const; 
>      /// schedule - Order nodes according to selected style, filling
>      /// in the Sequence member.
> 
> Modified: llvm/trunk/include/llvm/MC/MCInstrItineraries.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCInstrItineraries.h?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/MC/MCInstrItineraries.h (original) +++
> llvm/trunk/include/llvm/MC/MCInstrItineraries.h Tue Jun  5 16:11:27
> 2012 @@ -214,6 +214,12 @@ /// class.  The latency is the maximum
> completion time for any stage /// in the itinerary.
>    ///
> +  /// InstrStages override the itinerary's MinLatency property. In
> fact, if the
> +  /// stage latencies, which may be zero, are less than MinLatency,
> +  /// getStageLatency returns a value less than MinLatency.
> +  ///
> +  /// If no stages exist, MinLatency is used. If MinLatency is
> invalid (<0),
> +  /// then it defaults to one cycle.
>    unsigned getStageLatency(unsigned ItinClassIndx) const {
>      // If the target doesn't provide itinerary information, use a
> simple // non-zero default value for all instructions.  Some target's
> provide a @@ -222,7 +228,7 @@
>      // stage). This is different from beginStage == endStage != 0,
> which could // be used for zero-latency pseudo ops.
>      if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0)
> -      return 1;
> +      return (Props.MinLatency < 0) ? 1 : Props.MinLatency;
>  
>      // Calculate the maximum completion time for any stage.
>      unsigned Latency = 0, StartCycle = 0;
> 
> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original) +++
> llvm/trunk/include/llvm/Target/TargetInstrInfo.h Tue Jun  5 16:11:27
> 2012 @@ -668,18 +668,36 @@ return Opcode <= TargetOpcode::COPY;
>    }
>  
> +  virtual int getOperandLatency(const InstrItineraryData *ItinData,
> +                                SDNode *DefNode, unsigned DefIdx,
> +                                SDNode *UseNode, unsigned UseIdx)
> const = 0; +
>    /// getOperandLatency - Compute and return the use operand latency
> of a given /// pair of def and use.
>    /// In most cases, the static scheduling itinerary was enough to
> determine the /// operand latency. But it may not be possible for
> instructions with variable /// number of defs / uses.
> +  ///
> +  /// This is a raw interface to the itinerary that may be directly
> overriden by
> +  /// a target. Use computeOperandLatency to get the best estimate
> of latency. virtual int getOperandLatency(const InstrItineraryData
> *ItinData,
> -                              const MachineInstr *DefMI, unsigned
> DefIdx,
> -                              const MachineInstr *UseMI, unsigned
> UseIdx) const; -
> -  virtual int getOperandLatency(const InstrItineraryData *ItinData,
> -                                SDNode *DefNode, unsigned DefIdx,
> -                                SDNode *UseNode, unsigned UseIdx)
> const = 0;
> +                                const MachineInstr *DefMI, unsigned
> DefIdx,
> +                                const MachineInstr *UseMI,
> +                                unsigned UseIdx) const;
> +
> +  /// computeOperandLatency - Compute and return the latency of the
> given data
> +  /// dependent def and use. DefMI must be a valid def. UseMI may be
> NULL for
> +  /// an unknown use. If the subtarget allows, this may or may not
> need to call
> +  /// getOperandLatency().
> +  ///
> +  /// FindMin may be set to get the minimum vs. expected latency.
> Minimum
> +  /// latency is used for scheduling groups, while expected latency
> is for
> +  /// instruction cost and critical path.
> +  unsigned computeOperandLatency(const InstrItineraryData *ItinData,
> +                                 const TargetRegisterInfo *TRI,
> +                                 const MachineInstr *DefMI,
> +                                 const MachineInstr *UseMI,
> +                                 unsigned Reg, bool FindMin) const;
>  
>    /// getOutputLatency - Compute and return the output dependency
> latency of a /// a given pair of defs which both target the same
> register. This is usually @@ -693,13 +711,17 @@
>    /// getInstrLatency - Compute the instruction latency of a given
> instruction. /// If the instruction has higher cost when predicated,
> it's returned via /// PredCost.
> -  virtual int getInstrLatency(const InstrItineraryData *ItinData,
> -                              const MachineInstr *MI,
> -                              unsigned *PredCost = 0) const;
> +  virtual unsigned getInstrLatency(const InstrItineraryData
> *ItinData,
> +                                   const MachineInstr *MI,
> +                                   unsigned *PredCost = 0) const;
>  
>    virtual int getInstrLatency(const InstrItineraryData *ItinData,
>                                SDNode *Node) const = 0;
>  
> +  /// Return the default expected latency for a def based on it's
> opcode.
> +  unsigned defaultDefLatency(const InstrItineraryData *ItinData,
> +                             const MachineInstr *DefMI) const;
> +
>    /// isHighLatencyDef - Return true if this opcode has high latency
> to its /// result.
>    virtual bool isHighLatencyDef(int opc) const { return false; }
> 
> Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original) +++
> llvm/trunk/lib/CodeGen/MachineScheduler.cpp Tue Jun  5 16:11:27 2012
> @@ -21,8 +21,9 @@ #include "llvm/CodeGen/Passes.h"
>  #include "llvm/CodeGen/ScheduleDAGInstrs.h"
>  #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
> -#include "llvm/Analysis/AliasAnalysis.h"
>  #include "llvm/Target/TargetInstrInfo.h"
> +#include "llvm/MC/MCInstrItineraries.h"
> +#include "llvm/Analysis/AliasAnalysis.h"
>  #include "llvm/Support/CommandLine.h"
>  #include "llvm/Support/Debug.h"
>  #include "llvm/Support/ErrorHandling.h"
> @@ -394,6 +395,12 @@
>      return RegionCriticalPSets;
>    }
>  
> +  /// getIssueWidth - Return the max instructions per scheduling
> group.
> +  ///
> +  unsigned getIssueWidth() const {
> +    return InstrItins ? InstrItins->Props.IssueWidth : 1;
> +  }
> +
>  protected:
>    void initRegPressure();
>    void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
> @@ -787,13 +794,16 @@
>      /// MinReadyCycle - Cycle of the soonest available instruction.
>      unsigned MinReadyCycle;
>  
> +    // Remember the greatest min operand latency.
> +    unsigned MaxMinLatency;
> +
>      /// Pending queues extend the ready queues with the same ID and
> the /// PendingFlag set.
>      SchedBoundary(unsigned ID, const Twine &Name):
>        Available(ID, Name+".A"),
>        Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
>        CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
> -      MinReadyCycle(UINT_MAX) {}
> +      MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
>  
>      ~SchedBoundary() { delete HazardRec; }
>  
> @@ -805,6 +815,8 @@
>  
>      void bumpCycle();
>  
> +    void bumpNode(SUnit *SU, unsigned IssueWidth);
> +
>      void releasePending();
>  
>      void removeReady(SUnit *SU);
> @@ -868,25 +880,53 @@
>  }
>  
>  void ConvergingScheduler::releaseTopNode(SUnit *SU) {
> -  Top.releaseNode(SU, SU->getDepth());
> +  if (SU->isScheduled)
> +    return;
> +
> +  for (SUnit::succ_iterator I = SU->Preds.begin(), E =
> SU->Preds.end();
> +       I != E; ++I) {
> +    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
> +    unsigned Latency =
> +      DAG->computeOperandLatency(I->getSUnit(), SU,
> *I, /*FindMin=*/true); +#ifndef NDEBUG
> +    Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
> +#endif
> +    if (SU->TopReadyCycle < PredReadyCycle + Latency)
> +      SU->TopReadyCycle = PredReadyCycle + Latency;
> +  }
> +  Top.releaseNode(SU, SU->TopReadyCycle);
>  }
>  
>  void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
> -  Bot.releaseNode(SU, SU->getHeight());
> +  if (SU->isScheduled)
> +    return;
> +
> +  assert(SU->getInstr() && "Scheduled SUnit must have instr");
> +
> +  for (SUnit::succ_iterator I = SU->Succs.begin(), E =
> SU->Succs.end();
> +       I != E; ++I) {
> +    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
> +    unsigned Latency =
> +      DAG->computeOperandLatency(SU, I->getSUnit(),
> *I, /*FindMin=*/true); +#ifndef NDEBUG
> +    Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
> +#endif
> +    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
> +      SU->BotReadyCycle = SuccReadyCycle + Latency;
> +  }
> +  Bot.releaseNode(SU, SU->BotReadyCycle);
>  }
>  
>  void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
>                                                       unsigned
> ReadyCycle) {
> -  if (SU->isScheduled)
> -    return;
> -
>    if (ReadyCycle < MinReadyCycle)
>      MinReadyCycle = ReadyCycle;
>  
>    // Check for interlocks first. For the purpose of other
> heuristics, an // instruction that cannot issue appears as if it's
> not in the ReadyQueue.
> -  if (HazardRec->isEnabled()
> -      && HazardRec->getHazardType(SU) !=
> ScheduleHazardRecognizer::NoHazard)
> +  if (ReadyCycle > CurrCycle
> +      || (HazardRec->isEnabled() && (HazardRec->getHazardType(SU)
> +                                     !=
> ScheduleHazardRecognizer::NoHazard))) Pending.push(SU);
>    else
>      Available.push(SU);
> @@ -900,10 +940,11 @@
>    unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
>  
>    if (!HazardRec->isEnabled()) {
> -    // Bypass lots of virtual calls in case of long latency.
> +    // Bypass HazardRec virtual calls.
>      CurrCycle = NextCycle;
>    }
>    else {
> +    // Bypass getHazardType calls in case of long latency.
>      for (; CurrCycle != NextCycle; ++CurrCycle) {
>        if (isTop())
>          HazardRec->AdvanceCycle();
> @@ -917,6 +958,26 @@
>          << CurrCycle << '\n');
>  }
>  
> +/// Move the boundary of scheduled code by one SUnit.
> +void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU,
> +                                                  unsigned
> IssueWidth) {
> +  // Update the reservation table.
> +  if (HazardRec->isEnabled()) {
> +    if (!isTop() && SU->isCall) {
> +      // Calls are scheduled with their preceding instructions. For
> bottom-up
> +      // scheduling, clear the pipeline state before emitting.
> +      HazardRec->Reset();
> +    }
> +    HazardRec->EmitInstruction(SU);
> +  }
> +  // Check the instruction group size limit.
> +  ++IssueCount;
> +  if (IssueCount == IssueWidth) {
> +    DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
> +    bumpCycle();
> +  }
> +}
> +
>  /// Release pending ready nodes in to the available queue. This
> makes them /// visible to heuristics.
>  void ConvergingScheduler::SchedBoundary::releasePending() {
> @@ -928,7 +989,7 @@
>    // so, add them to the available queue.
>    for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
>      SUnit *SU = *(Pending.begin()+i);
> -    unsigned ReadyCycle = isTop() ? SU->getHeight() : SU->getDepth();
> +    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle :
> SU->BotReadyCycle; 
>      if (ReadyCycle < MinReadyCycle)
>        MinReadyCycle = ReadyCycle;
> @@ -965,7 +1026,8 @@
>      releasePending();
>  
>    for (unsigned i = 0; Available.empty(); ++i) {
> -    assert(i <= HazardRec->getMaxLookAhead() && "permanent hazard");
> (void)i;
> +    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
> +           "permanent hazard"); (void)i;
>      bumpCycle();
>      releasePending();
>    }
> @@ -1205,27 +1267,15 @@
>  
>  /// Update the scheduler's state after scheduling a node. This is
> the same node /// that was just returned by pickNode(). However,
> ScheduleDAGMI needs to update -/// it's state based on the current
> cycle before MachineSchedStrategy. +/// it's state based on the
> current cycle before MachineSchedStrategy does. void
> ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
> -  // Update the reservation table.
> -  if (IsTopNode && Top.HazardRec->isEnabled()) {
> -    Top.HazardRec->EmitInstruction(SU);
> -    if (Top.HazardRec->atIssueLimit()) {
> -      DEBUG(dbgs() << "*** Max instrs at cycle " << Top.CurrCycle <<
> '\n');
> -      Top.bumpCycle();
> -    }
> +  if (IsTopNode) {
> +    SU->TopReadyCycle = Top.CurrCycle;
> +    Top.bumpNode(SU, DAG->getIssueWidth());
>    }
> -  else if (Bot.HazardRec->isEnabled()) {
> -    if (SU->isCall) {
> -      // Calls are scheduled with their preceding instructions. For
> bottom-up
> -      // scheduling, clear the pipeline state before emitting.
> -      Bot.HazardRec->Reset();
> -    }
> -    Bot.HazardRec->EmitInstruction(SU);
> -    if (Bot.HazardRec->atIssueLimit()) {
> -      DEBUG(dbgs() << "*** Max instrs at cycle " << Bot.CurrCycle <<
> '\n');
> -      Bot.bumpCycle();
> -    }
> +  else {
> +    SU->BotReadyCycle = Bot.CurrCycle;
> +    Bot.bumpNode(SU, DAG->getIssueWidth());
>    }
>  }
>  
> 
> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original) +++
> llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Tue Jun  5 16:11:27 2012
> @@ -271,10 +271,12 @@ // Adjust the dependence latency using operand
> def/use // information (if any), and then allow the target to
>        // perform its own adjustments.
> -      const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
> +      SDep dep(SU, SDep::Data, LDataLatency, *Alias);
>        if (!UnitLatencies) {
> -        computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
> -        ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
> +        unsigned Latency = computeOperandLatency(SU, UseSU, dep);
> +        dep.setLatency(Latency);
> +
> +        ST.adjustSchedDependency(SU, UseSU, dep);
>        }
>        UseSU->addPred(dep);
>      }
> @@ -461,11 +463,13 @@
>        // Create a data dependence.
>        //
>        // TODO: Handle "special" address latencies cleanly.
> -      const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
> +      SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
>        if (!UnitLatencies) {
>          // Adjust the dependence latency using operand def/use
> information, then // allow the target to perform its own adjustments.
> -        computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
> +        unsigned Latency = computeOperandLatency(DefSU, SU,
> const_cast<SDep &>(dep));
> +        dep.setLatency(Latency);
> +
>          const TargetSubtargetInfo &ST =
> TM.getSubtarget<TargetSubtargetInfo>();
> ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); }
> @@ -970,8 +974,9 @@
>  }
>  
>  void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
> -  // Compute the latency for the node.
> -  if (!InstrItins || InstrItins->isEmpty()) {
> +  // Compute the latency for the node. We only provide a default for
> missing
> +  // itineraries. Empty itineraries still have latency properties.
> +  if (!InstrItins) {
>      SU->Latency = 1;
>  
>      // Simplistic target-independent heuristic: assume that loads
> take @@ -983,63 +988,15 @@
>    }
>  }
>  
> -void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
> -                                              SDep& dep) const {
> -  if (!InstrItins || InstrItins->isEmpty())
> -    return;
> -
> +unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit
> *Use,
> +                                                  const SDep& dep,
> +                                                  bool FindMin)
> const { // For a data dependency with a known register...
>    if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
> -    return;
> +    return 1;
>  
> -  const unsigned Reg = dep.getReg();
> -
> -  // ... find the definition of the register in the defining
> -  // instruction
> -  MachineInstr *DefMI = Def->getInstr();
> -  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
> -  if (DefIdx != -1) {
> -    const MachineOperand &MO = DefMI->getOperand(DefIdx);
> -    if (MO.isReg() && MO.isImplicit() &&
> -        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
> -      // This is an implicit def, getOperandLatency() won't return
> the correct
> -      // latency. e.g.
> -      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ...,
> %Q3<imp-def>
> -      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
> -      // What we want is to compute latency between def of %D6/%D7
> and use of
> -      // %Q3 instead.
> -      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false,
> true, TRI);
> -      if (DefMI->getOperand(Op2).isReg())
> -        DefIdx = Op2;
> -    }
> -    MachineInstr *UseMI = Use->getInstr();
> -    // For all uses of the register, calculate the maxmimum latency
> -    int Latency = -1;
> -    if (UseMI) {
> -      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i)
> {
> -        const MachineOperand &MO = UseMI->getOperand(i);
> -        if (!MO.isReg() || !MO.isUse())
> -          continue;
> -        unsigned MOReg = MO.getReg();
> -        if (MOReg != Reg)
> -          continue;
> -
> -        int UseCycle = TII->getOperandLatency(InstrItins, DefMI,
> DefIdx,
> -                                              UseMI, i);
> -        Latency = std::max(Latency, UseCycle);
> -      }
> -    } else {
> -      // UseMI is null, then it must be a scheduling barrier.
> -      if (!InstrItins || InstrItins->isEmpty())
> -        return;
> -      unsigned DefClass = DefMI->getDesc().getSchedClass();
> -      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
> -    }
> -
> -    // If we found a latency, then replace the existing dependence
> latency.
> -    if (Latency >= 0)
> -      dep.setLatency(Latency);
> -  }
> +  return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
> +                                    Use->getInstr(), dep.getReg(),
> FindMin); }
>  
>  void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
> 
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
> (original) +++
> llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h Tue Jun  5
> 16:11:27 2012 @@ -98,12 +98,6 @@ /// virtual void
> computeLatency(SUnit *SU); 
> -    /// computeOperandLatency - Override dependence edge latency
> using
> -    /// operand use/def information
> -    ///
> -    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
> -                                       SDep& dep) const { }
> -
>      virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
>                                         unsigned OpIdx, SDep& dep)
> const; 
> 
> Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original)
> +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Tue Jun  5
> 16:11:27 2012 @@ -1046,7 +1046,7 @@ return true;  // Below MI
>      unsigned DefDist = DDI->second;
>      assert(Dist > DefDist && "Visited def already?");
> -    if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist -
> DefDist))
> +    if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
>        return true;
>    }
>    return false;
> 
> Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original) +++
> llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Tue Jun  5 16:11:27
> 2012 @@ -2567,12 +2567,13 @@ 
>  int
>  ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData
> *ItinData,
> -                             const MachineInstr *DefMI, unsigned
> DefIdx,
> -                             const MachineInstr *UseMI, unsigned
> UseIdx) const {
> +                                    const MachineInstr *DefMI,
> unsigned DefIdx,
> +                                    const MachineInstr *UseMI,
> +                                    unsigned UseIdx) const {
>    if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
> -      DefMI->isRegSequence() || DefMI->isImplicitDef())
> +      DefMI->isRegSequence() || DefMI->isImplicitDef()) {
>      return 1;
> -
> +  }
>    if (!ItinData || ItinData->isEmpty())
>      return DefMI->mayLoad() ? 3 : 1;
>  
> @@ -2983,14 +2984,16 @@
>                             DepMI->getNumOperands());
>  }
>  
> -int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData
> *ItinData,
> -                                      const MachineInstr *MI,
> -                                      unsigned *PredCost) const {
> +unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData
> *ItinData,
> +                                           const MachineInstr *MI,
> +                                           unsigned *PredCost) const
> { if (MI->isCopyLike() || MI->isInsertSubreg() ||
>        MI->isRegSequence() || MI->isImplicitDef())
>      return 1;
>  
> -  if (!ItinData || ItinData->isEmpty())
> +  // Be sure to call getStageLatency for an empty itinerary in case
> it has a
> +  // valid MinLatency property.
> +  if (!ItinData)
>      return 1;
>  
>    if (MI->isBundle()) {
> 
> Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h (original) +++
> llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h Tue Jun  5 16:11:27 2012
> @@ -249,8 +249,9 @@ const MCInstrDesc &UseMCID,
>                          unsigned UseIdx, unsigned UseAlign) const;
>  
> -  int getInstrLatency(const InstrItineraryData *ItinData,
> -                      const MachineInstr *MI, unsigned *PredCost =
> 0) const;
> +  unsigned getInstrLatency(const InstrItineraryData *ItinData,
> +                           const MachineInstr *MI,
> +                           unsigned *PredCost = 0) const;
>  
>    int getInstrLatency(const InstrItineraryData *ItinData,
>                        SDNode *Node) const;
> 
> Modified: llvm/trunk/lib/Target/TargetInstrInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetInstrInfo.cpp?rev=158021&r1=158020&r2=158021&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/TargetInstrInfo.cpp (original) +++
> llvm/trunk/lib/Target/TargetInstrInfo.cpp Tue Jun  5 16:11:27 2012 @@
> -61,22 +61,125 @@ return 1;
>  }
>  
> +/// Return the default expected latency for a def based on it's
> opcode. +unsigned TargetInstrInfo::defaultDefLatency(const
> InstrItineraryData *ItinData,
> +                                            const MachineInstr
> *DefMI) const {
> +  if (DefMI->mayLoad())
> +    return ItinData->Props.LoadLatency;
> +  if (isHighLatencyDef(DefMI->getOpcode()))
> +    return ItinData->Props.HighLatency;
> +  return 1;
> +}
> +
> +/// Both DefMI and UseMI must be valid.  By default, call directly
> to the +/// itinerary. This may be overriden by the target.
>  int
>  TargetInstrInfo::getOperandLatency(const InstrItineraryData
> *ItinData,
> -                             const MachineInstr *DefMI, unsigned
> DefIdx,
> -                             const MachineInstr *UseMI, unsigned
> UseIdx) const {
> -  if (!ItinData || ItinData->isEmpty())
> -    return -1;
> -
> +                                   const MachineInstr *DefMI,
> unsigned DefIdx,
> +                                   const MachineInstr *UseMI,
> +                                   unsigned UseIdx) const {
>    unsigned DefClass = DefMI->getDesc().getSchedClass();
>    unsigned UseClass = UseMI->getDesc().getSchedClass();
>    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass,
> UseIdx); }
>  
> -int TargetInstrInfo::getInstrLatency(const InstrItineraryData
> *ItinData,
> -                                     const MachineInstr *MI,
> -                                     unsigned *PredCost) const {
> -  if (!ItinData || ItinData->isEmpty())
> +/// computeOperandLatency - Compute and return the latency of the
> given data +/// dependent def and use. DefMI must be a valid def.
> UseMI may be NULL for an +/// unknown use. Depending on the
> subtarget's itinerary properties, this may or +/// may not need to
> call getOperandLatency(). +///
> +/// FindMin may be set to get the minimum vs. expected latency.
> Minimum +/// latency is used for scheduling groups, while expected
> latency is for +/// instruction cost and critical path.
> +///
> +/// For most subtargets, we don't need DefIdx or UseIdx to compute
> min latency. +/// DefMI must be a valid definition, but UseMI may be
> NULL for an unknown use. +unsigned TargetInstrInfo::
> +computeOperandLatency(const InstrItineraryData *ItinData,
> +                      const TargetRegisterInfo *TRI,
> +                      const MachineInstr *DefMI, const MachineInstr
> *UseMI,
> +                      unsigned Reg, bool FindMin) const {
> +
> +  // Default to one cycle for missing itinerary. Empty itineraries
> still have
> +  // a properties. We have one hard-coded exception for loads, to
> preserve
> +  // existing behavior.
> +  if (!ItinData)
> +    return DefMI->mayLoad() ? 2 : 1;
> +
> +  // Return a latency based on the itinerary properties and defining
> instruction
> +  // if possible. Some common subtargets don't require per-operand
> latency,
> +  // especially for minimum latencies.
> +  if (FindMin) {
> +    // If MinLatency is valid, call getInstrLatency. This uses Stage
> latency if
> +    // it exists before defaulting to MinLatency.
> +    if (ItinData->Props.MinLatency >= 0)
> +      return getInstrLatency(ItinData, DefMI);
> +
> +    // If MinLatency is invalid, OperandLatency is interpreted as
> MinLatency.
> +    // For empty itineraries, short-cirtuit the check and default to
> one cycle.
> +    if (ItinData->isEmpty())
> +      return 1;
> +  }
> +  else if(ItinData->isEmpty())
> +    return defaultDefLatency(ItinData, DefMI);
> +
> +  // ...operand lookup required
> +
> +  // Find the definition of the register in the defining instruction.
> +  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
> +  if (DefIdx != -1) {
> +    const MachineOperand &MO = DefMI->getOperand(DefIdx);
> +    if (MO.isReg() && MO.isImplicit() &&
> +        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
> +      // This is an implicit def, getOperandLatency() won't return
> the correct
> +      // latency. e.g.
> +      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ...,
> %Q3<imp-def>
> +      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
> +      // What we want is to compute latency between def of %D6/%D7
> and use of
> +      // %Q3 instead.
> +      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false,
> true, TRI);
> +      if (DefMI->getOperand(Op2).isReg())
> +        DefIdx = Op2;
> +    }
> +    // For all uses of the register, calculate the maxmimum latency
> +    int OperLatency = -1;
> +
> +    // UseMI is null, then it must be a scheduling barrier.
> +    if (!UseMI) {
> +      unsigned DefClass = DefMI->getDesc().getSchedClass();
> +      OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
> +    }
> +    else {
> +      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i)
> {
> +        const MachineOperand &MO = UseMI->getOperand(i);
> +        if (!MO.isReg() || !MO.isUse())
> +          continue;
> +        unsigned MOReg = MO.getReg();
> +        if (MOReg != Reg)
> +          continue;
> +
> +        int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx,
> UseMI, i);
> +        OperLatency = std::max(OperLatency, UseCycle);
> +      }
> +    }
> +    // If we found an operand latency, we're done.
> +    if (OperLatency >= 0)
> +      return OperLatency;
> +  }
> +  // No operand latency was found.
> +  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
> +  // Expected latency is the max of the stage latency and itinerary
> props.
> +  if (!FindMin)
> +    InstrLatency = std::max(InstrLatency,
> defaultDefLatency(ItinData, DefMI));
> +  return InstrLatency;
> +}
> +
> +unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData
> *ItinData,
> +                                          const MachineInstr *MI,
> +                                          unsigned *PredCost) const {
> +  // Default to one cycle for no itinerary. However, an "empty"
> itinerary may
> +  // still have a MinLatency property, which getStageLatency checks.
> +  if (!ItinData)
>      return 1;
>  
>    return ItinData->getStageLatency(MI->getDesc().getSchedClass());
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-- 
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory