[llvm-commits] [llvm] r158021 - in /llvm/trunk: include/llvm/CodeGen/ScheduleDAG.h include/llvm/CodeGen/ScheduleDAGInstrs.h include/llvm/MC/MCInstrItineraries.h include/llvm/Target/TargetInstrInfo.h lib/CodeGen/MachineScheduler.cpp lib/CodeGen/ScheduleDAGInstrs.cpp lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h lib/CodeGen/TwoAddressInstructionPass.cpp lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMBaseInstrInfo.h lib/Target/TargetInstrInfo.cpp

Thu Jun 7 21:48:53 PDT 2012

On Wed, 06 Jun 2012 12:13:11 -0700
Andrew Trick <atrick at apple.com> wrote:

> 
> On Jun 6, 2012, at 6:32 AM, Hal Finkel <hfinkel at anl.gov> wrote:
> 
> > On Tue, 05 Jun 2012 21:11:27 -0000
> > Andrew Trick <atrick at apple.com> wrote:
> > 
> >> Author: atrick
> >> Date: Tue Jun  5 16:11:27 2012
> >> New Revision: 158021
> >> 
> >> URL: http://llvm.org/viewvc/llvm-project?rev=158021&view=rev
> >> Log:
> >> misched: API for minimum vs. expected latency.
> > 
> > Andy,
> > 
> > If I have some loads that I expect to cache miss, can I use this
> > API to get better scheduling for them?
> 
> That's a good use for "expected" latency, even for an inorder cpu.
> The current API doesn't directly support this, but it's moving in
> that direction and supporting it in the API will be easy.

Good, let's do that! ;)

> The hard
> part is knowing which loads miss. To support this well, the scheduler
> should also track recently scheduled addresses.

True. In the long run, I would like to:

 1. Provide a way for the user to annotate.
 2. Use profiling information when available.
 3. As you suggested, have the scheduler guess.

To support this, we'll probably need some kind of target API that can
describe something about cache and memory structure and latencies.

Thanks again,
Hal

> 
> -Andy
> 
> >> 
> >> Minimum latency determines per-cycle scheduling groups.
> >> Expected latency determines critical path and cost.
> >> 
> >> Modified:
> >>    llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
> >>    llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
> >>    llvm/trunk/include/llvm/MC/MCInstrItineraries.h
> >>    llvm/trunk/include/llvm/Target/TargetInstrInfo.h
> >>    llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> >>    llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
> >>    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
> >>    llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
> >>    llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
> >>    llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h
> >>    llvm/trunk/lib/Target/TargetInstrInfo.cpp
> >> 
> >> Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h (original) +++
> >> llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Tue Jun  5 16:11:27
> >> 2012 @@ -272,6 +272,9 @@ unsigned Depth;                     //
> >> Node depth. unsigned Height;                    // Node height.
> >>   public:
> >> +    unsigned TopReadyCycle; // Cycle relative to start when node
> >> is ready.
> >> +    unsigned BotReadyCycle; // Cycle relative to end when node is
> >> ready. +
> >>     const TargetRegisterClass *CopyDstRC; // Is a special copy node
> >> if not null. const TargetRegisterClass *CopySrcRC;
> >> 
> >> @@ -287,7 +290,7 @@
> >>         isScheduleHigh(false), isScheduleLow(false),
> >> isCloned(false), SchedulingPref(Sched::None),
> >>         isDepthCurrent(false), isHeightCurrent(false), Depth(0),
> >> Height(0),
> >> -        CopyDstRC(NULL), CopySrcRC(NULL) {}
> >> +        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL),
> >> CopySrcRC(NULL) {} 
> >>     /// SUnit - Construct an SUnit for post-regalloc scheduling to
> >> represent /// a MachineInstr.
> >> @@ -301,7 +304,7 @@
> >>         isScheduleHigh(false), isScheduleLow(false),
> >> isCloned(false), SchedulingPref(Sched::None),
> >>         isDepthCurrent(false), isHeightCurrent(false), Depth(0),
> >> Height(0),
> >> -        CopyDstRC(NULL), CopySrcRC(NULL) {}
> >> +        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL),
> >> CopySrcRC(NULL) {} 
> >>     /// SUnit - Construct a placeholder SUnit.
> >>     SUnit()
> >> @@ -314,7 +317,7 @@
> >>         isScheduleHigh(false), isScheduleLow(false),
> >> isCloned(false), SchedulingPref(Sched::None),
> >>         isDepthCurrent(false), isHeightCurrent(false), Depth(0),
> >> Height(0),
> >> -        CopyDstRC(NULL), CopySrcRC(NULL) {}
> >> +        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL),
> >> CopySrcRC(NULL) {} 
> >>     /// setNode - Assign the representative SDNode for this SUnit.
> >>     /// This may be used during pre-regalloc scheduling.
> >> @@ -552,12 +555,6 @@
> >>     ///
> >>     virtual void computeLatency(SUnit *SU) = 0;
> >> 
> >> -    /// ComputeOperandLatency - Override dependence edge latency
> >> using
> >> -    /// operand use/def information
> >> -    ///
> >> -    virtual void computeOperandLatency(SUnit *, SUnit *,
> >> -                                       SDep&) const { }
> >> -
> >>     /// ForceUnitLatencies - Return true if all scheduling edges
> >> should be given /// a latency value of one.  The default is to
> >> return false; schedulers may /// override this as needed.
> >> 
> >> Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)
> >> +++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Tue Jun  5
> >> 16:11:27 2012 @@ -291,11 +291,15 @@ ///
> >>     virtual void computeLatency(SUnit *SU);
> >> 
> >> -    /// computeOperandLatency - Override dependence edge latency
> >> using
> >> +    /// computeOperandLatency - Return dependence edge latency
> >> using /// operand use/def information
> >>     ///
> >> -    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
> >> -                                       SDep& dep) const;
> >> +    /// FindMin may be set to get the minimum vs. expected
> >> latency. Minimum
> >> +    /// latency is used for scheduling groups, while expected
> >> latency is for
> >> +    /// instruction cost and critical path.
> >> +    virtual unsigned computeOperandLatency(SUnit *Def, SUnit *Use,
> >> +                                           const SDep& dep,
> >> +                                           bool FindMin = false)
> >> const; 
> >>     /// schedule - Order nodes according to selected style, filling
> >>     /// in the Sequence member.
> >> 
> >> Modified: llvm/trunk/include/llvm/MC/MCInstrItineraries.h
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCInstrItineraries.h?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/include/llvm/MC/MCInstrItineraries.h (original) +++
> >> llvm/trunk/include/llvm/MC/MCInstrItineraries.h Tue Jun  5 16:11:27
> >> 2012 @@ -214,6 +214,12 @@ /// class.  The latency is the maximum
> >> completion time for any stage /// in the itinerary.
> >>   ///
> >> +  /// InstrStages override the itinerary's MinLatency property. In
> >> fact, if the
> >> +  /// stage latencies, which may be zero, are less than
> >> MinLatency,
> >> +  /// getStageLatency returns a value less than MinLatency.
> >> +  ///
> >> +  /// If no stages exist, MinLatency is used. If MinLatency is
> >> invalid (<0),
> >> +  /// then it defaults to one cycle.
> >>   unsigned getStageLatency(unsigned ItinClassIndx) const {
> >>     // If the target doesn't provide itinerary information, use a
> >> simple // non-zero default value for all instructions.  Some
> >> target's provide a @@ -222,7 +228,7 @@
> >>     // stage). This is different from beginStage == endStage != 0,
> >> which could // be used for zero-latency pseudo ops.
> >>     if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0)
> >> -      return 1;
> >> +      return (Props.MinLatency < 0) ? 1 : Props.MinLatency;
> >> 
> >>     // Calculate the maximum completion time for any stage.
> >>     unsigned Latency = 0, StartCycle = 0;
> >> 
> >> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original) +++
> >> llvm/trunk/include/llvm/Target/TargetInstrInfo.h Tue Jun  5
> >> 16:11:27 2012 @@ -668,18 +668,36 @@ return Opcode <=
> >> TargetOpcode::COPY; }
> >> 
> >> +  virtual int getOperandLatency(const InstrItineraryData
> >> *ItinData,
> >> +                                SDNode *DefNode, unsigned DefIdx,
> >> +                                SDNode *UseNode, unsigned UseIdx)
> >> const = 0; +
> >>   /// getOperandLatency - Compute and return the use operand
> >> latency of a given /// pair of def and use.
> >>   /// In most cases, the static scheduling itinerary was enough to
> >> determine the /// operand latency. But it may not be possible for
> >> instructions with variable /// number of defs / uses.
> >> +  ///
> >> +  /// This is a raw interface to the itinerary that may be
> >> directly overriden by
> >> +  /// a target. Use computeOperandLatency to get the best estimate
> >> of latency. virtual int getOperandLatency(const InstrItineraryData
> >> *ItinData,
> >> -                              const MachineInstr *DefMI, unsigned
> >> DefIdx,
> >> -                              const MachineInstr *UseMI, unsigned
> >> UseIdx) const; -
> >> -  virtual int getOperandLatency(const InstrItineraryData
> >> *ItinData,
> >> -                                SDNode *DefNode, unsigned DefIdx,
> >> -                                SDNode *UseNode, unsigned UseIdx)
> >> const = 0;
> >> +                                const MachineInstr *DefMI,
> >> unsigned DefIdx,
> >> +                                const MachineInstr *UseMI,
> >> +                                unsigned UseIdx) const;
> >> +
> >> +  /// computeOperandLatency - Compute and return the latency of
> >> the given data
> >> +  /// dependent def and use. DefMI must be a valid def. UseMI may
> >> be NULL for
> >> +  /// an unknown use. If the subtarget allows, this may or may not
> >> need to call
> >> +  /// getOperandLatency().
> >> +  ///
> >> +  /// FindMin may be set to get the minimum vs. expected latency.
> >> Minimum
> >> +  /// latency is used for scheduling groups, while expected
> >> latency is for
> >> +  /// instruction cost and critical path.
> >> +  unsigned computeOperandLatency(const InstrItineraryData
> >> *ItinData,
> >> +                                 const TargetRegisterInfo *TRI,
> >> +                                 const MachineInstr *DefMI,
> >> +                                 const MachineInstr *UseMI,
> >> +                                 unsigned Reg, bool FindMin)
> >> const;
> >> 
> >>   /// getOutputLatency - Compute and return the output dependency
> >> latency of a /// a given pair of defs which both target the same
> >> register. This is usually @@ -693,13 +711,17 @@
> >>   /// getInstrLatency - Compute the instruction latency of a given
> >> instruction. /// If the instruction has higher cost when
> >> predicated, it's returned via /// PredCost.
> >> -  virtual int getInstrLatency(const InstrItineraryData *ItinData,
> >> -                              const MachineInstr *MI,
> >> -                              unsigned *PredCost = 0) const;
> >> +  virtual unsigned getInstrLatency(const InstrItineraryData
> >> *ItinData,
> >> +                                   const MachineInstr *MI,
> >> +                                   unsigned *PredCost = 0) const;
> >> 
> >>   virtual int getInstrLatency(const InstrItineraryData *ItinData,
> >>                               SDNode *Node) const = 0;
> >> 
> >> +  /// Return the default expected latency for a def based on it's
> >> opcode.
> >> +  unsigned defaultDefLatency(const InstrItineraryData *ItinData,
> >> +                             const MachineInstr *DefMI) const;
> >> +
> >>   /// isHighLatencyDef - Return true if this opcode has high
> >> latency to its /// result.
> >>   virtual bool isHighLatencyDef(int opc) const { return false; }
> >> 
> >> Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original) +++
> >> llvm/trunk/lib/CodeGen/MachineScheduler.cpp Tue Jun  5 16:11:27
> >> 2012 @@ -21,8 +21,9 @@ #include "llvm/CodeGen/Passes.h"
> >> #include "llvm/CodeGen/ScheduleDAGInstrs.h"
> >> #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
> >> -#include "llvm/Analysis/AliasAnalysis.h"
> >> #include "llvm/Target/TargetInstrInfo.h"
> >> +#include "llvm/MC/MCInstrItineraries.h"
> >> +#include "llvm/Analysis/AliasAnalysis.h"
> >> #include "llvm/Support/CommandLine.h"
> >> #include "llvm/Support/Debug.h"
> >> #include "llvm/Support/ErrorHandling.h"
> >> @@ -394,6 +395,12 @@
> >>     return RegionCriticalPSets;
> >>   }
> >> 
> >> +  /// getIssueWidth - Return the max instructions per scheduling
> >> group.
> >> +  ///
> >> +  unsigned getIssueWidth() const {
> >> +    return InstrItins ? InstrItins->Props.IssueWidth : 1;
> >> +  }
> >> +
> >> protected:
> >>   void initRegPressure();
> >>   void updateScheduledPressure(std::vector<unsigned>
> >> NewMaxPressure); @@ -787,13 +794,16 @@
> >>     /// MinReadyCycle - Cycle of the soonest available instruction.
> >>     unsigned MinReadyCycle;
> >> 
> >> +    // Remember the greatest min operand latency.
> >> +    unsigned MaxMinLatency;
> >> +
> >>     /// Pending queues extend the ready queues with the same ID and
> >> the /// PendingFlag set.
> >>     SchedBoundary(unsigned ID, const Twine &Name):
> >>       Available(ID, Name+".A"),
> >>       Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
> >>       CheckPending(false), HazardRec(0), CurrCycle(0),
> >> IssueCount(0),
> >> -      MinReadyCycle(UINT_MAX) {}
> >> +      MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
> >> 
> >>     ~SchedBoundary() { delete HazardRec; }
> >> 
> >> @@ -805,6 +815,8 @@
> >> 
> >>     void bumpCycle();
> >> 
> >> +    void bumpNode(SUnit *SU, unsigned IssueWidth);
> >> +
> >>     void releasePending();
> >> 
> >>     void removeReady(SUnit *SU);
> >> @@ -868,25 +880,53 @@
> >> }
> >> 
> >> void ConvergingScheduler::releaseTopNode(SUnit *SU) {
> >> -  Top.releaseNode(SU, SU->getDepth());
> >> +  if (SU->isScheduled)
> >> +    return;
> >> +
> >> +  for (SUnit::succ_iterator I = SU->Preds.begin(), E =
> >> SU->Preds.end();
> >> +       I != E; ++I) {
> >> +    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
> >> +    unsigned Latency =
> >> +      DAG->computeOperandLatency(I->getSUnit(), SU,
> >> *I, /*FindMin=*/true); +#ifndef NDEBUG
> >> +    Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
> >> +#endif
> >> +    if (SU->TopReadyCycle < PredReadyCycle + Latency)
> >> +      SU->TopReadyCycle = PredReadyCycle + Latency;
> >> +  }
> >> +  Top.releaseNode(SU, SU->TopReadyCycle);
> >> }
> >> 
> >> void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
> >> -  Bot.releaseNode(SU, SU->getHeight());
> >> +  if (SU->isScheduled)
> >> +    return;
> >> +
> >> +  assert(SU->getInstr() && "Scheduled SUnit must have instr");
> >> +
> >> +  for (SUnit::succ_iterator I = SU->Succs.begin(), E =
> >> SU->Succs.end();
> >> +       I != E; ++I) {
> >> +    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
> >> +    unsigned Latency =
> >> +      DAG->computeOperandLatency(SU, I->getSUnit(),
> >> *I, /*FindMin=*/true); +#ifndef NDEBUG
> >> +    Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
> >> +#endif
> >> +    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
> >> +      SU->BotReadyCycle = SuccReadyCycle + Latency;
> >> +  }
> >> +  Bot.releaseNode(SU, SU->BotReadyCycle);
> >> }
> >> 
> >> void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
> >>                                                      unsigned
> >> ReadyCycle) {
> >> -  if (SU->isScheduled)
> >> -    return;
> >> -
> >>   if (ReadyCycle < MinReadyCycle)
> >>     MinReadyCycle = ReadyCycle;
> >> 
> >>   // Check for interlocks first. For the purpose of other
> >> heuristics, an // instruction that cannot issue appears as if it's
> >> not in the ReadyQueue.
> >> -  if (HazardRec->isEnabled()
> >> -      && HazardRec->getHazardType(SU) !=
> >> ScheduleHazardRecognizer::NoHazard)
> >> +  if (ReadyCycle > CurrCycle
> >> +      || (HazardRec->isEnabled() && (HazardRec->getHazardType(SU)
> >> +                                     !=
> >> ScheduleHazardRecognizer::NoHazard))) Pending.push(SU);
> >>   else
> >>     Available.push(SU);
> >> @@ -900,10 +940,11 @@
> >>   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
> >> 
> >>   if (!HazardRec->isEnabled()) {
> >> -    // Bypass lots of virtual calls in case of long latency.
> >> +    // Bypass HazardRec virtual calls.
> >>     CurrCycle = NextCycle;
> >>   }
> >>   else {
> >> +    // Bypass getHazardType calls in case of long latency.
> >>     for (; CurrCycle != NextCycle; ++CurrCycle) {
> >>       if (isTop())
> >>         HazardRec->AdvanceCycle();
> >> @@ -917,6 +958,26 @@
> >>         << CurrCycle << '\n');
> >> }
> >> 
> >> +/// Move the boundary of scheduled code by one SUnit.
> >> +void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU,
> >> +                                                  unsigned
> >> IssueWidth) {
> >> +  // Update the reservation table.
> >> +  if (HazardRec->isEnabled()) {
> >> +    if (!isTop() && SU->isCall) {
> >> +      // Calls are scheduled with their preceding instructions.
> >> For bottom-up
> >> +      // scheduling, clear the pipeline state before emitting.
> >> +      HazardRec->Reset();
> >> +    }
> >> +    HazardRec->EmitInstruction(SU);
> >> +  }
> >> +  // Check the instruction group size limit.
> >> +  ++IssueCount;
> >> +  if (IssueCount == IssueWidth) {
> >> +    DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle <<
> >> '\n');
> >> +    bumpCycle();
> >> +  }
> >> +}
> >> +
> >> /// Release pending ready nodes in to the available queue. This
> >> makes them /// visible to heuristics.
> >> void ConvergingScheduler::SchedBoundary::releasePending() {
> >> @@ -928,7 +989,7 @@
> >>   // so, add them to the available queue.
> >>   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
> >>     SUnit *SU = *(Pending.begin()+i);
> >> -    unsigned ReadyCycle = isTop() ? SU->getHeight() :
> >> SU->getDepth();
> >> +    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle :
> >> SU->BotReadyCycle; 
> >>     if (ReadyCycle < MinReadyCycle)
> >>       MinReadyCycle = ReadyCycle;
> >> @@ -965,7 +1026,8 @@
> >>     releasePending();
> >> 
> >>   for (unsigned i = 0; Available.empty(); ++i) {
> >> -    assert(i <= HazardRec->getMaxLookAhead() && "permanent
> >> hazard"); (void)i;
> >> +    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
> >> +           "permanent hazard"); (void)i;
> >>     bumpCycle();
> >>     releasePending();
> >>   }
> >> @@ -1205,27 +1267,15 @@
> >> 
> >> /// Update the scheduler's state after scheduling a node. This is
> >> the same node /// that was just returned by pickNode(). However,
> >> ScheduleDAGMI needs to update -/// it's state based on the current
> >> cycle before MachineSchedStrategy. +/// it's state based on the
> >> current cycle before MachineSchedStrategy does. void
> >> ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
> >> -  // Update the reservation table.
> >> -  if (IsTopNode && Top.HazardRec->isEnabled()) {
> >> -    Top.HazardRec->EmitInstruction(SU);
> >> -    if (Top.HazardRec->atIssueLimit()) {
> >> -      DEBUG(dbgs() << "*** Max instrs at cycle " << Top.CurrCycle
> >> << '\n');
> >> -      Top.bumpCycle();
> >> -    }
> >> +  if (IsTopNode) {
> >> +    SU->TopReadyCycle = Top.CurrCycle;
> >> +    Top.bumpNode(SU, DAG->getIssueWidth());
> >>   }
> >> -  else if (Bot.HazardRec->isEnabled()) {
> >> -    if (SU->isCall) {
> >> -      // Calls are scheduled with their preceding instructions.
> >> For bottom-up
> >> -      // scheduling, clear the pipeline state before emitting.
> >> -      Bot.HazardRec->Reset();
> >> -    }
> >> -    Bot.HazardRec->EmitInstruction(SU);
> >> -    if (Bot.HazardRec->atIssueLimit()) {
> >> -      DEBUG(dbgs() << "*** Max instrs at cycle " << Bot.CurrCycle
> >> << '\n');
> >> -      Bot.bumpCycle();
> >> -    }
> >> +  else {
> >> +    SU->BotReadyCycle = Bot.CurrCycle;
> >> +    Bot.bumpNode(SU, DAG->getIssueWidth());
> >>   }
> >> }
> >> 
> >> 
> >> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original) +++
> >> llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Tue Jun  5 16:11:27
> >> 2012 @@ -271,10 +271,12 @@ // Adjust the dependence latency using
> >> operand def/use // information (if any), and then allow the target
> >> to // perform its own adjustments.
> >> -      const SDep& dep = SDep(SU, SDep::Data, LDataLatency,
> >> *Alias);
> >> +      SDep dep(SU, SDep::Data, LDataLatency, *Alias);
> >>       if (!UnitLatencies) {
> >> -        computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
> >> -        ST.adjustSchedDependency(SU, UseSU, const_cast<SDep
> >> &>(dep));
> >> +        unsigned Latency = computeOperandLatency(SU, UseSU, dep);
> >> +        dep.setLatency(Latency);
> >> +
> >> +        ST.adjustSchedDependency(SU, UseSU, dep);
> >>       }
> >>       UseSU->addPred(dep);
> >>     }
> >> @@ -461,11 +463,13 @@
> >>       // Create a data dependence.
> >>       //
> >>       // TODO: Handle "special" address latencies cleanly.
> >> -      const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency,
> >> Reg);
> >> +      SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
> >>       if (!UnitLatencies) {
> >>         // Adjust the dependence latency using operand def/use
> >> information, then // allow the target to perform its own
> >> adjustments.
> >> -        computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
> >> +        unsigned Latency = computeOperandLatency(DefSU, SU,
> >> const_cast<SDep &>(dep));
> >> +        dep.setLatency(Latency);
> >> +
> >>         const TargetSubtargetInfo &ST =
> >> TM.getSubtarget<TargetSubtargetInfo>();
> >> ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); }
> >> @@ -970,8 +974,9 @@
> >> }
> >> 
> >> void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
> >> -  // Compute the latency for the node.
> >> -  if (!InstrItins || InstrItins->isEmpty()) {
> >> +  // Compute the latency for the node. We only provide a default
> >> for missing
> >> +  // itineraries. Empty itineraries still have latency properties.
> >> +  if (!InstrItins) {
> >>     SU->Latency = 1;
> >> 
> >>     // Simplistic target-independent heuristic: assume that loads
> >> take @@ -983,63 +988,15 @@
> >>   }
> >> }
> >> 
> >> -void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit
> >> *Use,
> >> -                                              SDep& dep) const {
> >> -  if (!InstrItins || InstrItins->isEmpty())
> >> -    return;
> >> -
> >> +unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def,
> >> SUnit *Use,
> >> +                                                  const SDep& dep,
> >> +                                                  bool FindMin)
> >> const { // For a data dependency with a known register...
> >>   if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
> >> -    return;
> >> +    return 1;
> >> 
> >> -  const unsigned Reg = dep.getReg();
> >> -
> >> -  // ... find the definition of the register in the defining
> >> -  // instruction
> >> -  MachineInstr *DefMI = Def->getInstr();
> >> -  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
> >> -  if (DefIdx != -1) {
> >> -    const MachineOperand &MO = DefMI->getOperand(DefIdx);
> >> -    if (MO.isReg() && MO.isImplicit() &&
> >> -        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
> >> -      // This is an implicit def, getOperandLatency() won't return
> >> the correct
> >> -      // latency. e.g.
> >> -      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ...,
> >> %Q3<imp-def>
> >> -      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
> >> -      // What we want is to compute latency between def of %D6/%D7
> >> and use of
> >> -      // %Q3 instead.
> >> -      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false,
> >> true, TRI);
> >> -      if (DefMI->getOperand(Op2).isReg())
> >> -        DefIdx = Op2;
> >> -    }
> >> -    MachineInstr *UseMI = Use->getInstr();
> >> -    // For all uses of the register, calculate the maxmimum
> >> latency
> >> -    int Latency = -1;
> >> -    if (UseMI) {
> >> -      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e;
> >> ++i) {
> >> -        const MachineOperand &MO = UseMI->getOperand(i);
> >> -        if (!MO.isReg() || !MO.isUse())
> >> -          continue;
> >> -        unsigned MOReg = MO.getReg();
> >> -        if (MOReg != Reg)
> >> -          continue;
> >> -
> >> -        int UseCycle = TII->getOperandLatency(InstrItins, DefMI,
> >> DefIdx,
> >> -                                              UseMI, i);
> >> -        Latency = std::max(Latency, UseCycle);
> >> -      }
> >> -    } else {
> >> -      // UseMI is null, then it must be a scheduling barrier.
> >> -      if (!InstrItins || InstrItins->isEmpty())
> >> -        return;
> >> -      unsigned DefClass = DefMI->getDesc().getSchedClass();
> >> -      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
> >> -    }
> >> -
> >> -    // If we found a latency, then replace the existing dependence
> >> latency.
> >> -    if (Latency >= 0)
> >> -      dep.setLatency(Latency);
> >> -  }
> >> +  return TII->computeOperandLatency(InstrItins, TRI,
> >> Def->getInstr(),
> >> +                                    Use->getInstr(), dep.getReg(),
> >> FindMin); }
> >> 
> >> void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
> >> 
> >> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
> >> (original) +++
> >> llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h Tue Jun  5
> >> 16:11:27 2012 @@ -98,12 +98,6 @@ /// virtual void
> >> computeLatency(SUnit *SU); 
> >> -    /// computeOperandLatency - Override dependence edge latency
> >> using
> >> -    /// operand use/def information
> >> -    ///
> >> -    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
> >> -                                       SDep& dep) const { }
> >> -
> >>     virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
> >>                                        unsigned OpIdx, SDep& dep)
> >> const; 
> >> 
> >> Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original)
> >> +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Tue Jun  5
> >> 16:11:27 2012 @@ -1046,7 +1046,7 @@ return true;  // Below MI
> >>     unsigned DefDist = DDI->second;
> >>     assert(Dist > DefDist && "Visited def already?");
> >> -    if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist -
> >> DefDist))
> >> +    if (TII->getInstrLatency(InstrItins, DefMI) > (Dist -
> >> DefDist)) return true;
> >>   }
> >>   return false;
> >> 
> >> Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original) +++
> >> llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Tue Jun  5 16:11:27
> >> 2012 @@ -2567,12 +2567,13 @@ 
> >> int
> >> ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData
> >> *ItinData,
> >> -                             const MachineInstr *DefMI, unsigned
> >> DefIdx,
> >> -                             const MachineInstr *UseMI, unsigned
> >> UseIdx) const {
> >> +                                    const MachineInstr *DefMI,
> >> unsigned DefIdx,
> >> +                                    const MachineInstr *UseMI,
> >> +                                    unsigned UseIdx) const {
> >>   if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
> >> -      DefMI->isRegSequence() || DefMI->isImplicitDef())
> >> +      DefMI->isRegSequence() || DefMI->isImplicitDef()) {
> >>     return 1;
> >> -
> >> +  }
> >>   if (!ItinData || ItinData->isEmpty())
> >>     return DefMI->mayLoad() ? 3 : 1;
> >> 
> >> @@ -2983,14 +2984,16 @@
> >>                            DepMI->getNumOperands());
> >> }
> >> 
> >> -int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData
> >> *ItinData,
> >> -                                      const MachineInstr *MI,
> >> -                                      unsigned *PredCost) const {
> >> +unsigned ARMBaseInstrInfo::getInstrLatency(const
> >> InstrItineraryData *ItinData,
> >> +                                           const MachineInstr *MI,
> >> +                                           unsigned *PredCost)
> >> const { if (MI->isCopyLike() || MI->isInsertSubreg() ||
> >>       MI->isRegSequence() || MI->isImplicitDef())
> >>     return 1;
> >> 
> >> -  if (!ItinData || ItinData->isEmpty())
> >> +  // Be sure to call getStageLatency for an empty itinerary in
> >> case it has a
> >> +  // valid MinLatency property.
> >> +  if (!ItinData)
> >>     return 1;
> >> 
> >>   if (MI->isBundle()) {
> >> 
> >> Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h (original) +++
> >> llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h Tue Jun  5 16:11:27
> >> 2012 @@ -249,8 +249,9 @@ const MCInstrDesc &UseMCID,
> >>                         unsigned UseIdx, unsigned UseAlign) const;
> >> 
> >> -  int getInstrLatency(const InstrItineraryData *ItinData,
> >> -                      const MachineInstr *MI, unsigned *PredCost =
> >> 0) const;
> >> +  unsigned getInstrLatency(const InstrItineraryData *ItinData,
> >> +                           const MachineInstr *MI,
> >> +                           unsigned *PredCost = 0) const;
> >> 
> >>   int getInstrLatency(const InstrItineraryData *ItinData,
> >>                       SDNode *Node) const;
> >> 
> >> Modified: llvm/trunk/lib/Target/TargetInstrInfo.cpp
> >> URL:
> >> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetInstrInfo.cpp?rev=158021&r1=158020&r2=158021&view=diff
> >> ==============================================================================
> >> --- llvm/trunk/lib/Target/TargetInstrInfo.cpp (original) +++
> >> llvm/trunk/lib/Target/TargetInstrInfo.cpp Tue Jun  5 16:11:27 2012
> >> @@ -61,22 +61,125 @@ return 1;
> >> }
> >> 
> >> +/// Return the default expected latency for a def based on it's
> >> opcode. +unsigned TargetInstrInfo::defaultDefLatency(const
> >> InstrItineraryData *ItinData,
> >> +                                            const MachineInstr
> >> *DefMI) const {
> >> +  if (DefMI->mayLoad())
> >> +    return ItinData->Props.LoadLatency;
> >> +  if (isHighLatencyDef(DefMI->getOpcode()))
> >> +    return ItinData->Props.HighLatency;
> >> +  return 1;
> >> +}
> >> +
> >> +/// Both DefMI and UseMI must be valid.  By default, call directly
> >> to the +/// itinerary. This may be overriden by the target.
> >> int
> >> TargetInstrInfo::getOperandLatency(const InstrItineraryData
> >> *ItinData,
> >> -                             const MachineInstr *DefMI, unsigned
> >> DefIdx,
> >> -                             const MachineInstr *UseMI, unsigned
> >> UseIdx) const {
> >> -  if (!ItinData || ItinData->isEmpty())
> >> -    return -1;
> >> -
> >> +                                   const MachineInstr *DefMI,
> >> unsigned DefIdx,
> >> +                                   const MachineInstr *UseMI,
> >> +                                   unsigned UseIdx) const {
> >>   unsigned DefClass = DefMI->getDesc().getSchedClass();
> >>   unsigned UseClass = UseMI->getDesc().getSchedClass();
> >>   return ItinData->getOperandLatency(DefClass, DefIdx, UseClass,
> >> UseIdx); }
> >> 
> >> -int TargetInstrInfo::getInstrLatency(const InstrItineraryData
> >> *ItinData,
> >> -                                     const MachineInstr *MI,
> >> -                                     unsigned *PredCost) const {
> >> -  if (!ItinData || ItinData->isEmpty())
> >> +/// computeOperandLatency - Compute and return the latency of the
> >> given data +/// dependent def and use. DefMI must be a valid def.
> >> UseMI may be NULL for an +/// unknown use. Depending on the
> >> subtarget's itinerary properties, this may or +/// may not need to
> >> call getOperandLatency(). +///
> >> +/// FindMin may be set to get the minimum vs. expected latency.
> >> Minimum +/// latency is used for scheduling groups, while expected
> >> latency is for +/// instruction cost and critical path.
> >> +///
> >> +/// For most subtargets, we don't need DefIdx or UseIdx to compute
> >> min latency. +/// DefMI must be a valid definition, but UseMI may
> >> be NULL for an unknown use. +unsigned TargetInstrInfo::
> >> +computeOperandLatency(const InstrItineraryData *ItinData,
> >> +                      const TargetRegisterInfo *TRI,
> >> +                      const MachineInstr *DefMI, const
> >> MachineInstr *UseMI,
> >> +                      unsigned Reg, bool FindMin) const {
> >> +
> >> +  // Default to one cycle for missing itinerary. Empty itineraries
> >> still have
> >> +  // a properties. We have one hard-coded exception for loads, to
> >> preserve
> >> +  // existing behavior.
> >> +  if (!ItinData)
> >> +    return DefMI->mayLoad() ? 2 : 1;
> >> +
> >> +  // Return a latency based on the itinerary properties and
> >> defining instruction
> >> +  // if possible. Some common subtargets don't require per-operand
> >> latency,
> >> +  // especially for minimum latencies.
> >> +  if (FindMin) {
> >> +    // If MinLatency is valid, call getInstrLatency. This uses
> >> Stage latency if
> >> +    // it exists before defaulting to MinLatency.
> >> +    if (ItinData->Props.MinLatency >= 0)
> >> +      return getInstrLatency(ItinData, DefMI);
> >> +
> >> +    // If MinLatency is invalid, OperandLatency is interpreted as
> >> MinLatency.
> >> +    // For empty itineraries, short-cirtuit the check and default
> >> to one cycle.
> >> +    if (ItinData->isEmpty())
> >> +      return 1;
> >> +  }
> >> +  else if(ItinData->isEmpty())
> >> +    return defaultDefLatency(ItinData, DefMI);
> >> +
> >> +  // ...operand lookup required
> >> +
> >> +  // Find the definition of the register in the defining
> >> instruction.
> >> +  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
> >> +  if (DefIdx != -1) {
> >> +    const MachineOperand &MO = DefMI->getOperand(DefIdx);
> >> +    if (MO.isReg() && MO.isImplicit() &&
> >> +        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
> >> +      // This is an implicit def, getOperandLatency() won't return
> >> the correct
> >> +      // latency. e.g.
> >> +      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ...,
> >> %Q3<imp-def>
> >> +      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
> >> +      // What we want is to compute latency between def of %D6/%D7
> >> and use of
> >> +      // %Q3 instead.
> >> +      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false,
> >> true, TRI);
> >> +      if (DefMI->getOperand(Op2).isReg())
> >> +        DefIdx = Op2;
> >> +    }
> >> +    // For all uses of the register, calculate the maxmimum
> >> latency
> >> +    int OperLatency = -1;
> >> +
> >> +    // UseMI is null, then it must be a scheduling barrier.
> >> +    if (!UseMI) {
> >> +      unsigned DefClass = DefMI->getDesc().getSchedClass();
> >> +      OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
> >> +    }
> >> +    else {
> >> +      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e;
> >> ++i) {
> >> +        const MachineOperand &MO = UseMI->getOperand(i);
> >> +        if (!MO.isReg() || !MO.isUse())
> >> +          continue;
> >> +        unsigned MOReg = MO.getReg();
> >> +        if (MOReg != Reg)
> >> +          continue;
> >> +
> >> +        int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx,
> >> UseMI, i);
> >> +        OperLatency = std::max(OperLatency, UseCycle);
> >> +      }
> >> +    }
> >> +    // If we found an operand latency, we're done.
> >> +    if (OperLatency >= 0)
> >> +      return OperLatency;
> >> +  }
> >> +  // No operand latency was found.
> >> +  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
> >> +  // Expected latency is the max of the stage latency and
> >> itinerary props.
> >> +  if (!FindMin)
> >> +    InstrLatency = std::max(InstrLatency,
> >> defaultDefLatency(ItinData, DefMI));
> >> +  return InstrLatency;
> >> +}
> >> +
> >> +unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData
> >> *ItinData,
> >> +                                          const MachineInstr *MI,
> >> +                                          unsigned *PredCost)
> >> const {
> >> +  // Default to one cycle for no itinerary. However, an "empty"
> >> itinerary may
> >> +  // still have a MinLatency property, which getStageLatency
> >> checks.
> >> +  if (!ItinData)
> >>     return 1;
> >> 
> >>   return ItinData->getStageLatency(MI->getDesc().getSchedClass());
> >> 
> >> 
> >> _______________________________________________
> >> llvm-commits mailing list
> >> llvm-commits at cs.uiuc.edu
> >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> > 
> > 
> > 
> > -- 
> > Hal Finkel
> > Postdoctoral Appointee
> > Leadership Computing Facility
> > Argonne National Laboratory
> 

-- 
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory