[llvm] r178553 - Count processor resources individually in MachineTraceMetrics.

Andrew Trick atrick at apple.com
Mon Apr 22 12:10:41 PDT 2013


On Apr 2, 2013, at 10:49 AM, Jakob Stoklund Olesen <stoklund at 2pi.dk> wrote:

> Author: stoklund
> Date: Tue Apr  2 12:49:51 2013
> New Revision: 178553
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=178553&view=rev
> Log:
> Count processor resources individually in MachineTraceMetrics.
> 
> The new instruction scheduling models provide information about the
> number of cycles consumed on each processor resource. This makes it
> possible to estimate ILP more accurately than simply counting
> instructions / issue width.
> 
> The functions getResourceDepth() and getResourceLength() now identify
> the limiting processor resource, and return a cycle count based on that.
> 
> This gives more precise resource information, particularly in traces
> that use one resource a lot more than others.
> 
> Modified:
>    llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h
>    llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp
> 
> Modified: llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h?rev=178553&r1=178552&r2=178553&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h Tue Apr  2 12:49:51 2013
> @@ -107,6 +107,13 @@ public:
>   /// Get the fixed resource information about MBB. Compute it on demand.
>   const FixedBlockInfo *getResources(const MachineBasicBlock*);
> 
> +  /// Get the scaled number of cycles used per processor resource in MBB.
> +  /// This is an array with SchedModel.getNumProcResourceKinds() entries.
> +  /// The getResources() function above must have been called first.
> +  ///
> +  /// These numbers have already been scaled by SchedModel.getResourceFactor().
> +  ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
> +
>   /// A virtual register or regunit required by a basic block or its trace
>   /// successors.
>   struct LiveInReg {
> @@ -284,6 +291,8 @@ public:
>   class Ensemble {
>     SmallVector<TraceBlockInfo, 4> BlockInfo;
>     DenseMap<const MachineInstr*, InstrCycles> Cycles;
> +    SmallVector<unsigned, 0> ProcResourceDepths;
> +    SmallVector<unsigned, 0> ProcResourceHeights;
>     friend class Trace;
> 
>     void computeTrace(const MachineBasicBlock*);
> @@ -303,6 +312,8 @@ public:
>     const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
>     const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
>     const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
> +    ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
> +    ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
> 
>   public:
>     virtual ~Ensemble();
> @@ -343,8 +354,22 @@ private:
>   // One entry per basic block, indexed by block number.
>   SmallVector<FixedBlockInfo, 4> BlockInfo;
> 
> +  // Cycles consumed on each processor resource per block.
> +  // The number of processor resource kinds is constant for a given subtarget,
> +  // but it is not known at compile time. The number of cycles consumed by
> +  // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
> +  // where Kinds = SchedModel.getNumProcResourceKinds().
> +  SmallVector<unsigned, 0> ProcResourceCycles;
> +
>   // One ensemble per strategy.
>   Ensemble* Ensembles[TS_NumStrategies];
> +
> +  // Convert scaled resource usage to a cycle count that can be compared with
> +  // latencies.
> +  unsigned getCycles(unsigned Scaled) {
> +    unsigned Factor = SchedModel.getLatencyFactor();
> +    return (Scaled + Factor - 1) / Factor;
> +  }
> };
> 
> inline raw_ostream &operator<<(raw_ostream &OS,
> 
> Modified: llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp?rev=178553&r1=178552&r2=178553&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp Tue Apr  2 12:49:51 2013
> @@ -18,6 +18,7 @@
> #include "llvm/CodeGen/Passes.h"
> #include "llvm/MC/MCSubtargetInfo.h"
> #include "llvm/Support/Debug.h"
> +#include "llvm/Support/Format.h"
> #include "llvm/Support/raw_ostream.h"
> #include "llvm/Target/TargetInstrInfo.h"
> #include "llvm/Target/TargetRegisterInfo.h"
> @@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFu
>     MF->getTarget().getSubtarget<TargetSubtargetInfo>();
>   SchedModel.init(*ST.getSchedModel(), &ST, TII);
>   BlockInfo.resize(MF->getNumBlockIDs());
> +  ProcResourceCycles.resize(MF->getNumBlockIDs() *
> +                            SchedModel.getNumProcResourceKinds());
>   return false;
> }
> 
> @@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const
>     return FBI;
> 
>   // Compute resource usage in the block.
> -  // FIXME: Compute per-functional unit counts.
>   FBI->HasCalls = false;
>   unsigned InstrCount = 0;
> +
> +  // Add up per-processor resource cycles as well.
> +  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
> +  SmallVector<unsigned, 32> PRCycles(PRKinds);
> +
>   for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
>        I != E; ++I) {
>     const MachineInstr *MI = I;
> @@ -96,11 +103,39 @@ MachineTraceMetrics::getResources(const
>     ++InstrCount;
>     if (MI->isCall())
>       FBI->HasCalls = true;
> +
> +    // Count processor resources used.
> +    const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
> +    if (!SC->isValid())
> +      continue;
> +
> +    for (TargetSchedModel::ProcResIter
> +         PI = SchedModel.getWriteProcResBegin(SC),
> +         PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
> +      assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
> +      PRCycles[PI->ProcResourceIdx] += PI->Cycles;
> +    }
>   }
>   FBI->InstrCount = InstrCount;
> +
> +  // Scale the resource cycles so they are comparable.
> +  unsigned PROffset = MBB->getNumber() * PRKinds;
> +  for (unsigned K = 0; K != PRKinds; ++K)
> +    ProcResourceCycles[PROffset + K] =
> +      PRCycles[K] * SchedModel.getResourceFactor(K);
> +
>   return FBI;
> }
> 
> +ArrayRef<unsigned>
> +MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
> +  assert(BlockInfo[MBBNum].hasResources() &&
> +         "getResources() must be called before getProcResourceCycles()");
> +  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
> +  return ArrayRef<unsigned>(&ProcResourceCycles[MBBNum * PRKinds], PRKinds);
> +}
> +
> +
> //===----------------------------------------------------------------------===//
> //                         Ensemble utility functions
> //===----------------------------------------------------------------------===//
> @@ -108,6 +143,9 @@ MachineTraceMetrics::getResources(const
> MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
>   : MTM(*ct) {
>   BlockInfo.resize(MTM.BlockInfo.size());
> +  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> +  ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
> +  ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
> }
> 
> // Virtual destructor serves as an anchor.
> @@ -123,21 +161,32 @@ MachineTraceMetrics::Ensemble::getLoopFo
> void MachineTraceMetrics::Ensemble::
> computeDepthResources(const MachineBasicBlock *MBB) {
>   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
> +  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> +  unsigned PROffset = MBB->getNumber() * PRKinds;
> 
>   // Compute resources from trace above. The top block is simple.
>   if (!TBI->Pred) {
>     TBI->InstrDepth = 0;
>     TBI->Head = MBB->getNumber();
> +    std::fill(ProcResourceDepths.begin() + PROffset,
> +              ProcResourceDepths.begin() + PROffset + PRKinds, 0);
>     return;
>   }
> 
>   // Compute from the block above. A post-order traversal ensures the
>   // predecessor is always computed first.
> -  TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
> +  unsigned PredNum = TBI->Pred->getNumber();
> +  TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
>   assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
>   const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
>   TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
>   TBI->Head = PredTBI->Head;
> +
> +  // Compute per-resource depths.
> +  ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
> +  ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
> +  for (unsigned K = 0; K != PRKinds; ++K)
> +    ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
> }
> 
> // Update resource-related information in the TraceBlockInfo for MBB.
> @@ -145,22 +194,33 @@ computeDepthResources(const MachineBasic
> void MachineTraceMetrics::Ensemble::
> computeHeightResources(const MachineBasicBlock *MBB) {
>   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
> +  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> +  unsigned PROffset = MBB->getNumber() * PRKinds;
> 
>   // Compute resources for the current block.
>   TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
> +  ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
> 
>   // The trace tail is done.
>   if (!TBI->Succ) {
>     TBI->Tail = MBB->getNumber();
> +    std::copy(PRCycles.begin(), PRCycles.end(),
> +              ProcResourceHeights.begin() + PROffset);
>     return;
>   }
> 
>   // Compute from the block below. A post-order traversal ensures the
>   // predecessor is always computed first.
> -  TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
> +  unsigned SuccNum = TBI->Succ->getNumber();
> +  TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
>   assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
>   TBI->InstrHeight += SuccTBI->InstrHeight;
>   TBI->Tail = SuccTBI->Tail;
> +
> +  // Compute per-resource heights.
> +  ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
> +  for (unsigned K = 0; K != PRKinds; ++K)
> +    ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
> }
> 
> // Check if depth resources for MBB are valid and return the TBI.
> @@ -181,6 +241,31 @@ getHeightResources(const MachineBasicBlo
>   return TBI->hasValidHeight() ? TBI : 0;
> }
> 
> +/// Get an array of processor resource depths for MBB. Indexed by processor
> +/// resource kind, this array contains the scaled processor resources consumed
> +/// by all blocks preceding MBB in its trace. It does not include instructions
> +/// in MBB.
> +///
> +/// Compare TraceBlockInfo::InstrDepth.
> +ArrayRef<unsigned>
> +MachineTraceMetrics::Ensemble::
> +getProcResourceDepths(unsigned MBBNum) const {
> +  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> +  return ArrayRef<unsigned>(&ProcResourceDepths[MBBNum * PRKinds], PRKinds);
> +}
> +
> +/// Get an array of processor resource heights for MBB. Indexed by processor
> +/// resource kind, this array contains the scaled processor resources consumed
> +/// by this block and all blocks following it in its trace.
> +///
> +/// Compare TraceBlockInfo::InstrHeight.
> +ArrayRef<unsigned>
> +MachineTraceMetrics::Ensemble::
> +getProcResourceHeights(unsigned MBBNum) const {
> +  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> +  return ArrayRef<unsigned>(&ProcResourceHeights[MBBNum * PRKinds], PRKinds);
> +}
> +
> //===----------------------------------------------------------------------===//
> //                         Trace Selection Strategies
> //===----------------------------------------------------------------------===//
> @@ -713,11 +798,24 @@ computeInstrDepths(const MachineBasicBlo
>   SmallVector<DataDep, 8> Deps;
>   while (!Stack.empty()) {
>     MBB = Stack.pop_back_val();
> -    DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
> +    DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
>     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
>     TBI.HasValidInstrDepths = true;
>     TBI.CriticalPath = 0;
> 
> +    // Print out resource depths here as well.
> +    DEBUG({
> +      dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
> +      ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
> +      for (unsigned K = 0; K != PRDepths.size(); ++K)
> +        if (PRDepths[K]) {
> +          unsigned Factor = MTM.SchedModel.getResourceFactor(K);
> +          dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
> +                 << MTM.SchedModel.getProcResource(K)->Name << " ("
> +                 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
> +        }
> +    });
> +
>     // Also compute the critical path length through MBB when possible.
>     if (TBI.HasValidInstrHeights)
>       TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
> @@ -928,6 +1026,18 @@ computeInstrHeights(const MachineBasicBl
>     TBI.HasValidInstrHeights = true;
>     TBI.CriticalPath = 0;
> 
> +    DEBUG({
> +      dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
> +      ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
> +      for (unsigned K = 0; K != PRHeights.size(); ++K)
> +        if (PRHeights[K]) {
> +          unsigned Factor = MTM.SchedModel.getResourceFactor(K);
> +          dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
> +                 << MTM.SchedModel.getProcResource(K)->Name << " ("
> +                 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
> +        }
> +    });
> +
>     // Get dependencies from PHIs in the trace successor.
>     const MachineBasicBlock *Succ = TBI.Succ;
>     // If MBB is the last block in the trace, and it has a back-edge to the
> @@ -1058,27 +1168,52 @@ MachineTraceMetrics::Trace::getPHIDepth(
> }
> 
> unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
> -  // For now, we compute the resource depth from instruction count / issue
> -  // width. Eventually, we should compute resource depth per functional unit
> -  // and return the max.
> +  // Find the limiting processor resource.
> +  // Numbers have been pre-scaled to be comparable.
> +  unsigned PRMax = 0;
> +  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
> +  if (Bottom) {
> +    ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
> +    for (unsigned K = 0; K != PRDepths.size(); ++K)
> +      PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
> +  } else {
> +    for (unsigned K = 0; K != PRDepths.size(); ++K)
> +      PRMax = std::max(PRMax, PRDepths[K]);
> +  }
> +  // Convert to cycle count.
> +  PRMax = TE.MTM.getCycles(PRMax);
> +
>   unsigned Instrs = TBI.InstrDepth;
>   if (Bottom)
>     Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
>   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
>     Instrs /= IW;
>   // Assume issue width 1 without a schedule model.
> -  return Instrs;
> +  return std::max(Instrs, PRMax);
> }
> 
> unsigned MachineTraceMetrics::Trace::
> getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
> +  // Add up resources above and below the center block.
> +  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
> +  ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
> +  unsigned PRMax = 0;
> +  for (unsigned K = 0; K != PRDepths.size(); ++K) {
> +    unsigned PRCycles = PRDepths[K] + PRHeights[K];
> +    for (unsigned I = 0; I != Extrablocks.size(); ++I)
> +      PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
> +    PRMax = std::max(PRMax, PRCycles);
> +  }
> +  // Convert to cycle count.
> +  PRMax = TE.MTM.getCycles(PRMax);
> +
>   unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
>   for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
>     Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
>   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
>     Instrs /= IW;
>   // Assume issue width 1 without a schedule model.
> -  return Instrs;
> +  return std::max(Instrs, PRMax);
> }

It's not clear to me when you account for the resources in the current block. Is this missing a call to getResourceCycles(getBlockNum())?

-Andy

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130422/753f24fb/attachment.html>


More information about the llvm-commits mailing list