[llvm] r178553 - Count processor resources individually in MachineTraceMetrics.
Andrew Trick
atrick at apple.com
Mon Apr 22 12:10:41 PDT 2013
On Apr 2, 2013, at 10:49 AM, Jakob Stoklund Olesen <stoklund at 2pi.dk> wrote:
> Author: stoklund
> Date: Tue Apr 2 12:49:51 2013
> New Revision: 178553
>
> URL: http://llvm.org/viewvc/llvm-project?rev=178553&view=rev
> Log:
> Count processor resources individually in MachineTraceMetrics.
>
> The new instruction scheduling models provide information about the
> number of cycles consumed on each processor resource. This makes it
> possible to estimate ILP more accurately than simply counting
> instructions / issue width.
>
> The functions getResourceDepth() and getResourceLength() now identify
> the limiting processor resource, and return a cycle count based on that.
>
> This gives more precise resource information, particularly in traces
> that use one resource a lot more than others.
>
> Modified:
> llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h
> llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp
>
> Modified: llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h?rev=178553&r1=178552&r2=178553&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h Tue Apr 2 12:49:51 2013
> @@ -107,6 +107,13 @@ public:
> /// Get the fixed resource information about MBB. Compute it on demand.
> const FixedBlockInfo *getResources(const MachineBasicBlock*);
>
> + /// Get the scaled number of cycles used per processor resource in MBB.
> + /// This is an array with SchedModel.getNumProcResourceKinds() entries.
> + /// The getResources() function above must have been called first.
> + ///
> + /// These numbers have already been scaled by SchedModel.getResourceFactor().
> + ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
> +
> /// A virtual register or regunit required by a basic block or its trace
> /// successors.
> struct LiveInReg {
> @@ -284,6 +291,8 @@ public:
> class Ensemble {
> SmallVector<TraceBlockInfo, 4> BlockInfo;
> DenseMap<const MachineInstr*, InstrCycles> Cycles;
> + SmallVector<unsigned, 0> ProcResourceDepths;
> + SmallVector<unsigned, 0> ProcResourceHeights;
> friend class Trace;
>
> void computeTrace(const MachineBasicBlock*);
> @@ -303,6 +312,8 @@ public:
> const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
> const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
> const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
> + ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
> + ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
>
> public:
> virtual ~Ensemble();
> @@ -343,8 +354,22 @@ private:
> // One entry per basic block, indexed by block number.
> SmallVector<FixedBlockInfo, 4> BlockInfo;
>
> + // Cycles consumed on each processor resource per block.
> + // The number of processor resource kinds is constant for a given subtarget,
> + // but it is not known at compile time. The number of cycles consumed by
> + // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
> + // where Kinds = SchedModel.getNumProcResourceKinds().
> + SmallVector<unsigned, 0> ProcResourceCycles;
> +
> // One ensemble per strategy.
> Ensemble* Ensembles[TS_NumStrategies];
> +
> + // Convert scaled resource usage to a cycle count that can be compared with
> + // latencies.
> + unsigned getCycles(unsigned Scaled) {
> + unsigned Factor = SchedModel.getLatencyFactor();
> + return (Scaled + Factor - 1) / Factor;
> + }
> };
>
> inline raw_ostream &operator<<(raw_ostream &OS,
>
> Modified: llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp?rev=178553&r1=178552&r2=178553&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp Tue Apr 2 12:49:51 2013
> @@ -18,6 +18,7 @@
> #include "llvm/CodeGen/Passes.h"
> #include "llvm/MC/MCSubtargetInfo.h"
> #include "llvm/Support/Debug.h"
> +#include "llvm/Support/Format.h"
> #include "llvm/Support/raw_ostream.h"
> #include "llvm/Target/TargetInstrInfo.h"
> #include "llvm/Target/TargetRegisterInfo.h"
> @@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFu
> MF->getTarget().getSubtarget<TargetSubtargetInfo>();
> SchedModel.init(*ST.getSchedModel(), &ST, TII);
> BlockInfo.resize(MF->getNumBlockIDs());
> + ProcResourceCycles.resize(MF->getNumBlockIDs() *
> + SchedModel.getNumProcResourceKinds());
> return false;
> }
>
> @@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const
> return FBI;
>
> // Compute resource usage in the block.
> - // FIXME: Compute per-functional unit counts.
> FBI->HasCalls = false;
> unsigned InstrCount = 0;
> +
> + // Add up per-processor resource cycles as well.
> + unsigned PRKinds = SchedModel.getNumProcResourceKinds();
> + SmallVector<unsigned, 32> PRCycles(PRKinds);
> +
> for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
> I != E; ++I) {
> const MachineInstr *MI = I;
> @@ -96,11 +103,39 @@ MachineTraceMetrics::getResources(const
> ++InstrCount;
> if (MI->isCall())
> FBI->HasCalls = true;
> +
> + // Count processor resources used.
> + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
> + if (!SC->isValid())
> + continue;
> +
> + for (TargetSchedModel::ProcResIter
> + PI = SchedModel.getWriteProcResBegin(SC),
> + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
> + assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
> + PRCycles[PI->ProcResourceIdx] += PI->Cycles;
> + }
> }
> FBI->InstrCount = InstrCount;
> +
> + // Scale the resource cycles so they are comparable.
> + unsigned PROffset = MBB->getNumber() * PRKinds;
> + for (unsigned K = 0; K != PRKinds; ++K)
> + ProcResourceCycles[PROffset + K] =
> + PRCycles[K] * SchedModel.getResourceFactor(K);
> +
> return FBI;
> }
>
> +ArrayRef<unsigned>
> +MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
> + assert(BlockInfo[MBBNum].hasResources() &&
> + "getResources() must be called before getProcResourceCycles()");
> + unsigned PRKinds = SchedModel.getNumProcResourceKinds();
> + return ArrayRef<unsigned>(&ProcResourceCycles[MBBNum * PRKinds], PRKinds);
> +}
> +
> +
> //===----------------------------------------------------------------------===//
> // Ensemble utility functions
> //===----------------------------------------------------------------------===//
> @@ -108,6 +143,9 @@ MachineTraceMetrics::getResources(const
> MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
> : MTM(*ct) {
> BlockInfo.resize(MTM.BlockInfo.size());
> + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> + ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
> + ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
> }
>
> // Virtual destructor serves as an anchor.
> @@ -123,21 +161,32 @@ MachineTraceMetrics::Ensemble::getLoopFo
> void MachineTraceMetrics::Ensemble::
> computeDepthResources(const MachineBasicBlock *MBB) {
> TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
> + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> + unsigned PROffset = MBB->getNumber() * PRKinds;
>
> // Compute resources from trace above. The top block is simple.
> if (!TBI->Pred) {
> TBI->InstrDepth = 0;
> TBI->Head = MBB->getNumber();
> + std::fill(ProcResourceDepths.begin() + PROffset,
> + ProcResourceDepths.begin() + PROffset + PRKinds, 0);
> return;
> }
>
> // Compute from the block above. A post-order traversal ensures the
> // predecessor is always computed first.
> - TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
> + unsigned PredNum = TBI->Pred->getNumber();
> + TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
> assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
> const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
> TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
> TBI->Head = PredTBI->Head;
> +
> + // Compute per-resource depths.
> + ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
> + ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
> + for (unsigned K = 0; K != PRKinds; ++K)
> + ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
> }
>
> // Update resource-related information in the TraceBlockInfo for MBB.
> @@ -145,22 +194,33 @@ computeDepthResources(const MachineBasic
> void MachineTraceMetrics::Ensemble::
> computeHeightResources(const MachineBasicBlock *MBB) {
> TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
> + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> + unsigned PROffset = MBB->getNumber() * PRKinds;
>
> // Compute resources for the current block.
> TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
> + ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
>
> // The trace tail is done.
> if (!TBI->Succ) {
> TBI->Tail = MBB->getNumber();
> + std::copy(PRCycles.begin(), PRCycles.end(),
> + ProcResourceHeights.begin() + PROffset);
> return;
> }
>
> // Compute from the block below. A post-order traversal ensures the
> // predecessor is always computed first.
> - TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
> + unsigned SuccNum = TBI->Succ->getNumber();
> + TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
> assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
> TBI->InstrHeight += SuccTBI->InstrHeight;
> TBI->Tail = SuccTBI->Tail;
> +
> + // Compute per-resource heights.
> + ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
> + for (unsigned K = 0; K != PRKinds; ++K)
> + ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
> }
>
> // Check if depth resources for MBB are valid and return the TBI.
> @@ -181,6 +241,31 @@ getHeightResources(const MachineBasicBlo
> return TBI->hasValidHeight() ? TBI : 0;
> }
>
> +/// Get an array of processor resource depths for MBB. Indexed by processor
> +/// resource kind, this array contains the scaled processor resources consumed
> +/// by all blocks preceding MBB in its trace. It does not include instructions
> +/// in MBB.
> +///
> +/// Compare TraceBlockInfo::InstrDepth.
> +ArrayRef<unsigned>
> +MachineTraceMetrics::Ensemble::
> +getProcResourceDepths(unsigned MBBNum) const {
> + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> + return ArrayRef<unsigned>(&ProcResourceDepths[MBBNum * PRKinds], PRKinds);
> +}
> +
> +/// Get an array of processor resource heights for MBB. Indexed by processor
> +/// resource kind, this array contains the scaled processor resources consumed
> +/// by this block and all blocks following it in its trace.
> +///
> +/// Compare TraceBlockInfo::InstrHeight.
> +ArrayRef<unsigned>
> +MachineTraceMetrics::Ensemble::
> +getProcResourceHeights(unsigned MBBNum) const {
> + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
> + return ArrayRef<unsigned>(&ProcResourceHeights[MBBNum * PRKinds], PRKinds);
> +}
> +
> //===----------------------------------------------------------------------===//
> // Trace Selection Strategies
> //===----------------------------------------------------------------------===//
> @@ -713,11 +798,24 @@ computeInstrDepths(const MachineBasicBlo
> SmallVector<DataDep, 8> Deps;
> while (!Stack.empty()) {
> MBB = Stack.pop_back_val();
> - DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
> + DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
> TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
> TBI.HasValidInstrDepths = true;
> TBI.CriticalPath = 0;
>
> + // Print out resource depths here as well.
> + DEBUG({
> + dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
> + ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
> + for (unsigned K = 0; K != PRDepths.size(); ++K)
> + if (PRDepths[K]) {
> + unsigned Factor = MTM.SchedModel.getResourceFactor(K);
> + dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
> + << MTM.SchedModel.getProcResource(K)->Name << " ("
> + << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
> + }
> + });
> +
> // Also compute the critical path length through MBB when possible.
> if (TBI.HasValidInstrHeights)
> TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
> @@ -928,6 +1026,18 @@ computeInstrHeights(const MachineBasicBl
> TBI.HasValidInstrHeights = true;
> TBI.CriticalPath = 0;
>
> + DEBUG({
> + dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
> + ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
> + for (unsigned K = 0; K != PRHeights.size(); ++K)
> + if (PRHeights[K]) {
> + unsigned Factor = MTM.SchedModel.getResourceFactor(K);
> + dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
> + << MTM.SchedModel.getProcResource(K)->Name << " ("
> + << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
> + }
> + });
> +
> // Get dependencies from PHIs in the trace successor.
> const MachineBasicBlock *Succ = TBI.Succ;
> // If MBB is the last block in the trace, and it has a back-edge to the
> @@ -1058,27 +1168,52 @@ MachineTraceMetrics::Trace::getPHIDepth(
> }
>
> unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
> - // For now, we compute the resource depth from instruction count / issue
> - // width. Eventually, we should compute resource depth per functional unit
> - // and return the max.
> + // Find the limiting processor resource.
> + // Numbers have been pre-scaled to be comparable.
> + unsigned PRMax = 0;
> + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
> + if (Bottom) {
> + ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
> + for (unsigned K = 0; K != PRDepths.size(); ++K)
> + PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
> + } else {
> + for (unsigned K = 0; K != PRDepths.size(); ++K)
> + PRMax = std::max(PRMax, PRDepths[K]);
> + }
> + // Convert to cycle count.
> + PRMax = TE.MTM.getCycles(PRMax);
> +
> unsigned Instrs = TBI.InstrDepth;
> if (Bottom)
> Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
> if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
> Instrs /= IW;
> // Assume issue width 1 without a schedule model.
> - return Instrs;
> + return std::max(Instrs, PRMax);
> }
>
> unsigned MachineTraceMetrics::Trace::
> getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
> + // Add up resources above and below the center block.
> + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
> + ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
> + unsigned PRMax = 0;
> + for (unsigned K = 0; K != PRDepths.size(); ++K) {
> + unsigned PRCycles = PRDepths[K] + PRHeights[K];
> + for (unsigned I = 0; I != Extrablocks.size(); ++I)
> + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
> + PRMax = std::max(PRMax, PRCycles);
> + }
> + // Convert to cycle count.
> + PRMax = TE.MTM.getCycles(PRMax);
> +
> unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
> for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
> Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
> if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
> Instrs /= IW;
> // Assume issue width 1 without a schedule model.
> - return Instrs;
> + return std::max(Instrs, PRMax);
> }
It's not clear to me when you account for the resources in the current block. Is this missing a call to getResourceCycles(getBlockNum())?
-Andy
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130422/753f24fb/attachment.html>
More information about the llvm-commits
mailing list