<html><head><meta http-equiv="Content-Type" content="text/html charset=us-ascii"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">On Apr 2, 2013, at 10:49 AM, Jakob Stoklund Olesen <<a href="mailto:stoklund@2pi.dk">stoklund@2pi.dk</a>> wrote:<br><div><br class="Apple-interchange-newline"><blockquote type="cite"><div style="letter-spacing: normal; orphans: auto; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: auto; word-spacing: 0px; -webkit-text-stroke-width: 0px;">Author: stoklund<br>Date: Tue Apr 2 12:49:51 2013<br>New Revision: 178553<br><br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project?rev=178553&view=rev">http://llvm.org/viewvc/llvm-project?rev=178553&view=rev</a><br>Log:<br>Count processor resources individually in MachineTraceMetrics.<br><br>The new instruction scheduling models provide information about the<br>number of cycles consumed on each processor resource. This makes it<br>possible to estimate ILP more accurately than simply counting<br>instructions / issue width.<br><br>The functions getResourceDepth() and getResourceLength() now identify<br>the limiting processor resource, and return a cycle count based on that.<br><br>This gives more precise resource information, particularly in traces<br>that use one resource a lot more than others.<br><br>Modified:<br> llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h<br> llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp<br><br>Modified: llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h?rev=178553&r1=178552&r2=178553&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h?rev=178553&r1=178552&r2=178553&view=diff</a><br>==============================================================================<br>--- llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h (original)<br>+++ llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h Tue Apr 2 12:49:51 2013<br>@@ -107,6 +107,13 @@ public:<br> /// Get the fixed resource information about MBB. Compute it on demand.<br> const FixedBlockInfo *getResources(const MachineBasicBlock*);<br><br>+ /// Get the scaled number of cycles used per processor resource in MBB.<br>+ /// This is an array with SchedModel.getNumProcResourceKinds() entries.<br>+ /// The getResources() function above must have been called first.<br>+ ///<br>+ /// These numbers have already been scaled by SchedModel.getResourceFactor().<br>+ ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;<br>+<br> /// A virtual register or regunit required by a basic block or its trace<br> /// successors.<br> struct LiveInReg {<br>@@ -284,6 +291,8 @@ public:<br> class Ensemble {<br> SmallVector<TraceBlockInfo, 4> BlockInfo;<br> DenseMap<const MachineInstr*, InstrCycles> Cycles;<br>+ SmallVector<unsigned, 0> ProcResourceDepths;<br>+ SmallVector<unsigned, 0> ProcResourceHeights;<br> friend class Trace;<br><br> void computeTrace(const MachineBasicBlock*);<br>@@ -303,6 +312,8 @@ public:<br> const MachineLoop *getLoopFor(const MachineBasicBlock*) const;<br> const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;<br> const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;<br>+ ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;<br>+ ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;<br><br> public:<br> virtual ~Ensemble();<br>@@ -343,8 +354,22 @@ private:<br> // One entry per basic block, indexed by block number.<br> SmallVector<FixedBlockInfo, 4> BlockInfo;<br><br>+ // Cycles consumed on each processor resource per block.<br>+ // The number of processor resource kinds is constant for a given subtarget,<br>+ // but it is not known at compile time. The number of cycles consumed by<br>+ // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]<br>+ // where Kinds = SchedModel.getNumProcResourceKinds().<br>+ SmallVector<unsigned, 0> ProcResourceCycles;<br>+<br> // One ensemble per strategy.<br> Ensemble* Ensembles[TS_NumStrategies];<br>+<br>+ // Convert scaled resource usage to a cycle count that can be compared with<br>+ // latencies.<br>+ unsigned getCycles(unsigned Scaled) {<br>+ unsigned Factor = SchedModel.getLatencyFactor();<br>+ return (Scaled + Factor - 1) / Factor;<br>+ }<br>};<br><br>inline raw_ostream &operator<<(raw_ostream &OS,<br><br>Modified: llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp?rev=178553&r1=178552&r2=178553&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp?rev=178553&r1=178552&r2=178553&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp (original)<br>+++ llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp Tue Apr 2 12:49:51 2013<br>@@ -18,6 +18,7 @@<br>#include "llvm/CodeGen/Passes.h"<br>#include "llvm/MC/MCSubtargetInfo.h"<br>#include "llvm/Support/Debug.h"<br>+#include "llvm/Support/Format.h"<br>#include "llvm/Support/raw_ostream.h"<br>#include "llvm/Target/TargetInstrInfo.h"<br>#include "llvm/Target/TargetRegisterInfo.h"<br>@@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFu<br> MF->getTarget().getSubtarget<TargetSubtargetInfo>();<br> SchedModel.init(*ST.getSchedModel(), &ST, TII);<br> BlockInfo.resize(MF->getNumBlockIDs());<br>+ ProcResourceCycles.resize(MF->getNumBlockIDs() *<br>+ SchedModel.getNumProcResourceKinds());<br> return false;<br>}<br><br>@@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const<br> return FBI;<br><br> // Compute resource usage in the block.<br>- // FIXME: Compute per-functional unit counts.<br> FBI->HasCalls = false;<br> unsigned InstrCount = 0;<br>+<br>+ // Add up per-processor resource cycles as well.<br>+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();<br>+ SmallVector<unsigned, 32> PRCycles(PRKinds);<br>+<br> for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();<br> I != E; ++I) {<br> const MachineInstr *MI = I;<br>@@ -96,11 +103,39 @@ MachineTraceMetrics::getResources(const<br> ++InstrCount;<br> if (MI->isCall())<br> FBI->HasCalls = true;<br>+<br>+ // Count processor resources used.<br>+ const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);<br>+ if (!SC->isValid())<br>+ continue;<br>+<br>+ for (TargetSchedModel::ProcResIter<br>+ PI = SchedModel.getWriteProcResBegin(SC),<br>+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {<br>+ assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");<br>+ PRCycles[PI->ProcResourceIdx] += PI->Cycles;<br>+ }<br> }<br> FBI->InstrCount = InstrCount;<br>+<br>+ // Scale the resource cycles so they are comparable.<br>+ unsigned PROffset = MBB->getNumber() * PRKinds;<br>+ for (unsigned K = 0; K != PRKinds; ++K)<br>+ ProcResourceCycles[PROffset + K] =<br>+ PRCycles[K] * SchedModel.getResourceFactor(K);<br>+<br> return FBI;<br>}<br><br>+ArrayRef<unsigned><br>+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {<br>+ assert(BlockInfo[MBBNum].hasResources() &&<br>+ "getResources() must be called before getProcResourceCycles()");<br>+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();<br>+ return ArrayRef<unsigned>(&ProcResourceCycles[MBBNum * PRKinds], PRKinds);<br>+}<br>+<br>+<br>//===----------------------------------------------------------------------===//<br>// Ensemble utility functions<br>//===----------------------------------------------------------------------===//<br>@@ -108,6 +143,9 @@ MachineTraceMetrics::getResources(const<br>MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)<br> : MTM(*ct) {<br> BlockInfo.resize(MTM.BlockInfo.size());<br>+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();<br>+ ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);<br>+ ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);<br>}<br><br>// Virtual destructor serves as an anchor.<br>@@ -123,21 +161,32 @@ MachineTraceMetrics::Ensemble::getLoopFo<br>void MachineTraceMetrics::Ensemble::<br>computeDepthResources(const MachineBasicBlock *MBB) {<br> TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];<br>+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();<br>+ unsigned PROffset = MBB->getNumber() * PRKinds;<br><br> // Compute resources from trace above. The top block is simple.<br> if (!TBI->Pred) {<br> TBI->InstrDepth = 0;<br> TBI->Head = MBB->getNumber();<br>+ std::fill(ProcResourceDepths.begin() + PROffset,<br>+ ProcResourceDepths.begin() + PROffset + PRKinds, 0);<br> return;<br> }<br><br> // Compute from the block above. A post-order traversal ensures the<br> // predecessor is always computed first.<br>- TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];<br>+ unsigned PredNum = TBI->Pred->getNumber();<br>+ TraceBlockInfo *PredTBI = &BlockInfo[PredNum];<br> assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");<br> const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);<br> TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;<br> TBI->Head = PredTBI->Head;<br>+<br>+ // Compute per-resource depths.<br>+ ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);<br>+ ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);<br>+ for (unsigned K = 0; K != PRKinds; ++K)<br>+ ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];<br>}<br><br>// Update resource-related information in the TraceBlockInfo for MBB.<br>@@ -145,22 +194,33 @@ computeDepthResources(const MachineBasic<br>void MachineTraceMetrics::Ensemble::<br>computeHeightResources(const MachineBasicBlock *MBB) {<br> TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];<br>+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();<br>+ unsigned PROffset = MBB->getNumber() * PRKinds;<br><br> // Compute resources for the current block.<br> TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;<br>+ ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());<br><br> // The trace tail is done.<br> if (!TBI->Succ) {<br> TBI->Tail = MBB->getNumber();<br>+ std::copy(PRCycles.begin(), PRCycles.end(),<br>+ ProcResourceHeights.begin() + PROffset);<br> return;<br> }<br><br> // Compute from the block below. A post-order traversal ensures the<br> // predecessor is always computed first.<br>- TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];<br>+ unsigned SuccNum = TBI->Succ->getNumber();<br>+ TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];<br> assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");<br> TBI->InstrHeight += SuccTBI->InstrHeight;<br> TBI->Tail = SuccTBI->Tail;<br>+<br>+ // Compute per-resource heights.<br>+ ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);<br>+ for (unsigned K = 0; K != PRKinds; ++K)<br>+ ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];<br>}<br><br>// Check if depth resources for MBB are valid and return the TBI.<br>@@ -181,6 +241,31 @@ getHeightResources(const MachineBasicBlo<br> return TBI->hasValidHeight() ? TBI : 0;<br>}<br><br>+/// Get an array of processor resource depths for MBB. Indexed by processor<br>+/// resource kind, this array contains the scaled processor resources consumed<br>+/// by all blocks preceding MBB in its trace. It does not include instructions<br>+/// in MBB.<br>+///<br>+/// Compare TraceBlockInfo::InstrDepth.<br>+ArrayRef<unsigned><br>+MachineTraceMetrics::Ensemble::<br>+getProcResourceDepths(unsigned MBBNum) const {<br>+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();<br>+ return ArrayRef<unsigned>(&ProcResourceDepths[MBBNum * PRKinds], PRKinds);<br>+}<br>+<br>+/// Get an array of processor resource heights for MBB. Indexed by processor<br>+/// resource kind, this array contains the scaled processor resources consumed<br>+/// by this block and all blocks following it in its trace.<br>+///<br>+/// Compare TraceBlockInfo::InstrHeight.<br>+ArrayRef<unsigned><br>+MachineTraceMetrics::Ensemble::<br>+getProcResourceHeights(unsigned MBBNum) const {<br>+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();<br>+ return ArrayRef<unsigned>(&ProcResourceHeights[MBBNum * PRKinds], PRKinds);<br>+}<br>+<br>//===----------------------------------------------------------------------===//<br>// Trace Selection Strategies<br>//===----------------------------------------------------------------------===//<br>@@ -713,11 +798,24 @@ computeInstrDepths(const MachineBasicBlo<br> SmallVector<DataDep, 8> Deps;<br> while (!Stack.empty()) {<br> MBB = Stack.pop_back_val();<br>- DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");<br>+ DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");<br> TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];<br> TBI.HasValidInstrDepths = true;<br> TBI.CriticalPath = 0;<br><br>+ // Print out resource depths here as well.<br>+ DEBUG({<br>+ dbgs() << format("%7u Instructions\n", TBI.InstrDepth);<br>+ ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());<br>+ for (unsigned K = 0; K != PRDepths.size(); ++K)<br>+ if (PRDepths[K]) {<br>+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);<br>+ dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))<br>+ << MTM.SchedModel.getProcResource(K)->Name << " ("<br>+ << PRDepths[K]/Factor << " ops x" << Factor << ")\n";<br>+ }<br>+ });<br>+<br> // Also compute the critical path length through MBB when possible.<br> if (TBI.HasValidInstrHeights)<br> TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);<br>@@ -928,6 +1026,18 @@ computeInstrHeights(const MachineBasicBl<br> TBI.HasValidInstrHeights = true;<br> TBI.CriticalPath = 0;<br><br>+ DEBUG({<br>+ dbgs() << format("%7u Instructions\n", TBI.InstrHeight);<br>+ ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());<br>+ for (unsigned K = 0; K != PRHeights.size(); ++K)<br>+ if (PRHeights[K]) {<br>+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);<br>+ dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))<br>+ << MTM.SchedModel.getProcResource(K)->Name << " ("<br>+ << PRHeights[K]/Factor << " ops x" << Factor << ")\n";<br>+ }<br>+ });<br>+<br> // Get dependencies from PHIs in the trace successor.<br> const MachineBasicBlock *Succ = TBI.Succ;<br> // If MBB is the last block in the trace, and it has a back-edge to the<br>@@ -1058,27 +1168,52 @@ MachineTraceMetrics::Trace::getPHIDepth(<br>}<br><br>unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {<br>- // For now, we compute the resource depth from instruction count / issue<br>- // width. Eventually, we should compute resource depth per functional unit<br>- // and return the max.<br>+ // Find the limiting processor resource.<br>+ // Numbers have been pre-scaled to be comparable.<br>+ unsigned PRMax = 0;<br>+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());<br>+ if (Bottom) {<br>+ ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());<br>+ for (unsigned K = 0; K != PRDepths.size(); ++K)<br>+ PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);<br>+ } else {<br>+ for (unsigned K = 0; K != PRDepths.size(); ++K)<br>+ PRMax = std::max(PRMax, PRDepths[K]);<br>+ }<br>+ // Convert to cycle count.<br>+ PRMax = TE.MTM.getCycles(PRMax);<br>+<br> unsigned Instrs = TBI.InstrDepth;<br> if (Bottom)<br> Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;<br> if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())<br> Instrs /= IW;<br> // Assume issue width 1 without a schedule model.<br>- return Instrs;<br>+ return std::max(Instrs, PRMax);<br>}<br><br>unsigned MachineTraceMetrics::Trace::<br>getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {<br>+ // Add up resources above and below the center block.<br>+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());<br>+ ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());<br>+ unsigned PRMax = 0;<br>+ for (unsigned K = 0; K != PRDepths.size(); ++K) {<br>+ unsigned PRCycles = PRDepths[K] + PRHeights[K];<br>+ for (unsigned I = 0; I != Extrablocks.size(); ++I)<br>+ PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];<br>+ PRMax = std::max(PRMax, PRCycles);<br>+ }<br>+ // Convert to cycle count.<br>+ PRMax = TE.MTM.getCycles(PRMax);<br>+<br> unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;<br> for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)<br> Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;<br> if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())<br> Instrs /= IW;<br> // Assume issue width 1 without a schedule model.<br>- return Instrs;<br>+ return std::max(Instrs, PRMax);<br>}<br></div></blockquote></div><br><div>It's not clear to me when you account for the resources in the current block. Is this missing a call to getResourceCycles(getBlockNum())?</div><div><br></div><div>-Andy</div><div><br></div></body></html>