[llvm] r333755 - [llvm-mca] Move the logic that computes the block throughput into Support.h. NFC
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 1 07:35:21 PDT 2018
Author: adibiagio
Date: Fri Jun 1 07:35:21 2018
New Revision: 333755
URL: http://llvm.org/viewvc/llvm-project?rev=333755&view=rev
Log:
[llvm-mca] Move the logic that computes the block throughput into Support.h. NFC
This will allow us to share the logic that computes the block throughput with
other views.
Modified:
llvm/trunk/tools/llvm-mca/SummaryView.cpp
llvm/trunk/tools/llvm-mca/SummaryView.h
llvm/trunk/tools/llvm-mca/Support.cpp
llvm/trunk/tools/llvm-mca/Support.h
Modified: llvm/trunk/tools/llvm-mca/SummaryView.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/SummaryView.cpp?rev=333755&r1=333754&r2=333755&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/SummaryView.cpp (original)
+++ llvm/trunk/tools/llvm-mca/SummaryView.cpp Fri Jun 1 07:35:21 2018
@@ -24,6 +24,14 @@ namespace mca {
using namespace llvm;
+SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+ unsigned Width)
+ : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
+ NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
+ ProcResourceMasks(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResourceMasks);
+}
+
void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
// We are only interested in the "instruction dispatched" events generated by
// the dispatch stage for instructions that are part of iteration #0.
@@ -41,56 +49,23 @@ void SummaryView::onInstructionEvent(con
const InstrDesc &Desc = Inst.getDesc();
NumMicroOps += Desc.NumMicroOps;
for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
- if (!RU.second.size())
- continue;
-
- assert(RU.second.NumUnits && "Expected more than one unit used!");
- if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
- ProcResourceUsage[RU.first] = RU.second.size();
- continue;
+ if (RU.second.size()) {
+ const auto It = find(ProcResourceMasks, RU.first);
+ assert(It != ProcResourceMasks.end() &&
+ "Invalid processor resource mask!");
+ ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] +=
+ RU.second.size();
}
-
- ProcResourceUsage[RU.first] += RU.second.size();
}
}
-double SummaryView::getBlockRThroughput() const {
- assert(NumMicroOps && "Expected at least one micro opcode!");
-
- SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
- computeProcResourceMasks(SM, Masks);
-
- // The block throughput is bounded from above by the hardware dispatch
- // throughput. That is because the DispatchWidth is an upper bound on the
- // number of opcodes that can be part of a single dispatch group.
- double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
-
- // The block throughput is also limited by the amount of hardware parallelism.
- // The number of available resource units affects the resource pressure
- // distributed, as well as how many blocks can be executed every cycle.
- for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
- uint64_t Mask = Masks[I];
- const auto It = ProcResourceUsage.find_as(Mask);
- if (It != ProcResourceUsage.end()) {
- const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
- unsigned NumUnits = MCDesc.NumUnits;
- double Throughput = static_cast<double>(It->second) / NumUnits;
- Max = std::max(Max, Throughput);
- }
- }
-
- // The block reciprocal throughput is computed as the MAX of:
- // - (#uOps / DispatchWidth)
- // - (#units / resource cycles) for every consumed processor resource.
- return Max;
-}
-
void SummaryView::printView(raw_ostream &OS) const {
unsigned Iterations = Source.getNumIterations();
unsigned Instructions = Source.size();
unsigned TotalInstructions = Instructions * Iterations;
double IPC = (double)TotalInstructions / TotalCycles;
- double BlockRThroughput = getBlockRThroughput();
+ double BlockRThroughput = computeBlockRThroughput(
+ SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
std::string Buffer;
raw_string_ostream TempStream(Buffer);
Modified: llvm/trunk/tools/llvm-mca/SummaryView.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/SummaryView.h?rev=333755&r1=333754&r2=333755&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/SummaryView.h (original)
+++ llvm/trunk/tools/llvm-mca/SummaryView.h Fri Jun 1 07:35:21 2018
@@ -45,10 +45,15 @@ class SummaryView : public View {
unsigned TotalCycles;
// The total number of micro opcodes contributed by a block of instructions.
unsigned NumMicroOps;
- // For each processor resource, this map stores the cumulative number of
- // resource cycles consumed by a block of instructions. The resource mask ID
- // is used as the key value to access elements of this map.
- llvm::DenseMap<uint64_t, unsigned> ProcResourceUsage;
+ // For each processor resource, this vector stores the cumulative number of
+ // resource cycles consumed by the analyzed code block.
+ llvm::SmallVector<unsigned, 8> ProcResourceUsage;
+
+ // Each processor resource is associated with a so-called processor resource
+ // mask. This vector allows to correlate processor resource IDs with processor
+ // resource masks. There is exactly one element per each processor resource
+ // declared by the scheduling model.
+ llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
// Compute the reciprocal throughput for the analyzed code block.
// The reciprocal block throughput is computed as the MAX between:
@@ -58,9 +63,7 @@ class SummaryView : public View {
public:
SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
- unsigned Width)
- : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
- NumMicroOps(0) {}
+ unsigned Width);
void onCycleEnd() override { ++TotalCycles; }
Modified: llvm/trunk/tools/llvm-mca/Support.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Support.cpp?rev=333755&r1=333754&r2=333755&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Support.cpp (original)
+++ llvm/trunk/tools/llvm-mca/Support.cpp Fri Jun 1 07:35:21 2018
@@ -48,4 +48,32 @@ void computeProcResourceMasks(const MCSc
ProcResourceID++;
}
}
+
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+ unsigned NumMicroOps,
+ ArrayRef<unsigned> ProcResourceUsage) {
+ // The block throughput is bounded from above by the hardware dispatch
+ // throughput. That is because the DispatchWidth is an upper bound on the
+ // number of opcodes that can be part of a single dispatch group.
+ double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+ // The block throughput is also limited by the amount of hardware parallelism.
+ // The number of available resource units affects the resource pressure
+ // distribution, as well as how many blocks can be executed every cycle.
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ unsigned ResourceCycles = ProcResourceUsage[I];
+ if (!ResourceCycles)
+ continue;
+
+ const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+ double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits;
+ Max = std::max(Max, Throughput);
+ }
+
+ // The block reciprocal throughput is computed as the MAX of:
+ // - (NumMicroOps / DispatchWidth)
+ // - (NumUnits / ResourceCycles) for every consumed processor resource.
+ return Max;
+}
+
} // namespace mca
Modified: llvm/trunk/tools/llvm-mca/Support.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Support.h?rev=333755&r1=333754&r2=333755&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Support.h (original)
+++ llvm/trunk/tools/llvm-mca/Support.h Fri Jun 1 07:35:21 2018
@@ -15,6 +15,7 @@
#ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H
#define LLVM_TOOLS_LLVM_MCA_SUPPORT_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSchedule.h"
@@ -44,6 +45,14 @@ namespace mca {
/// problems with simple bit manipulation operations.
void computeProcResourceMasks(const llvm::MCSchedModel &SM,
llvm::SmallVectorImpl<uint64_t> &Masks);
+
+/// Compute the reciprocal block throughput from a set of processor resource
+/// cycles. The reciprocal block throughput is computed as the MAX between:
+/// - NumMicroOps / DispatchWidth
+/// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource).
+double computeBlockRThroughput(const llvm::MCSchedModel &SM,
+ unsigned DispatchWidth, unsigned NumMicroOps,
+ llvm::ArrayRef<unsigned> ProcResourceUsage);
} // namespace mca
#endif
More information about the llvm-commits
mailing list