[llvm] r343680 - [llvm-exegesis] Resolve variant classes in analysis.
Clement Courbet via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 3 04:50:25 PDT 2018
Author: courbet
Date: Wed Oct 3 04:50:25 2018
New Revision: 343680
URL: http://llvm.org/viewvc/llvm-project?rev=343680&view=rev
Log:
[llvm-exegesis] Resolve variant classes in analysis.
Summary: See PR38884.
Reviewers: gchatelet
Subscribers: tschuett, RKSimon, llvm-commits
Differential Revision: https://reviews.llvm.org/D52825
Added:
llvm/trunk/test/tools/llvm-exegesis/X86/analysis-uops-variant.test
Modified:
llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp
llvm/trunk/tools/llvm-exegesis/lib/Analysis.h
Added: llvm/trunk/test/tools/llvm-exegesis/X86/analysis-uops-variant.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-exegesis/X86/analysis-uops-variant.test?rev=343680&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-exegesis/X86/analysis-uops-variant.test (added)
+++ llvm/trunk/test/tools/llvm-exegesis/X86/analysis-uops-variant.test Wed Oct 3 04:50:25 2018
@@ -0,0 +1,26 @@
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file="" -analysis-numpoints=1 | FileCheck %s
+
+# CHECK: cluster_id,opcode_name,config,sched_class,SBPort0,SBPort1,SBPort23,SBPort4,SBPort5,NumMicroOps
+# CHECK-NEXT: SBWriteZeroLatency
+
+---
+mode: uops
+key:
+ instructions:
+ - 'XOR32rr EAX EAX EAX'
+ config: ''
+ register_initial_values:
+cpu_name: sandybridge
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: SBPort0, value: 0.0012, per_snippet_value: 0.0012 }
+ - { key: SBPort1, value: 0.0021, per_snippet_value: 0.0021 }
+ - { key: SBPort23, value: 0.0013, per_snippet_value: 0.0013 }
+ - { key: SBPort4, value: 0.0018, per_snippet_value: 0.0018 }
+ - { key: SBPort5, value: 0.0012, per_snippet_value: 0.0012 }
+ - { key: NumMicroOps, value: 1.0108, per_snippet_value: 1.0108 }
+error: ''
+info: ''
+assembled_snippet: 31C031C031C031C031C031C031C031C031C031C031C031C031C031C031C031C0C3
+...
Modified: llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp?rev=343680&r1=343679&r2=343680&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/Analysis.cpp Wed Oct 3 04:50:25 2018
@@ -19,6 +19,16 @@ namespace exegesis {
static const char kCsvSep = ',';
+static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
+ unsigned SchedClassId,
+ const llvm::MCInst &MCI) {
+ const auto &SM = STI.getSchedModel();
+ while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
+ SchedClassId =
+ STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
+ return SchedClassId;
+}
+
namespace {
enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
@@ -126,11 +136,12 @@ void Analysis::printInstructionRowCsv(co
writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
OS << kCsvSep;
assert(!Point.Key.Instructions.empty());
- // FIXME: Resolve variant classes.
- const unsigned SchedClassId =
- InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
const auto &SchedModel = SubtargetInfo_->getSchedModel();
+ const llvm::MCInst &MCI = Point.Key.Instructions[0];
+ const unsigned SchedClassId = resolveSchedClassId(
+ *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
const llvm::MCSchedClassDesc *const SCDesc =
SchedModel.getSchedClassDesc(SchedClassId);
writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
@@ -193,21 +204,43 @@ Analysis::run<Analysis::PrintClusters>(l
return llvm::Error::success();
}
-std::unordered_map<unsigned, std::vector<size_t>>
+Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
+ ResolvedSchedClass &&RSC)
+ : RSC(std::move(RSC)) {}
+
+std::vector<Analysis::ResolvedSchedClassAndPoints>
Analysis::makePointsPerSchedClass() const {
- std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
+ std::vector<ResolvedSchedClassAndPoints> Entries;
+ // Maps SchedClassIds to index in result.
+ std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
const auto &Points = Clustering_.getPoints();
for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
const InstructionBenchmark &Point = Points[PointId];
if (!Point.Error.empty())
continue;
assert(!Point.Key.Instructions.empty());
- const auto Opcode = Point.Key.Instructions[0].getOpcode();
- // FIXME: Resolve variant classes.
- PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
- PointId);
+ // FIXME: we should be using the tuple of classes for instructions in the
+ // snippet as key.
+ const llvm::MCInst &MCI = Point.Key.Instructions[0];
+ unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
+ const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
+ .getSchedClassDesc(SchedClassId)
+ ->isVariant();
+ SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
+ const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
+ if (IndexIt == SchedClassIdToIndex.end()) {
+ // Create a new entry.
+ SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
+ ResolvedSchedClassAndPoints Entry(
+ ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
+ Entry.PointIds.push_back(PointId);
+ Entries.push_back(std::move(Entry));
+ } else {
+ // Append to the existing entry.
+ Entries[IndexIt->second].PointIds.push_back(PointId);
+ }
}
- return PointsPerSchedClass;
+ return Entries;
}
// Uops repeat the same opcode over again. Just show this opcode and show the
@@ -239,8 +272,8 @@ writeLatencySnippetHtml(llvm::raw_ostrea
}
void Analysis::printSchedClassClustersHtml(
- const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
- llvm::raw_ostream &OS) const {
+ const std::vector<SchedClassCluster> &Clusters,
+ const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
const auto &Points = Clustering_.getPoints();
OS << "<table class=\"sched-class-clusters\">";
OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
@@ -254,7 +287,7 @@ void Analysis::printSchedClassClustersHt
OS << "</tr>";
for (const SchedClassCluster &Cluster : Clusters) {
OS << "<tr class=\""
- << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
+ << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
? "good-cluster"
: "bad-cluster")
<< "\"><td>";
@@ -369,12 +402,17 @@ getNonRedundantWriteProcRes(const llvm::
return Result;
}
-Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
- const llvm::MCSubtargetInfo &STI)
- : SCDesc(&SD),
- NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
+Analysis::ResolvedSchedClass::ResolvedSchedClass(
+ const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
+ bool WasVariant)
+ : SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
+ WasVariant(WasVariant),
+ NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
IdealizedProcResPressure(computeIdealizedProcResPressure(
- STI.getSchedModel(), NonRedundantWriteProcRes)) {}
+ STI.getSchedModel(), NonRedundantWriteProcRes)) {
+ assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
+ "ResolvedSchedClass should never be variant");
+}
void Analysis::SchedClassCluster::addPoint(
size_t PointId, const InstructionBenchmarkClustering &Clustering) {
@@ -407,7 +445,7 @@ static unsigned findProcResIdx(const llv
}
bool Analysis::SchedClassCluster::measurementsMatch(
- const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
+ const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
const InstructionBenchmarkClustering &Clustering) const {
const size_t NumMeasurements = Representative.size();
std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
@@ -424,9 +462,9 @@ bool Analysis::SchedClassCluster::measur
}
// Find the latency.
SchedClassPoint[0].PerInstructionValue = 0.0;
- for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
+ for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
const llvm::MCWriteLatencyEntry *const WLE =
- STI.getWriteLatencyEntry(SC.SCDesc, I);
+ STI.getWriteLatencyEntry(RSC.SCDesc, I);
SchedClassPoint[0].PerInstructionValue =
std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
}
@@ -438,17 +476,17 @@ bool Analysis::SchedClassCluster::measur
if (ProcResIdx > 0) {
// Find the pressure on ProcResIdx `Key`.
const auto ProcResPressureIt =
- std::find_if(SC.IdealizedProcResPressure.begin(),
- SC.IdealizedProcResPressure.end(),
+ std::find_if(RSC.IdealizedProcResPressure.begin(),
+ RSC.IdealizedProcResPressure.end(),
[ProcResIdx](const std::pair<uint16_t, float> &WPR) {
return WPR.first == ProcResIdx;
});
SchedClassPoint[I].PerInstructionValue =
- ProcResPressureIt == SC.IdealizedProcResPressure.end()
+ ProcResPressureIt == RSC.IdealizedProcResPressure.end()
? 0.0
: ProcResPressureIt->second;
} else if (Key == "NumMicroOps") {
- SchedClassPoint[I].PerInstructionValue = SC.SCDesc->NumMicroOps;
+ SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
} else {
llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
"name, got "
@@ -465,26 +503,25 @@ bool Analysis::SchedClassCluster::measur
return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
}
-void Analysis::printSchedClassDescHtml(const SchedClass &SC,
+void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
llvm::raw_ostream &OS) const {
OS << "<table class=\"sched-class-desc\">";
OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
"th><th>WriteProcRes</th><th title=\"This is the idealized unit "
"resource (port) pressure assuming ideal distribution\">Idealized "
"Resource Pressure</th></tr>";
- if (SC.SCDesc->isValid()) {
+ if (RSC.SCDesc->isValid()) {
const auto &SM = SubtargetInfo_->getSchedModel();
OS << "<tr><td>✔</td>";
- OS << "<td>" << (SC.SCDesc->isVariant() ? "✔" : "✕")
- << "</td>";
- OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>";
+ OS << "<td>" << (RSC.WasVariant ? "✔" : "✕") << "</td>";
+ OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
// Latencies.
OS << "<td><ul>";
- for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
+ for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
const auto *const Entry =
- SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I);
+ SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
OS << "<li>" << Entry->Cycles;
- if (SC.SCDesc->NumWriteLatencyEntries > 1) {
+ if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
// Dismabiguate if more than 1 latency.
OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
}
@@ -493,7 +530,7 @@ void Analysis::printSchedClassDescHtml(c
OS << "</ul></td>";
// WriteProcRes.
OS << "<td><ul>";
- for (const auto &WPR : SC.NonRedundantWriteProcRes) {
+ for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
OS << "<li><span class=\"mono\">";
writeEscaped<kEscapeHtml>(OS,
SM.getProcResource(WPR.ProcResourceIdx)->Name);
@@ -502,7 +539,7 @@ void Analysis::printSchedClassDescHtml(c
OS << "</ul></td>";
// Idealized port pressure.
OS << "<td><ul>";
- for (const auto &Pressure : SC.IdealizedProcResPressure) {
+ for (const auto &Pressure : RSC.IdealizedProcResPressure) {
OS << "<li><span class=\"mono\">";
writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
.getProcResource(Pressure.first)
@@ -598,19 +635,12 @@ llvm::Error Analysis::run<Analysis::Prin
writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
OS << "</span></h3>";
- for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
- const auto SchedClassId = SchedClassAndPoints.first;
- const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
- const auto &SchedModel = SubtargetInfo_->getSchedModel();
- const llvm::MCSchedClassDesc *const SCDesc =
- SchedModel.getSchedClassDesc(SchedClassId);
- if (!SCDesc)
+ for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
+ if (!RSCAndPoints.RSC.SCDesc)
continue;
- const SchedClass SC(*SCDesc, *SubtargetInfo_);
-
// Bucket sched class points into sched class clusters.
std::vector<SchedClassCluster> SchedClassClusters;
- for (const size_t PointId : SchedClassPoints) {
+ for (const size_t PointId : RSCAndPoints.PointIds) {
const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
if (!ClusterId.isValid())
continue; // Ignore noise and errors. FIXME: take noise into account ?
@@ -629,24 +659,24 @@ llvm::Error Analysis::run<Analysis::Prin
// Print any scheduling class that has at least one cluster that does not
// match the checked-in data.
if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
- [this, &SC](const SchedClassCluster &C) {
- return C.measurementsMatch(*SubtargetInfo_, SC,
- Clustering_);
+ [this, &RSCAndPoints](const SchedClassCluster &C) {
+ return C.measurementsMatch(*SubtargetInfo_,
+ RSCAndPoints.RSC, Clustering_);
}))
continue; // Nothing weird.
OS << "<div class=\"inconsistency\"><p>Sched Class <span "
"class=\"sched-class-name\">";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
+ writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
#else
OS << SchedClassId;
#endif
OS << "</span> contains instructions whose performance characteristics do"
" not match that of LLVM:</p>";
- printSchedClassClustersHtml(SchedClassClusters, SC, OS);
+ printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
OS << "<p>llvm SchedModel data:</p>";
- printSchedClassDescHtml(SC, OS);
+ printSchedClassDescHtml(RSCAndPoints.RSC, OS);
OS << "</div>";
}
Modified: llvm/trunk/tools/llvm-exegesis/lib/Analysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-exegesis/lib/Analysis.h?rev=343680&r1=343679&r2=343680&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-exegesis/lib/Analysis.h (original)
+++ llvm/trunk/tools/llvm-exegesis/lib/Analysis.h Wed Oct 3 04:50:25 2018
@@ -49,11 +49,12 @@ private:
using ClusterId = InstructionBenchmarkClustering::ClusterId;
// An llvm::MCSchedClassDesc augmented with some additional data.
- struct SchedClass {
- SchedClass(const llvm::MCSchedClassDesc &SD,
- const llvm::MCSubtargetInfo &STI);
+ struct ResolvedSchedClass {
+ ResolvedSchedClass(const llvm::MCSubtargetInfo &STI,
+ unsigned ResolvedSchedClassId, bool WasVariant);
const llvm::MCSchedClassDesc *const SCDesc;
+ const bool WasVariant; // Whether the original class was variant.
const llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
NonRedundantWriteProcRes;
const std::vector<std::pair<uint16_t, float>> IdealizedProcResPressure;
@@ -75,7 +76,8 @@ private:
// Returns true if the cluster representative measurements match that of SC.
bool
- measurementsMatch(const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
+ measurementsMatch(const llvm::MCSubtargetInfo &STI,
+ const ResolvedSchedClass &SC,
const InstructionBenchmarkClustering &Clustering) const;
void addPoint(size_t PointId,
@@ -92,15 +94,22 @@ private:
void
printSchedClassClustersHtml(const std::vector<SchedClassCluster> &Clusters,
- const SchedClass &SC,
+ const ResolvedSchedClass &SC,
llvm::raw_ostream &OS) const;
- void printSchedClassDescHtml(const SchedClass &SC,
+ void printSchedClassDescHtml(const ResolvedSchedClass &SC,
llvm::raw_ostream &OS) const;
- // Builds a map of Sched Class -> indices of points that belong to the sched
- // class.
- std::unordered_map<unsigned, std::vector<size_t>>
- makePointsPerSchedClass() const;
+ // A pair of (Sched Class, indices of points that belong to the sched
+ // class).
+ struct ResolvedSchedClassAndPoints {
+ explicit ResolvedSchedClassAndPoints(ResolvedSchedClass &&RSC);
+
+ ResolvedSchedClass RSC;
+ std::vector<size_t> PointIds;
+ };
+
+ // Builds a list of ResolvedSchedClassAndPoints.
+ std::vector<ResolvedSchedClassAndPoints> makePointsPerSchedClass() const;
template <typename EscapeTag, EscapeTag Tag>
void writeSnippet(llvm::raw_ostream &OS, llvm::ArrayRef<uint8_t> Bytes,
More information about the llvm-commits
mailing list