[llvm] e030f80 - [Exegesis] Native clusterization: sub-partition by sched class id

Tue Sep 7 07:54:50 PDT 2021

Author: Roman Lebedev
Date: 2021-09-07T17:54:37+03:00
New Revision: e030f808ec69ce84f04c434ab6782d2628b71ef6

URL: https://github.com/llvm/llvm-project/commit/e030f808ec69ce84f04c434ab6782d2628b71ef6
DIFF: https://github.com/llvm/llvm-project/commit/e030f808ec69ce84f04c434ab6782d2628b71ef6.diff

LOG: [Exegesis] Native clusterization: sub-partition by sched class id

Currently native clusterization simply groups all benchmarks
by the opcode of key instruction, but that is suboptimal in certain cases,
e.g. where we can already tell that the particular instructions
already resolve into different sched classes.

Added: 
    

Modified: 
    llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test
    llvm/tools/llvm-exegesis/lib/Analysis.cpp
    llvm/tools/llvm-exegesis/lib/Analysis.h
    llvm/tools/llvm-exegesis/lib/Clustering.cpp
    llvm/tools/llvm-exegesis/lib/Clustering.h
    llvm/tools/llvm-exegesis/llvm-exegesis.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-
diff erent-sched-class.test b/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-
diff erent-sched-class.test
index 86190825c9c39..9c8eec0bddab8 100644

--- a/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-
diff erent-sched-class.test
+++ b/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-
diff erent-sched-class.test
@@ -1,9 +1,15 @@
 # RUN: llvm-exegesis -mcpu=znver3 -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s
 
+# Naive clusterization mainly groups by instruction opcode,
+# but it should also partition the benchmarks of the same opcode
+# by the sched class. For example, a regular `xor`, and same-operand `xor`
+# may have 
diff erent characteristics, and it will be confusing/misleading
+# to group them.
+
 # CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
 # CHECK-CLUSTERS-NEXT: {{^}}0,
 # CHECK-CLUSTERS-SAME: ,1.00{{$}}
-# CHECK-CLUSTERS-NEXT: {{^}}0,
+# CHECK-CLUSTERS:      {{^}}1,
 # CHECK-CLUSTERS-SAME: ,0.20{{$}}
 
 ---

diff  --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp
index be360b9f6d718..b12f872a28d97 100644
--- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp
@@ -151,12 +151,15 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
   OS << "\n";
 }
 
-Analysis::Analysis(const Target &Target, std::unique_ptr<MCInstrInfo> InstrInfo,
+Analysis::Analysis(const Target &Target,
+                   std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
+                   std::unique_ptr<MCInstrInfo> InstrInfo,
                    const InstructionBenchmarkClustering &Clustering,
                    double AnalysisInconsistencyEpsilon,
                    bool AnalysisDisplayUnstableOpcodes,
                    const std::string &ForceCpuName)
-    : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)),
+    : Clustering_(Clustering), SubtargetInfo_(std::move(SubtargetInfo)),
+      InstrInfo_(std::move(InstrInfo)),
       AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
                                            AnalysisInconsistencyEpsilon),
       AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {

diff  --git a/llvm/tools/llvm-exegesis/lib/Analysis.h b/llvm/tools/llvm-exegesis/lib/Analysis.h
index 9d8b04c99906a..c52948ab0cdef 100644
--- a/llvm/tools/llvm-exegesis/lib/Analysis.h
+++ b/llvm/tools/llvm-exegesis/lib/Analysis.h
@@ -36,7 +36,8 @@ namespace exegesis {
 // A helper class to analyze benchmark results for a target.
 class Analysis {
 public:
-  Analysis(const Target &Target, std::unique_ptr<MCInstrInfo> InstrInfo,
+  Analysis(const Target &Target, std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
+           std::unique_ptr<MCInstrInfo> InstrInfo,
            const InstructionBenchmarkClustering &Clustering,
            double AnalysisInconsistencyEpsilon,
            bool AnalysisDisplayUnstableOpcodes,

diff  --git a/llvm/tools/llvm-exegesis/lib/Clustering.cpp b/llvm/tools/llvm-exegesis/lib/Clustering.cpp
index c9070ef554dd6..08646aac52ef2 100644
--- a/llvm/tools/llvm-exegesis/lib/Clustering.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Clustering.cpp
@@ -8,13 +8,15 @@
 
 #include "Clustering.h"
 #include "Error.h"
+#include "SchedClassResolution.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include <algorithm>
+#include <deque>
 #include <string>
 #include <vector>
-#include <deque>
 
 namespace llvm {
 namespace exegesis {
@@ -183,46 +185,58 @@ void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
   }
 }
 
-void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) {
-  // Given an instruction Opcode, which are the benchmarks of this instruction?
-  std::vector<SmallVector<size_t, 1>> OpcodeToPoints;
-  OpcodeToPoints.resize(NumOpcodes);
-  size_t NumOpcodesSeen = 0;
+void InstructionBenchmarkClustering::clusterizeNaive(
+    const MCSubtargetInfo &SubtargetInfo, const MCInstrInfo &InstrInfo) {
+  // Given an instruction Opcode, which sched class id's are represented,
+  // and which are the benchmarks for each sched class?
+  std::vector<SmallMapVector<unsigned, SmallVector<size_t, 1>, 1>>
+      OpcodeToSchedClassesToPoints;
+  const unsigned NumOpcodes = InstrInfo.getNumOpcodes();
+  OpcodeToSchedClassesToPoints.resize(NumOpcodes);
+  size_t NumClusters = 0;
   for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
     const InstructionBenchmark &Point = Points_[P];
-    const unsigned Opcode = Point.keyInstruction().getOpcode();
+    const MCInst &MCI = Point.keyInstruction();
+    unsigned SchedClassId;
+    std::tie(SchedClassId, std::ignore) =
+        ResolvedSchedClass::resolveSchedClassId(SubtargetInfo, InstrInfo, MCI);
+    const unsigned Opcode = MCI.getOpcode();
     assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
-    SmallVectorImpl<size_t> &PointsOfOpcode = OpcodeToPoints[Opcode];
-    if (PointsOfOpcode.empty()) // If we previously have not seen any points of
-      ++NumOpcodesSeen; // this opcode, then naturally this is the new opcode.
-    PointsOfOpcode.emplace_back(P);
+    auto &Points = OpcodeToSchedClassesToPoints[Opcode][SchedClassId];
+    if (Points.empty()) // If we previously have not seen any points of
+      ++NumClusters;    // this opcode's sched class, then new cluster begins.
+    Points.emplace_back(P);
   }
-  assert(OpcodeToPoints.size() == NumOpcodes && "sanity check");
-  assert(NumOpcodesSeen <= NumOpcodes &&
+  assert(NumClusters <= NumOpcodes &&
          "can't see more opcodes than there are total opcodes");
-  assert(NumOpcodesSeen <= Points_.size() &&
+  assert(NumClusters <= Points_.size() &&
          "can't see more opcodes than there are total points");
 
-  Clusters_.reserve(NumOpcodesSeen); // One cluster per opcode.
-  for (ArrayRef<size_t> PointsOfOpcode :
-       make_filter_range(OpcodeToPoints, [](ArrayRef<size_t> PointsOfOpcode) {
-         return !PointsOfOpcode.empty(); // Ignore opcodes with no points.
-       })) {
-    // Create a new cluster.
-    Clusters_.emplace_back(ClusterId::makeValid(
-        Clusters_.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode)));
-    Cluster &CurrentCluster = Clusters_.back();
-    // Mark points as belonging to the new cluster.
-    for_each(PointsOfOpcode, [this, &CurrentCluster](size_t P) {
-      ClusterIdForPoint_[P] = CurrentCluster.Id;
-    });
-    // And add all the points of this opcode to the new cluster.
-    CurrentCluster.PointIndices.reserve(PointsOfOpcode.size());
-    CurrentCluster.PointIndices.assign(PointsOfOpcode.begin(),
-                                       PointsOfOpcode.end());
-    assert(CurrentCluster.PointIndices.size() == PointsOfOpcode.size());
+  Clusters_.reserve(NumClusters); // We already know how many clusters there is.
+  for (const auto &SchedClassesOfOpcode : OpcodeToSchedClassesToPoints) {
+    if (SchedClassesOfOpcode.empty())
+      continue;
+    for (ArrayRef<size_t> PointsOfSchedClass :
+         make_second_range(SchedClassesOfOpcode)) {
+      if (PointsOfSchedClass.empty())
+        continue;
+      // Create a new cluster.
+      Clusters_.emplace_back(ClusterId::makeValid(
+          Clusters_.size(),
+          /*IsUnstable=*/!areAllNeighbours(PointsOfSchedClass)));
+      Cluster &CurrentCluster = Clusters_.back();
+      // Mark points as belonging to the new cluster.
+      for_each(PointsOfSchedClass, [this, &CurrentCluster](size_t P) {
+        ClusterIdForPoint_[P] = CurrentCluster.Id;
+      });
+      // And add all the points of this opcode's sched class to the new cluster.
+      CurrentCluster.PointIndices.reserve(PointsOfSchedClass.size());
+      CurrentCluster.PointIndices.assign(PointsOfSchedClass.begin(),
+                                         PointsOfSchedClass.end());
+      assert(CurrentCluster.PointIndices.size() == PointsOfSchedClass.size());
+    }
   }
-  assert(Clusters_.size() == NumOpcodesSeen);
+  assert(Clusters_.size() == NumClusters);
 }
 
 // Given an instruction Opcode, we can make benchmarks (measurements) of the
@@ -317,7 +331,7 @@ void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) {
 Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
     const std::vector<InstructionBenchmark> &Points, const ModeE Mode,
     const size_t DbscanMinPts, const double AnalysisClusteringEpsilon,
-    Optional<unsigned> NumOpcodes) {
+    const MCSubtargetInfo *SubtargetInfo, const MCInstrInfo *InstrInfo) {
   InstructionBenchmarkClustering Clustering(
       Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
   if (auto Error = Clustering.validateAndSetup()) {
@@ -330,13 +344,13 @@ Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
   if (Mode == ModeE::Dbscan) {
     Clustering.clusterizeDbScan(DbscanMinPts);
 
-    if (NumOpcodes.hasValue())
-      Clustering.stabilize(NumOpcodes.getValue());
+    if (InstrInfo)
+      Clustering.stabilize(InstrInfo->getNumOpcodes());
   } else /*if(Mode == ModeE::Naive)*/ {
-    if (!NumOpcodes.hasValue())
-      return make_error<Failure>(
-          "'naive' clustering mode requires opcode count to be specified");
-    Clustering.clusterizeNaive(NumOpcodes.getValue());
+    if (!SubtargetInfo || !InstrInfo)
+      return make_error<Failure>("'naive' clustering mode requires "
+                                 "SubtargetInfo and InstrInfo to be present");
+    Clustering.clusterizeNaive(*SubtargetInfo, *InstrInfo);
   }
 
   return Clustering;

diff  --git a/llvm/tools/llvm-exegesis/lib/Clustering.h b/llvm/tools/llvm-exegesis/lib/Clustering.h
index 449ce405d78c6..a4da3af77499a 100644
--- a/llvm/tools/llvm-exegesis/lib/Clustering.h
+++ b/llvm/tools/llvm-exegesis/lib/Clustering.h
@@ -32,7 +32,8 @@ class InstructionBenchmarkClustering {
   static Expected<InstructionBenchmarkClustering>
   create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
          size_t DbscanMinPts, double AnalysisClusteringEpsilon,
-         Optional<unsigned> NumOpcodes = None);
+         const MCSubtargetInfo *SubtargetInfo = nullptr,
+         const MCInstrInfo *InstrInfo = nullptr);
 
   class ClusterId {
   public:
@@ -126,7 +127,8 @@ class InstructionBenchmarkClustering {
   Error validateAndSetup();
 
   void clusterizeDbScan(size_t MinPts);
-  void clusterizeNaive(unsigned NumOpcodes);
+  void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo,
+                       const MCInstrInfo &InstrInfo);
 
   // Stabilization is only needed if dbscan was used to clusterize.
   void stabilize(unsigned NumOpcodes);

diff  --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 220f40481f4da..178270c5bd88e 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -435,16 +435,19 @@ static void analysisMain() {
     return;
   }
 
+  std::unique_ptr<MCSubtargetInfo> SubtargetInfo(
+      TheTarget->createMCSubtargetInfo(Points[0].LLVMTriple, CpuName, ""));
+
   std::unique_ptr<MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
   assert(InstrInfo && "Unable to create instruction info!");
 
   const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
       Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
-      AnalysisClusteringEpsilon, InstrInfo->getNumOpcodes()));
+      AnalysisClusteringEpsilon, SubtargetInfo.get(), InstrInfo.get()));
 
-  const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
-                          AnalysisInconsistencyEpsilon,
-                          AnalysisDisplayUnstableOpcodes, CpuName);
+  const Analysis Analyzer(
+      *TheTarget, std::move(SubtargetInfo), std::move(InstrInfo), Clustering,
+      AnalysisInconsistencyEpsilon, AnalysisDisplayUnstableOpcodes, CpuName);
 
   maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
                                             AnalysisClustersOutputFile);