[llvm] e086a39 - [llvm-exegesis] Let Counter returns up to 16 entries

Fri Jun 26 07:57:39 PDT 2020

Author: Vy Nguyen
Date: 2020-06-26T10:57:20-04:00
New Revision: e086a39c118fa6c1d8d23257ff0f112d000d87fe

URL: https://github.com/llvm/llvm-project/commit/e086a39c118fa6c1d8d23257ff0f112d000d87fe
DIFF: https://github.com/llvm/llvm-project/commit/e086a39c118fa6c1d8d23257ff0f112d000d87fe.diff

LOG:  [llvm-exegesis] Let Counter returns up to 16 entries

    LBR contains (up to) 16 entries for last x branches and the X86LBRCounter (from D77422) should be able to return all those.
    Currently, it just returns the latest entry, which could lead to mis-leading measurements.
    This patch aslo changes the LatencyBenchmarkRunner to accommodate multi-value readings.

         https://reviews.llvm.org/D81050

Added: 
    

Modified: 
    llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
    llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
    llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
    llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
    llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
    llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
    llvm/tools/llvm-exegesis/lib/PerfHelper.h
    llvm/tools/llvm-exegesis/lib/Target.cpp
    llvm/tools/llvm-exegesis/lib/Target.h
    llvm/tools/llvm-exegesis/llvm-exegesis.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index d4bad347f604..c883a3409ae3 100644

--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -74,7 +74,8 @@ struct InstructionBenchmark {
   std::string Error;
   std::string Info;
   std::vector<uint8_t> AssembledSnippet;
-
+  // How to aggregate measurements.
+  enum ResultAggregationModeE { Min, Max, Mean, MinVariance };
   // Read functions.
   static Expected<InstructionBenchmark> readYaml(const LLVMState &State,
                                                  StringRef Filename);

diff  --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index a5565bdfa723..bdef8f8a8918 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -46,9 +46,29 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
 
 private:
   Expected<int64_t> runAndMeasure(const char *Counters) const override {
+    auto ResultOrError = runAndSample(Counters);
+    if (ResultOrError)
+      return ResultOrError.get()[0];
+    return ResultOrError.takeError();
+  }
+
+  static void
+  accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
+                          llvm::SmallVector<int64_t, 4> *Result) {
+
+    const size_t NumValues = std::max(NewValues.size(), Result->size());
+    if (NumValues > Result->size())
+      Result->resize(NumValues, 0);
+    for (size_t I = 0, End = NewValues.size(); I < End; ++I)
+      (*Result)[I] += NewValues[I];
+  }
+
+  Expected<llvm::SmallVector<int64_t, 4>>
+  runAndSample(const char *Counters) const override {
     // We sum counts when there are several counters for a single ProcRes
     // (e.g. P23 on SandyBridge).
-    int64_t CounterValue = 0;
+    llvm::SmallVector<int64_t, 4> CounterValues;
+    int Reserved = 0;
     SmallVector<StringRef, 2> CounterNames;
     StringRef(Counters).split(CounterNames, '+');
     char *const ScratchPtr = Scratch->ptr();
@@ -61,6 +81,17 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
         return CounterOrError.takeError();
 
       pfm::Counter *Counter = CounterOrError.get().get();
+      if (Reserved == 0) {
+        Reserved = Counter->numValues();
+        CounterValues.reserve(Reserved);
+      } else if (Reserved != Counter->numValues())
+        // It'd be wrong to accumulate vectors of 
diff erent sizes.
+        return make_error<Failure>(
+            llvm::Twine("Inconsistent number of values for counter ")
+                .concat(CounterName)
+                .concat(std::to_string(Counter->numValues()))
+                .concat(" vs expected of ")
+                .concat(std::to_string(Reserved)));
       Scratch->clear();
       {
         CrashRecoveryContext CRC;
@@ -75,9 +106,13 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
         if (Crashed)
           return make_error<SnippetCrash>("snippet crashed while running");
       }
-      CounterValue += Counter->read();
+      auto ValueOrError = Counter->readOrError();
+      if (!ValueOrError)
+        return ValueOrError.takeError();
+
+      accumulateCounterValues(ValueOrError.get(), &CounterValues);
     }
-    return CounterValue;
+    return CounterValues;
   }
 
   const LLVMState &State;

diff  --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
index b0fdb34450ee..cc51b5b8486b 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
@@ -21,6 +21,7 @@
 #include "LlvmState.h"
 #include "MCInstrDescView.h"
 #include "SnippetRepetitor.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/Error.h"
 #include <cstdlib>
@@ -65,7 +66,11 @@ class BenchmarkRunner {
   class FunctionExecutor {
   public:
     virtual ~FunctionExecutor();
+    // FIXME deprecate this.
     virtual Expected<int64_t> runAndMeasure(const char *Counters) const = 0;
+
+    virtual Expected<llvm::SmallVector<int64_t, 4>>
+    runAndSample(const char *Counters) const = 0;
   };
 
 protected:

diff  --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
index 83a3f0183e39..6cdefb8b0679 100644
--- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
@@ -8,48 +8,135 @@
 
 #include "LatencyBenchmarkRunner.h"
 
-#include "Target.h"
 #include "BenchmarkRunner.h"
+#include "Target.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cmath>
 
 namespace llvm {
 namespace exegesis {
 
-LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
-                                               InstructionBenchmark::ModeE Mode)
+LatencyBenchmarkRunner::LatencyBenchmarkRunner(
+    const LLVMState &State, InstructionBenchmark::ModeE Mode,
+    InstructionBenchmark::ResultAggregationModeE ResultAgg)
     : BenchmarkRunner(State, Mode) {
   assert((Mode == InstructionBenchmark::Latency ||
           Mode == InstructionBenchmark::InverseThroughput) &&
          "invalid mode");
+  ResultAggMode = ResultAgg;
 }
 
 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
 
+static double computeVariance(const llvm::SmallVector<int64_t, 4> &Values) {
+  if (Values.empty())
+    return 0.0;
+  double Sum = std::accumulate(Values.begin(), Values.end(), 0.0);
+
+  const double Mean = Sum / Values.size();
+  double Ret = 0;
+  for (const auto &V : Values) {
+    double Delta = V - Mean;
+    Ret += Delta * Delta;
+  }
+  return Ret / Values.size();
+}
+
+static int64_t findMin(const llvm::SmallVector<int64_t, 4> &Values) {
+  if (Values.empty())
+    return 0;
+  return *std::min_element(Values.begin(), Values.end());
+}
+
+static int64_t findMax(const llvm::SmallVector<int64_t, 4> &Values) {
+  if (Values.empty())
+    return 0;
+  return *std::max_element(Values.begin(), Values.end());
+}
+
+static int64_t findMean(const llvm::SmallVector<int64_t, 4> &Values) {
+  if (Values.empty())
+    return 0;
+  return std::accumulate(Values.begin(), Values.end(), 0.0) /
+         static_cast<double>(Values.size());
+}
+
 Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
     const FunctionExecutor &Executor) const {
   // Cycle measurements include some overhead from the kernel. Repeat the
-  // measure several times and take the minimum value.
+  // measure several times and return the aggregated value, as specified by
+  // ResultAggMode.
   constexpr const int NumMeasurements = 30;
-  int64_t MinValue = std::numeric_limits<int64_t>::max();
+  llvm::SmallVector<int64_t, 4> AccumulatedValues;
+  double MinVariance = std::numeric_limits<double>::infinity();
   const char *CounterName = State.getPfmCounters().CycleCounter;
+  // Values count for each run.
+  int ValuesCount = 0;
   for (size_t I = 0; I < NumMeasurements; ++I) {
-    auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
-    if (!ExpectedCounterValue)
-      return ExpectedCounterValue.takeError();
-    if (*ExpectedCounterValue < MinValue)
-      MinValue = *ExpectedCounterValue;
+    auto ExpectedCounterValues = Executor.runAndSample(CounterName);
+    if (!ExpectedCounterValues)
+      return ExpectedCounterValues.takeError();
+    ValuesCount = ExpectedCounterValues.get().size();
+    if (ValuesCount == 1)
+      AccumulatedValues.push_back(ExpectedCounterValues.get()[0]);
+    else {
+      // We'll keep the reading with lowest variance (ie., most stable)
+      double Variance = computeVariance(*ExpectedCounterValues);
+      if (MinVariance > Variance) {
+        AccumulatedValues = std::move(ExpectedCounterValues.get());
+        MinVariance = Variance;
+      }
+    }
   }
-  std::vector<BenchmarkMeasure> Result;
+
+  std::string ModeName;
   switch (Mode) {
   case InstructionBenchmark::Latency:
-    Result = {BenchmarkMeasure::Create("latency", MinValue)};
+    ModeName = "latency";
     break;
   case InstructionBenchmark::InverseThroughput:
-    Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
+    ModeName = "inverse_throughput";
     break;
   default:
     break;
   }
-  return std::move(Result);
+
+  switch (ResultAggMode) {
+  case InstructionBenchmark::MinVariance: {
+    if (ValuesCount == 1)
+      llvm::errs() << "Each sample only has one value. result-aggregation-mode "
+                      "of min-variance is probably non-sensical\n";
+    std::vector<BenchmarkMeasure> Result;
+    Result.reserve(AccumulatedValues.size());
+    for (const int64_t Value : AccumulatedValues)
+      Result.push_back(BenchmarkMeasure::Create(ModeName, Value));
+    return std::move(Result);
+  }
+  case InstructionBenchmark::Min: {
+    std::vector<BenchmarkMeasure> Result;
+    Result.push_back(
+        BenchmarkMeasure::Create(ModeName, findMin(AccumulatedValues)));
+    return std::move(Result);
+  }
+  case InstructionBenchmark::Max: {
+    std::vector<BenchmarkMeasure> Result;
+    Result.push_back(
+        BenchmarkMeasure::Create(ModeName, findMax(AccumulatedValues)));
+    return std::move(Result);
+  }
+  case InstructionBenchmark::Mean: {
+    std::vector<BenchmarkMeasure> Result;
+    Result.push_back(
+        BenchmarkMeasure::Create(ModeName, findMean(AccumulatedValues)));
+    return std::move(Result);
+  }
+  }
+  return llvm::make_error<Failure>(llvm::Twine("Unexpected benchmark mode(")
+                                       .concat(std::to_string(Mode))
+                                       .concat(" and unexpected ResultAggMode ")
+                                       .concat(std::to_string(ResultAggMode)));
 }
 
 } // namespace exegesis

diff  --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
index d4bb93416c20..b9b9efc25d14 100644
--- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
@@ -21,13 +21,16 @@ namespace exegesis {
 
 class LatencyBenchmarkRunner : public BenchmarkRunner {
 public:
-  LatencyBenchmarkRunner(const LLVMState &State,
-                         InstructionBenchmark::ModeE Mode);
+  LatencyBenchmarkRunner(
+      const LLVMState &State, InstructionBenchmark::ModeE Mode,
+      InstructionBenchmark::ResultAggregationModeE ResultAggMode);
   ~LatencyBenchmarkRunner() override;
 
 private:
   Expected<std::vector<BenchmarkMeasure>>
   runMeasurements(const FunctionExecutor &Executor) const override;
+
+  InstructionBenchmark::ResultAggregationModeE ResultAggMode;
 };
 } // namespace exegesis
 } // namespace llvm

diff  --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
index c372ac4f364e..cba4846709e8 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
@@ -119,23 +119,27 @@ void Counter::stop() { ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); }
 
 int64_t Counter::read() const {
   auto ValueOrError = readOrError();
-  if (ValueOrError)
-    return ValueOrError.get();
-
-  errs() << ValueOrError.takeError() << "\n";
+  if (ValueOrError) {
+    if (!ValueOrError.get().empty())
+      return ValueOrError.get()[0];
+    errs() << "Counter has no reading\n";
+  } else
+    errs() << ValueOrError.takeError() << "\n";
   return -1;
 }
 
-llvm::Expected<int64_t> Counter::readOrError() const {
+llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
   int64_t Count = 0;
   ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count));
   if (ReadSize != sizeof(Count))
     return llvm::make_error<llvm::StringError>("Failed to read event counter",
                                                llvm::errc::io_error);
-
-  return Count;
+  llvm::SmallVector<int64_t, 4> Result;
+  Result.push_back(Count);
+  return Result;
 }
 
+int Counter::numValues() const { return 1; }
 #else
 
 Counter::Counter(PerfEvent &&Event) : Event(std::move(Event)) {}
@@ -148,11 +152,13 @@ void Counter::stop() {}
 
 int64_t Counter::read() const { return 42; }
 
-llvm::Expected<int64_t> Counter::readOrError() const {
+llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
   return llvm::make_error<llvm::StringError>("Not implemented",
                                              llvm::errc::io_error);
 }
 
+int Counter::numValues() const { return 1; }
+
 #endif
 
 } // namespace pfm

diff  --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.h b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
index 7562af9c4524..d41b090e85f1 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.h
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
@@ -15,9 +15,11 @@
 #define LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Error.h"
+
 #include <cstdint>
 #include <functional>
 #include <memory>
@@ -85,7 +87,9 @@ class Counter {
   int64_t read() const;
 
   /// Returns the current value of the counter or error if it cannot be read.
-  virtual llvm::Expected<int64_t> readOrError() const;
+  virtual llvm::Expected<llvm::SmallVector<int64_t, 4>> readOrError() const;
+
+  virtual int numValues() const;
 
 private:
   PerfEvent Event;

diff  --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp
index 6150c738dad8..ad26c1678c78 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Target.cpp
@@ -68,8 +68,9 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createSnippetGenerator(
 }
 
 Expected<std::unique_ptr<BenchmarkRunner>>
-ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
-                                      const LLVMState &State) const {
+ExegesisTarget::createBenchmarkRunner(
+    InstructionBenchmark::ModeE Mode, const LLVMState &State,
+    InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
   PfmCountersInfo PfmCounters = State.getPfmCounters();
   switch (Mode) {
   case InstructionBenchmark::Unknown:
@@ -85,12 +86,12 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
               .concat(ModeName)
               .concat("' mode, sched model does not define a cycle counter."));
     }
-    return createLatencyBenchmarkRunner(State, Mode);
+    return createLatencyBenchmarkRunner(State, Mode, ResultAggMode);
   case InstructionBenchmark::Uops:
     if (!PfmCounters.UopsCounter && !PfmCounters.IssueCounters)
       return make_error<Failure>("can't run 'uops' mode, sched model does not "
                                  "define uops or issue counters.");
-    return createUopsBenchmarkRunner(State);
+    return createUopsBenchmarkRunner(State, ResultAggMode);
   }
   return nullptr;
 }
@@ -106,12 +107,14 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createParallelSnippetGenerator
 }
 
 std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
-    const LLVMState &State, InstructionBenchmark::ModeE Mode) const {
-  return std::make_unique<LatencyBenchmarkRunner>(State, Mode);
+    const LLVMState &State, InstructionBenchmark::ModeE Mode,
+    InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
+  return std::make_unique<LatencyBenchmarkRunner>(State, Mode, ResultAggMode);
 }
 
-std::unique_ptr<BenchmarkRunner>
-ExegesisTarget::createUopsBenchmarkRunner(const LLVMState &State) const {
+std::unique_ptr<BenchmarkRunner> ExegesisTarget::createUopsBenchmarkRunner(
+    const LLVMState &State,
+    InstructionBenchmark::ResultAggregationModeE /*unused*/) const {
   return std::make_unique<UopsBenchmarkRunner>(State);
 }
 

diff  --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index bcc283ee8fe7..70890795426d 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -148,9 +148,10 @@ class ExegesisTarget {
                          const LLVMState &State,
                          const SnippetGenerator::Options &Opts) const;
   // Creates a benchmark runner for the given mode.
-  Expected<std::unique_ptr<BenchmarkRunner>>
-  createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
-                        const LLVMState &State) const;
+  Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner(
+      InstructionBenchmark::ModeE Mode, const LLVMState &State,
+      InstructionBenchmark::ResultAggregationModeE ResultAggMode =
+          InstructionBenchmark::Min) const;
 
   // Returns the ExegesisTarget for the given triple or nullptr if the target
   // does not exist.
@@ -176,9 +177,11 @@ class ExegesisTarget {
   std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
       const LLVMState &State, const SnippetGenerator::Options &Opts) const;
   std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
-      const LLVMState &State, InstructionBenchmark::ModeE Mode) const;
+      const LLVMState &State, InstructionBenchmark::ModeE Mode,
+      InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
   std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
-      const LLVMState &State) const;
+      const LLVMState &State,
+      InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
 
   const ExegesisTarget *Next = nullptr;
   const ArrayRef<CpuAndPfmCounters> CpuPfmCounters;

diff  --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index ce3a31c12d4a..507015b97472 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -83,6 +83,21 @@ static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
                clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
                           "Analysis")));
 
+static cl::opt<exegesis::InstructionBenchmark::ResultAggregationModeE>
+    ResultAggMode(
+        "result-aggregation-mode",
+        cl::desc("How to aggregate multi-values result"), cl::cat(Options),
+        cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min",
+                              "Keep min reading"),
+                   clEnumValN(exegesis::InstructionBenchmark::Max, "max",
+                              "Keep max reading"),
+                   clEnumValN(exegesis::InstructionBenchmark::Mean, "mean",
+                              "Compute mean of all readings"),
+                   clEnumValN(exegesis::InstructionBenchmark::MinVariance,
+                              "min-variance",
+                              "Keep readings set with min-variance")),
+        cl::init(exegesis::InstructionBenchmark::Min));
+
 static cl::opt<exegesis::InstructionBenchmark::RepetitionModeE> RepetitionMode(
     "repetition-mode", cl::desc("how to repeat the instruction snippet"),
     cl::cat(BenchmarkOptions),
@@ -281,8 +296,9 @@ void benchmarkMain() {
 
   const LLVMState State(CpuName);
 
-  const std::unique_ptr<BenchmarkRunner> Runner = ExitOnErr(
-      State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State));
+  const std::unique_ptr<BenchmarkRunner> Runner =
+      ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
+          BenchmarkMode, State, ResultAggMode));
   if (!Runner) {
     ExitWithError("cannot create benchmark runner");
   }