[llvm] e086a39 - [llvm-exegesis] Let Counter returns up to 16 entries
Vy Nguyen via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 26 07:57:39 PDT 2020
Author: Vy Nguyen
Date: 2020-06-26T10:57:20-04:00
New Revision: e086a39c118fa6c1d8d23257ff0f112d000d87fe
URL: https://github.com/llvm/llvm-project/commit/e086a39c118fa6c1d8d23257ff0f112d000d87fe
DIFF: https://github.com/llvm/llvm-project/commit/e086a39c118fa6c1d8d23257ff0f112d000d87fe.diff
LOG: [llvm-exegesis] Let Counter returns up to 16 entries
LBR contains (up to) 16 entries for last x branches and the X86LBRCounter (from D77422) should be able to return all those.
Currently, it just returns the latest entry, which could lead to mis-leading measurements.
This patch aslo changes the LatencyBenchmarkRunner to accommodate multi-value readings.
https://reviews.llvm.org/D81050
Added:
Modified:
llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
llvm/tools/llvm-exegesis/lib/PerfHelper.h
llvm/tools/llvm-exegesis/lib/Target.cpp
llvm/tools/llvm-exegesis/lib/Target.h
llvm/tools/llvm-exegesis/llvm-exegesis.cpp
Removed:
################################################################################
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index d4bad347f604..c883a3409ae3 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -74,7 +74,8 @@ struct InstructionBenchmark {
std::string Error;
std::string Info;
std::vector<uint8_t> AssembledSnippet;
-
+ // How to aggregate measurements.
+ enum ResultAggregationModeE { Min, Max, Mean, MinVariance };
// Read functions.
static Expected<InstructionBenchmark> readYaml(const LLVMState &State,
StringRef Filename);
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index a5565bdfa723..bdef8f8a8918 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -46,9 +46,29 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
private:
Expected<int64_t> runAndMeasure(const char *Counters) const override {
+ auto ResultOrError = runAndSample(Counters);
+ if (ResultOrError)
+ return ResultOrError.get()[0];
+ return ResultOrError.takeError();
+ }
+
+ static void
+ accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
+ llvm::SmallVector<int64_t, 4> *Result) {
+
+ const size_t NumValues = std::max(NewValues.size(), Result->size());
+ if (NumValues > Result->size())
+ Result->resize(NumValues, 0);
+ for (size_t I = 0, End = NewValues.size(); I < End; ++I)
+ (*Result)[I] += NewValues[I];
+ }
+
+ Expected<llvm::SmallVector<int64_t, 4>>
+ runAndSample(const char *Counters) const override {
// We sum counts when there are several counters for a single ProcRes
// (e.g. P23 on SandyBridge).
- int64_t CounterValue = 0;
+ llvm::SmallVector<int64_t, 4> CounterValues;
+ int Reserved = 0;
SmallVector<StringRef, 2> CounterNames;
StringRef(Counters).split(CounterNames, '+');
char *const ScratchPtr = Scratch->ptr();
@@ -61,6 +81,17 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
return CounterOrError.takeError();
pfm::Counter *Counter = CounterOrError.get().get();
+ if (Reserved == 0) {
+ Reserved = Counter->numValues();
+ CounterValues.reserve(Reserved);
+ } else if (Reserved != Counter->numValues())
+ // It'd be wrong to accumulate vectors of
diff erent sizes.
+ return make_error<Failure>(
+ llvm::Twine("Inconsistent number of values for counter ")
+ .concat(CounterName)
+ .concat(std::to_string(Counter->numValues()))
+ .concat(" vs expected of ")
+ .concat(std::to_string(Reserved)));
Scratch->clear();
{
CrashRecoveryContext CRC;
@@ -75,9 +106,13 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
if (Crashed)
return make_error<SnippetCrash>("snippet crashed while running");
}
- CounterValue += Counter->read();
+ auto ValueOrError = Counter->readOrError();
+ if (!ValueOrError)
+ return ValueOrError.takeError();
+
+ accumulateCounterValues(ValueOrError.get(), &CounterValues);
}
- return CounterValue;
+ return CounterValues;
}
const LLVMState &State;
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
index b0fdb34450ee..cc51b5b8486b 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
@@ -21,6 +21,7 @@
#include "LlvmState.h"
#include "MCInstrDescView.h"
#include "SnippetRepetitor.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Error.h"
#include <cstdlib>
@@ -65,7 +66,11 @@ class BenchmarkRunner {
class FunctionExecutor {
public:
virtual ~FunctionExecutor();
+ // FIXME deprecate this.
virtual Expected<int64_t> runAndMeasure(const char *Counters) const = 0;
+
+ virtual Expected<llvm::SmallVector<int64_t, 4>>
+ runAndSample(const char *Counters) const = 0;
};
protected:
diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
index 83a3f0183e39..6cdefb8b0679 100644
--- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
@@ -8,48 +8,135 @@
#include "LatencyBenchmarkRunner.h"
-#include "Target.h"
#include "BenchmarkRunner.h"
+#include "Target.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cmath>
namespace llvm {
namespace exegesis {
-LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
- InstructionBenchmark::ModeE Mode)
+LatencyBenchmarkRunner::LatencyBenchmarkRunner(
+ const LLVMState &State, InstructionBenchmark::ModeE Mode,
+ InstructionBenchmark::ResultAggregationModeE ResultAgg)
: BenchmarkRunner(State, Mode) {
assert((Mode == InstructionBenchmark::Latency ||
Mode == InstructionBenchmark::InverseThroughput) &&
"invalid mode");
+ ResultAggMode = ResultAgg;
}
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
+static double computeVariance(const llvm::SmallVector<int64_t, 4> &Values) {
+ if (Values.empty())
+ return 0.0;
+ double Sum = std::accumulate(Values.begin(), Values.end(), 0.0);
+
+ const double Mean = Sum / Values.size();
+ double Ret = 0;
+ for (const auto &V : Values) {
+ double Delta = V - Mean;
+ Ret += Delta * Delta;
+ }
+ return Ret / Values.size();
+}
+
+static int64_t findMin(const llvm::SmallVector<int64_t, 4> &Values) {
+ if (Values.empty())
+ return 0;
+ return *std::min_element(Values.begin(), Values.end());
+}
+
+static int64_t findMax(const llvm::SmallVector<int64_t, 4> &Values) {
+ if (Values.empty())
+ return 0;
+ return *std::max_element(Values.begin(), Values.end());
+}
+
+static int64_t findMean(const llvm::SmallVector<int64_t, 4> &Values) {
+ if (Values.empty())
+ return 0;
+ return std::accumulate(Values.begin(), Values.end(), 0.0) /
+ static_cast<double>(Values.size());
+}
+
Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
const FunctionExecutor &Executor) const {
// Cycle measurements include some overhead from the kernel. Repeat the
- // measure several times and take the minimum value.
+ // measure several times and return the aggregated value, as specified by
+ // ResultAggMode.
constexpr const int NumMeasurements = 30;
- int64_t MinValue = std::numeric_limits<int64_t>::max();
+ llvm::SmallVector<int64_t, 4> AccumulatedValues;
+ double MinVariance = std::numeric_limits<double>::infinity();
const char *CounterName = State.getPfmCounters().CycleCounter;
+ // Values count for each run.
+ int ValuesCount = 0;
for (size_t I = 0; I < NumMeasurements; ++I) {
- auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
- if (!ExpectedCounterValue)
- return ExpectedCounterValue.takeError();
- if (*ExpectedCounterValue < MinValue)
- MinValue = *ExpectedCounterValue;
+ auto ExpectedCounterValues = Executor.runAndSample(CounterName);
+ if (!ExpectedCounterValues)
+ return ExpectedCounterValues.takeError();
+ ValuesCount = ExpectedCounterValues.get().size();
+ if (ValuesCount == 1)
+ AccumulatedValues.push_back(ExpectedCounterValues.get()[0]);
+ else {
+ // We'll keep the reading with lowest variance (ie., most stable)
+ double Variance = computeVariance(*ExpectedCounterValues);
+ if (MinVariance > Variance) {
+ AccumulatedValues = std::move(ExpectedCounterValues.get());
+ MinVariance = Variance;
+ }
+ }
}
- std::vector<BenchmarkMeasure> Result;
+
+ std::string ModeName;
switch (Mode) {
case InstructionBenchmark::Latency:
- Result = {BenchmarkMeasure::Create("latency", MinValue)};
+ ModeName = "latency";
break;
case InstructionBenchmark::InverseThroughput:
- Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
+ ModeName = "inverse_throughput";
break;
default:
break;
}
- return std::move(Result);
+
+ switch (ResultAggMode) {
+ case InstructionBenchmark::MinVariance: {
+ if (ValuesCount == 1)
+ llvm::errs() << "Each sample only has one value. result-aggregation-mode "
+ "of min-variance is probably non-sensical\n";
+ std::vector<BenchmarkMeasure> Result;
+ Result.reserve(AccumulatedValues.size());
+ for (const int64_t Value : AccumulatedValues)
+ Result.push_back(BenchmarkMeasure::Create(ModeName, Value));
+ return std::move(Result);
+ }
+ case InstructionBenchmark::Min: {
+ std::vector<BenchmarkMeasure> Result;
+ Result.push_back(
+ BenchmarkMeasure::Create(ModeName, findMin(AccumulatedValues)));
+ return std::move(Result);
+ }
+ case InstructionBenchmark::Max: {
+ std::vector<BenchmarkMeasure> Result;
+ Result.push_back(
+ BenchmarkMeasure::Create(ModeName, findMax(AccumulatedValues)));
+ return std::move(Result);
+ }
+ case InstructionBenchmark::Mean: {
+ std::vector<BenchmarkMeasure> Result;
+ Result.push_back(
+ BenchmarkMeasure::Create(ModeName, findMean(AccumulatedValues)));
+ return std::move(Result);
+ }
+ }
+ return llvm::make_error<Failure>(llvm::Twine("Unexpected benchmark mode(")
+ .concat(std::to_string(Mode))
+ .concat(" and unexpected ResultAggMode ")
+ .concat(std::to_string(ResultAggMode)));
}
} // namespace exegesis
diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
index d4bb93416c20..b9b9efc25d14 100644
--- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
@@ -21,13 +21,16 @@ namespace exegesis {
class LatencyBenchmarkRunner : public BenchmarkRunner {
public:
- LatencyBenchmarkRunner(const LLVMState &State,
- InstructionBenchmark::ModeE Mode);
+ LatencyBenchmarkRunner(
+ const LLVMState &State, InstructionBenchmark::ModeE Mode,
+ InstructionBenchmark::ResultAggregationModeE ResultAggMode);
~LatencyBenchmarkRunner() override;
private:
Expected<std::vector<BenchmarkMeasure>>
runMeasurements(const FunctionExecutor &Executor) const override;
+
+ InstructionBenchmark::ResultAggregationModeE ResultAggMode;
};
} // namespace exegesis
} // namespace llvm
diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
index c372ac4f364e..cba4846709e8 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
@@ -119,23 +119,27 @@ void Counter::stop() { ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); }
int64_t Counter::read() const {
auto ValueOrError = readOrError();
- if (ValueOrError)
- return ValueOrError.get();
-
- errs() << ValueOrError.takeError() << "\n";
+ if (ValueOrError) {
+ if (!ValueOrError.get().empty())
+ return ValueOrError.get()[0];
+ errs() << "Counter has no reading\n";
+ } else
+ errs() << ValueOrError.takeError() << "\n";
return -1;
}
-llvm::Expected<int64_t> Counter::readOrError() const {
+llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
int64_t Count = 0;
ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count));
if (ReadSize != sizeof(Count))
return llvm::make_error<llvm::StringError>("Failed to read event counter",
llvm::errc::io_error);
-
- return Count;
+ llvm::SmallVector<int64_t, 4> Result;
+ Result.push_back(Count);
+ return Result;
}
+int Counter::numValues() const { return 1; }
#else
Counter::Counter(PerfEvent &&Event) : Event(std::move(Event)) {}
@@ -148,11 +152,13 @@ void Counter::stop() {}
int64_t Counter::read() const { return 42; }
-llvm::Expected<int64_t> Counter::readOrError() const {
+llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
return llvm::make_error<llvm::StringError>("Not implemented",
llvm::errc::io_error);
}
+int Counter::numValues() const { return 1; }
+
#endif
} // namespace pfm
diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.h b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
index 7562af9c4524..d41b090e85f1 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.h
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
@@ -15,9 +15,11 @@
#define LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Error.h"
+
#include <cstdint>
#include <functional>
#include <memory>
@@ -85,7 +87,9 @@ class Counter {
int64_t read() const;
/// Returns the current value of the counter or error if it cannot be read.
- virtual llvm::Expected<int64_t> readOrError() const;
+ virtual llvm::Expected<llvm::SmallVector<int64_t, 4>> readOrError() const;
+
+ virtual int numValues() const;
private:
PerfEvent Event;
diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp
index 6150c738dad8..ad26c1678c78 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Target.cpp
@@ -68,8 +68,9 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createSnippetGenerator(
}
Expected<std::unique_ptr<BenchmarkRunner>>
-ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
- const LLVMState &State) const {
+ExegesisTarget::createBenchmarkRunner(
+ InstructionBenchmark::ModeE Mode, const LLVMState &State,
+ InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
PfmCountersInfo PfmCounters = State.getPfmCounters();
switch (Mode) {
case InstructionBenchmark::Unknown:
@@ -85,12 +86,12 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
.concat(ModeName)
.concat("' mode, sched model does not define a cycle counter."));
}
- return createLatencyBenchmarkRunner(State, Mode);
+ return createLatencyBenchmarkRunner(State, Mode, ResultAggMode);
case InstructionBenchmark::Uops:
if (!PfmCounters.UopsCounter && !PfmCounters.IssueCounters)
return make_error<Failure>("can't run 'uops' mode, sched model does not "
"define uops or issue counters.");
- return createUopsBenchmarkRunner(State);
+ return createUopsBenchmarkRunner(State, ResultAggMode);
}
return nullptr;
}
@@ -106,12 +107,14 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createParallelSnippetGenerator
}
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
- const LLVMState &State, InstructionBenchmark::ModeE Mode) const {
- return std::make_unique<LatencyBenchmarkRunner>(State, Mode);
+ const LLVMState &State, InstructionBenchmark::ModeE Mode,
+ InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
+ return std::make_unique<LatencyBenchmarkRunner>(State, Mode, ResultAggMode);
}
-std::unique_ptr<BenchmarkRunner>
-ExegesisTarget::createUopsBenchmarkRunner(const LLVMState &State) const {
+std::unique_ptr<BenchmarkRunner> ExegesisTarget::createUopsBenchmarkRunner(
+ const LLVMState &State,
+ InstructionBenchmark::ResultAggregationModeE /*unused*/) const {
return std::make_unique<UopsBenchmarkRunner>(State);
}
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index bcc283ee8fe7..70890795426d 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -148,9 +148,10 @@ class ExegesisTarget {
const LLVMState &State,
const SnippetGenerator::Options &Opts) const;
// Creates a benchmark runner for the given mode.
- Expected<std::unique_ptr<BenchmarkRunner>>
- createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
- const LLVMState &State) const;
+ Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner(
+ InstructionBenchmark::ModeE Mode, const LLVMState &State,
+ InstructionBenchmark::ResultAggregationModeE ResultAggMode =
+ InstructionBenchmark::Min) const;
// Returns the ExegesisTarget for the given triple or nullptr if the target
// does not exist.
@@ -176,9 +177,11 @@ class ExegesisTarget {
std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
const LLVMState &State, const SnippetGenerator::Options &Opts) const;
std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
- const LLVMState &State, InstructionBenchmark::ModeE Mode) const;
+ const LLVMState &State, InstructionBenchmark::ModeE Mode,
+ InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
- const LLVMState &State) const;
+ const LLVMState &State,
+ InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
const ExegesisTarget *Next = nullptr;
const ArrayRef<CpuAndPfmCounters> CpuPfmCounters;
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index ce3a31c12d4a..507015b97472 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -83,6 +83,21 @@ static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
"Analysis")));
+static cl::opt<exegesis::InstructionBenchmark::ResultAggregationModeE>
+ ResultAggMode(
+ "result-aggregation-mode",
+ cl::desc("How to aggregate multi-values result"), cl::cat(Options),
+ cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min",
+ "Keep min reading"),
+ clEnumValN(exegesis::InstructionBenchmark::Max, "max",
+ "Keep max reading"),
+ clEnumValN(exegesis::InstructionBenchmark::Mean, "mean",
+ "Compute mean of all readings"),
+ clEnumValN(exegesis::InstructionBenchmark::MinVariance,
+ "min-variance",
+ "Keep readings set with min-variance")),
+ cl::init(exegesis::InstructionBenchmark::Min));
+
static cl::opt<exegesis::InstructionBenchmark::RepetitionModeE> RepetitionMode(
"repetition-mode", cl::desc("how to repeat the instruction snippet"),
cl::cat(BenchmarkOptions),
@@ -281,8 +296,9 @@ void benchmarkMain() {
const LLVMState State(CpuName);
- const std::unique_ptr<BenchmarkRunner> Runner = ExitOnErr(
- State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State));
+ const std::unique_ptr<BenchmarkRunner> Runner =
+ ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
+ BenchmarkMode, State, ResultAggMode));
if (!Runner) {
ExitWithError("cannot create benchmark runner");
}
More information about the llvm-commits
mailing list