[llvm] 4fcd1a8 - [llvm-exegesis] Add option to check the hardware support for a given feature before benchmarking.
Vy Nguyen via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 30 09:26:27 PDT 2020
Author: Vy Nguyen
Date: 2020-09-30T12:25:59-04:00
New Revision: 4fcd1a8e6528ca42fe656f2745e15d2b7f5de495
URL: https://github.com/llvm/llvm-project/commit/4fcd1a8e6528ca42fe656f2745e15d2b7f5de495
DIFF: https://github.com/llvm/llvm-project/commit/4fcd1a8e6528ca42fe656f2745e15d2b7f5de495.diff
LOG: [llvm-exegesis] Add option to check the hardware support for a given feature before benchmarking.
This is mostly for the benefit of the LBR latency mode.
Right now, it performs no checking. If this is run on non-supported hardware, it will produce all zeroes for latency.
Differential Revision: https://reviews.llvm.org/D85254
Added:
Modified:
llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
llvm/tools/llvm-exegesis/lib/Target.h
llvm/tools/llvm-exegesis/lib/X86/Target.cpp
llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
llvm/tools/llvm-exegesis/lib/X86/X86Counter.h
llvm/tools/llvm-exegesis/llvm-exegesis.cpp
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
index 431967c1ec9b..69b08f27c39a 100644
--- a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
+++ b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
@@ -19,9 +19,9 @@ else:
try:
with open(os.devnull, 'w') as quiet:
check_llvm_exegesis_uops_result = subprocess.call(
- [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
+ [llvm_exegesis_exe, '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
check_llvm_exegesis_latency_result = subprocess.call(
- [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
+ [llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
except OSError:
print('could not exec llvm-exegesis')
config.unsupported = True
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 70890795426d..8a5624b42803 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -142,6 +142,11 @@ class ExegesisTarget {
return {&Instr};
}
+ // Checks hardware and software support for current benchmark mode.
+ // Returns an error if the target host does not have support to run the
+ // benchmark.
+ virtual Error checkFeatureSupport() const { return Error::success(); }
+
// Creates a snippet generator for the given mode.
std::unique_ptr<SnippetGenerator>
createSnippetGenerator(InstructionBenchmark::ModeE Mode,
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 9f045fa11aa2..270825a8777b 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -674,6 +674,23 @@ class ExegesisX86Target : public ExegesisTarget {
return Arch == Triple::x86_64 || Arch == Triple::x86;
}
+ Error checkFeatureSupport() const override {
+ // LBR is the only feature we conditionally support now.
+ // So if LBR is not requested, then we should be able to run the benchmarks.
+ if (LbrSamplingPeriod == 0)
+ return Error::success();
+
+#if defined(__linux__) && defined(HAVE_LIBPFM) && \
+ defined(LIBPFM_HAS_FIELD_CYCLES)
+ // If the kernel supports it, the hardware still may not have it.
+ return X86LbrCounter::checkLbrSupport();
+#else
+ return llvm::make_error<llvm::StringError>(
+ "LBR not supported on this kernel and/or platform",
+ llvm::errc::not_supported);
+#endif
+ }
+
static const unsigned kUnavailableRegisters[4];
};
diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
index 57b493818aaa..25ec4f858675 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
@@ -21,6 +21,7 @@
#endif // HAVE_LIBPFM
#include <atomic>
+#include <chrono>
#include <cstddef>
#include <cstdint>
#include <limits>
@@ -35,6 +36,8 @@
namespace llvm {
namespace exegesis {
+// Number of entries in the LBR.
+static constexpr int kLbrEntries = 16;
static constexpr size_t kBufferPages = 8;
static const size_t kDataBufferSize = kBufferPages * getpagesize();
@@ -70,7 +73,6 @@ static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
const void *From, const void *To,
llvm::SmallVector<int64_t, 4> *CycleArray) {
- assert(From != nullptr && To != nullptr);
const char *DataPtr = DataBuf;
while (DataPtr < DataBuf + DataSize) {
struct perf_event_header Header;
@@ -149,21 +151,47 @@ void X86LbrCounter::start() {
ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
}
+llvm::Error X86LbrCounter::checkLbrSupport() {
+ // Do a sample read and check if the results contain non-zero values.
+
+ X86LbrCounter counter(X86LbrPerfEvent(123));
+ counter.start();
+
+ // Prevent the compiler from unrolling the loop and get rid of all the
+ // branches. We need at least 16 iterations.
+ int Sum = 0;
+ int V = 1;
+
+ volatile int *P = &V;
+ auto TimeLimit =
+ std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
+
+ for (int I = 0;
+ I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
+ ++I) {
+ Sum += *P;
+ }
+
+ counter.stop();
+
+ auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
+ if (ResultOrError)
+ if (!ResultOrError.get().empty())
+ // If there is at least one non-zero entry, then LBR is supported.
+ for (const int64_t &Value : ResultOrError.get())
+ if (Value != 0)
+ return Error::success();
+
+ return llvm::make_error<llvm::StringError>(
+ "LBR format with cycles is not suppported on the host.",
+ llvm::errc::not_supported);
+}
+
llvm::Expected<llvm::SmallVector<int64_t, 4>>
X86LbrCounter::readOrError(StringRef FunctionBytes) const {
- // The max number of time-outs/retries before we give up.
- static constexpr int kMaxTimeouts = 160;
-
// Disable the event before reading
ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
- // Parses the LBR buffer and fills CycleArray with the sequence of cycle
- // counts from the buffer.
- llvm::SmallVector<int64_t, 4> CycleArray;
- std::unique_ptr<char[]> DataBuf(new char[kDataBufferSize]);
- int NumTimeouts = 0;
- int PollResult = 0;
-
// Find the boundary of the function so that we could filter the LBRs
// to keep only the relevant records.
if (FunctionBytes.empty())
@@ -172,6 +200,21 @@ X86LbrCounter::readOrError(StringRef FunctionBytes) const {
const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
FunctionBytes.size());
+ return doReadCounter(From, To);
+}
+
+llvm::Expected<llvm::SmallVector<int64_t, 4>>
+X86LbrCounter::doReadCounter(const void *From, const void *To) const {
+ // The max number of time-outs/retries before we give up.
+ static constexpr int kMaxTimeouts = 160;
+
+ // Parses the LBR buffer and fills CycleArray with the sequence of cycle
+ // counts from the buffer.
+ llvm::SmallVector<int64_t, 4> CycleArray;
+ auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
+ int NumTimeouts = 0;
+ int PollResult = 0;
+
while (PollResult <= 0) {
PollResult = pollLbrPerfEvent(FileDescriptor);
if (PollResult > 0)
diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h
index 94062012917d..73e4dc5b990a 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h
+++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h
@@ -33,6 +33,8 @@ class X86LbrPerfEvent : public pfm::PerfEvent {
class X86LbrCounter : public pfm::Counter {
public:
+ static llvm::Error checkLbrSupport();
+
explicit X86LbrCounter(pfm::PerfEvent &&Event);
virtual ~X86LbrCounter();
@@ -43,6 +45,9 @@ class X86LbrCounter : public pfm::Counter {
readOrError(StringRef FunctionBytes) const override;
private:
+ llvm::Expected<llvm::SmallVector<int64_t, 4>>
+ doReadCounter(const void *From, const void *To) const;
+
void *MMappedBuffer = nullptr;
};
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index fb3f41e14734..bc2f348a7eae 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -160,12 +160,6 @@ static cl::opt<std::string>
cl::desc(""), cl::cat(AnalysisOptions),
cl::init(""));
-static cl::list<std::string>
- AllowedHostCpus("allowed-host-cpu",
- cl::desc("If specified, only run the benchmark if the host "
- "CPU matches the names"),
- cl::cat(Options), cl::ZeroOrMore);
-
static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
"analysis-display-unstable-clusters",
cl::desc("if there is more than one benchmark for an opcode, said "
@@ -302,12 +296,9 @@ void benchmarkMain() {
const LLVMState State(CpuName);
- llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU();
- for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end();
- ++Begin) {
- if (ActualCpu != *Begin)
- ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu));
- }
+ // Preliminary check to ensure features needed for requested
+ // benchmark mode are present on target CPU and/or OS.
+ ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
const std::unique_ptr<BenchmarkRunner> Runner =
ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
More information about the llvm-commits
mailing list