[llvm-branch-commits] [llvm] [clang] [clang-tools-extra] [llvm-exegesis] Add support for validation counters (PR #76653)

Wed Jan 10 14:44:33 PST 2024

https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/76653

>From 76f199f4fc7244c3d972736595c685d7316c5203 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sat, 30 Dec 2023 18:18:12 -0800
Subject: [PATCH 1/6] [llvm-exegesis] Add support for validation counters

This patch adds support for validation counters. Validation counters can
be used to measure events that occur during snippet execution like cache
misses to ensure that certain assumed invariants about the benchmark
actually hold. Validation counters are setup within a perf event group,
so are turned on and off at exactly the same time as the "group leader"
counter that measures the desired value.
---
 .../llvm-exegesis/lib/BenchmarkResult.cpp     | 52 +++++++++++++++++++
 .../tools/llvm-exegesis/lib/BenchmarkResult.h | 15 +++++-
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     | 52 +++++++++++++------
 .../tools/llvm-exegesis/lib/BenchmarkRunner.h |  8 ++-
 .../lib/LatencyBenchmarkRunner.cpp            | 46 ++++++++++++----
 .../lib/LatencyBenchmarkRunner.h              |  3 ++
 llvm/tools/llvm-exegesis/lib/PerfHelper.cpp   | 37 +++++++++++--
 llvm/tools/llvm-exegesis/lib/PerfHelper.h     | 10 +++-
 llvm/tools/llvm-exegesis/lib/Target.cpp       | 35 +++++++++----
 llvm/tools/llvm-exegesis/lib/Target.h         | 13 ++---
 .../llvm-exegesis/lib/UopsBenchmarkRunner.cpp | 37 ++++++++++---
 .../llvm-exegesis/lib/UopsBenchmarkRunner.h   |  9 +++-
 llvm/tools/llvm-exegesis/lib/X86/Target.cpp   | 11 +++-
 .../llvm-exegesis/lib/X86/X86Counter.cpp      |  2 +-
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    | 18 ++++++-
 .../tools/llvm-exegesis/ClusteringTest.cpp    | 36 ++++++-------
 .../Mips/BenchmarkResultTest.cpp              | 12 ++---
 .../llvm-exegesis/X86/BenchmarkResultTest.cpp | 12 ++---
 18 files changed, 311 insertions(+), 97 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
index 02c4da11e032d6..1079df24b457b8 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/bit.h"
 #include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
@@ -192,6 +193,56 @@ template <> struct SequenceElementTraits<exegesis::BenchmarkMeasure> {
   static const bool flow = false;
 };
 
+const char *validationEventToString(exegesis::ValidationEvent VE) {
+  switch (VE) {
+  case exegesis::ValidationEvent::L1DCacheLoadMiss:
+    return "l1d-load-miss";
+  case exegesis::ValidationEvent::InstructionRetired:
+    return "instructions-retired";
+  case exegesis::ValidationEvent::DataTLBLoadMiss:
+    return "data-tlb-load-misses";
+  case exegesis::ValidationEvent::DataTLBStoreMiss:
+    return "data-tlb-store-misses";
+  }
+}
+
+Expected<exegesis::ValidationEvent> stringToValidationEvent(StringRef Input) {
+  if (Input == "l1d-load-miss")
+    return exegesis::ValidationEvent::L1DCacheLoadMiss;
+  else if (Input == "instructions-retired")
+    return exegesis::ValidationEvent::InstructionRetired;
+  else if (Input == "data-tlb-load-misses")
+    return exegesis::ValidationEvent::DataTLBLoadMiss;
+  else if (Input == "data-tlb-store-misses")
+    return exegesis::ValidationEvent::DataTLBStoreMiss;
+  else
+    return make_error<StringError>("Invalid validation event string",
+                                   errc::invalid_argument);
+}
+
+template <>
+struct CustomMappingTraits<
+    std::unordered_map<exegesis::ValidationEvent, int64_t>> {
+  static void
+  inputOne(IO &Io, StringRef KeyStr,
+           std::unordered_map<exegesis::ValidationEvent, int64_t> &VI) {
+    Expected<exegesis::ValidationEvent> Key = stringToValidationEvent(KeyStr);
+    if (!Key) {
+      Io.setError("Key is not a valid validation event");
+      return;
+    }
+    Io.mapRequired(KeyStr.str().c_str(), VI[*Key]);
+  }
+
+  static void
+  output(IO &Io, std::unordered_map<exegesis::ValidationEvent, int64_t> &VI) {
+    for (auto &IndividualVI : VI) {
+      Io.mapRequired(validationEventToString(IndividualVI.first),
+                     IndividualVI.second);
+    }
+  }
+};
+
 // exegesis::Measure is rendererd as a flow instead of a list.
 // e.g. { "key": "the key", "value": 0123 }
 template <> struct MappingTraits<exegesis::BenchmarkMeasure> {
@@ -203,6 +254,7 @@ template <> struct MappingTraits<exegesis::BenchmarkMeasure> {
     }
     Io.mapRequired("value", Obj.PerInstructionValue);
     Io.mapOptional("per_snippet_value", Obj.PerSnippetValue);
+    Io.mapOptional("validation_counters", Obj.ValidationCounters);
   }
   static const bool flow = true;
 };
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index 0d08febae20cb3..f142da07e0a47d 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -32,6 +32,13 @@ class Error;
 
 namespace exegesis {
 
+enum ValidationEvent {
+  L1DCacheLoadMiss,
+  InstructionRetired,
+  DataTLBLoadMiss,
+  DataTLBStoreMiss
+};
+
 enum class BenchmarkPhaseSelectorE {
   PrepareSnippet,
   PrepareAndAssembleSnippet,
@@ -77,8 +84,10 @@ struct BenchmarkKey {
 
 struct BenchmarkMeasure {
   // A helper to create an unscaled BenchmarkMeasure.
-  static BenchmarkMeasure Create(std::string Key, double Value) {
-    return {Key, Value, Value};
+  static BenchmarkMeasure
+  Create(std::string Key, double Value,
+         std::unordered_map<ValidationEvent, int64_t> ValCounters) {
+    return {Key, Value, Value, ValCounters};
   }
   std::string Key;
   // This is the per-instruction value, i.e. measured quantity scaled per
@@ -87,6 +96,8 @@ struct BenchmarkMeasure {
   // This is the per-snippet value, i.e. measured quantity for one repetition of
   // the whole snippet.
   double PerSnippetValue;
+  // These are the validation counter values.
+  std::unordered_map<ValidationEvent, int64_t> ValidationCounters;
 };
 
 // The result of an instruction benchmark.
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index c57fce970b2139..72b3a6e97b417d 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -70,7 +70,9 @@ void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
 }
 
 Expected<llvm::SmallVector<int64_t, 4>>
-BenchmarkRunner::FunctionExecutor::runAndSample(const char *Counters) const {
+BenchmarkRunner::FunctionExecutor::runAndSample(
+    const char *Counters, ArrayRef<const char *> ValidationCounters,
+    SmallVectorImpl<int64_t> &ValidationCounterValues) const {
   // We sum counts when there are several counters for a single ProcRes
   // (e.g. P23 on SandyBridge).
   llvm::SmallVector<int64_t, 4> CounterValues;
@@ -78,8 +80,8 @@ BenchmarkRunner::FunctionExecutor::runAndSample(const char *Counters) const {
   StringRef(Counters).split(CounterNames, '+');
   for (auto &CounterName : CounterNames) {
     CounterName = CounterName.trim();
-    Expected<SmallVector<int64_t, 4>> ValueOrError =
-        runWithCounter(CounterName);
+    Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter(
+        CounterName, ValidationCounters, ValidationCounterValues);
     if (!ValueOrError)
       return ValueOrError.takeError();
     accumulateCounterValues(ValueOrError.get(), &CounterValues);
@@ -119,11 +121,13 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
       (*Result)[I] += NewValues[I];
   }
 
-  Expected<llvm::SmallVector<int64_t, 4>>
-  runWithCounter(StringRef CounterName) const override {
+  Expected<llvm::SmallVector<int64_t, 4>> runWithCounter(
+      StringRef CounterName, ArrayRef<const char *> ValidationCounters,
+      SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
     const ExegesisTarget &ET = State.getExegesisTarget();
     char *const ScratchPtr = Scratch->ptr();
-    auto CounterOrError = ET.createCounter(CounterName, State);
+    auto CounterOrError =
+        ET.createCounter(CounterName, State, ValidationCounters);
 
     if (!CounterOrError)
       return CounterOrError.takeError();
@@ -155,6 +159,14 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
       }
     }
 
+    auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
+    if (!ValidationValuesOrErr)
+      return ValidationValuesOrErr.takeError();
+
+    ArrayRef RealValidationValues = *ValidationValuesOrErr;
+    for (size_t I = 0; I < RealValidationValues.size(); ++I)
+      ValidationCounterValues[I] = RealValidationValues[I];
+
     return Counter->readOrError(Function.getFunctionBytes());
   }
 
@@ -265,7 +277,9 @@ class SubProcessFunctionExecutorImpl
   }
 
   Error createSubProcessAndRunBenchmark(
-      StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues) const {
+      StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
+      ArrayRef<const char *> ValidationCounters,
+      SmallVectorImpl<int64_t> &ValidationCounterValues) const {
     int PipeFiles[2];
     int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles);
     if (PipeSuccessOrErr != 0) {
@@ -305,8 +319,8 @@ class SubProcessFunctionExecutorImpl
     }
 
     const ExegesisTarget &ET = State.getExegesisTarget();
-    auto CounterOrError =
-        ET.createCounter(CounterName, State, ParentOrChildPID);
+    auto CounterOrError = ET.createCounter(
+        CounterName, State, ValidationCounters, ParentOrChildPID);
 
     if (!CounterOrError)
       return CounterOrError.takeError();
@@ -361,6 +375,14 @@ class SubProcessFunctionExecutorImpl
           return CounterValueOrErr.takeError();
         CounterValues.swap(*CounterValueOrErr);
 
+        auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
+        if (!ValidationValuesOrErr)
+          return ValidationValuesOrErr.takeError();
+
+        ArrayRef RealValidationValues = *ValidationValuesOrErr;
+        for (size_t I = 0; I < RealValidationValues.size(); ++I)
+          ValidationCounterValues[I] = RealValidationValues[I];
+
         return Error::success();
       }
       // The child exited, but not successfully
@@ -459,15 +481,15 @@ class SubProcessFunctionExecutorImpl
     exit(0);
   }
 
-  Expected<llvm::SmallVector<int64_t, 4>>
-  runWithCounter(StringRef CounterName) const override {
+  Expected<llvm::SmallVector<int64_t, 4>> runWithCounter(
+      StringRef CounterName, ArrayRef<const char *> ValidationCounters,
+      SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
     SmallVector<int64_t, 4> Value(1, 0);
-    Error PossibleBenchmarkError =
-        createSubProcessAndRunBenchmark(CounterName, Value);
+    Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
+        CounterName, Value, ValidationCounters, ValidationCounterValues);
 
-    if (PossibleBenchmarkError) {
+    if (PossibleBenchmarkError)
       return std::move(PossibleBenchmarkError);
-    }
 
     return Value;
   }
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
index d746a0f775646f..80ec2d2fcfe576 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
@@ -93,14 +93,18 @@ class BenchmarkRunner {
     virtual ~FunctionExecutor();
 
     Expected<llvm::SmallVector<int64_t, 4>>
-    runAndSample(const char *Counters) const;
+    runAndSample(const char *Counters,
+                 ArrayRef<const char *> ValidationCounters,
+                 SmallVectorImpl<int64_t> &ValidationCounterValues) const;
 
   protected:
     static void
     accumulateCounterValues(const llvm::SmallVectorImpl<int64_t> &NewValues,
                             llvm::SmallVectorImpl<int64_t> *Result);
     virtual Expected<llvm::SmallVector<int64_t, 4>>
-    runWithCounter(StringRef CounterName) const = 0;
+    runWithCounter(StringRef CounterName,
+                   ArrayRef<const char *> ValidationCounters,
+                   SmallVectorImpl<int64_t> &ValidationCounterValues) const = 0;
   };
 
 protected:
diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
index eda450579a5838..c2179189d5dc0a 100644
--- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
@@ -22,8 +22,9 @@ LatencyBenchmarkRunner::LatencyBenchmarkRunner(
     const LLVMState &State, Benchmark::ModeE Mode,
     BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
     Benchmark::ResultAggregationModeE ResultAgg, ExecutionModeE ExecutionMode,
-    unsigned BenchmarkRepeatCount)
-    : BenchmarkRunner(State, Mode, BenchmarkPhaseSelector, ExecutionMode) {
+    ArrayRef<ValidationEvent> ValCounters, unsigned BenchmarkRepeatCount)
+    : BenchmarkRunner(State, Mode, BenchmarkPhaseSelector, ExecutionMode),
+      ValidationCounters(ValCounters) {
   assert((Mode == Benchmark::Latency || Mode == Benchmark::InverseThroughput) &&
          "invalid mode");
   ResultAggMode = ResultAgg;
@@ -72,11 +73,26 @@ Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
   // ResultAggMode.
   llvm::SmallVector<int64_t, 4> AccumulatedValues;
   double MinVariance = std::numeric_limits<double>::infinity();
-  const char *CounterName = State.getPfmCounters().CycleCounter;
+  const PfmCountersInfo &PCI = State.getPfmCounters();
+  const char *CounterName = PCI.CycleCounter;
+
+  SmallVector<const char *> ValCountersToRun;
+  ValCountersToRun.reserve(ValidationCounters.size());
+  for (const ValidationEvent ValEvent : ValidationCounters) {
+    auto ValCounterIt = PCI.ValidationCounters.find(ValEvent);
+    if (ValCounterIt == PCI.ValidationCounters.end())
+      return make_error<Failure>("Cannot create validation counter");
+
+    ValCountersToRun.push_back(ValCounterIt->second);
+  }
+
+  SmallVector<int64_t> ValCounterValues(ValCountersToRun.size(), -1);
   // Values count for each run.
   int ValuesCount = 0;
   for (size_t I = 0; I < NumMeasurements; ++I) {
-    auto ExpectedCounterValues = Executor.runAndSample(CounterName);
+    SmallVector<int64_t> IterationValCounterValues(ValCountersToRun.size(), -1);
+    auto ExpectedCounterValues = Executor.runAndSample(
+        CounterName, ValCountersToRun, IterationValCounterValues);
     if (!ExpectedCounterValues)
       return ExpectedCounterValues.takeError();
     ValuesCount = ExpectedCounterValues.get().size();
@@ -90,8 +106,15 @@ Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
         MinVariance = Variance;
       }
     }
+
+    for (size_t I = 0; I < ValCounterValues.size(); ++I)
+      ValCounterValues[I] += IterationValCounterValues[I];
   }
 
+  std::unordered_map<ValidationEvent, int64_t> ValidationInfo;
+  for (size_t I = 0; I < ValidationCounters.size(); ++I)
+    ValidationInfo[ValidationCounters[I]] = ValCounterValues[I];
+
   std::string ModeName;
   switch (Mode) {
   case Benchmark::Latency:
@@ -112,25 +135,26 @@ Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
     std::vector<BenchmarkMeasure> Result;
     Result.reserve(AccumulatedValues.size());
     for (const int64_t Value : AccumulatedValues)
-      Result.push_back(BenchmarkMeasure::Create(ModeName, Value));
+      Result.push_back(
+          BenchmarkMeasure::Create(ModeName, Value, ValidationInfo));
     return std::move(Result);
   }
   case Benchmark::Min: {
     std::vector<BenchmarkMeasure> Result;
-    Result.push_back(
-        BenchmarkMeasure::Create(ModeName, findMin(AccumulatedValues)));
+    Result.push_back(BenchmarkMeasure::Create(
+        ModeName, findMin(AccumulatedValues), ValidationInfo));
     return std::move(Result);
   }
   case Benchmark::Max: {
     std::vector<BenchmarkMeasure> Result;
-    Result.push_back(
-        BenchmarkMeasure::Create(ModeName, findMax(AccumulatedValues)));
+    Result.push_back(BenchmarkMeasure::Create(
+        ModeName, findMax(AccumulatedValues), ValidationInfo));
     return std::move(Result);
   }
   case Benchmark::Mean: {
     std::vector<BenchmarkMeasure> Result;
-    Result.push_back(
-        BenchmarkMeasure::Create(ModeName, findMean(AccumulatedValues)));
+    Result.push_back(BenchmarkMeasure::Create(
+        ModeName, findMean(AccumulatedValues), ValidationInfo));
     return std::move(Result);
   }
   }
diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
index fc159d7d9b5e98..2192679d87127e 100644
--- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h
@@ -15,6 +15,7 @@
 #define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H
 
 #include "BenchmarkRunner.h"
+#include "Target.h"
 
 namespace llvm {
 namespace exegesis {
@@ -25,6 +26,7 @@ class LatencyBenchmarkRunner : public BenchmarkRunner {
                          BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
                          Benchmark::ResultAggregationModeE ResultAggMode,
                          ExecutionModeE ExecutionMode,
+                         ArrayRef<ValidationEvent> ValCounters,
                          unsigned BenchmarkRepeatCount);
   ~LatencyBenchmarkRunner() override;
 
@@ -34,6 +36,7 @@ class LatencyBenchmarkRunner : public BenchmarkRunner {
 
   Benchmark::ResultAggregationModeE ResultAggMode;
   unsigned NumMeasurements;
+  SmallVector<ValidationEvent> ValidationCounters;
 };
 } // namespace exegesis
 } // namespace llvm
diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
index 314de1ec32366f..742dc0f939de58 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
@@ -107,15 +107,18 @@ StringRef PerfEvent::getPfmEventString() const {
   return FullQualifiedEventString;
 }
 
-Counter::Counter(PerfEvent &&E, pid_t ProcessID) : Event(std::move(E)) {
+Counter::Counter(PerfEvent &&E, std::vector<PerfEvent> &&ValEvents,
+                 pid_t ProcessID)
+    : Event(std::move(E)), ValidationEvents(std::move(ValEvents)),
+      ValidationFDs(ValidationEvents.size(), -1) {
   assert(Event.valid());
   IsDummyEvent = Event.name() == PerfEvent::DummyEventString;
   if (!IsDummyEvent)
-    initRealEvent(E, ProcessID);
+    initRealEvent(ProcessID);
 }
 
 #ifdef HAVE_LIBPFM
-void Counter::initRealEvent(const PerfEvent &E, pid_t ProcessID) {
+void Counter::initRealEvent(pid_t ProcessID) {
   const int Cpu = -1;     // measure any processor.
   const int GroupFd = -1; // no grouping of counters.
   const uint32_t Flags = 0;
@@ -131,6 +134,17 @@ void Counter::initRealEvent(const PerfEvent &E, pid_t ProcessID) {
               "pass --use-dummy-perf-counters command line option.\n";
   }
   assert(FileDescriptor != -1 && "Unable to open event");
+
+  // Setup validation counters
+  assert(ValidationFDs.size() == ValidationEvents.size());
+  for (size_t I = 0; I < ValidationEvents.size(); ++I) {
+    perf_event_attr AttrCopy = *ValidationEvents[I].attribute();
+    ValidationFDs[I] =
+        perf_event_open(&AttrCopy, ProcessID, Cpu, FileDescriptor, Flags);
+    if (ValidationFDs[I] == -1)
+      errs() << "Unable to open validation event. ERRNO: " << strerror(errno)
+             << "\n";
+  }
 }
 
 Counter::~Counter() {
@@ -165,6 +179,23 @@ Counter::readOrError(StringRef /*unused*/) const {
   return Result;
 }
 
+llvm::Expected<llvm::SmallVector<int64_t>>
+Counter::readValidationCountersOrError() const {
+  llvm::SmallVector<int64_t, 4> Result;
+  for (const int ValidationFD : ValidationFDs) {
+    int64_t Count;
+    if (!IsDummyEvent) {
+      ssize_t ReadSize = ::read(ValidationFD, &Count, sizeof(Count));
+      if (ReadSize != sizeof(Count))
+        return llvm::make_error<llvm::StringError>(
+            "Failed to read validation ounter", llvm::errc::io_error);
+    } else
+      Count = -1;
+    Result.push_back(Count);
+  }
+  return Result;
+}
+
 int Counter::numValues() const { return 1; }
 #else
 
diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.h b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
index 894aac1f197ed1..e538023905bbe6 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.h
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
@@ -82,7 +82,8 @@ class PerfEvent {
 class Counter {
 public:
   // event: the PerfEvent to measure.
-  explicit Counter(PerfEvent &&event, pid_t ProcessID = 0);
+  explicit Counter(PerfEvent &&event, std::vector<PerfEvent> &&ValEvents,
+                   pid_t ProcessID = 0);
 
   Counter(const Counter &) = delete;
   Counter(Counter &&other) = default;
@@ -104,6 +105,9 @@ class Counter {
   virtual llvm::Expected<llvm::SmallVector<int64_t, 4>>
   readOrError(StringRef FunctionBytes = StringRef()) const;
 
+  virtual llvm::Expected<llvm::SmallVector<int64_t>>
+  readValidationCountersOrError() const;
+
   virtual int numValues() const;
 
   int getFileDescriptor() const { return FileDescriptor; }
@@ -112,9 +116,11 @@ class Counter {
   PerfEvent Event;
   int FileDescriptor = -1;
   bool IsDummyEvent;
+  std::vector<PerfEvent> ValidationEvents;
+  std::vector<int> ValidationFDs;
 
 private:
-  void initRealEvent(const PerfEvent &E, pid_t ProcessID);
+  void initRealEvent(pid_t ProcessID);
 };
 
 } // namespace pfm
diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp
index 20b4afb9b8f676..1279c1d422387b 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Target.cpp
@@ -37,6 +37,7 @@ const ExegesisTarget *ExegesisTarget::lookup(Triple TT) {
 
 Expected<std::unique_ptr<pfm::Counter>>
 ExegesisTarget::createCounter(StringRef CounterName, const LLVMState &,
+                              ArrayRef<const char *> ValidationCounters,
                               const pid_t ProcessID) const {
   pfm::PerfEvent Event(CounterName);
   if (!Event.valid())
@@ -45,7 +46,18 @@ ExegesisTarget::createCounter(StringRef CounterName, const LLVMState &,
             .concat(CounterName)
             .concat("'"));
 
-  return std::make_unique<pfm::Counter>(std::move(Event), ProcessID);
+  std::vector<pfm::PerfEvent> ValidationEvents;
+  for (const char *ValCounterName : ValidationCounters) {
+    ValidationEvents.emplace_back(ValCounterName);
+    if (!ValidationEvents.back().valid())
+      return llvm::make_error<Failure>(
+          llvm::Twine("Unable to create validation counter with name '")
+              .concat(ValCounterName)
+              .concat("'"));
+  }
+
+  return std::make_unique<pfm::Counter>(std::move(Event),
+                                        std::move(ValidationEvents), ProcessID);
 }
 
 void ExegesisTarget::registerTarget(ExegesisTarget *Target) {
@@ -79,7 +91,7 @@ ExegesisTarget::createBenchmarkRunner(
     Benchmark::ModeE Mode, const LLVMState &State,
     BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
     BenchmarkRunner::ExecutionModeE ExecutionMode,
-    unsigned BenchmarkRepeatCount,
+    unsigned BenchmarkRepeatCount, ArrayRef<ValidationEvent> ValidationCounters,
     Benchmark::ResultAggregationModeE ResultAggMode) const {
   PfmCountersInfo PfmCounters = State.getPfmCounters();
   switch (Mode) {
@@ -101,9 +113,9 @@ ExegesisTarget::createBenchmarkRunner(
                   "benchmarking or --use-dummy-perf-counters to not query "
                   "the kernel for real event counts."));
     }
-    return createLatencyBenchmarkRunner(State, Mode, BenchmarkPhaseSelector,
-                                        ResultAggMode, ExecutionMode,
-                                        BenchmarkRepeatCount);
+    return createLatencyBenchmarkRunner(
+        State, Mode, BenchmarkPhaseSelector, ResultAggMode, ExecutionMode,
+        ValidationCounters, BenchmarkRepeatCount);
   case Benchmark::Uops:
     if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
         !PfmCounters.UopsCounter && !PfmCounters.IssueCounters)
@@ -113,7 +125,8 @@ ExegesisTarget::createBenchmarkRunner(
           "benchmarking or --use-dummy-perf-counters to not query the kernel "
           "for real event counts.");
     return createUopsBenchmarkRunner(State, BenchmarkPhaseSelector,
-                                     ResultAggMode, ExecutionMode);
+                                     ResultAggMode, ExecutionMode,
+                                     ValidationCounters);
   }
   return nullptr;
 }
@@ -133,18 +146,20 @@ std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
     BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
     Benchmark::ResultAggregationModeE ResultAggMode,
     BenchmarkRunner::ExecutionModeE ExecutionMode,
+    ArrayRef<ValidationEvent> ValidationCounters,
     unsigned BenchmarkRepeatCount) const {
   return std::make_unique<LatencyBenchmarkRunner>(
       State, Mode, BenchmarkPhaseSelector, ResultAggMode, ExecutionMode,
-      BenchmarkRepeatCount);
+      ValidationCounters, BenchmarkRepeatCount);
 }
 
 std::unique_ptr<BenchmarkRunner> ExegesisTarget::createUopsBenchmarkRunner(
     const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
     Benchmark::ResultAggregationModeE /*unused*/,
-    BenchmarkRunner::ExecutionModeE ExecutionMode) const {
-  return std::make_unique<UopsBenchmarkRunner>(State, BenchmarkPhaseSelector,
-                                               ExecutionMode);
+    BenchmarkRunner::ExecutionModeE ExecutionMode,
+    ArrayRef<ValidationEvent> ValidationCounters) const {
+  return std::make_unique<UopsBenchmarkRunner>(
+      State, BenchmarkPhaseSelector, ExecutionMode, ValidationCounters);
 }
 
 const PfmCountersInfo PfmCountersInfo::Default = {
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 3956bc983181f6..eab9e78d929984 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -41,13 +41,6 @@ extern cl::OptionCategory Options;
 extern cl::OptionCategory BenchmarkOptions;
 extern cl::OptionCategory AnalysisOptions;
 
-enum ValidationEvent {
-  L1DCacheLoadMiss,
-  InstructionRetired,
-  DataTLBLoadMiss,
-  DataTLBStoreMiss
-};
-
 struct PfmCountersInfo {
   // An optional name of a performance counter that can be used to measure
   // cycles.
@@ -90,6 +83,7 @@ class ExegesisTarget {
   // Targets can use this to create target-specific perf counters.
   virtual Expected<std::unique_ptr<pfm::Counter>>
   createCounter(StringRef CounterName, const LLVMState &State,
+                ArrayRef<const char *> ValidationCounters,
                 const pid_t ProcessID = 0) const;
 
   // Targets can use this to add target-specific passes in assembleToStream();
@@ -274,6 +268,7 @@ class ExegesisTarget {
       BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
       BenchmarkRunner::ExecutionModeE ExecutionMode,
       unsigned BenchmarkRepeatCount,
+      ArrayRef<ValidationEvent> ValidationCounters,
       Benchmark::ResultAggregationModeE ResultAggMode = Benchmark::Min) const;
 
   // Returns the ExegesisTarget for the given triple or nullptr if the target
@@ -318,11 +313,13 @@ class ExegesisTarget {
       BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
       Benchmark::ResultAggregationModeE ResultAggMode,
       BenchmarkRunner::ExecutionModeE ExecutionMode,
+      ArrayRef<ValidationEvent> ValidationCounters,
       unsigned BenchmarkRepeatCount) const;
   std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
       const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
       Benchmark::ResultAggregationModeE ResultAggMode,
-      BenchmarkRunner::ExecutionModeE ExecutionMode) const;
+      BenchmarkRunner::ExecutionModeE ExecutionMode,
+      ArrayRef<ValidationEvent> ValidationCounters) const;
 
   const ExegesisTarget *Next = nullptr;
   const ArrayRef<CpuAndPfmCounters> CpuPfmCounters;
diff --git a/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp
index 6351fdd3345a83..ac1fb89cceda16 100644
--- a/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp
@@ -19,25 +19,50 @@ Expected<std::vector<BenchmarkMeasure>>
 UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const {
   std::vector<BenchmarkMeasure> Result;
   const PfmCountersInfo &PCI = State.getPfmCounters();
+
+  SmallVector<const char *> ValCountersToRun;
+  ValCountersToRun.reserve(ValidationCounters.size());
+  for (const ValidationEvent ValEvent : ValidationCounters) {
+    auto ValCounterIt = PCI.ValidationCounters.find(ValEvent);
+    if (ValCounterIt == PCI.ValidationCounters.end())
+      return make_error<Failure>("Cannot create validation counter");
+
+    ValCountersToRun.push_back(ValCounterIt->second);
+  }
+
   // Uops per port.
   for (const auto *IssueCounter = PCI.IssueCounters,
                   *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters;
        IssueCounter != IssueCounterEnd; ++IssueCounter) {
+    SmallVector<int64_t> ValCounterPortValues(ValCountersToRun.size(), -1);
     if (!IssueCounter->Counter)
       continue;
-    auto ExpectedCounterValue = Executor.runAndSample(IssueCounter->Counter);
+    auto ExpectedCounterValue = Executor.runAndSample(
+        IssueCounter->Counter, ValCountersToRun, ValCounterPortValues);
     if (!ExpectedCounterValue)
       return ExpectedCounterValue.takeError();
-    Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName,
-                                              (*ExpectedCounterValue)[0]));
+
+    std::unordered_map<ValidationEvent, int64_t> ValidationInfo;
+    for (size_t I = 0; I < ValidationCounters.size(); ++I)
+      ValidationInfo[ValidationCounters[I]] = ValCounterPortValues[I];
+
+    Result.push_back(BenchmarkMeasure::Create(
+        IssueCounter->ProcResName, (*ExpectedCounterValue)[0], ValidationInfo));
   }
   // NumMicroOps.
   if (const char *const UopsCounter = PCI.UopsCounter) {
-    auto ExpectedCounterValue = Executor.runAndSample(UopsCounter);
+    SmallVector<int64_t> ValCounterUopsValues(ValCountersToRun.size(), -1);
+    auto ExpectedCounterValue = Executor.runAndSample(
+        UopsCounter, ValCountersToRun, ValCounterUopsValues);
     if (!ExpectedCounterValue)
       return ExpectedCounterValue.takeError();
-    Result.push_back(
-        BenchmarkMeasure::Create("NumMicroOps", (*ExpectedCounterValue)[0]));
+
+    std::unordered_map<ValidationEvent, int64_t> ValidationInfo;
+    for (size_t I = 0; I < ValidationCounters.size(); ++I)
+      ValidationInfo[ValidationCounters[I]] = ValCounterUopsValues[I];
+
+    Result.push_back(BenchmarkMeasure::Create(
+        "NumMicroOps", (*ExpectedCounterValue)[0], ValidationInfo));
   }
   return std::move(Result);
 }
diff --git a/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h
index 337f0936701225..3e155db00cde71 100644
--- a/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h
@@ -15,6 +15,7 @@
 #define LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H
 
 #include "BenchmarkRunner.h"
+#include "Target.h"
 
 namespace llvm {
 namespace exegesis {
@@ -23,9 +24,11 @@ class UopsBenchmarkRunner : public BenchmarkRunner {
 public:
   UopsBenchmarkRunner(const LLVMState &State,
                       BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
-                      ExecutionModeE ExecutionMode)
+                      ExecutionModeE ExecutionMode,
+                      ArrayRef<ValidationEvent> ValCounters)
       : BenchmarkRunner(State, Benchmark::Uops, BenchmarkPhaseSelector,
-                        ExecutionMode) {}
+                        ExecutionMode),
+        ValidationCounters(ValCounters) {}
   ~UopsBenchmarkRunner() override;
 
   static constexpr const size_t kMinNumDifferentAddresses = 6;
@@ -33,6 +36,8 @@ class UopsBenchmarkRunner : public BenchmarkRunner {
 private:
   Expected<std::vector<BenchmarkMeasure>>
   runMeasurements(const FunctionExecutor &Executor) const override;
+
+  SmallVector<ValidationEvent> ValidationCounters;
 };
 
 } // namespace exegesis
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 0ab74b8e00da11..5cf54ba0899059 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -681,6 +681,7 @@ class ExegesisX86Target : public ExegesisTarget {
 
   Expected<std::unique_ptr<pfm::Counter>>
   createCounter(StringRef CounterName, const LLVMState &State,
+                ArrayRef<const char *> ValidationCounters,
                 const pid_t ProcessID) const override {
     // If LbrSamplingPeriod was provided, then ignore the
     // CounterName because we only have one for LBR.
@@ -689,6 +690,13 @@ class ExegesisX86Target : public ExegesisTarget {
       // __linux__ (for now)
 #if defined(HAVE_LIBPFM) && defined(LIBPFM_HAS_FIELD_CYCLES) &&                \
     defined(__linux__)
+      // TODO(boomanaiden154): Add in support for using validation counters when
+      // using LBR counters.
+      if (ValidationCounters.size() > 0)
+        return llvm::make_error<llvm::StringError>(
+            "Using LBR is not currently supported with validation counters",
+            llvm::errc::invalid_argument);
+
       return std::make_unique<X86LbrCounter>(
           X86LbrPerfEvent(LbrSamplingPeriod));
 #else
@@ -698,7 +706,8 @@ class ExegesisX86Target : public ExegesisTarget {
           llvm::errc::invalid_argument);
 #endif
     }
-    return ExegesisTarget::createCounter(CounterName, State, ProcessID);
+    return ExegesisTarget::createCounter(CounterName, State, ValidationCounters,
+                                         ProcessID);
   }
 
   enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 };
diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
index 423c45e22bf8c0..d51773b2204624 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
@@ -141,7 +141,7 @@ X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
 }
 
 X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
-    : Counter(std::move(NewEvent)) {
+    : Counter(std::move(NewEvent), {}) {
   MMappedBuffer = mmap(nullptr, kMappedBufferSize, PROT_READ | PROT_WRITE,
                        MAP_SHARED, FileDescriptor, 0);
   if (MMappedBuffer == MAP_FAILED)
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 1b35fde815f11f..cbdfe12a5d6556 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -268,6 +268,22 @@ static cl::opt<unsigned> BenchmarkRepeatCount(
              "before aggregating the results"),
     cl::cat(BenchmarkOptions), cl::init(30));
 
+static cl::list<ValidationEvent> ValidationCounters(
+    "validation-counter",
+    cl::desc(
+        "The name of a validation counter to run concurrently with the main "
+        "counter to validate benchmarking assumptions"),
+    cl::CommaSeparated, cl::cat(BenchmarkOptions),
+    cl::values(clEnumValN(ValidationEvent::L1DCacheLoadMiss, "l1d-load-misses",
+                          "Count L1D cache load misses"),
+               clEnumValN(ValidationEvent::InstructionRetired,
+                          "instructions-retired", "Count retired instructions"),
+               clEnumValN(ValidationEvent::DataTLBLoadMiss,
+                          "data-tlb-load-misses", "Count data TLB load misses"),
+               clEnumValN(ValidationEvent::DataTLBStoreMiss,
+                          "data-tlb-store-misses",
+                          "Count data TLB store misses")));
+
 static ExitOnError ExitOnErr("llvm-exegesis error: ");
 
 // Helper function that logs the error(s) and exits.
@@ -501,7 +517,7 @@ void benchmarkMain() {
   const std::unique_ptr<BenchmarkRunner> Runner =
       ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
           BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode,
-          BenchmarkRepeatCount, ResultAggMode));
+          BenchmarkRepeatCount, ValidationCounters, ResultAggMode));
   if (!Runner) {
     ExitWithError("cannot create benchmark runner");
   }
diff --git a/llvm/unittests/tools/llvm-exegesis/ClusteringTest.cpp b/llvm/unittests/tools/llvm-exegesis/ClusteringTest.cpp
index 25fe813502e54a..26bb6c5d2e4c2f 100644
--- a/llvm/unittests/tools/llvm-exegesis/ClusteringTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/ClusteringTest.cpp
@@ -32,17 +32,17 @@ TEST(ClusteringTest, Clusters3D) {
 
   // Cluster around (x=0, y=1, z=2): points {0, 3}.
   Points[0].Measurements = {
-      {"x", 0.01, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
+      {"x", 0.01, 0.0, {}}, {"y", 1.02, 0.0, {}}, {"z", 1.98, 0.0, {}}};
   Points[3].Measurements = {
-      {"x", -0.01, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
+      {"x", -0.01, 0.0, {}}, {"y", 1.02, 0.0, {}}, {"z", 1.98, 0.0, {}}};
   // Cluster around (x=1, y=1, z=2): points {1, 4}.
   Points[1].Measurements = {
-      {"x", 1.01, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
+      {"x", 1.01, 0.0, {}}, {"y", 1.02, 0.0, {}}, {"z", 1.98, 0.0, {}}};
   Points[4].Measurements = {
-      {"x", 0.99, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
+      {"x", 0.99, 0.0, {}}, {"y", 1.02, 0.0, {}}, {"z", 1.98, 0.0, {}}};
   // Cluster around (x=0, y=0, z=0): points {5}, marked as noise.
   Points[5].Measurements = {
-      {"x", 0.0, 0.0}, {"y", 0.01, 0.0}, {"z", -0.02, 0.0}};
+      {"x", 0.0, 0.0, {}}, {"y", 0.01, 0.0, {}}, {"z", -0.02, 0.0, {}}};
   // Error cluster: points {2}
   Points[2].Error = "oops";
 
@@ -71,8 +71,8 @@ TEST(ClusteringTest, Clusters3D) {
 TEST(ClusteringTest, Clusters3D_InvalidSize) {
   std::vector<Benchmark> Points(6);
   Points[0].Measurements = {
-      {"x", 0.01, 0.0}, {"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
-  Points[1].Measurements = {{"y", 1.02, 0.0}, {"z", 1.98, 0.0}};
+      {"x", 0.01, 0.0, {}}, {"y", 1.02, 0.0, {}}, {"z", 1.98, 0.0, {}}};
+  Points[1].Measurements = {{"y", 1.02, 0.0, {}}, {"z", 1.98, 0.0, {}}};
   auto Error =
       BenchmarkClustering::create(
           Points, BenchmarkClustering::ModeE::Dbscan, 2, 0.25)
@@ -83,8 +83,8 @@ TEST(ClusteringTest, Clusters3D_InvalidSize) {
 
 TEST(ClusteringTest, Clusters3D_InvalidOrder) {
   std::vector<Benchmark> Points(6);
-  Points[0].Measurements = {{"x", 0.01, 0.0}, {"y", 1.02, 0.0}};
-  Points[1].Measurements = {{"y", 1.02, 0.0}, {"x", 1.98, 0.0}};
+  Points[0].Measurements = {{"x", 0.01, 0.0, {}}, {"y", 1.02, 0.0, {}}};
+  Points[1].Measurements = {{"y", 1.02, 0.0, {}}, {"x", 1.98, 0.0, {}}};
   auto Error =
       BenchmarkClustering::create(
           Points, BenchmarkClustering::ModeE::Dbscan, 2, 0.25)
@@ -110,12 +110,9 @@ TEST(ClusteringTest, Ordering) {
 TEST(ClusteringTest, Ordering1) {
   std::vector<Benchmark> Points(3);
 
-  Points[0].Measurements = {
-      {"x", 0.0, 0.0}};
-  Points[1].Measurements = {
-      {"x", 1.0, 0.0}};
-  Points[2].Measurements = {
-      {"x", 2.0, 0.0}};
+  Points[0].Measurements = {{"x", 0.0, 0.0, {}}};
+  Points[1].Measurements = {{"x", 1.0, 0.0, {}}};
+  Points[2].Measurements = {{"x", 2.0, 0.0, {}}};
 
   auto Clustering = BenchmarkClustering::create(
       Points, BenchmarkClustering::ModeE::Dbscan, 2, 1.1);
@@ -127,12 +124,9 @@ TEST(ClusteringTest, Ordering1) {
 TEST(ClusteringTest, Ordering2) {
   std::vector<Benchmark> Points(3);
 
-  Points[0].Measurements = {
-      {"x", 0.0, 0.0}};
-  Points[1].Measurements = {
-      {"x", 2.0, 0.0}};
-  Points[2].Measurements = {
-      {"x", 1.0, 0.0}};
+  Points[0].Measurements = {{"x", 0.0, 0.0, {}}};
+  Points[1].Measurements = {{"x", 2.0, 0.0, {}}};
+  Points[2].Measurements = {{"x", 1.0, 0.0, {}}};
 
   auto Clustering = BenchmarkClustering::create(
       Points, BenchmarkClustering::ModeE::Dbscan, 2, 1.1);
diff --git a/llvm/unittests/tools/llvm-exegesis/Mips/BenchmarkResultTest.cpp b/llvm/unittests/tools/llvm-exegesis/Mips/BenchmarkResultTest.cpp
index 201e0a8e7acce2..3d02b8f648411d 100644
--- a/llvm/unittests/tools/llvm-exegesis/Mips/BenchmarkResultTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/Mips/BenchmarkResultTest.cpp
@@ -65,8 +65,8 @@ TEST_F(MipsBenchmarkResultTest, WriteToAndReadFromDisk) {
   ToDisk.CpuName = "cpu_name";
   ToDisk.LLVMTriple = "llvm_triple";
   ToDisk.NumRepetitions = 1;
-  ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, 1});
-  ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, 2});
+  ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, 1, {}});
+  ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, 2, {}});
   ToDisk.Error = "error";
   ToDisk.Info = "info";
 
@@ -124,10 +124,10 @@ TEST_F(MipsBenchmarkResultTest, WriteToAndReadFromDisk) {
 
 TEST_F(MipsBenchmarkResultTest, PerInstructionStats) {
   PerInstructionStats Stats;
-  Stats.push(BenchmarkMeasure{"a", 0.5, 0.0});
-  Stats.push(BenchmarkMeasure{"a", 1.5, 0.0});
-  Stats.push(BenchmarkMeasure{"a", -1.0, 0.0});
-  Stats.push(BenchmarkMeasure{"a", 0.0, 0.0});
+  Stats.push(BenchmarkMeasure{"a", 0.5, 0.0, {}});
+  Stats.push(BenchmarkMeasure{"a", 1.5, 0.0, {}});
+  Stats.push(BenchmarkMeasure{"a", -1.0, 0.0, {}});
+  Stats.push(BenchmarkMeasure{"a", 0.0, 0.0, {}});
   EXPECT_EQ(Stats.min(), -1.0);
   EXPECT_EQ(Stats.max(), 1.5);
   EXPECT_EQ(Stats.avg(), 0.25); // (0.5+1.5-1.0+0.0) / 4
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp
index 6c558b59be982d..a0174cca55d79c 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp
@@ -75,8 +75,8 @@ TEST(BenchmarkResultTest, WriteToAndReadFromDisk) {
   ToDisk.CpuName = "cpu_name";
   ToDisk.LLVMTriple = "llvm_triple";
   ToDisk.NumRepetitions = 1;
-  ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, 1});
-  ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, 2});
+  ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, 1, {}});
+  ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, 2, {}});
   ToDisk.Error = "error";
   ToDisk.Info = "info";
 
@@ -149,10 +149,10 @@ TEST(BenchmarkResultTest, WriteToAndReadFromDisk) {
 
 TEST(BenchmarkResultTest, PerInstructionStats) {
   PerInstructionStats Stats;
-  Stats.push(BenchmarkMeasure{"a", 0.5, 0.0});
-  Stats.push(BenchmarkMeasure{"a", 1.5, 0.0});
-  Stats.push(BenchmarkMeasure{"a", -1.0, 0.0});
-  Stats.push(BenchmarkMeasure{"a", 0.0, 0.0});
+  Stats.push(BenchmarkMeasure{"a", 0.5, 0.0, {}});
+  Stats.push(BenchmarkMeasure{"a", 1.5, 0.0, {}});
+  Stats.push(BenchmarkMeasure{"a", -1.0, 0.0, {}});
+  Stats.push(BenchmarkMeasure{"a", 0.0, 0.0, {}});
   EXPECT_EQ(Stats.min(), -1.0);
   EXPECT_EQ(Stats.max(), 1.5);
   EXPECT_EQ(Stats.avg(), 0.25); // (0.5+1.5-1.0+0.0) / 4

>From b56251a6440af74667cf038d1568efe75245ca17 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sat, 30 Dec 2023 22:25:38 -0800
Subject: [PATCH 2/6] Add llvm-lit test

---
 .../tools/llvm-exegesis/X86/validation-counters.asm  | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 llvm/test/tools/llvm-exegesis/X86/validation-counters.asm

diff --git a/llvm/test/tools/llvm-exegesis/X86/validation-counters.asm b/llvm/test/tools/llvm-exegesis/X86/validation-counters.asm
new file mode 100644
index 00000000000000..7d0c940519e6ab
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/validation-counters.asm
@@ -0,0 +1,12 @@
+# REQUIRES: exegesis-can-measure-latency, exegesis-can-measure-uops, x86_64-linux
+
+# Check that when specifying validation counters, the validation counter is
+# collected and the information is displayed in the output. Test across
+# multiple configurations that need to be wired up separately for validation
+# counter support.
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr --validation-counter=instructions-retired | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr --validation-counter=instructions-retired -execution-mode=subprocess | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=uops -opcode-name=ADD64rr --validation-counter=instructions-retired -execution-mode=subprocess | FileCheck %s
+
+# CHECK: instructions-retired: {{[0-9]+}}

>From 4b0fb1093b1c7ce30d2fe0e0b01f3e32c6647488 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Tue, 2 Jan 2024 23:40:46 -0800
Subject: [PATCH 3/6] Only add instructions retired in this patch

---
 llvm/include/llvm/Target/TargetPfmCounters.td |  5 +---
 llvm/lib/Target/X86/X86PfmCounters.td         | 24 +++++++++++++++++++
 .../llvm-exegesis/lib/BenchmarkResult.cpp     | 14 +----------
 .../tools/llvm-exegesis/lib/BenchmarkResult.h |  3 ---
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    | 12 +++-------
 5 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/llvm/include/llvm/Target/TargetPfmCounters.td b/llvm/include/llvm/Target/TargetPfmCounters.td
index 72f6b39f4878f4..d162327afea2cf 100644
--- a/llvm/include/llvm/Target/TargetPfmCounters.td
+++ b/llvm/include/llvm/Target/TargetPfmCounters.td
@@ -32,10 +32,7 @@ class ValidationEvent <int event_number> {
   int EventNumber = event_number;
 }
 
-def L1DCacheLoadMiss    : ValidationEvent<0>;
-def InstructionRetired  : ValidationEvent<1>;
-def DataTLBLoadMiss     : ValidationEvent<2>;
-def DataTLBStoreMiss    : ValidationEvent<3>;
+def InstructionRetired  : ValidationEvent<0>;
 
 // Validation counters can be tied to a specific event
 class PfmValidationCounter<ValidationEvent event_type, string counter>
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 99cac504f157d3..52c86b1f74c1cb 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -100,6 +100,9 @@ def SandyBridgePfmCounters : ProcPfmCounters {
     PfmIssueCounter<"SBPort4",  "uops_dispatched_port:port_4">,
     PfmIssueCounter<"SBPort5",  "uops_dispatched_port:port_5">
   ];
+  let ValidationCounters = [
+    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+  ];
 }
 def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>;
 def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>;
@@ -117,6 +120,9 @@ def HaswellPfmCounters : ProcPfmCounters {
     PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">,
     PfmIssueCounter<"HWPort7", "uops_executed_port:port_7">
   ];
+  let ValidationCounters = [
+    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+  ];
 }
 def : PfmCountersBinding<"haswell", HaswellPfmCounters>;
 
@@ -133,6 +139,9 @@ def BroadwellPfmCounters : ProcPfmCounters {
     PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">,
     PfmIssueCounter<"BWPort7", "uops_executed_port:port_7">
   ];
+  let ValidationCounters = [
+    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+  ];
 }
 def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>;
 
@@ -149,6 +158,9 @@ def SkylakeClientPfmCounters : ProcPfmCounters {
     PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">,
     PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7">
   ];
+  let ValidationCounters = [
+    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+  ];
 }
 def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>;
 
@@ -165,6 +177,9 @@ def SkylakeServerPfmCounters : ProcPfmCounters {
     PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">,
     PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7">
   ];
+  let ValidationCounters = [
+    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+  ];
 }
 def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
 def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
@@ -182,6 +197,9 @@ def IceLakePfmCounters : ProcPfmCounters {
     PfmIssueCounter<"ICXPort6",  "uops_dispatched_port:port_6">,
     PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8">
   ];
+  let ValidationCounters = [
+    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+  ];
 }
 def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>;
 def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>;
@@ -265,6 +283,9 @@ def ZnVer1PfmCounters : ProcPfmCounters {
     PfmIssueCounter<"ZnAGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
     PfmIssueCounter<"ZnDivider", "div_op_count">
   ];
+  let ValidationCounters = [
+    PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
+  ];
 }
 def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;
 
@@ -308,5 +329,8 @@ def ZnVer4PfmCounters : ProcPfmCounters {
     PfmIssueCounter<"Zn4Divider", "div_op_count">,
     PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">
   ];
+  let ValidationCounters = [
+    PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
+  ];
 }
 def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
index 1079df24b457b8..d3f69beb19c46f 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
@@ -195,26 +195,14 @@ template <> struct SequenceElementTraits<exegesis::BenchmarkMeasure> {
 
 const char *validationEventToString(exegesis::ValidationEvent VE) {
   switch (VE) {
-  case exegesis::ValidationEvent::L1DCacheLoadMiss:
-    return "l1d-load-miss";
   case exegesis::ValidationEvent::InstructionRetired:
     return "instructions-retired";
-  case exegesis::ValidationEvent::DataTLBLoadMiss:
-    return "data-tlb-load-misses";
-  case exegesis::ValidationEvent::DataTLBStoreMiss:
-    return "data-tlb-store-misses";
   }
 }
 
 Expected<exegesis::ValidationEvent> stringToValidationEvent(StringRef Input) {
-  if (Input == "l1d-load-miss")
-    return exegesis::ValidationEvent::L1DCacheLoadMiss;
-  else if (Input == "instructions-retired")
+  if (Input == "instructions-retired")
     return exegesis::ValidationEvent::InstructionRetired;
-  else if (Input == "data-tlb-load-misses")
-    return exegesis::ValidationEvent::DataTLBLoadMiss;
-  else if (Input == "data-tlb-store-misses")
-    return exegesis::ValidationEvent::DataTLBStoreMiss;
   else
     return make_error<StringError>("Invalid validation event string",
                                    errc::invalid_argument);
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index f142da07e0a47d..9fc5d851b29abb 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -33,10 +33,7 @@ class Error;
 namespace exegesis {
 
 enum ValidationEvent {
-  L1DCacheLoadMiss,
   InstructionRetired,
-  DataTLBLoadMiss,
-  DataTLBStoreMiss
 };
 
 enum class BenchmarkPhaseSelectorE {
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index cbdfe12a5d6556..29617532d27d86 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -274,15 +274,9 @@ static cl::list<ValidationEvent> ValidationCounters(
         "The name of a validation counter to run concurrently with the main "
         "counter to validate benchmarking assumptions"),
     cl::CommaSeparated, cl::cat(BenchmarkOptions),
-    cl::values(clEnumValN(ValidationEvent::L1DCacheLoadMiss, "l1d-load-misses",
-                          "Count L1D cache load misses"),
-               clEnumValN(ValidationEvent::InstructionRetired,
-                          "instructions-retired", "Count retired instructions"),
-               clEnumValN(ValidationEvent::DataTLBLoadMiss,
-                          "data-tlb-load-misses", "Count data TLB load misses"),
-               clEnumValN(ValidationEvent::DataTLBStoreMiss,
-                          "data-tlb-store-misses",
-                          "Count data TLB store misses")));
+    cl::values(clEnumValN(ValidationEvent::InstructionRetired,
+                          "instructions-retired",
+                          "Count retired instructions")));
 
 static ExitOnError ExitOnErr("llvm-exegesis error: ");
 

>From d17e9c4ea7f893fccade67a5809b2590eebdbeac Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Wed, 3 Jan 2024 11:29:58 -0800
Subject: [PATCH 4/6] Refactor default pfm counters based on reviewer feedback

---
 llvm/lib/Target/X86/X86PfmCounters.td | 48 ++++++++++-----------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 52c86b1f74c1cb..48d68954970915 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -18,6 +18,10 @@ def DefaultPfmCounters : ProcPfmCounters {}
 def : PfmCountersDefaultBinding<DefaultPfmCounters>;
 
 // Intel X86 Counters.
+defvar DefaultIntelPfmValidationCounters = [
+  PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+];
+
 def PentiumPfmCounters : ProcPfmCounters {
   let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
   let UopsCounter = PfmCounter<"uops_retired">;
@@ -100,9 +104,7 @@ def SandyBridgePfmCounters : ProcPfmCounters {
     PfmIssueCounter<"SBPort4",  "uops_dispatched_port:port_4">,
     PfmIssueCounter<"SBPort5",  "uops_dispatched_port:port_5">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
-  ];
+  let ValidationCounters = DefaultIntelPfmValidationCounters;
 }
 def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>;
 def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>;
@@ -120,9 +122,7 @@ def HaswellPfmCounters : ProcPfmCounters {
     PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">,
     PfmIssueCounter<"HWPort7", "uops_executed_port:port_7">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
-  ];
+  let ValidationCounters = DefaultIntelPfmValidationCounters;
 }
 def : PfmCountersBinding<"haswell", HaswellPfmCounters>;
 
@@ -139,9 +139,7 @@ def BroadwellPfmCounters : ProcPfmCounters {
     PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">,
     PfmIssueCounter<"BWPort7", "uops_executed_port:port_7">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
-  ];
+  let ValidationCounters = DefaultIntelPfmValidationCounters;
 }
 def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>;
 
@@ -158,9 +156,7 @@ def SkylakeClientPfmCounters : ProcPfmCounters {
     PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">,
     PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
-  ];
+  let ValidationCounters = DefaultIntelPfmValidationCounters;
 }
 def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>;
 
@@ -177,9 +173,7 @@ def SkylakeServerPfmCounters : ProcPfmCounters {
     PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">,
     PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
-  ];
+  let ValidationCounters = DefaultIntelPfmValidationCounters;
 }
 def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
 def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
@@ -197,9 +191,7 @@ def IceLakePfmCounters : ProcPfmCounters {
     PfmIssueCounter<"ICXPort6",  "uops_dispatched_port:port_6">,
     PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
-  ];
+  let ValidationCounters = DefaultIntelPfmValidationCounters;
 }
 def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>;
 def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>;
@@ -207,6 +199,10 @@ def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>;
 def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>;
 
 // AMD X86 Counters.
+defvar DefaultAMDPfmValidationCounters = [
+  PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
+];
+
 // Set basic counters for AMD cpus that we know libpfm4 supports.
 def DefaultAMDPfmCounters : ProcPfmCounters {
   let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
@@ -283,9 +279,7 @@ def ZnVer1PfmCounters : ProcPfmCounters {
     PfmIssueCounter<"ZnAGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
     PfmIssueCounter<"ZnDivider", "div_op_count">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
-  ];
+  let ValidationCounters = DefaultAMDPfmValidationCounters;
 }
 def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;
 
@@ -296,9 +290,7 @@ def ZnVer2PfmCounters : ProcPfmCounters {
     PfmIssueCounter<"Zn2AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
     PfmIssueCounter<"Zn2Divider", "div_op_count">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
-  ];
+  let ValidationCounters = DefaultAMDPfmValidationCounters;
 }
 def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>;
 
@@ -312,9 +304,7 @@ def ZnVer3PfmCounters : ProcPfmCounters {
     PfmIssueCounter<"Zn3Store", "ls_dispatch:store_dispatch">,
     PfmIssueCounter<"Zn3Divider", "div_op_count">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
-  ];
+  let ValidationCounters = DefaultAMDPfmValidationCounters;
 }
 def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>;
 
@@ -329,8 +319,6 @@ def ZnVer4PfmCounters : ProcPfmCounters {
     PfmIssueCounter<"Zn4Divider", "div_op_count">,
     PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">
   ];
-  let ValidationCounters = [
-    PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
-  ];
+  let ValidationCounters = DefaultAMDPfmValidationCounters;
 }
 def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;

>From 15189d46487b6594ece1f0adbb71adab059dea0b Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Wed, 3 Jan 2024 14:09:30 -0800
Subject: [PATCH 5/6] Set PERF_IOC_FLAG_GROUP on ioctl calls

Without this call, the ioctl calls only work by chance and fail in
certain cases, like when the users sets the RDX register in subprocess
mode.
---
 llvm/tools/llvm-exegesis/lib/PerfHelper.cpp |  4 ++--
 llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 11 +++++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
index 742dc0f939de58..44faaeb1512215 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
@@ -154,12 +154,12 @@ Counter::~Counter() {
 
 void Counter::start() {
   if (!IsDummyEvent)
-    ioctl(FileDescriptor, PERF_EVENT_IOC_RESET, 0);
+    ioctl(FileDescriptor, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
 }
 
 void Counter::stop() {
   if (!IsDummyEvent)
-    ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
+    ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
 }
 
 llvm::Expected<llvm::SmallVector<int64_t, 4>>
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 5cf54ba0899059..b702a77715efd9 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -45,6 +45,9 @@
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <unistd.h>
+#ifdef HAVE_LIBPFM
+#include <perfmon/perf_event.h>
+#endif // HAVE_LIBPFM
 #endif
 
 #define GET_AVAILABLE_OPCODE_CHECKER
@@ -1252,7 +1255,7 @@ std::vector<MCInst>
 ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const {
   std::vector<MCInst> ConfigurePerfCounterCode;
   if (SaveRegisters)
-    saveSyscallRegisters(ConfigurePerfCounterCode, 2);
+    saveSyscallRegisters(ConfigurePerfCounterCode, 3);
   ConfigurePerfCounterCode.push_back(
       loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress())));
   ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm)
@@ -1264,9 +1267,13 @@ ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const
                                          .addReg(0));
   ConfigurePerfCounterCode.push_back(
       loadImmediate(X86::RSI, 64, APInt(64, Request)));
+#ifdef HAVE_LIBPFM
+  ConfigurePerfCounterCode.push_back(
+      loadImmediate(X86::RDX, 64, APInt(64, PERF_IOC_FLAG_GROUP)));
+#endif // HAVE_LIBPFM
   generateSyscall(SYS_ioctl, ConfigurePerfCounterCode);
   if (SaveRegisters)
-    restoreSyscallRegisters(ConfigurePerfCounterCode, 2);
+    restoreSyscallRegisters(ConfigurePerfCounterCode, 3);
   return ConfigurePerfCounterCode;
 }
 

>From 0268550a652074a7080395264658ba8d705c2714 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Thu, 4 Jan 2024 14:32:48 -0800
Subject: [PATCH 6/6] Update initial latency benchmark val counter values

---
 llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
index c2179189d5dc0a..51cf05c584e8fe 100644
--- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
@@ -86,7 +86,7 @@ Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
     ValCountersToRun.push_back(ValCounterIt->second);
   }
 
-  SmallVector<int64_t> ValCounterValues(ValCountersToRun.size(), -1);
+  SmallVector<int64_t> ValCounterValues(ValCountersToRun.size(), 0);
   // Values count for each run.
   int ValuesCount = 0;
   for (size_t I = 0; I < NumMeasurements; ++I) {