[llvm] [Exegesis] Add supports to serialize/deserialize object files into benchmarks (PR #121993)

Tue Jan 7 12:48:59 PST 2025

https://github.com/mshockwave created https://github.com/llvm/llvm-project/pull/121993

This patch adds support to serialize the assembled object files into the benchmark results, so that we can deserialize them later on and run the measurements. This is useful when the overhead of end-to-end execution (snippet generation + benchmark measurement) is too high and we want to separate it into two stages.

The object file is compressed and serialized into base64 string. It has fantastic compression rate because there are lots of (nearly) identical instructions in the file.

Currently this patch can only resume before the measure phase. It does not support repetition modes that require more than one snippet (i.e. min and middle-half-loop/duplicate) either.

-----

This PR stacks on top of #121991 

>From d4edd1dccee66ba64fee32290278aeeb507121a8 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Tue, 7 Jan 2025 12:30:11 -0800
Subject: [PATCH 1/2] [Exegesis] Add `--dry-run-measurement`

This flag will make llvm-exegesis run everything except the actual
snippet execution.
---
 llvm/docs/CommandGuide/llvm-exegesis.rst      |  5 ++++
 .../llvm-exegesis/dry-run-measurement.test    | 11 +++++++++
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     | 24 +++++++++++++++----
 3 files changed, 35 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/dry-run-measurement.test

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index 8266d891a5e6b1..cd0bce9e2dbcc3 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -449,6 +449,11 @@ OPTIONS
  crash when hardware performance counters are unavailable and for
  debugging :program:`llvm-exegesis` itself.
 
+.. option:: --dry-run-measurement
+  If set, llvm-exegesis runs everything except the actual snippet execution.
+  This is useful if we want to test some part of the code without actually
+  running on native platforms.
+
 .. option:: --execution-mode=[inprocess,subprocess]
 
   This option specifies what execution mode to use. The `inprocess` execution
diff --git a/llvm/test/tools/llvm-exegesis/dry-run-measurement.test b/llvm/test/tools/llvm-exegesis/dry-run-measurement.test
new file mode 100644
index 00000000000000..82857e7998b5e6
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/dry-run-measurement.test
@@ -0,0 +1,11 @@
+# RUN: llvm-exegesis --mtriple=riscv64 --mcpu=sifive-p470 --mode=latency --opcode-name=ADD --use-dummy-perf-counters --dry-run-measurement | FileCheck %s
+# REQUIRES: riscv-registered-target
+
+# This test makes sure that llvm-exegesis doesn't execute "cross-compiled" snippets in the presence of
+# --dry-run-measurement. RISC-V was chosen simply because most of the time we run tests on X86 machines.
+
+# Should not contain misleading results.
+# CHECK: measurements:    []
+
+# Should not contain error messages like "snippet crashed while running: Segmentation fault".
+# CHECK: error:           ''
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index a7771b99e97b1a..9b978c558c1fe7 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -53,6 +53,12 @@
 namespace llvm {
 namespace exegesis {
 
+static cl::opt<bool>
+    DryRunMeasurement("dry-run-measurement",
+                      cl::desc("Run every steps in the measurement phase "
+                               "except executing the snippet."),
+                      cl::init(false), cl::Hidden);
+
 BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
                                  BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
                                  ExecutionModeE ExecutionMode,
@@ -140,13 +146,21 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
     Scratch->clear();
     {
       auto PS = ET.withSavedState();
+      // We can't directly capture DryRunMeasurement in the lambda below.
+      bool DryRun = DryRunMeasurement;
       CrashRecoveryContext CRC;
       CrashRecoveryContext::Enable();
-      const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
-        Counter->start();
-        this->Function(ScratchPtr);
-        Counter->stop();
-      });
+      const bool Crashed =
+          !CRC.RunSafely([this, Counter, ScratchPtr, DryRun]() {
+            if (DryRun) {
+              Counter->start();
+              Counter->stop();
+            } else {
+              Counter->start();
+              this->Function(ScratchPtr);
+              Counter->stop();
+            }
+          });
       CrashRecoveryContext::Disable();
       PS.reset();
       if (Crashed) {

>From 74da08e9c1ac8af35dd06954f61c177fb5ff057d Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 6 Jan 2025 14:25:54 -0800
Subject: [PATCH 2/2] [Exegesis] Add supports to serialize/deserialize
 benchmarks

TBA...
---
 llvm/docs/CommandGuide/llvm-exegesis.rst      |  11 +-
 .../RISCV/serialize-obj-file.test             |  33 +++
 .../llvm-exegesis/lib/BenchmarkResult.cpp     |  95 ++++++-
 .../tools/llvm-exegesis/lib/BenchmarkResult.h |  20 ++
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     |  43 +++
 .../tools/llvm-exegesis/lib/BenchmarkRunner.h |  11 +-
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    | 256 +++++++++++-------
 7 files changed, 367 insertions(+), 102 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index cd0bce9e2dbcc3..c3580cdecab7b9 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -299,9 +299,18 @@ OPTIONS
   However, it is possible to stop at some stage before measuring. Choices are:
   * ``prepare-snippet``: Only generate the minimal instruction sequence.
   * ``prepare-and-assemble-snippet``: Same as ``prepare-snippet``, but also dumps an excerpt of the sequence (hex encoded).
-  * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
+  * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but
+    also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
+    If either zlib or zstd is available and we're using either duplicate or
+    loop repetition mode, this phase generates benchmarks with a serialized
+    snippet object file attached to it.
   * ``measure``: Same as ``assemble-measured-code``, but also runs the measurement.
 
+.. option:: --run-measurement=<benchmarks file>
+
+  Given a benchmarks file generated after the ``assembly-measured-code`` phase,
+  resume the measurement phase from it.
+
 .. option:: --x86-lbr-sample-period=<nBranches/sample>
 
   Specify the LBR sampling period - how many branches before we take a sample.
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test b/llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test
new file mode 100644
index 00000000000000..befd16699bef1a
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test
@@ -0,0 +1,33 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --benchmark-phase=assemble-measured-code --mode=latency --benchmarks-file=%t.yaml
+# RUN: FileCheck --input-file=%t.yaml %s --check-prefixes=CHECK,SERIALIZE
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --run-measurement=%t.yaml --mode=latency --dry-run-measurement --use-dummy-perf-counters \
+# RUN:    --dump-object-to-disk=%t.o | FileCheck %s --check-prefixes=CHECK,DESERIALIZE
+# RUN: llvm-objdump -d %t.o | FileCheck %s --check-prefix=OBJDUMP
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --dry-run-measurement --use-dummy-perf-counters | \
+# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=min | \
+# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-loop | \
+# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-duplicate | \
+# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# REQUIRES: zlib || zstd
+
+# A round-trip test for serialize/deserialize benchmarks.
+
+# CHECK: mode: latency
+# CHECK:  instructions:
+# CHECK-NEXT: - 'SH3ADD X{{.*}} X{{.*}} X{{.*}}'
+# CHECK: cpu_name:        sifive-p470
+# CHECK-NEXT: llvm_triple:     riscv64
+# CHECK-NEXT: min_instructions: 10000
+# CHECK-NEXT: measurements:    []
+# SERIALIZE: error: actual measurements skipped.
+# DESERIALIZE: error:           ''
+# CHECK: info:            Repeating a single explicitly serial instruction
+
+# OBJDUMP: sh3add
+
+# Negative tests: we shouldn't serialize object files in some scenarios.
+
+# NO-SERIALIZE-NOT: object_file:
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
index 84dc23b343c6c0..eff5a6d547cbda 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
@@ -15,10 +15,13 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/bit.h"
 #include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Base64.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 
 static constexpr const char kIntegerPrefix[] = "i_0x";
@@ -27,6 +30,12 @@ static constexpr const char kInvalidOperand[] = "INVALID";
 
 namespace llvm {
 
+static cl::opt<compression::Format> ForceObjectFileCompressionFormat(
+    "exegesis-force-obj-compress-format", cl::Hidden,
+    cl::desc("Force to use this compression format for object files."),
+    cl::values(clEnumValN(compression::Format::Zstd, "zstd", "Using Zstandard"),
+               clEnumValN(compression::Format::Zlib, "zlib", "Using LibZ")));
+
 namespace {
 
 // A mutable struct holding an LLVMState that can be passed through the
@@ -278,6 +287,13 @@ template <> struct ScalarTraits<exegesis::RegisterValue> {
   static const bool flow = true;
 };
 
+template <> struct ScalarEnumerationTraits<compression::Format> {
+  static void enumeration(IO &Io, compression::Format &Format) {
+    Io.enumCase(Format, "zstd", compression::Format::Zstd);
+    Io.enumCase(Format, "zlib", compression::Format::Zlib);
+  }
+};
+
 template <> struct MappingContextTraits<exegesis::BenchmarkKey, YamlContext> {
   static void mapping(IO &Io, exegesis::BenchmarkKey &Obj,
                       YamlContext &Context) {
@@ -288,6 +304,33 @@ template <> struct MappingContextTraits<exegesis::BenchmarkKey, YamlContext> {
   }
 };
 
+template <> struct MappingTraits<exegesis::Benchmark::ObjectFile> {
+  struct NormalizedBase64Binary {
+    std::string Base64Str;
+
+    NormalizedBase64Binary(IO &) {}
+    NormalizedBase64Binary(IO &, const std::vector<uint8_t> &Data)
+        : Base64Str(llvm::encodeBase64(Data)) {}
+
+    std::vector<uint8_t> denormalize(IO &) {
+      std::vector<char> Buffer;
+      if (Error E = llvm::decodeBase64(Base64Str, Buffer))
+        report_fatal_error(std::move(E));
+
+      StringRef Data(Buffer.data(), Buffer.size());
+      return std::vector<uint8_t>(Data.bytes_begin(), Data.bytes_end());
+    }
+  };
+
+  static void mapping(IO &Io, exegesis::Benchmark::ObjectFile &Obj) {
+    Io.mapRequired("compression", Obj.CompressionFormat);
+    Io.mapRequired("original_size", Obj.UncompressedSize);
+    MappingNormalization<NormalizedBase64Binary, std::vector<uint8_t>>
+        ObjFileString(Io, Obj.CompressedBytes);
+    Io.mapRequired("compressed_bytes", ObjFileString->Base64Str);
+  }
+};
+
 template <> struct MappingContextTraits<exegesis::Benchmark, YamlContext> {
   struct NormalizedBinary {
     NormalizedBinary(IO &io) {}
@@ -325,9 +368,11 @@ template <> struct MappingContextTraits<exegesis::Benchmark, YamlContext> {
     Io.mapRequired("error", Obj.Error);
     Io.mapOptional("info", Obj.Info);
     // AssembledSnippet
-    MappingNormalization<NormalizedBinary, std::vector<uint8_t>> BinaryString(
+    MappingNormalization<NormalizedBinary, std::vector<uint8_t>> SnippetString(
         Io, Obj.AssembledSnippet);
-    Io.mapOptional("assembled_snippet", BinaryString->Binary);
+    Io.mapOptional("assembled_snippet", SnippetString->Binary);
+    // ObjectFile
+    Io.mapOptional("object_file", Obj.ObjFile);
   }
 };
 
@@ -364,6 +409,52 @@ Benchmark::readTriplesAndCpusFromYamls(MemoryBufferRef Buffer) {
   return Result;
 }
 
+Error Benchmark::setObjectFile(StringRef RawBytes) {
+  SmallVector<uint8_t> CompressedBytes;
+  llvm::compression::Format CompressionFormat;
+
+  auto isFormatAvailable = [](llvm::compression::Format F) -> bool {
+    switch (F) {
+    case compression::Format::Zstd:
+      return compression::zstd::isAvailable();
+    case compression::Format::Zlib:
+      return compression::zlib::isAvailable();
+    }
+  };
+  if (ForceObjectFileCompressionFormat.getNumOccurrences() > 0) {
+    CompressionFormat = ForceObjectFileCompressionFormat;
+    if (!isFormatAvailable(CompressionFormat))
+      return make_error<StringError>(
+          "The designated compression format is not available.",
+          inconvertibleErrorCode());
+  } else if (isFormatAvailable(compression::Format::Zstd)) {
+    // Try newer compression algorithm first.
+    CompressionFormat = compression::Format::Zstd;
+  } else if (isFormatAvailable(compression::Format::Zlib)) {
+    CompressionFormat = compression::Format::Zlib;
+  } else {
+    return make_error<StringError>(
+        "None of the compression methods is available.",
+        inconvertibleErrorCode());
+  }
+
+  switch (CompressionFormat) {
+  case compression::Format::Zstd:
+    compression::zstd::compress({RawBytes.bytes_begin(), RawBytes.bytes_end()},
+                                CompressedBytes);
+    break;
+  case compression::Format::Zlib:
+    compression::zlib::compress({RawBytes.bytes_begin(), RawBytes.bytes_end()},
+                                CompressedBytes);
+    break;
+  }
+
+  ObjFile = {CompressionFormat,
+             RawBytes.size(),
+             {CompressedBytes.begin(), CompressedBytes.end()}};
+  return Error::success();
+}
+
 Expected<Benchmark> Benchmark::readYaml(const LLVMState &State,
                                         MemoryBufferRef Buffer) {
   yaml::Input Yin(Buffer);
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index 3c09a8380146e5..a5217566204a14 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <limits>
 #include <set>
@@ -76,6 +77,11 @@ struct BenchmarkKey {
   uintptr_t SnippetAddress = 0;
   // The register that should be used to hold the loop counter.
   unsigned LoopRegister;
+
+  bool operator==(const BenchmarkKey &RHS) const {
+    return Config == RHS.Config &&
+           Instructions[0].getOpcode() == RHS.Instructions[0].getOpcode();
+  }
 };
 
 struct BenchmarkMeasure {
@@ -122,6 +128,16 @@ struct Benchmark {
   std::string Error;
   std::string Info;
   std::vector<uint8_t> AssembledSnippet;
+
+  struct ObjectFile {
+    llvm::compression::Format CompressionFormat;
+    size_t UncompressedSize = 0;
+    std::vector<uint8_t> CompressedBytes;
+
+    bool isValid() const { return UncompressedSize && CompressedBytes.size(); }
+  };
+  std::optional<ObjectFile> ObjFile;
+
   // How to aggregate measurements.
   enum ResultAggregationModeE { Min, Max, Mean, MinVariance };
 
@@ -132,6 +148,10 @@ struct Benchmark {
   Benchmark &operator=(const Benchmark &) = delete;
   Benchmark &operator=(Benchmark &&) = delete;
 
+  // Compress raw object file bytes and assign the result and compression type
+  // to CompressedObjectFile and ObjFileCompression, respectively.
+  class Error setObjectFile(StringRef RawBytes);
+
   // Read functions.
   static Expected<Benchmark> readYaml(const LLVMState &State,
                                                  MemoryBufferRef Buffer);
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 9b978c558c1fe7..3bca6ed13d8fc8 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -624,6 +624,7 @@ Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
 Expected<BenchmarkRunner::RunnableConfiguration>
 BenchmarkRunner::getRunnableConfiguration(
     const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
+    Benchmark::RepetitionModeE RepetitionMode,
     const SnippetRepetitor &Repetitor) const {
   RunnableConfiguration RC;
 
@@ -668,12 +669,54 @@ BenchmarkRunner::getRunnableConfiguration(
                         LoopBodySize, GenerateMemoryInstructions);
     if (Error E = Snippet.takeError())
       return std::move(E);
+    // There is no need to serialize/deserialize the object file if we're
+    // simply running end-to-end measurements.
+    // Same goes for any repetition mode that requires more than a single
+    // snippet.
+    if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure &&
+        (RepetitionMode == Benchmark::Loop ||
+         RepetitionMode == Benchmark::Duplicate)) {
+      if (Error E = BenchmarkResult.setObjectFile(*Snippet))
+        return std::move(E);
+    }
     RC.ObjectFile = getObjectFromBuffer(*Snippet);
   }
 
   return std::move(RC);
 }
 
+Expected<BenchmarkRunner::RunnableConfiguration>
+BenchmarkRunner::getRunnableConfiguration(Benchmark &&B) const {
+  assert(B.ObjFile.has_value() && B.ObjFile->isValid() &&
+         "No serialized obejct file is attached?");
+  const Benchmark::ObjectFile &ObjFile = *B.ObjFile;
+  SmallVector<uint8_t> DecompressedObjFile;
+  switch (ObjFile.CompressionFormat) {
+  case compression::Format::Zstd:
+    if (!compression::zstd::isAvailable())
+      return make_error<StringError>("zstd is not available for decompression.",
+                                     inconvertibleErrorCode());
+    if (Error E = compression::zstd::decompress(ObjFile.CompressedBytes,
+                                                DecompressedObjFile,
+                                                ObjFile.UncompressedSize))
+      return std::move(E);
+    break;
+  case compression::Format::Zlib:
+    if (!compression::zlib::isAvailable())
+      return make_error<StringError>("zlib is not available for decompression.",
+                                     inconvertibleErrorCode());
+    if (Error E = compression::zlib::decompress(ObjFile.CompressedBytes,
+                                                DecompressedObjFile,
+                                                ObjFile.UncompressedSize))
+      return std::move(E);
+    break;
+  }
+
+  StringRef Buffer(reinterpret_cast<const char *>(DecompressedObjFile.begin()),
+                   DecompressedObjFile.size());
+  return RunnableConfiguration{std::move(B), getObjectFromBuffer(Buffer)};
+}
+
 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
 BenchmarkRunner::createFunctionExecutor(
     object::OwningBinary<object::ObjectFile> ObjectFile,
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
index e688b814d1c83d..ef9446bdd5bbe8 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
@@ -54,18 +54,25 @@ class BenchmarkRunner {
     RunnableConfiguration &operator=(RunnableConfiguration &&) = delete;
     RunnableConfiguration &operator=(const RunnableConfiguration &) = delete;
 
+    Benchmark BenchmarkResult;
+    object::OwningBinary<object::ObjectFile> ObjectFile;
+
   private:
     RunnableConfiguration() = default;
 
-    Benchmark BenchmarkResult;
-    object::OwningBinary<object::ObjectFile> ObjectFile;
+    RunnableConfiguration(Benchmark &&B,
+                          object::OwningBinary<object::ObjectFile> &&OF)
+        : BenchmarkResult(std::move(B)), ObjectFile(std::move(OF)) {}
   };
 
   Expected<RunnableConfiguration>
   getRunnableConfiguration(const BenchmarkCode &Configuration,
                            unsigned MinInstructions, unsigned LoopUnrollFactor,
+                           Benchmark::RepetitionModeE RepetitionMode,
                            const SnippetRepetitor &Repetitor) const;
 
+  Expected<RunnableConfiguration> getRunnableConfiguration(Benchmark &&B) const;
+
   std::pair<Error, Benchmark>
   runConfiguration(RunnableConfiguration &&RC,
                    const std::optional<StringRef> &DumpFile,
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index fa37e05956be8c..a21f3bdb5fba5f 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -114,8 +114,7 @@ static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
 
 static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
     "benchmark-phase",
-    cl::desc(
-        "it is possible to stop the benchmarking process after some phase"),
+    cl::desc("Stop the benchmarking process after some phase"),
     cl::cat(BenchmarkOptions),
     cl::values(
         clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet",
@@ -135,6 +134,13 @@ static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
             "(default)")),
     cl::init(BenchmarkPhaseSelectorE::Measure));
 
+static cl::opt<std::string> RunMeasurement(
+    "run-measurement",
+    cl::desc(
+        "Run measurement phase with a benchmarks file generated previously"),
+    cl::cat(BenchmarkOptions), cl::value_desc("<benchmarks file>"),
+    cl::init(""));
+
 static cl::opt<bool>
     UseDummyPerfCounters("use-dummy-perf-counters",
                          cl::desc("Do not read real performance counters, use "
@@ -397,11 +403,55 @@ generateSnippets(const LLVMState &State, unsigned Opcode,
   return Benchmarks;
 }
 
-static void runBenchmarkConfigurations(
-    const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,
+static void deserializeRunnableConfigurations(
+    std::vector<Benchmark> &Benchmarks, const BenchmarkRunner &Runner,
+    std::vector<BenchmarkRunner::RunnableConfiguration> &RunnableConfigs,
+    SmallVectorImpl<unsigned> &Repetitions) {
+  for (unsigned I = 0U, E = Benchmarks.size(); I < E; ++I) {
+    // Reset any previous error.
+    Benchmarks[I].Error.clear();
+
+    RunnableConfigs.emplace_back(
+        ExitOnErr(Runner.getRunnableConfiguration(std::move(Benchmarks[I]))));
+    if (I > 0 && RunnableConfigs[I].BenchmarkResult.Key ==
+                     RunnableConfigs[I - 1].BenchmarkResult.Key) {
+      // Extend the current end index in Repetitions.
+      Repetitions.back() = RunnableConfigs.size();
+    } else {
+      // Append a new entry into Repetitions.
+      Repetitions.push_back(RunnableConfigs.size());
+    }
+  }
+}
+
+static void collectRunnableConfigurations(
+    ArrayRef<BenchmarkCode> Configurations,
     ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,
-    const BenchmarkRunner &Runner) {
-  assert(!Configurations.empty() && "Don't have any configurations to run.");
+    const BenchmarkRunner &Runner,
+    std::vector<BenchmarkRunner::RunnableConfiguration> &RunnableConfigs,
+    SmallVectorImpl<unsigned> &Repetitions) {
+
+  SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions};
+  if (RepetitionMode == Benchmark::MiddleHalfDuplicate ||
+      RepetitionMode == Benchmark::MiddleHalfLoop)
+    MinInstructionCounts.push_back(MinInstructions * 2);
+
+  for (const BenchmarkCode &Conf : Configurations) {
+    for (const auto &Repetitor : Repetitors) {
+      for (unsigned IterationRepetitions : MinInstructionCounts)
+        RunnableConfigs.emplace_back(ExitOnErr(Runner.getRunnableConfiguration(
+            Conf, IterationRepetitions, LoopBodySize, RepetitionMode,
+            *Repetitor)));
+    }
+    Repetitions.emplace_back(RunnableConfigs.size());
+  }
+}
+
+static void runBenchmarkConfigurations(
+    const LLVMState &State,
+    std::vector<BenchmarkRunner::RunnableConfiguration> &RunnableConfigs,
+    ArrayRef<unsigned> Repetitions, const BenchmarkRunner &Runner) {
+  assert(!RunnableConfigs.empty() && "Don't have any configurations to run.");
   std::optional<raw_fd_ostream> FileOstr;
   if (BenchmarkFile != "-") {
     int ResultFD = 0;
@@ -415,43 +465,38 @@ static void runBenchmarkConfigurations(
 
   std::optional<ProgressMeter<>> Meter;
   if (BenchmarkMeasurementsPrintProgress)
-    Meter.emplace(Configurations.size());
+    Meter.emplace(RunnableConfigs.size());
 
-  SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions};
-  if (RepetitionMode == Benchmark::MiddleHalfDuplicate ||
-      RepetitionMode == Benchmark::MiddleHalfLoop)
-    MinInstructionCounts.push_back(MinInstructions * 2);
+  std::optional<StringRef> DumpFile;
+  if (DumpObjectToDisk.getNumOccurrences())
+    DumpFile = DumpObjectToDisk;
 
-  for (const BenchmarkCode &Conf : Configurations) {
+  const std::optional<int> BenchmarkCPU =
+      BenchmarkProcessCPU == -1 ? std::nullopt
+                                : std::optional(BenchmarkProcessCPU.getValue());
+
+  unsigned StartIdx = 0;
+  for (unsigned EndIdx : Repetitions) {
     ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr);
     SmallVector<Benchmark, 2> AllResults;
 
-    for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :
-         Repetitors) {
-      for (unsigned IterationRepetitions : MinInstructionCounts) {
-        auto RC = ExitOnErr(Runner.getRunnableConfiguration(
-            Conf, IterationRepetitions, LoopBodySize, *Repetitor));
-        std::optional<StringRef> DumpFile;
-        if (DumpObjectToDisk.getNumOccurrences())
-          DumpFile = DumpObjectToDisk;
-        const std::optional<int> BenchmarkCPU =
-            BenchmarkProcessCPU == -1
-                ? std::nullopt
-                : std::optional(BenchmarkProcessCPU.getValue());
-        auto [Err, BenchmarkResult] =
-            Runner.runConfiguration(std::move(RC), DumpFile, BenchmarkCPU);
-        if (Err) {
-          // Errors from executing the snippets are fine.
-          // All other errors are a framework issue and should fail.
-          if (!Err.isA<SnippetExecutionFailure>())
-            ExitOnErr(std::move(Err));
-
-          BenchmarkResult.Error = toString(std::move(Err));
+    for (unsigned Idx = StartIdx; Idx < EndIdx; ++Idx) {
+      auto RC = std::move(RunnableConfigs[Idx]);
+      auto [Err, BenchmarkResult] =
+          Runner.runConfiguration(std::move(RC), DumpFile, BenchmarkCPU);
+      if (Err) {
+        // Errors from executing the snippets are fine.
+        // All other errors are a framework issue and should fail.
+        if (!Err.isA<SnippetExecutionFailure>()) {
+          llvm::errs() << "llvm-exegesis error: " << toString(std::move(Err));
+          exit(1);
         }
-        AllResults.push_back(std::move(BenchmarkResult));
+        BenchmarkResult.Error = toString(std::move(Err));
       }
-    }
 
+      AllResults.push_back(std::move(BenchmarkResult));
+    }
+    StartIdx = EndIdx;
     Benchmark &Result = AllResults.front();
 
     // If any of our measurements failed, pretend they all have failed.
@@ -517,77 +562,94 @@ void benchmarkMain() {
     ExitWithError("cannot create benchmark runner");
   }
 
-  const auto Opcodes = getOpcodesOrDie(State);
-  std::vector<BenchmarkCode> Configurations;
-
-  unsigned LoopRegister =
-      State.getExegesisTarget().getDefaultLoopCounterRegister(
-          State.getTargetMachine().getTargetTriple());
-
-  if (Opcodes.empty()) {
-    Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
-    for (const auto &Configuration : Configurations) {
-      if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
-          (Configuration.Key.MemoryMappings.size() != 0 ||
-           Configuration.Key.MemoryValues.size() != 0 ||
-           Configuration.Key.SnippetAddress != 0))
-        ExitWithError("Memory and snippet address annotations are only "
-                      "supported in subprocess "
-                      "execution mode");
-    }
-    LoopRegister = Configurations[0].Key.LoopRegister;
-  }
+  std::vector<BenchmarkRunner::RunnableConfiguration> RunnableConfigs;
+  SmallVector<unsigned> Repetitions;
 
-  SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;
-  if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
-    Repetitors.emplace_back(
-        SnippetRepetitor::Create(RepetitionMode, State, LoopRegister));
-  else {
-    for (Benchmark::RepetitionModeE RepMode :
-         {Benchmark::RepetitionModeE::Duplicate,
-          Benchmark::RepetitionModeE::Loop})
-      Repetitors.emplace_back(
-          SnippetRepetitor::Create(RepMode, State, LoopRegister));
-  }
+  // Write to standard output if file is not set.
+  if (BenchmarkFile.empty())
+    BenchmarkFile = "-";
 
-  BitVector AllReservedRegs;
-  for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
-    AllReservedRegs |= Repetitor->getReservedRegs();
-
-  if (!Opcodes.empty()) {
-    for (const unsigned Opcode : Opcodes) {
-      // Ignore instructions without a sched class if
-      // -ignore-invalid-sched-class is passed.
-      if (IgnoreInvalidSchedClass &&
-          State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
-        errs() << State.getInstrInfo().getName(Opcode)
-               << ": ignoring instruction without sched class\n";
-        continue;
+  if (!RunMeasurement.empty()) {
+    // Right now we only support resuming before the measurement phase.
+    auto ErrOrBuffer =
+        MemoryBuffer::getFileOrSTDIN(RunMeasurement, /*IsText=*/true);
+    if (!ErrOrBuffer)
+      report_fatal_error(errorCodeToError(ErrOrBuffer.getError()));
+
+    std::vector<Benchmark> Benchmarks =
+        ExitOnErr(Benchmark::readYamls(State, **ErrOrBuffer));
+    deserializeRunnableConfigurations(Benchmarks, *Runner, RunnableConfigs,
+                                      Repetitions);
+  } else {
+    const auto Opcodes = getOpcodesOrDie(State);
+    std::vector<BenchmarkCode> Configurations;
+
+    unsigned LoopRegister =
+        State.getExegesisTarget().getDefaultLoopCounterRegister(
+            State.getTargetMachine().getTargetTriple());
+
+    if (Opcodes.empty()) {
+      Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
+      for (const auto &Configuration : Configurations) {
+        if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
+            (Configuration.Key.MemoryMappings.size() != 0 ||
+             Configuration.Key.MemoryValues.size() != 0 ||
+             Configuration.Key.SnippetAddress != 0))
+          ExitWithError("Memory and snippet address annotations are only "
+                        "supported in subprocess "
+                        "execution mode");
       }
+      LoopRegister = Configurations[0].Key.LoopRegister;
+    }
+    SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;
+    if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
+      Repetitors.emplace_back(
+          SnippetRepetitor::Create(RepetitionMode, State, LoopRegister));
+    else {
+      for (Benchmark::RepetitionModeE RepMode :
+           {Benchmark::RepetitionModeE::Duplicate,
+            Benchmark::RepetitionModeE::Loop})
+        Repetitors.emplace_back(
+            SnippetRepetitor::Create(RepMode, State, LoopRegister));
+    }
 
-      auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs);
-      if (!ConfigsForInstr) {
-        logAllUnhandledErrors(
-            ConfigsForInstr.takeError(), errs(),
-            Twine(State.getInstrInfo().getName(Opcode)).concat(": "));
-        continue;
+    BitVector AllReservedRegs;
+    for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
+      AllReservedRegs |= Repetitor->getReservedRegs();
+
+    if (!Opcodes.empty()) {
+      for (const unsigned Opcode : Opcodes) {
+        // Ignore instructions without a sched class if
+        // -ignore-invalid-sched-class is passed.
+        if (IgnoreInvalidSchedClass &&
+            State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
+          errs() << State.getInstrInfo().getName(Opcode)
+                 << ": ignoring instruction without sched class\n";
+          continue;
+        }
+
+        auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs);
+        if (!ConfigsForInstr) {
+          logAllUnhandledErrors(
+              ConfigsForInstr.takeError(), errs(),
+              Twine(State.getInstrInfo().getName(Opcode)).concat(": "));
+          continue;
+        }
+        std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
+                  std::back_inserter(Configurations));
       }
-      std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
-                std::back_inserter(Configurations));
     }
-  }
+    if (MinInstructions == 0) {
+      ExitOnErr.setBanner("llvm-exegesis: ");
+      ExitWithError("--min-instructions must be greater than zero");
+    }
 
-  if (MinInstructions == 0) {
-    ExitOnErr.setBanner("llvm-exegesis: ");
-    ExitWithError("--min-instructions must be greater than zero");
+    collectRunnableConfigurations(Configurations, Repetitors, *Runner,
+                                  RunnableConfigs, Repetitions);
   }
 
-  // Write to standard output if file is not set.
-  if (BenchmarkFile.empty())
-    BenchmarkFile = "-";
-
-  if (!Configurations.empty())
-    runBenchmarkConfigurations(State, Configurations, Repetitors, *Runner);
+  if (!RunnableConfigs.empty())
+    runBenchmarkConfigurations(State, RunnableConfigs, Repetitions, *Runner);
 
   pfm::pfmTerminate();
 }