[llvm] [Exegesis] Add supports to serialize/deserialize object files into benchmarks (PR #121993)

Mon Jan 13 14:58:06 PST 2025

https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/121993

>From f69c8abfbb7135e72a3971dfdec84982e1ff1f11 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 6 Jan 2025 14:25:54 -0800
Subject: [PATCH 1/4] [Exegesis] Add supports to serialize/deserialize
 benchmarks

TBA...
---
 llvm/docs/CommandGuide/llvm-exegesis.rst      |  11 +-
 .../RISCV/serialize-obj-file.test             |  33 +++
 .../llvm-exegesis/lib/BenchmarkResult.cpp     |  95 ++++++-
 .../tools/llvm-exegesis/lib/BenchmarkResult.h |  20 ++
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     |  43 +++
 .../tools/llvm-exegesis/lib/BenchmarkRunner.h |  11 +-
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    | 256 +++++++++++-------
 7 files changed, 367 insertions(+), 102 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index d357c2ceea4189..f2f12253366118 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -299,10 +299,19 @@ OPTIONS
   However, it is possible to stop at some stage before measuring. Choices are:
   * ``prepare-snippet``: Only generate the minimal instruction sequence.
   * ``prepare-and-assemble-snippet``: Same as ``prepare-snippet``, but also dumps an excerpt of the sequence (hex encoded).
-  * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
+  * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but
+    also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
+    If either zlib or zstd is available and we're using either duplicate or
+    loop repetition mode, this phase generates benchmarks with a serialized
+    snippet object file attached to it.
   * ``measure``: Same as ``assemble-measured-code``, but also runs the measurement.
   * ``dry-run-measurement``: Same as measure, but does not actually execute the snippet.
 
+.. option:: --run-measurement=<benchmarks file>
+
+  Given a benchmarks file generated after the ``assembly-measured-code`` phase,
+  resume the measurement phase from it.
+
 .. option:: --x86-lbr-sample-period=<nBranches/sample>
 
   Specify the LBR sampling period - how many branches before we take a sample.
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test b/llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test
new file mode 100644
index 00000000000000..befd16699bef1a
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test
@@ -0,0 +1,33 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --benchmark-phase=assemble-measured-code --mode=latency --benchmarks-file=%t.yaml
+# RUN: FileCheck --input-file=%t.yaml %s --check-prefixes=CHECK,SERIALIZE
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --run-measurement=%t.yaml --mode=latency --dry-run-measurement --use-dummy-perf-counters \
+# RUN:    --dump-object-to-disk=%t.o | FileCheck %s --check-prefixes=CHECK,DESERIALIZE
+# RUN: llvm-objdump -d %t.o | FileCheck %s --check-prefix=OBJDUMP
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --dry-run-measurement --use-dummy-perf-counters | \
+# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=min | \
+# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-loop | \
+# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-duplicate | \
+# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# REQUIRES: zlib || zstd
+
+# A round-trip test for serialize/deserialize benchmarks.
+
+# CHECK: mode: latency
+# CHECK:  instructions:
+# CHECK-NEXT: - 'SH3ADD X{{.*}} X{{.*}} X{{.*}}'
+# CHECK: cpu_name:        sifive-p470
+# CHECK-NEXT: llvm_triple:     riscv64
+# CHECK-NEXT: min_instructions: 10000
+# CHECK-NEXT: measurements:    []
+# SERIALIZE: error: actual measurements skipped.
+# DESERIALIZE: error:           ''
+# CHECK: info:            Repeating a single explicitly serial instruction
+
+# OBJDUMP: sh3add
+
+# Negative tests: we shouldn't serialize object files in some scenarios.
+
+# NO-SERIALIZE-NOT: object_file:
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
index 84dc23b343c6c0..eff5a6d547cbda 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
@@ -15,10 +15,13 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/bit.h"
 #include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Base64.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 
 static constexpr const char kIntegerPrefix[] = "i_0x";
@@ -27,6 +30,12 @@ static constexpr const char kInvalidOperand[] = "INVALID";
 
 namespace llvm {
 
+static cl::opt<compression::Format> ForceObjectFileCompressionFormat(
+    "exegesis-force-obj-compress-format", cl::Hidden,
+    cl::desc("Force to use this compression format for object files."),
+    cl::values(clEnumValN(compression::Format::Zstd, "zstd", "Using Zstandard"),
+               clEnumValN(compression::Format::Zlib, "zlib", "Using LibZ")));
+
 namespace {
 
 // A mutable struct holding an LLVMState that can be passed through the
@@ -278,6 +287,13 @@ template <> struct ScalarTraits<exegesis::RegisterValue> {
   static const bool flow = true;
 };
 
+template <> struct ScalarEnumerationTraits<compression::Format> {
+  static void enumeration(IO &Io, compression::Format &Format) {
+    Io.enumCase(Format, "zstd", compression::Format::Zstd);
+    Io.enumCase(Format, "zlib", compression::Format::Zlib);
+  }
+};
+
 template <> struct MappingContextTraits<exegesis::BenchmarkKey, YamlContext> {
   static void mapping(IO &Io, exegesis::BenchmarkKey &Obj,
                       YamlContext &Context) {
@@ -288,6 +304,33 @@ template <> struct MappingContextTraits<exegesis::BenchmarkKey, YamlContext> {
   }
 };
 
+template <> struct MappingTraits<exegesis::Benchmark::ObjectFile> {
+  struct NormalizedBase64Binary {
+    std::string Base64Str;
+
+    NormalizedBase64Binary(IO &) {}
+    NormalizedBase64Binary(IO &, const std::vector<uint8_t> &Data)
+        : Base64Str(llvm::encodeBase64(Data)) {}
+
+    std::vector<uint8_t> denormalize(IO &) {
+      std::vector<char> Buffer;
+      if (Error E = llvm::decodeBase64(Base64Str, Buffer))
+        report_fatal_error(std::move(E));
+
+      StringRef Data(Buffer.data(), Buffer.size());
+      return std::vector<uint8_t>(Data.bytes_begin(), Data.bytes_end());
+    }
+  };
+
+  static void mapping(IO &Io, exegesis::Benchmark::ObjectFile &Obj) {
+    Io.mapRequired("compression", Obj.CompressionFormat);
+    Io.mapRequired("original_size", Obj.UncompressedSize);
+    MappingNormalization<NormalizedBase64Binary, std::vector<uint8_t>>
+        ObjFileString(Io, Obj.CompressedBytes);
+    Io.mapRequired("compressed_bytes", ObjFileString->Base64Str);
+  }
+};
+
 template <> struct MappingContextTraits<exegesis::Benchmark, YamlContext> {
   struct NormalizedBinary {
     NormalizedBinary(IO &io) {}
@@ -325,9 +368,11 @@ template <> struct MappingContextTraits<exegesis::Benchmark, YamlContext> {
     Io.mapRequired("error", Obj.Error);
     Io.mapOptional("info", Obj.Info);
     // AssembledSnippet
-    MappingNormalization<NormalizedBinary, std::vector<uint8_t>> BinaryString(
+    MappingNormalization<NormalizedBinary, std::vector<uint8_t>> SnippetString(
         Io, Obj.AssembledSnippet);
-    Io.mapOptional("assembled_snippet", BinaryString->Binary);
+    Io.mapOptional("assembled_snippet", SnippetString->Binary);
+    // ObjectFile
+    Io.mapOptional("object_file", Obj.ObjFile);
   }
 };
 
@@ -364,6 +409,52 @@ Benchmark::readTriplesAndCpusFromYamls(MemoryBufferRef Buffer) {
   return Result;
 }
 
+Error Benchmark::setObjectFile(StringRef RawBytes) {
+  SmallVector<uint8_t> CompressedBytes;
+  llvm::compression::Format CompressionFormat;
+
+  auto isFormatAvailable = [](llvm::compression::Format F) -> bool {
+    switch (F) {
+    case compression::Format::Zstd:
+      return compression::zstd::isAvailable();
+    case compression::Format::Zlib:
+      return compression::zlib::isAvailable();
+    }
+  };
+  if (ForceObjectFileCompressionFormat.getNumOccurrences() > 0) {
+    CompressionFormat = ForceObjectFileCompressionFormat;
+    if (!isFormatAvailable(CompressionFormat))
+      return make_error<StringError>(
+          "The designated compression format is not available.",
+          inconvertibleErrorCode());
+  } else if (isFormatAvailable(compression::Format::Zstd)) {
+    // Try newer compression algorithm first.
+    CompressionFormat = compression::Format::Zstd;
+  } else if (isFormatAvailable(compression::Format::Zlib)) {
+    CompressionFormat = compression::Format::Zlib;
+  } else {
+    return make_error<StringError>(
+        "None of the compression methods is available.",
+        inconvertibleErrorCode());
+  }
+
+  switch (CompressionFormat) {
+  case compression::Format::Zstd:
+    compression::zstd::compress({RawBytes.bytes_begin(), RawBytes.bytes_end()},
+                                CompressedBytes);
+    break;
+  case compression::Format::Zlib:
+    compression::zlib::compress({RawBytes.bytes_begin(), RawBytes.bytes_end()},
+                                CompressedBytes);
+    break;
+  }
+
+  ObjFile = {CompressionFormat,
+             RawBytes.size(),
+             {CompressedBytes.begin(), CompressedBytes.end()}};
+  return Error::success();
+}
+
 Expected<Benchmark> Benchmark::readYaml(const LLVMState &State,
                                         MemoryBufferRef Buffer) {
   yaml::Input Yin(Buffer);
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index 5480d856168784..2094334d754fa0 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <limits>
 #include <set>
@@ -77,6 +78,11 @@ struct BenchmarkKey {
   uintptr_t SnippetAddress = 0;
   // The register that should be used to hold the loop counter.
   unsigned LoopRegister;
+
+  bool operator==(const BenchmarkKey &RHS) const {
+    return Config == RHS.Config &&
+           Instructions[0].getOpcode() == RHS.Instructions[0].getOpcode();
+  }
 };
 
 struct BenchmarkMeasure {
@@ -123,6 +129,16 @@ struct Benchmark {
   std::string Error;
   std::string Info;
   std::vector<uint8_t> AssembledSnippet;
+
+  struct ObjectFile {
+    llvm::compression::Format CompressionFormat;
+    size_t UncompressedSize = 0;
+    std::vector<uint8_t> CompressedBytes;
+
+    bool isValid() const { return UncompressedSize && CompressedBytes.size(); }
+  };
+  std::optional<ObjectFile> ObjFile;
+
   // How to aggregate measurements.
   enum ResultAggregationModeE { Min, Max, Mean, MinVariance };
 
@@ -133,6 +149,10 @@ struct Benchmark {
   Benchmark &operator=(const Benchmark &) = delete;
   Benchmark &operator=(Benchmark &&) = delete;
 
+  // Compress raw object file bytes and assign the result and compression type
+  // to CompressedObjectFile and ObjFileCompression, respectively.
+  class Error setObjectFile(StringRef RawBytes);
+
   // Read functions.
   static Expected<Benchmark> readYaml(const LLVMState &State,
                                                  MemoryBufferRef Buffer);
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index cc46f7feb6cf7f..9a8c3f28176e6a 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -619,6 +619,7 @@ Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
 Expected<BenchmarkRunner::RunnableConfiguration>
 BenchmarkRunner::getRunnableConfiguration(
     const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
+    Benchmark::RepetitionModeE RepetitionMode,
     const SnippetRepetitor &Repetitor) const {
   RunnableConfiguration RC;
 
@@ -663,12 +664,54 @@ BenchmarkRunner::getRunnableConfiguration(
                         LoopBodySize, GenerateMemoryInstructions);
     if (Error E = Snippet.takeError())
       return std::move(E);
+    // There is no need to serialize/deserialize the object file if we're
+    // simply running end-to-end measurements.
+    // Same goes for any repetition mode that requires more than a single
+    // snippet.
+    if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure &&
+        (RepetitionMode == Benchmark::Loop ||
+         RepetitionMode == Benchmark::Duplicate)) {
+      if (Error E = BenchmarkResult.setObjectFile(*Snippet))
+        return std::move(E);
+    }
     RC.ObjectFile = getObjectFromBuffer(*Snippet);
   }
 
   return std::move(RC);
 }
 
+Expected<BenchmarkRunner::RunnableConfiguration>
+BenchmarkRunner::getRunnableConfiguration(Benchmark &&B) const {
+  assert(B.ObjFile.has_value() && B.ObjFile->isValid() &&
+         "No serialized obejct file is attached?");
+  const Benchmark::ObjectFile &ObjFile = *B.ObjFile;
+  SmallVector<uint8_t> DecompressedObjFile;
+  switch (ObjFile.CompressionFormat) {
+  case compression::Format::Zstd:
+    if (!compression::zstd::isAvailable())
+      return make_error<StringError>("zstd is not available for decompression.",
+                                     inconvertibleErrorCode());
+    if (Error E = compression::zstd::decompress(ObjFile.CompressedBytes,
+                                                DecompressedObjFile,
+                                                ObjFile.UncompressedSize))
+      return std::move(E);
+    break;
+  case compression::Format::Zlib:
+    if (!compression::zlib::isAvailable())
+      return make_error<StringError>("zlib is not available for decompression.",
+                                     inconvertibleErrorCode());
+    if (Error E = compression::zlib::decompress(ObjFile.CompressedBytes,
+                                                DecompressedObjFile,
+                                                ObjFile.UncompressedSize))
+      return std::move(E);
+    break;
+  }
+
+  StringRef Buffer(reinterpret_cast<const char *>(DecompressedObjFile.begin()),
+                   DecompressedObjFile.size());
+  return RunnableConfiguration{std::move(B), getObjectFromBuffer(Buffer)};
+}
+
 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
 BenchmarkRunner::createFunctionExecutor(
     object::OwningBinary<object::ObjectFile> ObjectFile,
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
index e688b814d1c83d..ef9446bdd5bbe8 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
@@ -54,18 +54,25 @@ class BenchmarkRunner {
     RunnableConfiguration &operator=(RunnableConfiguration &&) = delete;
     RunnableConfiguration &operator=(const RunnableConfiguration &) = delete;
 
+    Benchmark BenchmarkResult;
+    object::OwningBinary<object::ObjectFile> ObjectFile;
+
   private:
     RunnableConfiguration() = default;
 
-    Benchmark BenchmarkResult;
-    object::OwningBinary<object::ObjectFile> ObjectFile;
+    RunnableConfiguration(Benchmark &&B,
+                          object::OwningBinary<object::ObjectFile> &&OF)
+        : BenchmarkResult(std::move(B)), ObjectFile(std::move(OF)) {}
   };
 
   Expected<RunnableConfiguration>
   getRunnableConfiguration(const BenchmarkCode &Configuration,
                            unsigned MinInstructions, unsigned LoopUnrollFactor,
+                           Benchmark::RepetitionModeE RepetitionMode,
                            const SnippetRepetitor &Repetitor) const;
 
+  Expected<RunnableConfiguration> getRunnableConfiguration(Benchmark &&B) const;
+
   std::pair<Error, Benchmark>
   runConfiguration(RunnableConfiguration &&RC,
                    const std::optional<StringRef> &DumpFile,
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 07bd44ee64f1f2..4b18eb96f02e71 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -114,8 +114,7 @@ static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
 
 static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
     "benchmark-phase",
-    cl::desc(
-        "it is possible to stop the benchmarking process after some phase"),
+    cl::desc("Stop the benchmarking process after some phase"),
     cl::cat(BenchmarkOptions),
     cl::values(
         clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet",
@@ -138,6 +137,13 @@ static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
             "Same as measure, but does not actually execute the snippet")),
     cl::init(BenchmarkPhaseSelectorE::Measure));
 
+static cl::opt<std::string> RunMeasurement(
+    "run-measurement",
+    cl::desc(
+        "Run measurement phase with a benchmarks file generated previously"),
+    cl::cat(BenchmarkOptions), cl::value_desc("<benchmarks file>"),
+    cl::init(""));
+
 static cl::opt<bool>
     UseDummyPerfCounters("use-dummy-perf-counters",
                          cl::desc("Do not read real performance counters, use "
@@ -400,11 +406,55 @@ generateSnippets(const LLVMState &State, unsigned Opcode,
   return Benchmarks;
 }
 
-static void runBenchmarkConfigurations(
-    const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,
+static void deserializeRunnableConfigurations(
+    std::vector<Benchmark> &Benchmarks, const BenchmarkRunner &Runner,
+    std::vector<BenchmarkRunner::RunnableConfiguration> &RunnableConfigs,
+    SmallVectorImpl<unsigned> &Repetitions) {
+  for (unsigned I = 0U, E = Benchmarks.size(); I < E; ++I) {
+    // Reset any previous error.
+    Benchmarks[I].Error.clear();
+
+    RunnableConfigs.emplace_back(
+        ExitOnErr(Runner.getRunnableConfiguration(std::move(Benchmarks[I]))));
+    if (I > 0 && RunnableConfigs[I].BenchmarkResult.Key ==
+                     RunnableConfigs[I - 1].BenchmarkResult.Key) {
+      // Extend the current end index in Repetitions.
+      Repetitions.back() = RunnableConfigs.size();
+    } else {
+      // Append a new entry into Repetitions.
+      Repetitions.push_back(RunnableConfigs.size());
+    }
+  }
+}
+
+static void collectRunnableConfigurations(
+    ArrayRef<BenchmarkCode> Configurations,
     ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,
-    const BenchmarkRunner &Runner) {
-  assert(!Configurations.empty() && "Don't have any configurations to run.");
+    const BenchmarkRunner &Runner,
+    std::vector<BenchmarkRunner::RunnableConfiguration> &RunnableConfigs,
+    SmallVectorImpl<unsigned> &Repetitions) {
+
+  SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions};
+  if (RepetitionMode == Benchmark::MiddleHalfDuplicate ||
+      RepetitionMode == Benchmark::MiddleHalfLoop)
+    MinInstructionCounts.push_back(MinInstructions * 2);
+
+  for (const BenchmarkCode &Conf : Configurations) {
+    for (const auto &Repetitor : Repetitors) {
+      for (unsigned IterationRepetitions : MinInstructionCounts)
+        RunnableConfigs.emplace_back(ExitOnErr(Runner.getRunnableConfiguration(
+            Conf, IterationRepetitions, LoopBodySize, RepetitionMode,
+            *Repetitor)));
+    }
+    Repetitions.emplace_back(RunnableConfigs.size());
+  }
+}
+
+static void runBenchmarkConfigurations(
+    const LLVMState &State,
+    std::vector<BenchmarkRunner::RunnableConfiguration> &RunnableConfigs,
+    ArrayRef<unsigned> Repetitions, const BenchmarkRunner &Runner) {
+  assert(!RunnableConfigs.empty() && "Don't have any configurations to run.");
   std::optional<raw_fd_ostream> FileOstr;
   if (BenchmarkFile != "-") {
     int ResultFD = 0;
@@ -418,43 +468,38 @@ static void runBenchmarkConfigurations(
 
   std::optional<ProgressMeter<>> Meter;
   if (BenchmarkMeasurementsPrintProgress)
-    Meter.emplace(Configurations.size());
+    Meter.emplace(RunnableConfigs.size());
 
-  SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions};
-  if (RepetitionMode == Benchmark::MiddleHalfDuplicate ||
-      RepetitionMode == Benchmark::MiddleHalfLoop)
-    MinInstructionCounts.push_back(MinInstructions * 2);
+  std::optional<StringRef> DumpFile;
+  if (DumpObjectToDisk.getNumOccurrences())
+    DumpFile = DumpObjectToDisk;
 
-  for (const BenchmarkCode &Conf : Configurations) {
+  const std::optional<int> BenchmarkCPU =
+      BenchmarkProcessCPU == -1 ? std::nullopt
+                                : std::optional(BenchmarkProcessCPU.getValue());
+
+  unsigned StartIdx = 0;
+  for (unsigned EndIdx : Repetitions) {
     ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr);
     SmallVector<Benchmark, 2> AllResults;
 
-    for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :
-         Repetitors) {
-      for (unsigned IterationRepetitions : MinInstructionCounts) {
-        auto RC = ExitOnErr(Runner.getRunnableConfiguration(
-            Conf, IterationRepetitions, LoopBodySize, *Repetitor));
-        std::optional<StringRef> DumpFile;
-        if (DumpObjectToDisk.getNumOccurrences())
-          DumpFile = DumpObjectToDisk;
-        const std::optional<int> BenchmarkCPU =
-            BenchmarkProcessCPU == -1
-                ? std::nullopt
-                : std::optional(BenchmarkProcessCPU.getValue());
-        auto [Err, BenchmarkResult] =
-            Runner.runConfiguration(std::move(RC), DumpFile, BenchmarkCPU);
-        if (Err) {
-          // Errors from executing the snippets are fine.
-          // All other errors are a framework issue and should fail.
-          if (!Err.isA<SnippetExecutionFailure>())
-            ExitOnErr(std::move(Err));
-
-          BenchmarkResult.Error = toString(std::move(Err));
+    for (unsigned Idx = StartIdx; Idx < EndIdx; ++Idx) {
+      auto RC = std::move(RunnableConfigs[Idx]);
+      auto [Err, BenchmarkResult] =
+          Runner.runConfiguration(std::move(RC), DumpFile, BenchmarkCPU);
+      if (Err) {
+        // Errors from executing the snippets are fine.
+        // All other errors are a framework issue and should fail.
+        if (!Err.isA<SnippetExecutionFailure>()) {
+          llvm::errs() << "llvm-exegesis error: " << toString(std::move(Err));
+          exit(1);
         }
-        AllResults.push_back(std::move(BenchmarkResult));
+        BenchmarkResult.Error = toString(std::move(Err));
       }
-    }
 
+      AllResults.push_back(std::move(BenchmarkResult));
+    }
+    StartIdx = EndIdx;
     Benchmark &Result = AllResults.front();
 
     // If any of our measurements failed, pretend they all have failed.
@@ -520,77 +565,94 @@ void benchmarkMain() {
     ExitWithError("cannot create benchmark runner");
   }
 
-  const auto Opcodes = getOpcodesOrDie(State);
-  std::vector<BenchmarkCode> Configurations;
-
-  unsigned LoopRegister =
-      State.getExegesisTarget().getDefaultLoopCounterRegister(
-          State.getTargetMachine().getTargetTriple());
-
-  if (Opcodes.empty()) {
-    Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
-    for (const auto &Configuration : Configurations) {
-      if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
-          (Configuration.Key.MemoryMappings.size() != 0 ||
-           Configuration.Key.MemoryValues.size() != 0 ||
-           Configuration.Key.SnippetAddress != 0))
-        ExitWithError("Memory and snippet address annotations are only "
-                      "supported in subprocess "
-                      "execution mode");
-    }
-    LoopRegister = Configurations[0].Key.LoopRegister;
-  }
+  std::vector<BenchmarkRunner::RunnableConfiguration> RunnableConfigs;
+  SmallVector<unsigned> Repetitions;
 
-  SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;
-  if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
-    Repetitors.emplace_back(
-        SnippetRepetitor::Create(RepetitionMode, State, LoopRegister));
-  else {
-    for (Benchmark::RepetitionModeE RepMode :
-         {Benchmark::RepetitionModeE::Duplicate,
-          Benchmark::RepetitionModeE::Loop})
-      Repetitors.emplace_back(
-          SnippetRepetitor::Create(RepMode, State, LoopRegister));
-  }
+  // Write to standard output if file is not set.
+  if (BenchmarkFile.empty())
+    BenchmarkFile = "-";
 
-  BitVector AllReservedRegs;
-  for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
-    AllReservedRegs |= Repetitor->getReservedRegs();
-
-  if (!Opcodes.empty()) {
-    for (const unsigned Opcode : Opcodes) {
-      // Ignore instructions without a sched class if
-      // -ignore-invalid-sched-class is passed.
-      if (IgnoreInvalidSchedClass &&
-          State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
-        errs() << State.getInstrInfo().getName(Opcode)
-               << ": ignoring instruction without sched class\n";
-        continue;
+  if (!RunMeasurement.empty()) {
+    // Right now we only support resuming before the measurement phase.
+    auto ErrOrBuffer =
+        MemoryBuffer::getFileOrSTDIN(RunMeasurement, /*IsText=*/true);
+    if (!ErrOrBuffer)
+      report_fatal_error(errorCodeToError(ErrOrBuffer.getError()));
+
+    std::vector<Benchmark> Benchmarks =
+        ExitOnErr(Benchmark::readYamls(State, **ErrOrBuffer));
+    deserializeRunnableConfigurations(Benchmarks, *Runner, RunnableConfigs,
+                                      Repetitions);
+  } else {
+    const auto Opcodes = getOpcodesOrDie(State);
+    std::vector<BenchmarkCode> Configurations;
+
+    unsigned LoopRegister =
+        State.getExegesisTarget().getDefaultLoopCounterRegister(
+            State.getTargetMachine().getTargetTriple());
+
+    if (Opcodes.empty()) {
+      Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
+      for (const auto &Configuration : Configurations) {
+        if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
+            (Configuration.Key.MemoryMappings.size() != 0 ||
+             Configuration.Key.MemoryValues.size() != 0 ||
+             Configuration.Key.SnippetAddress != 0))
+          ExitWithError("Memory and snippet address annotations are only "
+                        "supported in subprocess "
+                        "execution mode");
       }
+      LoopRegister = Configurations[0].Key.LoopRegister;
+    }
+    SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;
+    if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
+      Repetitors.emplace_back(
+          SnippetRepetitor::Create(RepetitionMode, State, LoopRegister));
+    else {
+      for (Benchmark::RepetitionModeE RepMode :
+           {Benchmark::RepetitionModeE::Duplicate,
+            Benchmark::RepetitionModeE::Loop})
+        Repetitors.emplace_back(
+            SnippetRepetitor::Create(RepMode, State, LoopRegister));
+    }
 
-      auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs);
-      if (!ConfigsForInstr) {
-        logAllUnhandledErrors(
-            ConfigsForInstr.takeError(), errs(),
-            Twine(State.getInstrInfo().getName(Opcode)).concat(": "));
-        continue;
+    BitVector AllReservedRegs;
+    for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
+      AllReservedRegs |= Repetitor->getReservedRegs();
+
+    if (!Opcodes.empty()) {
+      for (const unsigned Opcode : Opcodes) {
+        // Ignore instructions without a sched class if
+        // -ignore-invalid-sched-class is passed.
+        if (IgnoreInvalidSchedClass &&
+            State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
+          errs() << State.getInstrInfo().getName(Opcode)
+                 << ": ignoring instruction without sched class\n";
+          continue;
+        }
+
+        auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs);
+        if (!ConfigsForInstr) {
+          logAllUnhandledErrors(
+              ConfigsForInstr.takeError(), errs(),
+              Twine(State.getInstrInfo().getName(Opcode)).concat(": "));
+          continue;
+        }
+        std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
+                  std::back_inserter(Configurations));
       }
-      std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
-                std::back_inserter(Configurations));
     }
-  }
+    if (MinInstructions == 0) {
+      ExitOnErr.setBanner("llvm-exegesis: ");
+      ExitWithError("--min-instructions must be greater than zero");
+    }
 
-  if (MinInstructions == 0) {
-    ExitOnErr.setBanner("llvm-exegesis: ");
-    ExitWithError("--min-instructions must be greater than zero");
+    collectRunnableConfigurations(Configurations, Repetitors, *Runner,
+                                  RunnableConfigs, Repetitions);
   }
 
-  // Write to standard output if file is not set.
-  if (BenchmarkFile.empty())
-    BenchmarkFile = "-";
-
-  if (!Configurations.empty())
-    runBenchmarkConfigurations(State, Configurations, Repetitors, *Runner);
+  if (!RunnableConfigs.empty())
+    runBenchmarkConfigurations(State, RunnableConfigs, Repetitions, *Runner);
 
   pfm::pfmTerminate();
 }

>From 14913ca3b813c69aee3583ab45787cd0dfcd245d Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 13 Jan 2025 11:47:05 -0800
Subject: [PATCH 2/4] fixup! Turn serialize-obj-file.test into a generic test

---
 .../tools/llvm-exegesis/{RISCV => }/serialize-obj-file.test  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
 rename llvm/test/tools/llvm-exegesis/{RISCV => }/serialize-obj-file.test (87%)

diff --git a/llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test b/llvm/test/tools/llvm-exegesis/serialize-obj-file.test
similarity index 87%
rename from llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test
rename to llvm/test/tools/llvm-exegesis/serialize-obj-file.test
index befd16699bef1a..09b76239d23e46 100644
--- a/llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test
+++ b/llvm/test/tools/llvm-exegesis/serialize-obj-file.test
@@ -1,9 +1,9 @@
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --benchmark-phase=assemble-measured-code --mode=latency --benchmarks-file=%t.yaml
 # RUN: FileCheck --input-file=%t.yaml %s --check-prefixes=CHECK,SERIALIZE
-# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --run-measurement=%t.yaml --mode=latency --dry-run-measurement --use-dummy-perf-counters \
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --run-measurement=%t.yaml --mode=latency --benchmark-phase=dry-run-measurement --use-dummy-perf-counters \
 # RUN:    --dump-object-to-disk=%t.o | FileCheck %s --check-prefixes=CHECK,DESERIALIZE
 # RUN: llvm-objdump -d %t.o | FileCheck %s --check-prefix=OBJDUMP
-# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --dry-run-measurement --use-dummy-perf-counters | \
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=dry-run-measurement --use-dummy-perf-counters | \
 # RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=min | \
 # RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
@@ -11,6 +11,7 @@
 # RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-duplicate | \
 # RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+# REQUIRES: riscv-registered-target && native-registered-exegesis-target
 # REQUIRES: zlib || zstd
 
 # A round-trip test for serialize/deserialize benchmarks.

>From 619a4c3fd66c99b5a2318bc51c54a6718041896d Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 13 Jan 2025 14:33:13 -0800
Subject: [PATCH 3/4] fixup! Address review comments

---
 llvm/include/llvm/Support/Compression.h       | 11 ++++++++
 .../llvm-exegesis/serialize-obj-file.test     | 25 +++++++++++-------
 .../llvm-exegesis/lib/BenchmarkResult.cpp     |  7 -----
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     | 26 ++++++++++++++-----
 4 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index 2a8da9e96d356f..4441c3481e984c 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/YAMLTraits.h"
 
 namespace llvm {
 template <typename T> class SmallVectorImpl;
@@ -126,6 +127,16 @@ Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
 
 } // End of namespace compression
 
+namespace yaml {
+// Related YAML traits.
+template <> struct ScalarEnumerationTraits<compression::Format> {
+  static void enumeration(IO &Io, compression::Format &Format) {
+    Io.enumCase(Format, "zstd", compression::Format::Zstd);
+    Io.enumCase(Format, "zlib", compression::Format::Zlib);
+  }
+};
+} // namespace yaml
+
 } // End of namespace llvm
 
 #endif
diff --git a/llvm/test/tools/llvm-exegesis/serialize-obj-file.test b/llvm/test/tools/llvm-exegesis/serialize-obj-file.test
index 09b76239d23e46..5047090ae31b7c 100644
--- a/llvm/test/tools/llvm-exegesis/serialize-obj-file.test
+++ b/llvm/test/tools/llvm-exegesis/serialize-obj-file.test
@@ -1,16 +1,22 @@
-# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --benchmark-phase=assemble-measured-code --mode=latency --benchmarks-file=%t.yaml
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --exegesis-serialize-benchmarks --benchmark-phase=assemble-measured-code \
+# RUN:    --mode=latency --benchmarks-file=%t.yaml
 # RUN: FileCheck --input-file=%t.yaml %s --check-prefixes=CHECK,SERIALIZE
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --run-measurement=%t.yaml --mode=latency --benchmark-phase=dry-run-measurement --use-dummy-perf-counters \
 # RUN:    --dump-object-to-disk=%t.o | FileCheck %s --check-prefixes=CHECK,DESERIALIZE
 # RUN: llvm-objdump -d %t.o | FileCheck %s --check-prefix=OBJDUMP
-# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=dry-run-measurement --use-dummy-perf-counters | \
-# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
-# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=min | \
-# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
-# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-loop | \
-# RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
-# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-duplicate | \
+
+# We should not serialie benchmarks by default.
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --benchmark-phase=assemble-measured-code --mode=latency | \
 # RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
+
+# We currently don't support serialization for repetition modes that require more than one snippets.
+# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --exegesis-serialize-benchmarks --benchmark-phase=assemble-measured-code \
+# RUN:    --repetition-mode=min 2>&1 | FileCheck %s --check-prefix=NOT-SUPPORTED
+# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --exegesis-serialize-benchmarks --benchmark-phase=assemble-measured-code \
+# RUN:    --repetition-mode=middle-half-loop 2>&1 | FileCheck %s --check-prefix=NOT-SUPPORTED
+# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --exegesis-serialize-benchmarks --benchmark-phase=assemble-measured-code \
+# RUN:    --repetition-mode=middle-half-duplicate 2>&1 | FileCheck %s --check-prefix=NOT-SUPPORTED
+
 # REQUIRES: riscv-registered-target && native-registered-exegesis-target
 # REQUIRES: zlib || zstd
 
@@ -29,6 +35,7 @@
 
 # OBJDUMP: sh3add
 
-# Negative tests: we shouldn't serialize object files in some scenarios.
+# Negative tests.
 
+# NOT-SUPPORTED: -exegesis-serialize-benchmarks currently only supports -repetition-mode of loop and duplicate.
 # NO-SERIALIZE-NOT: object_file:
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
index eff5a6d547cbda..1323f728b708ee 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
@@ -287,13 +287,6 @@ template <> struct ScalarTraits<exegesis::RegisterValue> {
   static const bool flow = true;
 };
 
-template <> struct ScalarEnumerationTraits<compression::Format> {
-  static void enumeration(IO &Io, compression::Format &Format) {
-    Io.enumCase(Format, "zstd", compression::Format::Zstd);
-    Io.enumCase(Format, "zlib", compression::Format::Zlib);
-  }
-};
-
 template <> struct MappingContextTraits<exegesis::BenchmarkKey, YamlContext> {
   static void mapping(IO &Io, exegesis::BenchmarkKey &Obj,
                       YamlContext &Context) {
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 9a8c3f28176e6a..72bbc6a5e58e9e 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
@@ -51,6 +52,14 @@
 #endif // __linux__
 
 namespace llvm {
+
+static cl::opt<bool>
+    SerializeBenchmarks("exegesis-serialize-benchmarks",
+                        cl::desc("Generate fully-serialized benchmarks "
+                                 "that can later be deserialized and "
+                                 "resuming the measurement."),
+                        cl::init(false));
+
 namespace exegesis {
 
 BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
@@ -664,16 +673,19 @@ BenchmarkRunner::getRunnableConfiguration(
                         LoopBodySize, GenerateMemoryInstructions);
     if (Error E = Snippet.takeError())
       return std::move(E);
-    // There is no need to serialize/deserialize the object file if we're
-    // simply running end-to-end measurements.
-    // Same goes for any repetition mode that requires more than a single
-    // snippet.
-    if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure &&
-        (RepetitionMode == Benchmark::Loop ||
-         RepetitionMode == Benchmark::Duplicate)) {
+
+    // Generate fully-serialized benchmarks.
+    if (SerializeBenchmarks) {
+      if (RepetitionMode != Benchmark::Loop &&
+          RepetitionMode != Benchmark::Duplicate)
+        return make_error<Failure>(
+            "-exegesis-serialize-benchmarks currently "
+            "only supports -repetition-mode of loop and duplicate.");
+
       if (Error E = BenchmarkResult.setObjectFile(*Snippet))
         return std::move(E);
     }
+
     RC.ObjectFile = getObjectFromBuffer(*Snippet);
   }
 

>From c03937600d3f8a4664a8a6b86377533ac757b54f Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 13 Jan 2025 14:57:18 -0800
Subject: [PATCH 4/4] fixup! Use a shorter flag name

---
 llvm/docs/CommandGuide/llvm-exegesis.rst      | 21 ++++++++++++-------
 .../llvm-exegesis/serialize-obj-file.test     | 10 ++++-----
 .../llvm-exegesis/lib/BenchmarkResult.cpp     |  5 +++--
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     |  6 +++---
 4 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index f2f12253366118..e036757c69bdec 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -299,18 +299,25 @@ OPTIONS
   However, it is possible to stop at some stage before measuring. Choices are:
   * ``prepare-snippet``: Only generate the minimal instruction sequence.
   * ``prepare-and-assemble-snippet``: Same as ``prepare-snippet``, but also dumps an excerpt of the sequence (hex encoded).
-  * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but
-    also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
-    If either zlib or zstd is available and we're using either duplicate or
-    loop repetition mode, this phase generates benchmarks with a serialized
-    snippet object file attached to it.
+  * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
   * ``measure``: Same as ``assemble-measured-code``, but also runs the measurement.
   * ``dry-run-measurement``: Same as measure, but does not actually execute the snippet.
 
+.. option:: --serialize-benchmarks
+
+  Generate a fully serialized benchmarks file, including the assembled object
+  files. This is useful to resume the measurement later with ``--run-measurement``.
+
+.. option:: --force-serialized-obj-compress-format=[zlib|zstd]
+
+  When serializing benchmarks with ``--serialize-benchmarks``, always use the
+  compression format designated by this flag.
+
 .. option:: --run-measurement=<benchmarks file>
 
-  Given a benchmarks file generated after the ``assembly-measured-code`` phase,
-  resume the measurement phase from it.
+  Given a fully serialized benchmarks file generated after the
+  ``assembly-measured-code`` phase with ``--serialize-benchmarks``, resume the
+  measurement phase from it.
 
 .. option:: --x86-lbr-sample-period=<nBranches/sample>
 
diff --git a/llvm/test/tools/llvm-exegesis/serialize-obj-file.test b/llvm/test/tools/llvm-exegesis/serialize-obj-file.test
index 5047090ae31b7c..5b5a68e36035a5 100644
--- a/llvm/test/tools/llvm-exegesis/serialize-obj-file.test
+++ b/llvm/test/tools/llvm-exegesis/serialize-obj-file.test
@@ -1,4 +1,4 @@
-# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --exegesis-serialize-benchmarks --benchmark-phase=assemble-measured-code \
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --serialize-benchmarks --benchmark-phase=assemble-measured-code \
 # RUN:    --mode=latency --benchmarks-file=%t.yaml
 # RUN: FileCheck --input-file=%t.yaml %s --check-prefixes=CHECK,SERIALIZE
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --run-measurement=%t.yaml --mode=latency --benchmark-phase=dry-run-measurement --use-dummy-perf-counters \
@@ -10,11 +10,11 @@
 # RUN:    FileCheck %s --check-prefix=NO-SERIALIZE
 
 # We currently don't support serialization for repetition modes that require more than one snippets.
-# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --exegesis-serialize-benchmarks --benchmark-phase=assemble-measured-code \
+# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --serialize-benchmarks --benchmark-phase=assemble-measured-code \
 # RUN:    --repetition-mode=min 2>&1 | FileCheck %s --check-prefix=NOT-SUPPORTED
-# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --exegesis-serialize-benchmarks --benchmark-phase=assemble-measured-code \
+# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --serialize-benchmarks --benchmark-phase=assemble-measured-code \
 # RUN:    --repetition-mode=middle-half-loop 2>&1 | FileCheck %s --check-prefix=NOT-SUPPORTED
-# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --exegesis-serialize-benchmarks --benchmark-phase=assemble-measured-code \
+# RUN: not llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --serialize-benchmarks --benchmark-phase=assemble-measured-code \
 # RUN:    --repetition-mode=middle-half-duplicate 2>&1 | FileCheck %s --check-prefix=NOT-SUPPORTED
 
 # REQUIRES: riscv-registered-target && native-registered-exegesis-target
@@ -37,5 +37,5 @@
 
 # Negative tests.
 
-# NOT-SUPPORTED: -exegesis-serialize-benchmarks currently only supports -repetition-mode of loop and duplicate.
+# NOT-SUPPORTED: -serialize-benchmarks currently only supports -repetition-mode of loop and duplicate.
 # NO-SERIALIZE-NOT: object_file:
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
index 1323f728b708ee..cdda71d6431948 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp
@@ -31,8 +31,9 @@ static constexpr const char kInvalidOperand[] = "INVALID";
 namespace llvm {
 
 static cl::opt<compression::Format> ForceObjectFileCompressionFormat(
-    "exegesis-force-obj-compress-format", cl::Hidden,
-    cl::desc("Force to use this compression format for object files."),
+    "force-serialized-obj-compress-format", cl::Hidden,
+    cl::desc(
+        "Force to use this compression format for serialized object files."),
     cl::values(clEnumValN(compression::Format::Zstd, "zstd", "Using Zstandard"),
                clEnumValN(compression::Format::Zlib, "zlib", "Using LibZ")));
 
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 72bbc6a5e58e9e..aaf8dac8945d8f 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -54,7 +54,7 @@
 namespace llvm {
 
 static cl::opt<bool>
-    SerializeBenchmarks("exegesis-serialize-benchmarks",
+    SerializeBenchmarks("serialize-benchmarks",
                         cl::desc("Generate fully-serialized benchmarks "
                                  "that can later be deserialized and "
                                  "resuming the measurement."),
@@ -679,8 +679,8 @@ BenchmarkRunner::getRunnableConfiguration(
       if (RepetitionMode != Benchmark::Loop &&
           RepetitionMode != Benchmark::Duplicate)
         return make_error<Failure>(
-            "-exegesis-serialize-benchmarks currently "
-            "only supports -repetition-mode of loop and duplicate.");
+            "-serialize-benchmarks currently only supports -repetition-mode "
+            "of loop and duplicate.");
 
       if (Error E = BenchmarkResult.setObjectFile(*Snippet))
         return std::move(E);