[llvm] [llvm-exegesis] Add support for warmup iterations (PR #76895)

Aiden Grossman via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 3 21:19:32 PST 2024


https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/76895

This patch adds in support for warmup iterations in the subprocess execution mode. These are essentially iterations of the snippet run after setup has complete, but before the performance counters are turned on to warmup the cache/TLB.

>From 7e3c5252cc963a51508b9c3915b9623d48355ef0 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Wed, 3 Jan 2024 15:56:51 -0800
Subject: [PATCH] [llvm-exegesis] Add support for warmup iterations

This patch adds in support for warmup iterations in the subprocess
execution mode. These are essentially iterations of the snippet run
after setup has complete, but before the performance counters are turned
on to warmup the cache/TLB.
---
 llvm/tools/llvm-exegesis/lib/Assembler.cpp    | 181 +++++++++++-------
 llvm/tools/llvm-exegesis/lib/Assembler.h      |  14 +-
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     |  24 ++-
 .../tools/llvm-exegesis/lib/BenchmarkRunner.h |   6 +-
 .../llvm-exegesis/lib/SnippetRepetitor.cpp    |  22 ++-
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    |  17 +-
 .../llvm-exegesis/Common/AssemblerUtils.h     |   2 +-
 .../X86/SnippetRepetitorTest.cpp              |   3 +-
 8 files changed, 178 insertions(+), 91 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 9f03a4e3a5a6ff..18a2b3b25a683f 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -47,38 +47,14 @@ static constexpr const char ModuleID[] = "ExegesisInfoTest";
 static constexpr const char FunctionID[] = "foo";
 static const Align kFunctionAlignment(4096);
 
-// Fills the given basic block with register setup code, and returns true if
-// all registers could be setup correctly.
-static bool generateSnippetSetupCode(const ExegesisTarget &ET,
-                                     const MCSubtargetInfo *const MSI,
-                                     BasicBlockFiller &BBF,
-                                     const BenchmarkKey &Key,
-                                     bool GenerateMemoryInstructions) {
+static bool generateRegisterSetupCode(
+    const ExegesisTarget &ET, const MCSubtargetInfo *const MSI,
+    BasicBlockFiller &BBF, ArrayRef<RegisterValue> InitialRegisterValues,
+    bool GenerateMemoryInstructions, Register StackPointerRegister) {
   bool IsSnippetSetupComplete = true;
-  if (GenerateMemoryInstructions) {
-    BBF.addInstructions(ET.generateMemoryInitialSetup());
-    for (const MemoryMapping &MM : Key.MemoryMappings) {
-#ifdef __linux__
-      // The frontend that generates that parses the memory mapping information
-      // from the user should validate that the requested address is a multiple
-      // of the page size. Assert that this is true here.
-      assert(MM.Address % getpagesize() == 0 &&
-             "Memory mappings need to be aligned to page boundaries.");
-#endif
-      BBF.addInstructions(ET.generateMmap(
-          MM.Address, Key.MemoryValues.at(MM.MemoryValueName).SizeBytes,
-          ET.getAuxiliaryMemoryStartAddress() +
-              sizeof(int) * (Key.MemoryValues.at(MM.MemoryValueName).Index +
-                             SubprocessMemory::AuxiliaryMemoryOffset)));
-    }
-    BBF.addInstructions(ET.setStackRegisterToAuxMem());
-  }
-  Register StackPointerRegister = BBF.MF.getSubtarget()
-                                      .getTargetLowering()
-                                      ->getStackPointerRegisterToSaveRestore();
-  for (const RegisterValue &RV : Key.RegisterInitialValues) {
+  for (const RegisterValue &RV : InitialRegisterValues) {
     if (GenerateMemoryInstructions) {
-      // If we're generating memory instructions, don't load in the value for
+      // If we are generating memory instructions, don't load in the value for
       // the register with the stack pointer as it will be used later to finish
       // the setup.
       if (RV.Register == StackPointerRegister)
@@ -90,23 +66,61 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
       IsSnippetSetupComplete = false;
     BBF.addInstructions(SetRegisterCode);
   }
-  if (GenerateMemoryInstructions) {
+  return IsSnippetSetupComplete;
+}
+
+static void generateMemoryMappings(const ExegesisTarget &ET,
+                                   BasicBlockFiller &BBF,
+                                   const BenchmarkKey &Key) {
+  BBF.addInstructions(ET.generateMemoryInitialSetup());
+  for (const MemoryMapping &MM : Key.MemoryMappings) {
+#ifdef __linux__
+    // The frontend that generates that parses the memory mapping information
+    // from the user should validate that the requested address is a multiple
+    // of the page size. Assert that this is true here.
+    assert(MM.Address % getpagesize() == 0 &&
+           "Memory mappings need to be aligned to page boundaries.");
+#endif
+    BBF.addInstructions(ET.generateMmap(
+        MM.Address, Key.MemoryValues.at(MM.MemoryValueName).SizeBytes,
+        ET.getAuxiliaryMemoryStartAddress() +
+            sizeof(int) * (Key.MemoryValues.at(MM.MemoryValueName).Index +
+                           SubprocessMemory::AuxiliaryMemoryOffset)));
+  }
+  BBF.addInstructions(ET.setStackRegisterToAuxMem());
+}
+
+static bool
+setStackPointerRegister(const ExegesisTarget &ET,
+                        const MCSubtargetInfo *const MSI, BasicBlockFiller &BBF,
+                        ArrayRef<RegisterValue> InitialRegisterValues,
+                        Register StackPointerRegister) {
+  bool IsSnippetSetupComplete = true;
+  for (const RegisterValue &RV : InitialRegisterValues) {
+    // Load in the stack register now as we're done using it elsewhere
+    // and need to set the value in preparation for executing the
+    // snippet.
+    if (RV.Register != StackPointerRegister)
+      continue;
+    const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
+    if (SetRegisterCode.empty())
+      IsSnippetSetupComplete = false;
+    BBF.addInstructions(SetRegisterCode);
+    break;
+  }
+  return IsSnippetSetupComplete;
+}
+
+static bool generatePerfCounterReset(
+    const ExegesisTarget &ET, const MCSubtargetInfo *const MSI,
+    BasicBlockFiller &BBF, ArrayRef<RegisterValue> InitialRegisterValues,
+    Register StackPointerRegister) {
+  bool IsSnippetSetupComplete = true;
 #ifdef HAVE_LIBPFM
-    BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true));
+  BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true));
 #endif // HAVE_LIBPFM
-    for (const RegisterValue &RV : Key.RegisterInitialValues) {
-      // Load in the stack register now as we're done using it elsewhere
-      // and need to set the value in preparation for executing the
-      // snippet.
-      if (RV.Register != StackPointerRegister)
-        continue;
-      const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
-      if (SetRegisterCode.empty())
-        IsSnippetSetupComplete = false;
-      BBF.addInstructions(SetRegisterCode);
-      break;
-    }
-  }
+  IsSnippetSetupComplete = setStackPointerRegister(
+      ET, MSI, BBF, InitialRegisterValues, StackPointerRegister);
   return IsSnippetSetupComplete;
 }
 
@@ -147,7 +161,7 @@ MachineFunction &createVoidVoidPtrMachineFunction(StringRef FunctionName,
   return MMI->getOrCreateMachineFunction(*F);
 }
 
-BasicBlockFiller::BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB,
+BasicBlockFiller::BasicBlockFiller(MachineFunction *MF, MachineBasicBlock *MBB,
                                    const MCInstrInfo *MCII)
     : MF(MF), MBB(MBB), MCII(MCII) {}
 
@@ -193,17 +207,17 @@ void BasicBlockFiller::addReturn(const ExegesisTarget &ET,
 #endif // __linux__
   }
   // Insert the return code.
-  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   if (TII->getReturnOpcode() < TII->getNumOpcodes()) {
     BuildMI(MBB, DL, TII->get(TII->getReturnOpcode()));
   } else {
-    MachineIRBuilder MIB(MF);
+    MachineIRBuilder MIB(*MF);
     MIB.setMBB(*MBB);
 
     FunctionLoweringInfo FuncInfo;
     FuncInfo.CanLowerReturn = true;
-    MF.getSubtarget().getCallLowering()->lowerReturn(MIB, nullptr, {}, FuncInfo,
-                                                     0);
+    MF->getSubtarget().getCallLowering()->lowerReturn(MIB, nullptr, {},
+                                                      FuncInfo, 0);
   }
 }
 
@@ -215,7 +229,7 @@ FunctionFiller::FunctionFiller(MachineFunction &MF,
 BasicBlockFiller FunctionFiller::addBasicBlock() {
   MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
   MF.push_back(MBB);
-  return BasicBlockFiller(MF, MBB, MCII);
+  return BasicBlockFiller(&MF, MBB, MCII);
 }
 
 ArrayRef<unsigned> FunctionFiller::getRegistersSetUp() const {
@@ -241,11 +255,28 @@ BitVector getFunctionReservedRegs(const TargetMachine &TM) {
   return MF.getSubtarget().getRegisterInfo()->getReservedRegs(MF);
 }
 
+static void setMBBLiveIns(const ExegesisTarget &ET, MachineBasicBlock *MBB,
+                          bool GenerateMemoryInstructions,
+                          ArrayRef<unsigned> LiveIns) {
+  for (const unsigned Reg : LiveIns)
+    MBB->addLiveIn(Reg);
+
+  if (GenerateMemoryInstructions) {
+    for (const unsigned Reg : ET.getArgumentRegisters())
+      MBB->addLiveIn(Reg);
+    // Add a live in for registers that need saving so that the machine
+    // verifier doesn't fail if the register is never defined.
+    for (const unsigned Reg : ET.getRegistersNeedSaving())
+      MBB->addLiveIn(Reg);
+  }
+}
+
 Error assembleToStream(const ExegesisTarget &ET,
                        std::unique_ptr<LLVMTargetMachine> TM,
                        ArrayRef<unsigned> LiveIns, const FillFunction &Fill,
                        raw_pwrite_stream &AsmStream, const BenchmarkKey &Key,
-                       bool GenerateMemoryInstructions) {
+                       bool GenerateMemoryInstructions,
+                       std::optional<FillFunction> WarmupFill) {
   auto Context = std::make_unique<LLVMContext>();
   std::unique_ptr<Module> Module =
       createModule(Context, TM->createDataLayout());
@@ -280,20 +311,42 @@ Error assembleToStream(const ExegesisTarget &ET,
   FunctionFiller Sink(MF, std::move(RegistersSetUp));
   auto Entry = Sink.getEntry();
 
-  for (const unsigned Reg : LiveIns)
-    Entry.MBB->addLiveIn(Reg);
+  setMBBLiveIns(ET, Entry.MBB, GenerateMemoryInstructions, LiveIns);
 
-  if (GenerateMemoryInstructions) {
-    for (const unsigned Reg : ET.getArgumentRegisters())
-      Entry.MBB->addLiveIn(Reg);
-    // Add a live in for registers that need saving so that the machine verifier
-    // doesn't fail if the register is never defined.
-    for (const unsigned Reg : ET.getRegistersNeedSaving())
-      Entry.MBB->addLiveIn(Reg);
+  bool IsSnippetSetupComplete = true;
+  const MCSubtargetInfo *const MSI = TM->getMCSubtargetInfo();
+
+  Register StackPointerRegister = MF.getSubtarget()
+                                      .getTargetLowering()
+                                      ->getStackPointerRegisterToSaveRestore();
+
+  if (GenerateMemoryInstructions)
+    generateMemoryMappings(ET, Entry, Key);
+
+  BasicBlockFiller BenchmarkStartBlock = Entry;
+
+  if (WarmupFill) {
+    IsSnippetSetupComplete &= generateRegisterSetupCode(
+        ET, MSI, Entry, Key.RegisterInitialValues, GenerateMemoryInstructions,
+        StackPointerRegister);
+
+    IsSnippetSetupComplete &= setStackPointerRegister(
+        ET, MSI, Entry, Key.RegisterInitialValues, StackPointerRegister);
+
+    BenchmarkStartBlock = (*WarmupFill)(Sink, false, Entry);
+
+    setMBBLiveIns(ET, BenchmarkStartBlock.MBB, GenerateMemoryInstructions,
+                  LiveIns);
   }
 
-  const bool IsSnippetSetupComplete = generateSnippetSetupCode(
-      ET, TM->getMCSubtargetInfo(), Entry, Key, GenerateMemoryInstructions);
+  IsSnippetSetupComplete &= generateRegisterSetupCode(
+      ET, MSI, BenchmarkStartBlock, Key.RegisterInitialValues,
+      GenerateMemoryInstructions, StackPointerRegister);
+
+  if (GenerateMemoryInstructions)
+    IsSnippetSetupComplete &= generatePerfCounterReset(
+        ET, MSI, BenchmarkStartBlock, Key.RegisterInitialValues,
+        StackPointerRegister);
 
   // If the snippet setup is not complete, we disable liveliness tracking. This
   // means that we won't know what values are in the registers.
@@ -301,7 +354,7 @@ Error assembleToStream(const ExegesisTarget &ET,
   if (!IsSnippetSetupComplete)
     Properties.reset(MachineFunctionProperties::Property::TracksLiveness);
 
-  Fill(Sink);
+  Fill(Sink, true, BenchmarkStartBlock);
 
   // prologue/epilogue pass needs the reserved registers to be frozen, this
   // is usually done by the SelectionDAGISel pass.
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.h b/llvm/tools/llvm-exegesis/lib/Assembler.h
index d85d7fdcf04f54..8af3399b9f09a0 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.h
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.h
@@ -44,7 +44,7 @@ BitVector getFunctionReservedRegs(const TargetMachine &TM);
 // Helper to fill in a basic block.
 class BasicBlockFiller {
 public:
-  BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB,
+  BasicBlockFiller(MachineFunction *MF, MachineBasicBlock *MBB,
                    const MCInstrInfo *MCII);
 
   void addInstruction(const MCInst &Inst, const DebugLoc &DL = DebugLoc());
@@ -53,9 +53,9 @@ class BasicBlockFiller {
   void addReturn(const ExegesisTarget &ET, bool SubprocessCleanup,
                  const DebugLoc &DL = DebugLoc());
 
-  MachineFunction &MF;
-  MachineBasicBlock *const MBB;
-  const MCInstrInfo *const MCII;
+  MachineFunction *MF;
+  MachineBasicBlock *MBB;
+  const MCInstrInfo *MCII;
 };
 
 // Helper to fill in a function.
@@ -82,7 +82,8 @@ class FunctionFiller {
 };
 
 // A callback that fills a function.
-using FillFunction = std::function<void(FunctionFiller &)>;
+using FillFunction =
+    std::function<BasicBlockFiller(FunctionFiller &, bool, BasicBlockFiller &)>;
 
 // Creates a temporary `void foo(char*)` function containing the provided
 // Instructions. Runs a set of llvm Passes to provide correct prologue and
@@ -92,7 +93,8 @@ Error assembleToStream(const ExegesisTarget &ET,
                        std::unique_ptr<LLVMTargetMachine> TM,
                        ArrayRef<unsigned> LiveIns, const FillFunction &Fill,
                        raw_pwrite_stream &AsmStreamm, const BenchmarkKey &Key,
-                       bool GenerateMemoryInstructions);
+                       bool GenerateMemoryInstructions,
+                       std::optional<FillFunction> WarmupFill);
 
 // Creates an ObjectFile in the format understood by the host.
 // Note: the resulting object keeps a copy of Buffer so it can be discarded once
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 5f08c67bfc89a0..67e17b5b6acef1 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -478,15 +478,22 @@ class SubProcessFunctionExecutorImpl
 Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
     const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
     unsigned MinInstructions, unsigned LoopBodySize,
-    bool GenerateMemoryInstructions) const {
+    bool GenerateMemoryInstructions, unsigned MinWarmupInstructions) const {
   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
   SmallString<0> Buffer;
   raw_svector_ostream OS(Buffer);
+
+  std::optional<FillFunction> OptionalWarmupFill = {};
+  if (MinWarmupInstructions > 0)
+    OptionalWarmupFill =
+        Repetitor.Repeat(Instructions, MinWarmupInstructions, LoopBodySize,
+                         GenerateMemoryInstructions);
+
   if (Error E = assembleToStream(
           State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
           Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
                            GenerateMemoryInstructions),
-          OS, BC.Key, GenerateMemoryInstructions)) {
+          OS, BC.Key, GenerateMemoryInstructions, OptionalWarmupFill)) {
     return std::move(E);
   }
   return Buffer;
@@ -495,7 +502,7 @@ Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
 Expected<BenchmarkRunner::RunnableConfiguration>
 BenchmarkRunner::getRunnableConfiguration(
     const BenchmarkCode &BC, unsigned NumRepetitions, unsigned LoopBodySize,
-    const SnippetRepetitor &Repetitor) const {
+    const SnippetRepetitor &Repetitor, unsigned WarmupMinInstructions) const {
   RunnableConfiguration RC;
 
   Benchmark &BenchmarkResult = RC.BenchmarkResult;
@@ -519,9 +526,12 @@ BenchmarkRunner::getRunnableConfiguration(
   if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
     const int MinInstructionsForSnippet = 4 * Instructions.size();
     const int LoopBodySizeForSnippet = 2 * Instructions.size();
+    // Do not include warmup iterations in the assembled snippet to display
+    // as reasonable warmup instruction minimums can easily blow up the size
+    // of the string.
     auto Snippet =
         assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
-                        LoopBodySizeForSnippet, GenerateMemoryInstructions);
+                        LoopBodySizeForSnippet, GenerateMemoryInstructions, 0);
     if (Error E = Snippet.takeError())
       return std::move(E);
 
@@ -534,9 +544,9 @@ BenchmarkRunner::getRunnableConfiguration(
   // measurements.
   if (BenchmarkPhaseSelector >
       BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
-    auto Snippet =
-        assembleSnippet(BC, Repetitor, BenchmarkResult.NumRepetitions,
-                        LoopBodySize, GenerateMemoryInstructions);
+    auto Snippet = assembleSnippet(
+        BC, Repetitor, BenchmarkResult.NumRepetitions, LoopBodySize,
+        GenerateMemoryInstructions, WarmupMinInstructions);
     if (Error E = Snippet.takeError())
       return std::move(E);
     RC.ObjectFile = getObjectFromBuffer(*Snippet);
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
index d746a0f775646f..7a5fac345986cd 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
@@ -63,7 +63,8 @@ class BenchmarkRunner {
   Expected<RunnableConfiguration>
   getRunnableConfiguration(const BenchmarkCode &Configuration,
                            unsigned NumRepetitions, unsigned LoopUnrollFactor,
-                           const SnippetRepetitor &Repetitor) const;
+                           const SnippetRepetitor &Repetitor,
+                           unsigned WarmupMinInstructions) const;
 
   std::pair<Error, Benchmark>
   runConfiguration(RunnableConfiguration &&RC,
@@ -116,7 +117,8 @@ class BenchmarkRunner {
   Expected<SmallString<0>>
   assembleSnippet(const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
                   unsigned MinInstructions, unsigned LoopBodySize,
-                  bool GenerateMemoryInstructions) const;
+                  bool GenerateMemoryInstructions,
+                  unsigned MinWarmupInstructions) const;
 
   Expected<std::string> writeObjectFile(StringRef Buffer,
                                         StringRef FileName) const;
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
index cc5a045a8be5dd..3100716165f23f 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
@@ -26,8 +26,8 @@ class DuplicateSnippetRepetitor : public SnippetRepetitor {
                       unsigned LoopBodySize,
                       bool CleanupMemory) const override {
     return [this, Instructions, MinInstructions,
-            CleanupMemory](FunctionFiller &Filler) {
-      auto Entry = Filler.getEntry();
+            CleanupMemory](FunctionFiller &Filler, bool AddReturn,
+                           BasicBlockFiller &Entry) -> BasicBlockFiller {
       if (!Instructions.empty()) {
         // Add the whole snippet at least once.
         Entry.addInstructions(Instructions);
@@ -35,7 +35,9 @@ class DuplicateSnippetRepetitor : public SnippetRepetitor {
           Entry.addInstruction(Instructions[I % Instructions.size()]);
         }
       }
-      Entry.addReturn(State.getExegesisTarget(), CleanupMemory);
+      if (AddReturn)
+        Entry.addReturn(State.getExegesisTarget(), CleanupMemory);
+      return std::move(Entry);
     };
   }
 
@@ -57,9 +59,9 @@ class LoopSnippetRepetitor : public SnippetRepetitor {
                       unsigned LoopBodySize,
                       bool CleanupMemory) const override {
     return [this, Instructions, MinInstructions, LoopBodySize,
-            CleanupMemory](FunctionFiller &Filler) {
+            CleanupMemory](FunctionFiller &Filler, bool AddReturn,
+                           BasicBlockFiller &Entry) -> BasicBlockFiller {
       const auto &ET = State.getExegesisTarget();
-      auto Entry = Filler.getEntry();
 
       // We can not use loop snippet repetitor for terminator instructions.
       for (const MCInst &Inst : Instructions) {
@@ -68,7 +70,7 @@ class LoopSnippetRepetitor : public SnippetRepetitor {
         if (!MCID.isTerminator())
           continue;
         Entry.addReturn(State.getExegesisTarget(), CleanupMemory);
-        return;
+        return Entry;
       }
 
       auto Loop = Filler.addBasicBlock();
@@ -93,7 +95,7 @@ class LoopSnippetRepetitor : public SnippetRepetitor {
       Entry.MBB->addSuccessor(Loop.MBB, BranchProbability::getOne());
       Loop.MBB->addSuccessor(Loop.MBB, BranchProbability::getOne());
       // If the snippet setup completed, then we can track liveness.
-      if (Loop.MF.getProperties().hasProperty(
+      if (Loop.MF->getProperties().hasProperty(
               MachineFunctionProperties::Property::TracksLiveness)) {
         // The live ins are: the loop counter, the registers that were setup by
         // the entry block, and entry block live ins.
@@ -112,7 +114,11 @@ class LoopSnippetRepetitor : public SnippetRepetitor {
 
       // Set up the exit basic block.
       Loop.MBB->addSuccessor(Exit.MBB, BranchProbability::getZero());
-      Exit.addReturn(State.getExegesisTarget(), CleanupMemory);
+
+      if (AddReturn)
+        Exit.addReturn(State.getExegesisTarget(), CleanupMemory);
+
+      return Exit;
     };
   }
 
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 1b35fde815f11f..d8540a68374167 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -268,6 +268,12 @@ static cl::opt<unsigned> BenchmarkRepeatCount(
              "before aggregating the results"),
     cl::cat(BenchmarkOptions), cl::init(30));
 
+static cl::opt<unsigned> WarmupMinInstructions(
+    "warmup-min-instructions",
+    cl::desc("The number of iterations of the snippet to run before starting "
+             "the performance counters and actually benchmarking the snippet"),
+    cl::cat(BenchmarkOptions), cl::init(0));
+
 static ExitOnError ExitOnErr("llvm-exegesis error: ");
 
 // Helper function that logs the error(s) and exits.
@@ -405,8 +411,9 @@ static void runBenchmarkConfigurations(
 
     for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :
          Repetitors) {
-      auto RC = ExitOnErr(Runner.getRunnableConfiguration(
-          Conf, NumRepetitions, LoopBodySize, *Repetitor));
+      auto RC = ExitOnErr(
+          Runner.getRunnableConfiguration(Conf, NumRepetitions, LoopBodySize,
+                                          *Repetitor, WarmupMinInstructions));
       std::optional<StringRef> DumpFile;
       if (DumpObjectToDisk.getNumOccurrences())
         DumpFile = DumpObjectToDisk;
@@ -481,6 +488,12 @@ void benchmarkMain() {
 #endif
   }
 
+  if (WarmupMinInstructions > 0 &&
+      ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess) {
+    ExitWithError("Warmup iterations are currently only supported in the "
+                  "subprocess execution mode.");
+  }
+
   InitializeAllAsmPrinters();
   InitializeAllAsmParsers();
   InitializeAllExegesisTargets();
diff --git a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
index 9cf63931e6dd50..5c92dbcbb2fcc2 100644
--- a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
+++ b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
@@ -81,7 +81,7 @@ class MachineFunctionGeneratorBaseTest : public ::testing::Test {
     BenchmarkKey Key;
     Key.RegisterInitialValues = RegisterInitialValues;
     EXPECT_FALSE(assembleToStream(*ET, createTargetMachine(), /*LiveIns=*/{},
-                                  Fill, AsmStream, Key, false));
+                                  Fill, AsmStream, Key, false, {}));
     Expected<ExecutableFunction> ExecFunc = ExecutableFunction::create(
         createTargetMachine(), getObjectFromBuffer(AsmStream.str()));
 
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
index d2382ec0cddc49..53b46fa5cfcdf7 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
@@ -44,7 +44,8 @@ class X86SnippetRepetitorTest : public X86TestBase {
     FunctionFiller Sink(*MF, {X86::EAX});
     const auto Fill =
         Repetitor->Repeat(Instructions, kMinInstructions, kLoopBodySize, false);
-    Fill(Sink);
+    BasicBlockFiller Entry = Sink.getEntry();
+    Fill(Sink, true, Entry);
   }
 
   static constexpr const unsigned kMinInstructions = 3;



More information about the llvm-commits mailing list