[llvm] [llvm-exegesis] Enable dummy perf counters in subprocess mode (PR #74224)

Aiden Grossman via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 2 20:18:14 PST 2023


https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/74224

This patch enables the usage of dummy perf counters in the subprocess execution mode. This allows for users without libpfm installed/without access to performance counters to still use the subprocess execution mode to test the execution of snippets. This enables use cases like deriving memory annotations inside a virtual machine where performance counters will not be present.

>From d5081e46a12e5714c97463135ab2b20c89e69585 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sat, 2 Dec 2023 20:01:50 -0800
Subject: [PATCH] [llvm-exegesis] Enable dummy perf counters in subprocess mode

This patch enables the usage of dummy perf counters in the subprocess
execution mode. This allows for users without libpfm installed/without
access to performance counters to still use the subprocess execution
mode to test the execution of snippets. This enables use cases like
deriving memory annotations inside a virtual machine where performance
counters will not be present.
---
 .../X86/dummy-perf-counters-subprocess.s      |  7 --
 .../latency/dummy-perf-counters-subprocess.s  |  9 +++
 llvm/tools/llvm-exegesis/lib/Assembler.cpp    | 19 +++--
 llvm/tools/llvm-exegesis/lib/Assembler.h      |  6 +-
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     | 72 +++++++++++++++----
 llvm/tools/llvm-exegesis/lib/LlvmState.cpp    |  8 ++-
 llvm/tools/llvm-exegesis/lib/LlvmState.h      |  6 +-
 .../llvm-exegesis/lib/SnippetRepetitor.cpp    |  9 ++-
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    |  5 --
 .../llvm-exegesis/Common/AssemblerUtils.h     |  2 +-
 10 files changed, 100 insertions(+), 43 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-exegesis/X86/dummy-perf-counters-subprocess.s
 create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/dummy-perf-counters-subprocess.s

diff --git a/llvm/test/tools/llvm-exegesis/X86/dummy-perf-counters-subprocess.s b/llvm/test/tools/llvm-exegesis/X86/dummy-perf-counters-subprocess.s
deleted file mode 100644
index d810d697ca273..0000000000000
--- a/llvm/test/tools/llvm-exegesis/X86/dummy-perf-counters-subprocess.s
+++ /dev/null
@@ -1,7 +0,0 @@
-# REQUIRES: exegesis-can-measure-latency, x86_64-linux
-
-# RUN: not llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess -use-dummy-perf-counters 2>&1 | FileCheck %s
-
-# CHECK: llvm-exegesis error: Dummy perf counters are not supported in the subprocess execution mode.
-
-mov $0, %rax
diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/dummy-perf-counters-subprocess.s b/llvm/test/tools/llvm-exegesis/X86/latency/dummy-perf-counters-subprocess.s
new file mode 100644
index 0000000000000..f5b40fbba28dd
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/dummy-perf-counters-subprocess.s
@@ -0,0 +1,9 @@
+# REQUIRES: exegesis-can-execute-x86_64, x86_64-linux
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency -snippets-file=%s -execution-mode=subprocess --use-dummy-perf-counters | FileCheck %s
+
+# LLVM-EXEGESIS-DEFREG RAX 0
+
+movq $5, %rax
+
+# CHECK: measurements:   []
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index e17d239faa47e..cf5281556d536 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -48,7 +48,8 @@ static const Align kFunctionAlignment(4096);
 static bool generateSnippetSetupCode(
     const ExegesisTarget &ET, const MCSubtargetInfo *const MSI,
     ArrayRef<RegisterValue> RegisterInitialValues, BasicBlockFiller &BBF,
-    const BenchmarkKey &Key, bool GenerateMemoryInstructions) {
+    const BenchmarkKey &Key, bool GenerateMemoryInstructions,
+    bool UseDummyPerfCounters) {
   bool IsSnippetSetupComplete = true;
   if (GenerateMemoryInstructions) {
     BBF.addInstructions(ET.generateMemoryInitialSetup());
@@ -80,7 +81,8 @@ static bool generateSnippetSetupCode(
   }
   if (GenerateMemoryInstructions) {
 #ifdef HAVE_LIBPFM
-    BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true));
+    if (!UseDummyPerfCounters)
+      BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true));
 #endif // HAVE_LIBPFM
     for (const RegisterValue &RV : RegisterInitialValues) {
       // Load in the stack register now as we're done using it elsewhere
@@ -170,11 +172,14 @@ void BasicBlockFiller::addInstructions(ArrayRef<MCInst> Insts,
 }
 
 void BasicBlockFiller::addReturn(const ExegesisTarget &ET,
-                                 bool SubprocessCleanup, const DebugLoc &DL) {
+                                 bool SubprocessCleanup,
+                                 bool UseDummyPerfCounters,
+                                 const DebugLoc &DL) {
   // Insert cleanup code
   if (SubprocessCleanup) {
 #ifdef HAVE_LIBPFM
-    addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_DISABLE, false));
+    if (!UseDummyPerfCounters)
+      addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_DISABLE, false));
 #endif // HAVE_LIBPFM
 #ifdef __linux__
     addInstructions(ET.generateExitSyscall(0));
@@ -234,8 +239,8 @@ Error assembleToStream(const ExegesisTarget &ET,
                        ArrayRef<unsigned> LiveIns,
                        ArrayRef<RegisterValue> RegisterInitialValues,
                        const FillFunction &Fill, raw_pwrite_stream &AsmStream,
-                       const BenchmarkKey &Key,
-                       bool GenerateMemoryInstructions) {
+                       const BenchmarkKey &Key, bool GenerateMemoryInstructions,
+                       bool UseDummyPerfCounters) {
   auto Context = std::make_unique<LLVMContext>();
   std::unique_ptr<Module> Module =
       createModule(Context, TM->createDataLayout());
@@ -284,7 +289,7 @@ Error assembleToStream(const ExegesisTarget &ET,
 
   const bool IsSnippetSetupComplete = generateSnippetSetupCode(
       ET, TM->getMCSubtargetInfo(), RegisterInitialValues, Entry, Key,
-      GenerateMemoryInstructions);
+      GenerateMemoryInstructions, UseDummyPerfCounters);
 
   // If the snippet setup is not complete, we disable liveliness tracking. This
   // means that we won't know what values are in the registers.
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.h b/llvm/tools/llvm-exegesis/lib/Assembler.h
index abc5aa7be8cfe..166a1c2020bfb 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.h
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.h
@@ -51,7 +51,7 @@ class BasicBlockFiller {
   void addInstructions(ArrayRef<MCInst> Insts, const DebugLoc &DL = DebugLoc());
 
   void addReturn(const ExegesisTarget &ET, bool SubprocessCleanup,
-                 const DebugLoc &DL = DebugLoc());
+                 bool UseDummyPerfCounters, const DebugLoc &DL = DebugLoc());
 
   MachineFunction &MF;
   MachineBasicBlock *const MBB;
@@ -93,8 +93,8 @@ Error assembleToStream(const ExegesisTarget &ET,
                        ArrayRef<unsigned> LiveIns,
                        ArrayRef<RegisterValue> RegisterInitialValues,
                        const FillFunction &Fill, raw_pwrite_stream &AsmStreamm,
-                       const BenchmarkKey &Key,
-                       bool GenerateMemoryInstructions);
+                       const BenchmarkKey &Key, bool GenerateMemoryInstructions,
+                       bool UseDummyPerfCounters);
 
 // Creates an ObjectFile in the format understood by the host.
 // Note: the resulting object keeps a copy of Buffer so it can be discarded once
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 85375dec2a44c..ecaf92643b012 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -264,6 +264,32 @@ class SubProcessFunctionExecutorImpl
     return FD;
   }
 
+  Error sendSyncMessageThroughSocket(int SocketFd) const {
+    char ToSend = 'a';
+
+    ssize_t BytesWritten = write(SocketFd, &ToSend, sizeof(char));
+
+    if (BytesWritten < 0)
+      return make_error<Failure>(
+          "Failed to write synchronization message to socket: " +
+          Twine(strerror(errno)));
+
+    return Error::success();
+  }
+
+  Error recieveSyncMessageFromSocket(int SocketFD) const {
+    char Buffer = 'b';
+
+    ssize_t BytesRecieved = read(SocketFD, &Buffer, sizeof(char));
+
+    if (BytesRecieved < 0)
+      return make_error<Failure>(
+          "Failed to read synchronization message from socket: " +
+          Twine(strerror(errno)));
+
+    return Error::success();
+  }
+
   Error createSubProcessAndRunBenchmark(
       StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues) const {
     int PipeFiles[2];
@@ -316,8 +342,9 @@ class SubProcessFunctionExecutorImpl
     close(PipeFiles[0]);
 
     // Make sure to attach to the process (and wait for the sigstop to be
-    // delivered and for the process to continue) before we write to the counter
-    // file descriptor. Attaching to the process before writing to the socket
+    // delivered and for the process to continue) before we write the counter
+    // file descriptor or dummy synchronization message.
+    // Attaching to the process before writing to the socket
     // ensures that the subprocess at most has blocked on the read call. If we
     // attach afterwards, the subprocess might exit before we get to the attach
     // call due to effects like scheduler contention, introducing transient
@@ -337,12 +364,20 @@ class SubProcessFunctionExecutorImpl
           "Failed to continue execution of the child process: " +
           Twine(strerror(errno)));
 
-    int CounterFileDescriptor = Counter->getFileDescriptor();
-    Error SendError =
-        sendFileDescriptorThroughSocket(PipeFiles[1], CounterFileDescriptor);
+    if (!State.usingDummyPerfCounters()) {
+      int CounterFileDescriptor = Counter->getFileDescriptor();
+      Error SendError =
+          sendFileDescriptorThroughSocket(PipeFiles[1], CounterFileDescriptor);
+
+      if (SendError)
+        return SendError;
 
-    if (SendError)
-      return SendError;
+    } else {
+      Error SendError = sendSyncMessageThroughSocket(PipeFiles[1]);
+
+      if (SendError)
+        return SendError;
+    }
 
     int ChildStatus;
     if (wait(&ChildStatus) == -1) {
@@ -383,13 +418,23 @@ class SubProcessFunctionExecutorImpl
     // The following occurs within the benchmarking subprocess
     pid_t ParentPID = getppid();
 
-    Expected<int> CounterFileDescriptorOrError =
-        getFileDescriptorFromSocket(Pipe);
+    int CounterFileDescriptor = 0;
+
+    if (!State.usingDummyPerfCounters()) {
+      Expected<int> CounterFileDescriptorOrError =
+          getFileDescriptorFromSocket(Pipe);
 
-    if (!CounterFileDescriptorOrError)
-      exit(ChildProcessExitCodeE::CounterFDReadFailed);
+      if (!CounterFileDescriptorOrError)
+        exit(ChildProcessExitCodeE::CounterFDReadFailed);
 
-    int CounterFileDescriptor = *CounterFileDescriptorOrError;
+      CounterFileDescriptor = *CounterFileDescriptorOrError;
+
+    } else {
+      Error PossibleRecvError = recieveSyncMessageFromSocket(Pipe);
+
+      if (PossibleRecvError)
+        exit(ChildProcessExitCodeE::CounterFDReadFailed);
+    }
 
 // Glibc versions greater than 2.35 automatically call rseq during
 // initialization. Unmapping the region that glibc sets up for this causes
@@ -458,7 +503,8 @@ Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
           BC.Key.RegisterInitialValues,
           Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
                            GenerateMemoryInstructions),
-          OS, BC.Key, GenerateMemoryInstructions)) {
+          OS, BC.Key, GenerateMemoryInstructions,
+          State.usingDummyPerfCounters())) {
     return std::move(E);
   }
   return Buffer;
diff --git a/llvm/tools/llvm-exegesis/lib/LlvmState.cpp b/llvm/tools/llvm-exegesis/lib/LlvmState.cpp
index 8aeea456fba1f..00a4b3fbf3685 100644
--- a/llvm/tools/llvm-exegesis/lib/LlvmState.cpp
+++ b/llvm/tools/llvm-exegesis/lib/LlvmState.cpp
@@ -76,14 +76,16 @@ Expected<LLVMState> LLVMState::Create(std::string TripleName,
   const PfmCountersInfo &PCI = UseDummyPerfCounters
                                    ? ET->getDummyPfmCounters()
                                    : ET->getPfmCounters(CpuName);
-  return LLVMState(std::move(TM), ET, &PCI);
+  return LLVMState(std::move(TM), ET, &PCI, UseDummyPerfCounters);
 }
 
 LLVMState::LLVMState(std::unique_ptr<const TargetMachine> TM,
-                     const ExegesisTarget *ET, const PfmCountersInfo *PCI)
+                     const ExegesisTarget *ET, const PfmCountersInfo *PCI,
+                     bool UseDummyPerfCounters_)
     : TheExegesisTarget(ET), TheTargetMachine(std::move(TM)), PfmCounters(PCI),
       OpcodeNameToOpcodeIdxMapping(createOpcodeNameToOpcodeIdxMapping()),
-      RegNameToRegNoMapping(createRegNameToRegNoMapping()) {
+      RegNameToRegNoMapping(createRegNameToRegNoMapping()),
+      UseDummyPerfCounters(UseDummyPerfCounters_) {
   BitVector ReservedRegs = getFunctionReservedRegs(getTargetMachine());
   for (const unsigned Reg : TheExegesisTarget->getUnavailableRegisters())
     ReservedRegs.set(Reg);
diff --git a/llvm/tools/llvm-exegesis/lib/LlvmState.h b/llvm/tools/llvm-exegesis/lib/LlvmState.h
index 16f0def518256..751b24c98304b 100644
--- a/llvm/tools/llvm-exegesis/lib/LlvmState.h
+++ b/llvm/tools/llvm-exegesis/lib/LlvmState.h
@@ -80,6 +80,8 @@ class LLVMState {
     return *RegNameToRegNoMapping;
   }
 
+  bool usingDummyPerfCounters() const { return UseDummyPerfCounters; }
+
 private:
   std::unique_ptr<const DenseMap<StringRef, unsigned>>
   createOpcodeNameToOpcodeIdxMapping() const;
@@ -88,7 +90,7 @@ class LLVMState {
   createRegNameToRegNoMapping() const;
 
   LLVMState(std::unique_ptr<const TargetMachine> TM, const ExegesisTarget *ET,
-            const PfmCountersInfo *PCI);
+            const PfmCountersInfo *PCI, bool UseDummyPerfCounters_);
 
   const ExegesisTarget *TheExegesisTarget;
   std::unique_ptr<const TargetMachine> TheTargetMachine;
@@ -98,6 +100,8 @@ class LLVMState {
   std::unique_ptr<const DenseMap<StringRef, unsigned>>
       OpcodeNameToOpcodeIdxMapping;
   std::unique_ptr<const DenseMap<StringRef, unsigned>> RegNameToRegNoMapping;
+
+  const bool UseDummyPerfCounters;
 };
 
 } // namespace exegesis
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
index 636dd11ff3265..dd7cdf2ad345e 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp
@@ -38,7 +38,8 @@ class DuplicateSnippetRepetitor : public SnippetRepetitor {
           Entry.addInstruction(Instructions[I % Instructions.size()]);
         }
       }
-      Entry.addReturn(State.getExegesisTarget(), CleanupMemory);
+      Entry.addReturn(State.getExegesisTarget(), CleanupMemory,
+                      State.usingDummyPerfCounters());
     };
   }
 
@@ -70,7 +71,8 @@ class LoopSnippetRepetitor : public SnippetRepetitor {
         const MCInstrDesc &MCID = Filler.MCII->get(Opcode);
         if (!MCID.isTerminator())
           continue;
-        Entry.addReturn(State.getExegesisTarget(), CleanupMemory);
+        Entry.addReturn(State.getExegesisTarget(), CleanupMemory,
+                        State.usingDummyPerfCounters());
         return;
       }
 
@@ -115,7 +117,8 @@ class LoopSnippetRepetitor : public SnippetRepetitor {
 
       // Set up the exit basic block.
       Loop.MBB->addSuccessor(Exit.MBB, BranchProbability::getZero());
-      Exit.addReturn(State.getExegesisTarget(), CleanupMemory);
+      Exit.addReturn(State.getExegesisTarget(), CleanupMemory,
+                     State.usingDummyPerfCounters());
     };
   }
 
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 261335a817d06..a18226fafd2ab 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -477,11 +477,6 @@ void benchmarkMain() {
   if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure)
     ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
 
-  if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess &&
-      UseDummyPerfCounters)
-    ExitWithError("Dummy perf counters are not supported in the subprocess "
-                  "execution mode.");
-
   const std::unique_ptr<BenchmarkRunner> Runner =
       ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
           BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode,
diff --git a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
index 2804a6e69e824..36be02bf68a92 100644
--- a/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
+++ b/llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h
@@ -82,7 +82,7 @@ class MachineFunctionGeneratorBaseTest : public ::testing::Test {
     Key.RegisterInitialValues = RegisterInitialValues;
     EXPECT_FALSE(assembleToStream(*ET, createTargetMachine(), /*LiveIns=*/{},
                                   RegisterInitialValues, Fill, AsmStream, Key,
-                                  false));
+                                  false, false));
     Expected<ExecutableFunction> ExecFunc = ExecutableFunction::create(
         createTargetMachine(), getObjectFromBuffer(AsmStream.str()));
 



More information about the llvm-commits mailing list