[llvm] [llvm-exegesis] Add support for pinning benchmarking process to a CPU (PR #85168)

Wed Mar 13 18:25:58 PDT 2024

https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/85168

This patch adds in support for pinning a benchmarking process to a specific CPU (in the subprocess benchmarking mode on Linux). This is intended to be used in environments where a certain set of CPUs is isolated from the scheduler using something like cgroups and thus should present less potential for noise than normal. This also opens up the door for doing multithreaded benchmarking as we can now pin benchmarking processes to specific CPUs that we know won't interfere with each other.

>From dd9a8f130fa8f5f1b9f0d9da69c4a183611fa999 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Tue, 12 Mar 2024 19:00:19 -0700
Subject: [PATCH] [llvm-exegesis] Add support for pinning benchmarking process
 to a CPU

This patch adds in support for pinning a benchmarking process to a
specific CPU (in the subprocess benchmarking mode on Linux). This is
intended to be used in environments where a certain set of CPUs is
isolated from the scheduler using something like cgroups and thus should
present less potential for noise than normal. This also opens up the
door for doing multithreaded benchmarking as we can now pin benchmarking
processes to specific CPUs that we know won't interfere with each other.
---
 .../X86/latency/cpu-pinning-execution-mode.s  |  5 ++
 .../llvm-exegesis/X86/latency/cpu-pinning.s   |  5 ++
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     | 66 ++++++++++++++-----
 .../tools/llvm-exegesis/lib/BenchmarkRunner.h |  6 +-
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    | 14 +++-
 5 files changed, 78 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s
 create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning.s

diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s
new file mode 100644
index 00000000000000..62a7b1d1e486e1
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s
@@ -0,0 +1,5 @@
+# REQUIRES: exegesis-can-measure-latency, x86_64-linux
+
+# RUN: not llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr -execution-mode=inprocess --benchmark-process-cpu=0 2>&1 | FileCheck %s
+
+# CHECK: llvm-exegesis error: --benchmark-process-cpu is only supported in the subprocess execution mode
diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning.s b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning.s
new file mode 100644
index 00000000000000..0ea3752fc3bb95
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning.s
@@ -0,0 +1,5 @@
+# REQUIRES: exegesis-can-measure-latency, x86_64-linux
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr -execution-mode=subprocess | FileCheck %s
+
+# CHECK: - { key: latency, value: {{[0-9.]*}}, per_snippet_value: {{[0-9.]*}}
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 4e97d188d17259..9c5a037ee2e67d 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -97,7 +97,8 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
 public:
   static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
-         BenchmarkRunner::ScratchSpace *Scratch) {
+         BenchmarkRunner::ScratchSpace *Scratch,
+         std::optional<int> BenchmarkProcessCPU) {
     Expected<ExecutableFunction> EF =
         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
 
@@ -105,14 +106,17 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
       return EF.takeError();
 
     return std::unique_ptr<InProcessFunctionExecutorImpl>(
-        new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
+        new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch,
+                                          BenchmarkProcessCPU));
   }
 
 private:
   InProcessFunctionExecutorImpl(const LLVMState &State,
                                 ExecutableFunction Function,
-                                BenchmarkRunner::ScratchSpace *Scratch)
-      : State(State), Function(std::move(Function)), Scratch(Scratch) {}
+                                BenchmarkRunner::ScratchSpace *Scratch,
+                                std::optional<int> BenchmarkCPU)
+      : State(State), Function(std::move(Function)), Scratch(Scratch),
+        BenchmarkProcessCPU(BenchmarkCPU) {}
 
   static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
                                       SmallVector<int64_t, 4> *Result) {
@@ -175,6 +179,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
   const LLVMState &State;
   const ExecutableFunction Function;
   BenchmarkRunner::ScratchSpace *const Scratch;
+  const std::optional<int> BenchmarkProcessCPU;
 };
 
 #ifdef __linux__
@@ -189,27 +194,31 @@ class SubProcessFunctionExecutorImpl
 public:
   static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
-         const BenchmarkKey &Key) {
+         const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) {
     Expected<ExecutableFunction> EF =
         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
     if (!EF)
       return EF.takeError();
 
     return std::unique_ptr<SubProcessFunctionExecutorImpl>(
-        new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key));
+        new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key,
+                                           BenchmarkProcessCPU));
   }
 
 private:
   SubProcessFunctionExecutorImpl(const LLVMState &State,
                                  ExecutableFunction Function,
-                                 const BenchmarkKey &Key)
-      : State(State), Function(std::move(Function)), Key(Key) {}
+                                 const BenchmarkKey &Key,
+                                 std::optional<int> BenchmarkCPU)
+      : State(State), Function(std::move(Function)), Key(Key),
+        BenchmarkProcessCPU(BenchmarkCPU) {}
 
   enum ChildProcessExitCodeE {
     CounterFDReadFailed = 1,
     RSeqDisableFailed,
     FunctionDataMappingFailed,
-    AuxiliaryMemorySetupFailed
+    AuxiliaryMemorySetupFailed,
+    SetCPUAffinityFailed
   };
 
   StringRef childProcessExitCodeToString(int ExitCode) const {
@@ -222,6 +231,8 @@ class SubProcessFunctionExecutorImpl
       return "Failed to map memory for assembled snippet";
     case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
       return "Failed to setup auxiliary memory";
+    case ChildProcessExitCodeE::SetCPUAffinityFailed:
+      return "Failed to set CPU affinity of the benchmarking process";
     default:
       return "Child process returned with unknown exit code";
     }
@@ -310,6 +321,29 @@ class SubProcessFunctionExecutorImpl
     }
 
     if (ParentOrChildPID == 0) {
+      if (BenchmarkProcessCPU) {
+        // Set the CPU affinity for the child process, so that we ensure that if
+        // the user specified a CPU the process should run on, the benchmarking
+        // process is running on that CPU.
+        cpu_set_t CPUMask;
+        CPU_ZERO(&CPUMask);
+        CPU_SET(*BenchmarkProcessCPU, &CPUMask);
+        // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they
+        // are available.
+        int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask);
+        if (SetAffinityReturn == -1) {
+          exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
+        }
+
+        // Check (if assertions are enabled) that we are actually running on the
+        // CPU that was specified by the user.
+        unsigned int CurrentCPU;
+        assert(getcpu(&CurrentCPU, nullptr) == 0 &&
+               "Expected getcpu call to succeed.");
+        assert(static_cast<int>(CurrentCPU) == *BenchmarkProcessCPU &&
+               "Expected current CPU to equal the CPU requested by the user");
+      }
+
       // We are in the child process, close the write end of the pipe.
       close(PipeFiles[1]);
       // Unregister handlers, signal handling is now handled through ptrace in
@@ -500,6 +534,7 @@ class SubProcessFunctionExecutorImpl
   const LLVMState &State;
   const ExecutableFunction Function;
   const BenchmarkKey &Key;
+  const std::optional<int> BenchmarkProcessCPU;
 };
 #endif // __linux__
 } // namespace
@@ -577,11 +612,11 @@ BenchmarkRunner::getRunnableConfiguration(
 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
 BenchmarkRunner::createFunctionExecutor(
     object::OwningBinary<object::ObjectFile> ObjectFile,
-    const BenchmarkKey &Key) const {
+    const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
   switch (ExecutionMode) {
   case ExecutionModeE::InProcess: {
     auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
-        State, std::move(ObjectFile), Scratch.get());
+        State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU);
     if (!InProcessExecutorOrErr)
       return InProcessExecutorOrErr.takeError();
 
@@ -590,7 +625,7 @@ BenchmarkRunner::createFunctionExecutor(
   case ExecutionModeE::SubProcess: {
 #ifdef __linux__
     auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
-        State, std::move(ObjectFile), Key);
+        State, std::move(ObjectFile), Key, BenchmarkProcessCPU);
     if (!SubProcessExecutorOrErr)
       return SubProcessExecutorOrErr.takeError();
 
@@ -605,8 +640,8 @@ BenchmarkRunner::createFunctionExecutor(
 }
 
 std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
-    RunnableConfiguration &&RC,
-    const std::optional<StringRef> &DumpFile) const {
+    RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile,
+    std::optional<int> BenchmarkProcessCPU) const {
   Benchmark &BenchmarkResult = RC.BenchmarkResult;
   object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
 
@@ -627,7 +662,8 @@ std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
   }
 
   Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
-      createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key);
+      createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key,
+                             BenchmarkProcessCPU);
   if (!Executor)
     return {Executor.takeError(), std::move(BenchmarkResult)};
   auto NewMeasurements = runMeasurements(**Executor);
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
index 9b4bb1d41149fe..e688b814d1c83d 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
@@ -68,7 +68,8 @@ class BenchmarkRunner {
 
   std::pair<Error, Benchmark>
   runConfiguration(RunnableConfiguration &&RC,
-                   const std::optional<StringRef> &DumpFile) const;
+                   const std::optional<StringRef> &DumpFile,
+                   std::optional<int> BenchmarkProcessCPU) const;
 
   // Scratch space to run instructions that touch memory.
   struct ScratchSpace {
@@ -135,7 +136,8 @@ class BenchmarkRunner {
 
   Expected<std::unique_ptr<FunctionExecutor>>
   createFunctionExecutor(object::OwningBinary<object::ObjectFile> Obj,
-                         const BenchmarkKey &Key) const;
+                         const BenchmarkKey &Key,
+                         std::optional<int> BenchmarkProcessCPU) const;
 };
 
 } // namespace exegesis
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 1ae2565e894c69..3e0d75faaeb341 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -269,6 +269,11 @@ static cl::list<ValidationEvent> ValidationCounters(
         "counter to validate benchmarking assumptions"),
     cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions());
 
+static cl::opt<int> BenchmarkProcessCPU(
+    "benchmark-process-cpu",
+    cl::desc("The CPU number that the benchmarking process should executon on"),
+    cl::cat(BenchmarkOptions), cl::init(-1));
+
 static ExitOnError ExitOnErr("llvm-exegesis error: ");
 
 // Helper function that logs the error(s) and exits.
@@ -418,8 +423,15 @@ static void runBenchmarkConfigurations(
         std::optional<StringRef> DumpFile;
         if (DumpObjectToDisk.getNumOccurrences())
           DumpFile = DumpObjectToDisk;
+        std::optional<int> BenchmarkCPU = std::nullopt;
+        if (BenchmarkProcessCPU != -1) {
+          if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess)
+            ExitWithError("--benchmark-process-cpu is only supported in the "
+                          "subprocess execution mode");
+          BenchmarkCPU = BenchmarkProcessCPU;
+        }
         auto [Err, BenchmarkResult] =
-            Runner.runConfiguration(std::move(RC), DumpFile);
+            Runner.runConfiguration(std::move(RC), DumpFile, BenchmarkCPU);
         if (Err) {
           // Errors from executing the snippets are fine.
           // All other errors are a framework issue and should fail.