[llvm] [llvm-exegesis] Add support for loading X86 segment registers (PR #76368)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 26 01:39:42 PST 2023
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/76368
>From af284d3ec36bb814d2adabd4ded3832b78c1973f Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Mon, 25 Dec 2023 11:59:33 -0800
Subject: [PATCH 1/3] [llvm-exegesis] Add support for loading X86 segment
registers
This patch adds support for setting the X86 segment registers. These
registers are used in quite a few basic blocks in BHive and similar
datasets, so being able to set them is necessary to ensure consistent
runs as the live-in values of fs and gs can change across runs.
---
.../latency/segment-registers-subprocess.asm | 29 ++++++++
llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 70 ++++++++++++++-----
2 files changed, 83 insertions(+), 16 deletions(-)
create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm
diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm b/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm
new file mode 100644
index 00000000000000..5d5219f9375f2f
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm
@@ -0,0 +1,29 @@
+# REQUIRES: exegesis-can-measure-latency, x86_64-linux
+
+# Check that the value of the segment registers is set properly when in
+# subprocess mode.
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s
+
+# LLVM-EXEGESIS-DEFREG FS 12345600
+# LLVM-EXEGESIS-DEFREG GS 2468ac00
+# LLVM-EXEGESIS-DEFREG R13 0
+# LLVM-EXEGESIS-DEFREG R14 127
+# LLVM-EXEGESIS-DEFREG R15 0
+# LLVM-EXEGESIS-MEM-DEF MEM1 4096 0000000012345600
+# LLVM-EXEGESIS-MEM-DEF MEM2 4096 000000002468ac00
+# LLVM-EXEGESIS-MEM-MAP MEM1 305418240
+# LLVM-EXEGESIS-MEM-MAP MEM2 610836480
+
+movq %fs:0, %r13
+cmpq $0x12345600, %r13
+cmovneq %r14, %r15
+movq %gs:0, %r13
+cmpq $0x2468ac00, %r13
+cmovneq %r14, %r15
+
+movq $60, %rax
+movq %r15, %rdi
+syscall
+
+# CHECK-NOT: error: 'Child benchmarking process exited with non-zero exit code: Child process returned with unknown exit code'
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 2c2d1adb0fcf08..0869e32c89eaa5 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -39,6 +39,7 @@
#endif
#ifdef __linux__
+#include <asm/prctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
@@ -907,9 +908,62 @@ void ExegesisX86Target::decrementLoopCounterAndJump(
.addImm(X86::COND_NE);
}
+void generateRegisterStackPush(unsigned int Register,
+ std::vector<MCInst> &GeneratedCode) {
+ GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register));
+}
+
+void generateRegisterStackPop(unsigned int Register,
+ std::vector<MCInst> &GeneratedCode) {
+ GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register));
+}
+
+void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
+ GeneratedCode.push_back(
+ loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber)));
+ GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL));
+}
+
+static std::vector<MCInst> loadImmediateSegmentRegister(unsigned Reg,
+ const APInt &Value) {
+ assert(Value.getBitWidth() <= 64 && "Value must fit in the register.");
+ std::vector<MCInst> loadSegmentRegisterCode;
+ // Preserve RCX and R11 (clobbered by the system call), and RAX, RDI, and RSI
+ // (used to make the system call). Preserve the registers here as we don't
+ // want to make any assumptions about the ordering of what registers are
+ // loaded in first, and we might have already loaded in registers that we are
+ // going to be clobbering here.
+ generateRegisterStackPush(X86::RAX, loadSegmentRegisterCode);
+ generateRegisterStackPush(X86::RDI, loadSegmentRegisterCode);
+ generateRegisterStackPush(X86::RSI, loadSegmentRegisterCode);
+ generateRegisterStackPush(X86::RCX, loadSegmentRegisterCode);
+ generateRegisterStackPush(X86::R11, loadSegmentRegisterCode);
+ // Generate the instructions to make the arch_prctl system call to set
+ // the registers.
+ assert(Reg == X86::FS ||
+ Reg == X86::GS &&
+ "Only the segment registers GS and FS are supported");
+ int SyscallCode = 0;
+ SyscallCode = Reg == X86::FS ? SyscallCode | ARCH_SET_FS : SyscallCode;
+ SyscallCode = Reg == X86::GS ? SyscallCode | ARCH_SET_GS : SyscallCode;
+ loadSegmentRegisterCode.push_back(
+ loadImmediate(X86::RDI, 64, APInt(64, SyscallCode)));
+ loadSegmentRegisterCode.push_back(loadImmediate(X86::RSI, 64, Value));
+ generateSyscall(SYS_arch_prctl, loadSegmentRegisterCode);
+ // Restore the registers in reverse order
+ generateRegisterStackPop(X86::R11, loadSegmentRegisterCode);
+ generateRegisterStackPop(X86::RCX, loadSegmentRegisterCode);
+ generateRegisterStackPop(X86::RSI, loadSegmentRegisterCode);
+ generateRegisterStackPop(X86::RDI, loadSegmentRegisterCode);
+ generateRegisterStackPop(X86::RAX, loadSegmentRegisterCode);
+ return loadSegmentRegisterCode;
+}
+
std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
unsigned Reg,
const APInt &Value) const {
+ if (X86::SEGMENT_REGRegClass.contains(Reg))
+ return loadImmediateSegmentRegister(Reg, Value);
if (X86::GR8RegClass.contains(Reg))
return {loadImmediate(Reg, 8, Value)};
if (X86::GR16RegClass.contains(Reg))
@@ -992,12 +1046,6 @@ static constexpr const intptr_t VAddressSpaceCeiling = 0xC0000000;
static constexpr const intptr_t VAddressSpaceCeiling = 0x0000800000000000;
#endif
-void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
- GeneratedCode.push_back(
- loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber)));
- GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL));
-}
-
void generateRoundToNearestPage(unsigned int Register,
std::vector<MCInst> &GeneratedCode) {
int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
@@ -1157,16 +1205,6 @@ intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const {
return VAddressSpaceCeiling - 2 * getpagesize();
}
-void generateRegisterStackPush(unsigned int Register,
- std::vector<MCInst> &GeneratedCode) {
- GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register));
-}
-
-void generateRegisterStackPop(unsigned int Register,
- std::vector<MCInst> &GeneratedCode) {
- GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register));
-}
-
std::vector<MCInst>
ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const {
std::vector<MCInst> ConfigurePerfCounterCode;
>From b7199905b45db021bf7a5149c0e5ed4b92e03781 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Tue, 26 Dec 2023 00:50:51 -0800
Subject: [PATCH 2/3] Address reviewer feedback
---
llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 80 +++++++++++----------
1 file changed, 44 insertions(+), 36 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 0869e32c89eaa5..ddaf279e7cb398 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -924,38 +924,61 @@ void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL));
}
+constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
+ X86::RDI, X86::RSI, X86::RDX, X86::R10, X86::R8, X86::R9};
+
+static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode,
+ unsigned ArgumentCount) {
+ assert(ArgumentCount < 6 &&
+ "System calls only X86-64 Linux can only take six arguments");
+ // Preserve RCX and R11 (Clobbered by the system call).
+ generateRegisterStackPush(X86::RCX, GeneratedCode);
+ generateRegisterStackPush(X86::R11, GeneratedCode);
+ // Preserve RAX (used for the syscall number/return value).
+ generateRegisterStackPush(X86::RAX, GeneratedCode);
+ // Preserve the registers used to pass arguments to the system call.
+ for (unsigned I = 0; I < ArgumentCount; ++I)
+ generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode);
+}
+
+static void restoreSyscallRegisters(std::vector<MCInst> &GeneratedCode,
+ unsigned ArgumentCount) {
+ assert(ArgumentCount < 6 &&
+ "System calls only X86-64 Linux can only take six arguments");
+ // Restore the argument registers, in the opposite order of the way they are
+ // saved.
+ for (unsigned I = ArgumentCount; I > 0; --I) {
+ generateRegisterStackPop(SyscallArgumentRegisters[I - 1], GeneratedCode);
+ }
+ generateRegisterStackPop(X86::RAX, GeneratedCode);
+ generateRegisterStackPop(X86::R11, GeneratedCode);
+ generateRegisterStackPop(X86::RCX, GeneratedCode);
+}
+
static std::vector<MCInst> loadImmediateSegmentRegister(unsigned Reg,
const APInt &Value) {
assert(Value.getBitWidth() <= 64 && "Value must fit in the register.");
std::vector<MCInst> loadSegmentRegisterCode;
- // Preserve RCX and R11 (clobbered by the system call), and RAX, RDI, and RSI
- // (used to make the system call). Preserve the registers here as we don't
+ // Preserve the syscall registers here as we don't
// want to make any assumptions about the ordering of what registers are
// loaded in first, and we might have already loaded in registers that we are
// going to be clobbering here.
- generateRegisterStackPush(X86::RAX, loadSegmentRegisterCode);
- generateRegisterStackPush(X86::RDI, loadSegmentRegisterCode);
- generateRegisterStackPush(X86::RSI, loadSegmentRegisterCode);
- generateRegisterStackPush(X86::RCX, loadSegmentRegisterCode);
- generateRegisterStackPush(X86::R11, loadSegmentRegisterCode);
+ saveSyscallRegisters(loadSegmentRegisterCode, 2);
// Generate the instructions to make the arch_prctl system call to set
// the registers.
- assert(Reg == X86::FS ||
- Reg == X86::GS &&
- "Only the segment registers GS and FS are supported");
int SyscallCode = 0;
- SyscallCode = Reg == X86::FS ? SyscallCode | ARCH_SET_FS : SyscallCode;
- SyscallCode = Reg == X86::GS ? SyscallCode | ARCH_SET_GS : SyscallCode;
+ if (Reg == X86::FS)
+ SyscallCode = ARCH_SET_FS;
+ else if (Reg == X86::GS)
+ SyscallCode = ARCH_SET_GS;
+ else
+ llvm_unreachable("Only the segment registers GS and FS are supported");
loadSegmentRegisterCode.push_back(
loadImmediate(X86::RDI, 64, APInt(64, SyscallCode)));
loadSegmentRegisterCode.push_back(loadImmediate(X86::RSI, 64, Value));
generateSyscall(SYS_arch_prctl, loadSegmentRegisterCode);
// Restore the registers in reverse order
- generateRegisterStackPop(X86::R11, loadSegmentRegisterCode);
- generateRegisterStackPop(X86::RCX, loadSegmentRegisterCode);
- generateRegisterStackPop(X86::RSI, loadSegmentRegisterCode);
- generateRegisterStackPop(X86::RDI, loadSegmentRegisterCode);
- generateRegisterStackPop(X86::RAX, loadSegmentRegisterCode);
+ restoreSyscallRegisters(loadSegmentRegisterCode, 2);
return loadSegmentRegisterCode;
}
@@ -1208,16 +1231,8 @@ intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const {
std::vector<MCInst>
ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const {
std::vector<MCInst> ConfigurePerfCounterCode;
- if(SaveRegisters) {
- // Preserve RAX, RDI, and RSI by pushing them to the stack.
- generateRegisterStackPush(X86::RAX, ConfigurePerfCounterCode);
- generateRegisterStackPush(X86::RDI, ConfigurePerfCounterCode);
- generateRegisterStackPush(X86::RSI, ConfigurePerfCounterCode);
- // RCX and R11 will get clobbered by the syscall instruction, so save them
- // as well.
- generateRegisterStackPush(X86::RCX, ConfigurePerfCounterCode);
- generateRegisterStackPush(X86::R11, ConfigurePerfCounterCode);
- }
+ if (SaveRegisters)
+ saveSyscallRegisters(ConfigurePerfCounterCode, 2);
ConfigurePerfCounterCode.push_back(
loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress())));
ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm)
@@ -1230,15 +1245,8 @@ ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const
ConfigurePerfCounterCode.push_back(
loadImmediate(X86::RSI, 64, APInt(64, Request)));
generateSyscall(SYS_ioctl, ConfigurePerfCounterCode);
- if(SaveRegisters) {
- // Restore R11 then RCX
- generateRegisterStackPop(X86::R11, ConfigurePerfCounterCode);
- generateRegisterStackPop(X86::RCX, ConfigurePerfCounterCode);
- // Restore RAX, RDI, and RSI, in reverse order.
- generateRegisterStackPop(X86::RSI, ConfigurePerfCounterCode);
- generateRegisterStackPop(X86::RDI, ConfigurePerfCounterCode);
- generateRegisterStackPop(X86::RAX, ConfigurePerfCounterCode);
- }
+ if (SaveRegisters)
+ restoreSyscallRegisters(ConfigurePerfCounterCode, 2);
return ConfigurePerfCounterCode;
}
>From fee2faf6c47c5acfe9edff5fb0acfcbb0b4333ae Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Tue, 26 Dec 2023 01:39:32 -0800
Subject: [PATCH 3/3] Address reviewer feedback
---
llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index ddaf279e7cb398..27eecc357fde3d 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -929,7 +929,7 @@ constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
- assert(ArgumentCount < 6 &&
+ assert(ArgumentCount <= 6 &&
"System calls only X86-64 Linux can only take six arguments");
// Preserve RCX and R11 (Clobbered by the system call).
generateRegisterStackPush(X86::RCX, GeneratedCode);
@@ -943,7 +943,7 @@ static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode,
static void restoreSyscallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
- assert(ArgumentCount < 6 &&
+ assert(ArgumentCount <= 6 &&
"System calls only X86-64 Linux can only take six arguments");
// Restore the argument registers, in the opposite order of the way they are
// saved.
More information about the llvm-commits
mailing list