[llvm] 5cc7402 - Reland "[llvm-exegesis] Add support for loading X86 segment registers (#76368)"

Aiden Grossman via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 29 15:05:34 PST 2023


Author: Aiden Grossman
Date: 2023-12-29T15:05:04-08:00
New Revision: 5cc74029168dda258ec53ca7473df814db99960e

URL: https://github.com/llvm/llvm-project/commit/5cc74029168dda258ec53ca7473df814db99960e
DIFF: https://github.com/llvm/llvm-project/commit/5cc74029168dda258ec53ca7473df814db99960e.diff

LOG: Reland "[llvm-exegesis] Add support for loading X86 segment registers (#76368)"

This reverts commit 8b485070844d03cda467e75aa8c924184ba671cf.

This relands commit 7c383481a8e86918b3aaca4288c1eed62a4d6ff4.

This caused build failures on non-x86-64 builders as there was no
preprocessor logic around the newly included headers. This has been
fixed in the relanded patch.

Added: 
    llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm

Modified: 
    llvm/tools/llvm-exegesis/lib/X86/Target.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm b/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm
new file mode 100644
index 00000000000000..5d5219f9375f2f
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/segment-registers-subprocess.asm
@@ -0,0 +1,29 @@
+# REQUIRES: exegesis-can-measure-latency, x86_64-linux
+
+# Check that the value of the segment registers is set properly when in
+# subprocess mode.
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s
+
+# LLVM-EXEGESIS-DEFREG FS 12345600
+# LLVM-EXEGESIS-DEFREG GS 2468ac00
+# LLVM-EXEGESIS-DEFREG R13 0
+# LLVM-EXEGESIS-DEFREG R14 127
+# LLVM-EXEGESIS-DEFREG R15 0
+# LLVM-EXEGESIS-MEM-DEF MEM1 4096 0000000012345600
+# LLVM-EXEGESIS-MEM-DEF MEM2 4096 000000002468ac00
+# LLVM-EXEGESIS-MEM-MAP MEM1 305418240
+# LLVM-EXEGESIS-MEM-MAP MEM2 610836480
+
+movq %fs:0, %r13
+cmpq $0x12345600, %r13
+cmovneq %r14, %r15
+movq %gs:0, %r13
+cmpq $0x2468ac00, %r13
+cmovneq %r14, %r15
+
+movq $60, %rax
+movq %r15, %rdi
+syscall
+
+# CHECK-NOT: error:           'Child benchmarking process exited with non-zero exit code: Child process returned with unknown exit code'

diff  --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 2c2d1adb0fcf08..537417c7a0796e 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -39,6 +39,9 @@
 #endif
 
 #ifdef __linux__
+#ifdef __x86_64__
+#include <asm/prctl.h>
+#endif // __x86_64__
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <unistd.h>
@@ -907,9 +910,90 @@ void ExegesisX86Target::decrementLoopCounterAndJump(
       .addImm(X86::COND_NE);
 }
 
+void generateRegisterStackPush(unsigned int Register,
+                               std::vector<MCInst> &GeneratedCode) {
+  GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register));
+}
+
+void generateRegisterStackPop(unsigned int Register,
+                              std::vector<MCInst> &GeneratedCode) {
+  GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register));
+}
+
+void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
+  GeneratedCode.push_back(
+      loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber)));
+  GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL));
+}
+
+constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
+    X86::RDI, X86::RSI, X86::RDX, X86::R10, X86::R8, X86::R9};
+
+static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode,
+                                 unsigned ArgumentCount) {
+  assert(ArgumentCount <= 6 &&
+         "System calls only X86-64 Linux can only take six arguments");
+  // Preserve RCX and R11 (Clobbered by the system call).
+  generateRegisterStackPush(X86::RCX, GeneratedCode);
+  generateRegisterStackPush(X86::R11, GeneratedCode);
+  // Preserve RAX (used for the syscall number/return value).
+  generateRegisterStackPush(X86::RAX, GeneratedCode);
+  // Preserve the registers used to pass arguments to the system call.
+  for (unsigned I = 0; I < ArgumentCount; ++I)
+    generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode);
+}
+
+static void restoreSyscallRegisters(std::vector<MCInst> &GeneratedCode,
+                                    unsigned ArgumentCount) {
+  assert(ArgumentCount <= 6 &&
+         "System calls only X86-64 Linux can only take six arguments");
+  // Restore the argument registers, in the opposite order of the way they are
+  // saved.
+  for (unsigned I = ArgumentCount; I > 0; --I) {
+    generateRegisterStackPop(SyscallArgumentRegisters[I - 1], GeneratedCode);
+  }
+  generateRegisterStackPop(X86::RAX, GeneratedCode);
+  generateRegisterStackPop(X86::R11, GeneratedCode);
+  generateRegisterStackPop(X86::RCX, GeneratedCode);
+}
+
+static std::vector<MCInst> loadImmediateSegmentRegister(unsigned Reg,
+                                                        const APInt &Value) {
+#ifdef __x86_64__
+  assert(Value.getBitWidth() <= 64 && "Value must fit in the register.");
+  std::vector<MCInst> loadSegmentRegisterCode;
+  // Preserve the syscall registers here as we don't
+  // want to make any assumptions about the ordering of what registers are
+  // loaded in first, and we might have already loaded in registers that we are
+  // going to be clobbering here.
+  saveSyscallRegisters(loadSegmentRegisterCode, 2);
+  // Generate the instructions to make the arch_prctl system call to set
+  // the registers.
+  int SyscallCode = 0;
+  if (Reg == X86::FS)
+    SyscallCode = ARCH_SET_FS;
+  else if (Reg == X86::GS)
+    SyscallCode = ARCH_SET_GS;
+  else
+    llvm_unreachable("Only the segment registers GS and FS are supported");
+  loadSegmentRegisterCode.push_back(
+      loadImmediate(X86::RDI, 64, APInt(64, SyscallCode)));
+  loadSegmentRegisterCode.push_back(loadImmediate(X86::RSI, 64, Value));
+  generateSyscall(SYS_arch_prctl, loadSegmentRegisterCode);
+  // Restore the registers in reverse order
+  restoreSyscallRegisters(loadSegmentRegisterCode, 2);
+  return loadSegmentRegisterCode;
+#else
+  llvm_unreachable("Loading immediate segment registers is only supported with "
+                   "x86-64 llvm-exegesis");
+#endif
+}
+
 std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
                                                 unsigned Reg,
                                                 const APInt &Value) const {
+  if (X86::SEGMENT_REGRegClass.contains(Reg))
+    return loadImmediateSegmentRegister(Reg, Value);
   if (X86::GR8RegClass.contains(Reg))
     return {loadImmediate(Reg, 8, Value)};
   if (X86::GR16RegClass.contains(Reg))
@@ -992,12 +1076,6 @@ static constexpr const intptr_t VAddressSpaceCeiling = 0xC0000000;
 static constexpr const intptr_t VAddressSpaceCeiling = 0x0000800000000000;
 #endif
 
-void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
-  GeneratedCode.push_back(
-      loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber)));
-  GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL));
-}
-
 void generateRoundToNearestPage(unsigned int Register,
                                 std::vector<MCInst> &GeneratedCode) {
   int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
@@ -1157,29 +1235,11 @@ intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const {
   return VAddressSpaceCeiling - 2 * getpagesize();
 }
 
-void generateRegisterStackPush(unsigned int Register,
-                               std::vector<MCInst> &GeneratedCode) {
-  GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register));
-}
-
-void generateRegisterStackPop(unsigned int Register,
-                              std::vector<MCInst> &GeneratedCode) {
-  GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register));
-}
-
 std::vector<MCInst>
 ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const {
   std::vector<MCInst> ConfigurePerfCounterCode;
-  if(SaveRegisters) {
-    // Preserve RAX, RDI, and RSI by pushing them to the stack.
-    generateRegisterStackPush(X86::RAX, ConfigurePerfCounterCode);
-    generateRegisterStackPush(X86::RDI, ConfigurePerfCounterCode);
-    generateRegisterStackPush(X86::RSI, ConfigurePerfCounterCode);
-    // RCX and R11 will get clobbered by the syscall instruction, so save them
-    // as well.
-    generateRegisterStackPush(X86::RCX, ConfigurePerfCounterCode);
-    generateRegisterStackPush(X86::R11, ConfigurePerfCounterCode);
-  }
+  if (SaveRegisters)
+    saveSyscallRegisters(ConfigurePerfCounterCode, 2);
   ConfigurePerfCounterCode.push_back(
       loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress())));
   ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm)
@@ -1192,15 +1252,8 @@ ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const
   ConfigurePerfCounterCode.push_back(
       loadImmediate(X86::RSI, 64, APInt(64, Request)));
   generateSyscall(SYS_ioctl, ConfigurePerfCounterCode);
-  if(SaveRegisters) {
-    // Restore R11 then RCX
-    generateRegisterStackPop(X86::R11, ConfigurePerfCounterCode);
-    generateRegisterStackPop(X86::RCX, ConfigurePerfCounterCode);
-    // Restore RAX, RDI, and RSI, in reverse order.
-    generateRegisterStackPop(X86::RSI, ConfigurePerfCounterCode);
-    generateRegisterStackPop(X86::RDI, ConfigurePerfCounterCode);
-    generateRegisterStackPop(X86::RAX, ConfigurePerfCounterCode);
-  }
+  if (SaveRegisters)
+    restoreSyscallRegisters(ConfigurePerfCounterCode, 2);
   return ConfigurePerfCounterCode;
 }
 


        


More information about the llvm-commits mailing list