[llvm] [llvm-exegesis] [AArch64] Add support for Load Instructions in subprocess execution mode (PR #144895)
Lakshay Kumar via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 02:21:05 PDT 2025
https://github.com/lakshayk-nv updated https://github.com/llvm/llvm-project/pull/144895
>From 5ec0a438512f4932a5313ffc8bceb050b48e9b3b Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 04:49:31 -0700
Subject: [PATCH 01/22] [llvm-exegesis] [AArch64] Use X16 instead of X8
- Switched X16 as temporary register in loadFPCRImmediate instead of X8 which is used by syscalls
- Updated Testcase with hardcoded reg number.
---
llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s | 4 ++--
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index 3ef664f899551..bcd7792f17fd8 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -70,6 +70,6 @@ RUN: llvm-objdump -d %d > %t.s
RUN: FileCheck %s --check-prefix=FPCR-ASM < %t.s
FPCR-ASM: <foo>:
FPCR-ASM: movi d{{[0-9]+}}, #0000000000000000
-FPCR-ASM-NEXT: mov x8, #0x0
-FPCR-ASM-NEXT: msr FPCR, x8
+FPCR-ASM-NEXT: mov x16, #0x0
+FPCR-ASM-NEXT: msr FPCR, x16
FPCR-ASM-NEXT: bfcvt h{{[0-9]+}}, s{{[0-9]+}}
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 3a0021e3c132d..b1333dc8081b2 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -72,7 +72,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
// Generates instructions to load an immediate value into an FPCR register.
static std::vector<MCInst>
loadFPCRImmediate(MCRegister Reg, unsigned RegBitWidth, const APInt &Value) {
- MCRegister TempReg = AArch64::X8;
+ MCRegister TempReg = AArch64::X16;
MCInst LoadImm = MCInstBuilder(AArch64::MOVi64imm).addReg(TempReg).addImm(0);
MCInst MoveToFPCR =
MCInstBuilder(AArch64::MSR).addImm(AArch64SysReg::FPCR).addReg(TempReg);
>From a75c835be316a7a060d1b0a2dfd79f17eee6ea67 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 04:57:21 -0700
Subject: [PATCH 02/22] [llvm-exegesis] [AArch64] Add helpers to push/pop GPRs
and save/restore syscall registers and syscall generator
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 83 +++++++++++++++++++
1 file changed, 83 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index b1333dc8081b2..7576b5f08f9c3 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -105,6 +105,89 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
return Instructions;
}
+static void generateRegisterStackPush(unsigned int RegToPush,
+ std::vector<MCInst> &GeneratedCode,
+ int imm = -16) {
+ // STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes
+ // before the store to maintain 16-bytes alignment.
+ if (AArch64::GPR64RegClass.contains(RegToPush)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre)
+ .addReg(AArch64::SP)
+ .addReg(RegToPush)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else if (AArch64::GPR32RegClass.contains(RegToPush)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre)
+ .addReg(AArch64::SP)
+ .addReg(RegToPush)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else {
+ llvm_unreachable("Unsupported register class for stack push");
+ }
+}
+
+static void generateRegisterStackPop(unsigned int RegToPopTo,
+ std::vector<MCInst> &GeneratedCode,
+ int imm = 16) {
+ // LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load.
+ if (AArch64::GPR64RegClass.contains(RegToPopTo)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost)
+ .addReg(AArch64::SP)
+ .addReg(RegToPopTo)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else if (AArch64::GPR32RegClass.contains(RegToPopTo)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost)
+ .addReg(AArch64::SP)
+ .addReg(RegToPopTo)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else {
+ llvm_unreachable("Unsupported register class for stack pop");
+ }
+}
+
+void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber)));
+ GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0));
+}
+
+/// Functions to save/restore system call registers
+#ifdef __linux__
+constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
+ AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+};
+
+static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+ unsigned ArgumentCount) {
+ // AArch64 Linux typically uses X0-X5 for the first 6 arguments.
+ // Some syscalls can take up to 8 arguments in X0-X7.
+ assert(ArgumentCount <= 6 &&
+ "This implementation saves up to 6 argument registers (X0-X5)");
+ // generateRegisterStackPush(ArgumentRegisters::TempRegister, GeneratedCode);
+ // Preserve X8 (used for the syscall number/return value).
+ generateRegisterStackPush(AArch64::X8, GeneratedCode);
+ // Preserve the registers used to pass arguments to the system call.
+ for (unsigned I = 0; I < ArgumentCount; ++I) {
+ generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode);
+ }
+}
+
+static void restoreSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+ unsigned ArgumentCount) {
+ assert(ArgumentCount <= 6 &&
+ "This implementation restores up to 6 argument registers (X0-X5)");
+ // Restore argument registers, in opposite order of the way they are saved.
+ for (int I = ArgumentCount - 1; I >= 0; --I) {
+ generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode);
+ }
+ generateRegisterStackPop(AArch64::X8, GeneratedCode);
+ // generateRegisterStackPop(ArgumentRegisters::TempRegister, GeneratedCode);
+}
+#endif // __linux__
#include "AArch64GenExegesis.inc"
namespace {
>From 21cd6535f5fc4c5b4e6fa35e839abfb91fa3184b Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 05:00:21 -0700
Subject: [PATCH 03/22] [llvm-exegesis] [AArch64] Implement memory management
required functions
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 207 +++++++++++++++++-
llvm/tools/llvm-exegesis/lib/Target.h | 4 +
2 files changed, 210 insertions(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 7576b5f08f9c3..4fdf1a56398e2 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -6,11 +6,34 @@
//
//===----------------------------------------------------------------------===//
#include "../Target.h"
+#include "../Error.h"
+#include "../MmapUtils.h"
+#include "../SerialSnippetGenerator.h"
+#include "../SnippetGenerator.h"
+#include "../SubprocessMemory.h"
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <vector>
+#define DEBUG_TYPE "exegesis-aarch64-target"
#if defined(__aarch64__) && defined(__linux__)
-#include <sys/prctl.h> // For PR_PAC_* constants
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <unistd.h> // for getpagesize()
+#ifdef HAVE_LIBPFM
+#include <perfmon/perf_event.h>
+#endif // HAVE_LIBPFM
+#include <linux/prctl.h> // For PR_PAC_* constants
+#include <sys/prctl.h>
+#ifndef PR_PAC_SET_ENABLED_KEYS
+#define PR_PAC_SET_ENABLED_KEYS 60
+#endif
+#ifndef PR_PAC_GET_ENABLED_KEYS
+#define PR_PAC_GET_ENABLED_KEYS 61
+#endif
#ifndef PR_PAC_APIAKEY
#define PR_PAC_APIAKEY (1UL << 0)
#endif
@@ -197,7 +220,39 @@ class ExegesisAArch64Target : public ExegesisTarget {
ExegesisAArch64Target()
: ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {}
+ enum ArgumentRegisters {
+ CodeSize = AArch64::X12,
+ AuxiliaryMemoryFD = AArch64::X13,
+ TempRegister = AArch64::X16,
+ };
+
+ std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
+ int imm = 0) const override {
+ std::vector<MCInst> Insts;
+ if (AArch64::GPR32RegClass.contains(Reg) ||
+ AArch64::GPR64RegClass.contains(Reg)) {
+ generateRegisterStackPop(Reg, Insts, imm);
+ return Insts;
+ }
+ return {};
+ }
+
private:
+#ifdef __linux__
+ std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override;
+ std::vector<MCInst>
+ generateMmap(uintptr_t Address, size_t Length,
+ uintptr_t FileDescriptorAddress) const override;
+ void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override;
+ std::vector<MCInst> generateMemoryInitialSetup() const override;
+ std::vector<MCInst> setStackRegisterToAuxMem() const override;
+ uintptr_t getAuxiliaryMemoryStartAddress() const override;
+ std::vector<MCInst> configurePerfCounter(long Request,
+ bool SaveRegisters) const override;
+ std::vector<MCRegister> getArgumentRegisters() const override;
+ std::vector<MCRegister> getRegistersNeedSaving() const override;
+#endif // __linux__
+
std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
const APInt &Value) const override {
if (AArch64::GPR32RegClass.contains(Reg))
@@ -237,6 +292,156 @@ class ExegesisAArch64Target : public ExegesisTarget {
} // namespace
+#ifdef __linux__
+// true : let use of fixed address to Virtual Address Space Ceiling
+// false: let kernel choose the address of the auxiliary memory
+bool UseFixedAddress = true;
+
+static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000;
+
+static void generateRoundToNearestPage(unsigned int TargetRegister,
+ std::vector<MCInst> &GeneratedCode) {
+ int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
+ // Round down to the nearest page by getting rid of the least significant bits
+ // representing location in the page.
+
+ // Single instruction using AND with inverted mask (effectively BIC)
+ uint64_t BitsToClearMask = (1ULL << PageSizeShift) - 1; // 0xFFF
+ uint64_t AndMask = ~BitsToClearMask; // ...FFFFFFFFFFFF000
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ANDXri)
+ .addReg(TargetRegister) // Xd
+ .addReg(TargetRegister) // Xn
+ .addImm(AndMask) // imm bitmask
+ );
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const {
+ std::vector<MCInst> ExitCallCode;
+ ExitCallCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, ExitCode)));
+ generateSysCall(SYS_exit, ExitCallCode); // SYS_exit is 93
+ return ExitCallCode;
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length,
+ uintptr_t FileDescriptorAddress) const {
+ // mmap(address, length, prot, flags, fd, offset=0)
+ int flags = MAP_SHARED;
+ if (Address != 0) {
+ flags |= MAP_FIXED_NOREPLACE;
+ }
+ std::vector<MCInst> MmapCode;
+ MmapCode.push_back(
+ loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr
+ MmapCode.push_back(
+ loadImmediate(AArch64::X1, 64, APInt(64, Length))); // length
+ MmapCode.push_back(loadImmediate(AArch64::X2, 64,
+ APInt(64, PROT_READ | PROT_WRITE))); // prot
+ MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
+ // FIXME: File descriptor address is not initialized.
+ // Copy file descriptor location from aux memory into X4
+ MmapCode.push_back(
+ loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd
+ // Dereference file descriptor into FD argument register
+ // MmapCode.push_back(MCInstBuilder(AArch64::LDRWui)
+ // .addReg(AArch64::W4) // Destination register
+ // .addReg(AArch64::X4) // Base register (address)
+ // .addImm(0)); // Offset (-byte words)
+ // FIXME: This is not correct.
+ MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
+ generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222
+ return MmapCode;
+}
+
+void ExegesisAArch64Target::generateMmapAuxMem(
+ std::vector<MCInst> &GeneratedCode) const {
+ int fd = -1;
+ int flags = MAP_SHARED;
+ uintptr_t address = getAuxiliaryMemoryStartAddress();
+ if (fd == -1)
+ flags |= MAP_ANONYMOUS;
+ if (address != 0)
+ flags |= MAP_FIXED_NOREPLACE;
+ int prot = PROT_READ | PROT_WRITE;
+
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X0, 64, APInt(64, address))); // map adr
+ GeneratedCode.push_back(loadImmediate(
+ AArch64::X1, 64,
+ APInt(64, SubprocessMemory::AuxiliaryMemorySize))); // length
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X2, 64, APInt(64, prot))); // prot
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
+ GeneratedCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
+ generateSysCall(SYS_mmap, GeneratedCode); // SYS_mmap is 222
+}
+
+std::vector<MCInst> ExegesisAArch64Target::generateMemoryInitialSetup() const {
+ std::vector<MCInst> MemoryInitialSetupCode;
+ generateMmapAuxMem(MemoryInitialSetupCode); // FIXME: Uninit file descriptor
+
+ // If using fixed address for auxiliary memory skip this step,
+ // When using dynamic memory allocation (non-fixed address), we must preserve
+ // the mmap return value (X0) which contains the allocated memory address.
+ // This value is saved to the stack to ensure registers requiring memory
+ // access can retrieve the correct address even if X0 is modified by
+ // intermediate code.
+ generateRegisterStackPush(AArch64::X0, MemoryInitialSetupCode);
+ // FIXME: Ensure stack pointer remains stable to prevent loss of saved address
+ return MemoryInitialSetupCode;
+}
+
+std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
+ std::vector<MCInst> instructions; // NOP
+ // TODO: Implement this, Found no need for this in AArch64.
+ return instructions;
+}
+
+uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const {
+ if (!UseFixedAddress)
+ // Allow kernel to select an appropriate memory address
+ return 0;
+ // Return the second to last page in the virtual address space
+ // to try and prevent interference with memory annotations in the snippet
+ // VAddressSpaceCeiling = 0x0000800000000000
+ // FIXME: Why 2 pages?
+ return VAddressSpaceCeiling - (2 * getpagesize());
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::configurePerfCounter(long Request,
+ bool SaveRegisters) const {
+ std::vector<MCInst> ConfigurePerfCounterCode; // NOP
+ // FIXME: SYSCALL exits with EBADF error - file descriptor is invalid
+ // No file is opened previosly to add as file descriptor
+ return ConfigurePerfCounterCode;
+}
+
+std::vector<MCRegister> ExegesisAArch64Target::getArgumentRegisters() const {
+ return {AArch64::X0, AArch64::X1};
+}
+
+std::vector<MCRegister> ExegesisAArch64Target::getRegistersNeedSaving() const {
+ return {
+ AArch64::X0,
+ AArch64::X1,
+ AArch64::X2,
+ AArch64::X3,
+ AArch64::X4,
+ AArch64::X5,
+ AArch64::X8,
+ ArgumentRegisters::TempRegister,
+ ArgumentRegisters::CodeSize,
+ ArgumentRegisters::AuxiliaryMemoryFD,
+ };
+}
+
+#endif // __linux__
+
static ExegesisTarget *getTheExegesisAArch64Target() {
static ExegesisAArch64Target Target;
return &Target;
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 77fbaa6e95412..736c9d9ff6c23 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -308,6 +308,10 @@ class ExegesisTarget {
return std::make_unique<SavedState>();
}
+ virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg, int imm = 0) const {
+ return {};
+ }
+
private:
virtual bool matchesArch(Triple::ArchType Arch) const = 0;
>From 50c07dbaec5f2db052ee2f2c2bd0dfa8db6a6464 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 06:16:21 -0700
Subject: [PATCH 04/22] [llvm-exegesis] [AArch64] Implement different register
initialization for subprocess execution mode
---
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 34 ++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index fd7924db08441..a73eaf76a46d7 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -66,6 +66,8 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
assert(MM.Address % getpagesize() == 0 &&
"Memory mappings need to be aligned to page boundaries.");
#endif
+ // FIXME: file descriptor for aux memory seems not initialized.
+ // TODO: Invoke openat syscall to get correct fd for aux memory
const MemoryValue &MemVal = Key.MemoryValues.at(MM.MemoryValueName);
BBF.addInstructions(ET.generateMmap(
MM.Address, MemVal.SizeBytes,
@@ -78,15 +80,47 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
Register StackPointerRegister = BBF.MF.getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore();
+#define DEBUG_TYPE "register-initial-values"
+ // FIXME: Only loading first register with memory address is hacky.
+ bool isFirstRegister = true;
for (const RegisterValue &RV : Key.RegisterInitialValues) {
+ // Debug: register name and class name and value from BenchmarkKey
+ const MCRegisterInfo *RegInfo = BBF.MF.getTarget().getMCRegisterInfo();
+ const char *RegName = RegInfo->getName(RV.Register);
+ const char *regClassName = "Unknown";
+ for (unsigned i = 0, e = RegInfo->getNumRegClasses(); i < e; ++i) {
+ const MCRegisterClass &RC = RegInfo->getRegClass(i);
+ if (RC.contains(RV.Register)) {
+ regClassName = RegInfo->getRegClassName(&RC);
+ break;
+ }
+ }
+ LLVM_DEBUG(
+ dbgs() << "Setting register (Class: " << regClassName << ") " << RegName
+ << std::string(
+ std::max(0, 3 - static_cast<int>(strlen(RegName))), ' '));
+
if (GenerateMemoryInstructions) {
// If we're generating memory instructions, don't load in the value for
// the register with the stack pointer as it will be used later to finish
// the setup.
if (Register(RV.Register) == StackPointerRegister)
continue;
+#if defined(__aarch64__)
+ auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16);
+ if (!StackLoadInsts.empty() && isFirstRegister) {
+ for (const auto &Inst : StackLoadInsts)
+ BBF.addInstruction(Inst);
+ isFirstRegister = false;
+ LLVM_DEBUG(dbgs() << "from stack with post-increment offset of " << 16
+ << " bytes\n");
+ continue;
+ }
+#endif
}
// Load a constant in the register.
+ LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n");
+#undef DEBUG_TYPE
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
if (SetRegisterCode.empty())
IsSnippetSetupComplete = false;
>From 01a03a1c1ca78a41e00d408bea4fcbe197c3dfba Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 06:23:19 -0700
Subject: [PATCH 05/22] [llvm-exegesis] [AArch64] Resolve Merge Conflict coming
from reverted #136868
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 35 +++++++++++++++----
1 file changed, 29 insertions(+), 6 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 4fdf1a56398e2..ec1e667b1f629 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,12 +28,6 @@
#endif // HAVE_LIBPFM
#include <linux/prctl.h> // For PR_PAC_* constants
#include <sys/prctl.h>
-#ifndef PR_PAC_SET_ENABLED_KEYS
-#define PR_PAC_SET_ENABLED_KEYS 60
-#endif
-#ifndef PR_PAC_GET_ENABLED_KEYS
-#define PR_PAC_GET_ENABLED_KEYS 61
-#endif
#ifndef PR_PAC_APIAKEY
#define PR_PAC_APIAKEY (1UL << 0)
#endif
@@ -288,6 +282,35 @@ class ExegesisAArch64Target : public ExegesisTarget {
// Function return is a pseudo-instruction that needs to be expanded
PM.add(createAArch64ExpandPseudoPass());
}
+
+ const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
+ unsigned Opcode) const override {
+ if (const char *Reason =
+ ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode))
+ return Reason;
+
+ if (isPointerAuth(Opcode)) {
+#if defined(__aarch64__) && defined(__linux__)
+ // Disable all PAC keys. Note that while we expect the measurements to
+ // be the same with PAC keys disabled, they could potentially be lower
+ // since authentication checks are bypassed.
+ if (prctl(PR_PAC_SET_ENABLED_KEYS,
+ PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY |
+ PR_PAC_APDBKEY, // all keys
+ 0, // disable all
+ 0, 0) < 0) {
+ return "Failed to disable PAC keys";
+ }
+#else
+ return "Unsupported opcode: isPointerAuth";
+#endif
+ }
+
+ if (isLoadTagMultiple(Opcode))
+ return "Unsupported opcode: load tag multiple";
+
+ return nullptr;
+ }
};
} // namespace
>From 412c9caa86d8586d4419c4334646a7186bd81f7a Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 06:58:45 -0700
Subject: [PATCH 06/22] [llvm-exegesis] [AArch64] Format changes
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 32 +------------------
llvm/tools/llvm-exegesis/lib/Target.h | 3 +-
2 files changed, 3 insertions(+), 32 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index ec1e667b1f629..c8613f360b376 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -223,7 +223,7 @@ class ExegesisAArch64Target : public ExegesisTarget {
std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
int imm = 0) const override {
std::vector<MCInst> Insts;
- if (AArch64::GPR32RegClass.contains(Reg) ||
+ if (AArch64::GPR32RegClass.contains(Reg) ||
AArch64::GPR64RegClass.contains(Reg)) {
generateRegisterStackPop(Reg, Insts, imm);
return Insts;
@@ -283,36 +283,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
PM.add(createAArch64ExpandPseudoPass());
}
- const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
- unsigned Opcode) const override {
- if (const char *Reason =
- ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode))
- return Reason;
-
- if (isPointerAuth(Opcode)) {
-#if defined(__aarch64__) && defined(__linux__)
- // Disable all PAC keys. Note that while we expect the measurements to
- // be the same with PAC keys disabled, they could potentially be lower
- // since authentication checks are bypassed.
- if (prctl(PR_PAC_SET_ENABLED_KEYS,
- PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY |
- PR_PAC_APDBKEY, // all keys
- 0, // disable all
- 0, 0) < 0) {
- return "Failed to disable PAC keys";
- }
-#else
- return "Unsupported opcode: isPointerAuth";
-#endif
- }
-
- if (isLoadTagMultiple(Opcode))
- return "Unsupported opcode: load tag multiple";
-
- return nullptr;
- }
-};
-
} // namespace
#ifdef __linux__
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 736c9d9ff6c23..0304908cbb2b2 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -308,7 +308,8 @@ class ExegesisTarget {
return std::make_unique<SavedState>();
}
- virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg, int imm = 0) const {
+ virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
+ int imm = 0) const {
return {};
}
>From 9d7efdbc155eab21b84bd7c481481b5aff6c6002 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 08:45:38 -0700
Subject: [PATCH 07/22] [llvm-exegesis] [AArch64] Fix missing closing brace in
Target.cpp
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index c8613f360b376..c3bd18c3a8440 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -282,6 +282,7 @@ class ExegesisAArch64Target : public ExegesisTarget {
// Function return is a pseudo-instruction that needs to be expanded
PM.add(createAArch64ExpandPseudoPass());
}
+};
} // namespace
>From be42cd7a92f8e5068b43b7d9b87eafa2219740a8 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Tue, 1 Jul 2025 09:42:50 -0700
Subject: [PATCH 08/22] [llvm-exegesis] Renamed `ArgumentRegisters` enum to
`ReservedRegisters` for clarity.
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 8 ++++----
llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 10 +++++-----
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index c3bd18c3a8440..8cfce2d7af682 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -214,7 +214,7 @@ class ExegesisAArch64Target : public ExegesisTarget {
ExegesisAArch64Target()
: ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {}
- enum ArgumentRegisters {
+ enum ReservedRegisters {
CodeSize = AArch64::X12,
AuxiliaryMemoryFD = AArch64::X13,
TempRegister = AArch64::X16,
@@ -428,9 +428,9 @@ std::vector<MCRegister> ExegesisAArch64Target::getRegistersNeedSaving() const {
AArch64::X4,
AArch64::X5,
AArch64::X8,
- ArgumentRegisters::TempRegister,
- ArgumentRegisters::CodeSize,
- ArgumentRegisters::AuxiliaryMemoryFD,
+ ReservedRegisters::TempRegister,
+ ReservedRegisters::CodeSize,
+ ReservedRegisters::AuxiliaryMemoryFD,
};
}
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 5dae6c0a25fab..a13bcc595efc6 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -725,7 +725,7 @@ class ExegesisX86Target : public ExegesisTarget {
ProcessID);
}
- enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 };
+ enum ReservedRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 };
private:
void addTargetSpecificPasses(PassManagerBase &PM) const override;
@@ -1166,7 +1166,7 @@ void ExegesisX86Target::generateUpperMunmap(
// Load in the size of the snippet to RDI from from the argument register.
GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
.addReg(X86::RDI)
- .addReg(ArgumentRegisters::CodeSize));
+ .addReg(ReservedRegisters::CodeSize));
// Add the length of the snippet (in %RDI) to the current instruction pointer
// (%R8) to get the address where we should start unmapping at.
GeneratedCode.push_back(MCInstBuilder(X86::ADD64rr)
@@ -1236,7 +1236,7 @@ void ExegesisX86Target::generateMmapAuxMem(
loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE)));
GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
.addReg(X86::R8)
- .addReg(ArgumentRegisters::AuxiliaryMemoryFD));
+ .addReg(ReservedRegisters::AuxiliaryMemoryFD));
GeneratedCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0)));
generateSyscall(SYS_mmap, GeneratedCode);
}
@@ -1244,10 +1244,10 @@ void ExegesisX86Target::generateMmapAuxMem(
void ExegesisX86Target::moveArgumentRegisters(
std::vector<MCInst> &GeneratedCode) const {
GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
- .addReg(ArgumentRegisters::CodeSize)
+ .addReg(ReservedRegisters::CodeSize)
.addReg(X86::RDI));
GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
- .addReg(ArgumentRegisters::AuxiliaryMemoryFD)
+ .addReg(ReservedRegisters::AuxiliaryMemoryFD)
.addReg(X86::RSI));
}
>From ce64b4732550f681d0a25649f9f799f6cbe97f65 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Tue, 1 Jul 2025 09:49:48 -0700
Subject: [PATCH 09/22] [llvm-exegesis] [AArch64] Refactor stack push/pop
functions for clarity and update syscall argument handling and descriptive
comments
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 27 ++++++++++---------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 8cfce2d7af682..bb09f8adfa10e 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -166,6 +166,11 @@ static void generateRegisterStackPop(unsigned int RegToPopTo,
}
void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
+ // AArch64 Linux follows the AAPCS (ARM Architecture Procedure Call Standard):
+ // - X8 register contains the system call number
+ // - X0-X5 registers contain the first 6 arguments (if any)
+ // - SVC #0 instruction triggers the system call
+ // - Return value is placed in X0 register
GeneratedCode.push_back(
loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber)));
GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0));
@@ -173,18 +178,17 @@ void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
/// Functions to save/restore system call registers
#ifdef __linux__
-constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
- AArch64::X0, AArch64::X1, AArch64::X2,
- AArch64::X3, AArch64::X4, AArch64::X5,
+constexpr std::array<unsigned, 8> SyscallArgumentRegisters{
+ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
+ AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7,
};
static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
- // AArch64 Linux typically uses X0-X5 for the first 6 arguments.
- // Some syscalls can take up to 8 arguments in X0-X7.
- assert(ArgumentCount <= 6 &&
- "This implementation saves up to 6 argument registers (X0-X5)");
- // generateRegisterStackPush(ArgumentRegisters::TempRegister, GeneratedCode);
+ // AArch64 follows the AAPCS (ARM Architecture Procedure Call Standard):
+ // X0-X7 registers contain the first 8 arguments.
+ assert(ArgumentCount <= 8 &&
+ "This implementation saves up to 8 argument registers (X0-X7)");
// Preserve X8 (used for the syscall number/return value).
generateRegisterStackPush(AArch64::X8, GeneratedCode);
// Preserve the registers used to pass arguments to the system call.
@@ -195,14 +199,13 @@ static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode,
static void restoreSysCallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
- assert(ArgumentCount <= 6 &&
- "This implementation restores up to 6 argument registers (X0-X5)");
- // Restore argument registers, in opposite order of the way they are saved.
+ assert(ArgumentCount <= 8 &&
+ "This implementation restores up to 8 argument registers (X0-X7)");
+ // Restore registers in reverse order
for (int I = ArgumentCount - 1; I >= 0; --I) {
generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode);
}
generateRegisterStackPop(AArch64::X8, GeneratedCode);
- // generateRegisterStackPop(ArgumentRegisters::TempRegister, GeneratedCode);
}
#endif // __linux__
#include "AArch64GenExegesis.inc"
>From 5133a050a4e986f87deeb4d637243f69ca224b66 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Tue, 1 Jul 2025 09:51:46 -0700
Subject: [PATCH 10/22] [llvm-exegesis] [AArch64] Removed if-else block
brackets
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 20 ++++++-------------
1 file changed, 6 insertions(+), 14 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index bb09f8adfa10e..50018831c1923 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -127,42 +127,40 @@ static void generateRegisterStackPush(unsigned int RegToPush,
int imm = -16) {
// STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes
// before the store to maintain 16-bytes alignment.
- if (AArch64::GPR64RegClass.contains(RegToPush)) {
+ if (AArch64::GPR64RegClass.contains(RegToPush))
GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre)
.addReg(AArch64::SP)
.addReg(RegToPush)
.addReg(AArch64::SP)
.addImm(imm));
- } else if (AArch64::GPR32RegClass.contains(RegToPush)) {
+ else if (AArch64::GPR32RegClass.contains(RegToPush))
GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre)
.addReg(AArch64::SP)
.addReg(RegToPush)
.addReg(AArch64::SP)
.addImm(imm));
- } else {
+ else
llvm_unreachable("Unsupported register class for stack push");
- }
}
static void generateRegisterStackPop(unsigned int RegToPopTo,
std::vector<MCInst> &GeneratedCode,
int imm = 16) {
// LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load.
- if (AArch64::GPR64RegClass.contains(RegToPopTo)) {
+ if (AArch64::GPR64RegClass.contains(RegToPopTo))
GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost)
.addReg(AArch64::SP)
.addReg(RegToPopTo)
.addReg(AArch64::SP)
.addImm(imm));
- } else if (AArch64::GPR32RegClass.contains(RegToPopTo)) {
+ else if (AArch64::GPR32RegClass.contains(RegToPopTo))
GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost)
.addReg(AArch64::SP)
.addReg(RegToPopTo)
.addReg(AArch64::SP)
.addImm(imm));
- } else {
+ else
llvm_unreachable("Unsupported register class for stack pop");
- }
}
void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
@@ -340,12 +338,6 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length,
// Copy file descriptor location from aux memory into X4
MmapCode.push_back(
loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd
- // Dereference file descriptor into FD argument register
- // MmapCode.push_back(MCInstBuilder(AArch64::LDRWui)
- // .addReg(AArch64::W4) // Destination register
- // .addReg(AArch64::X4) // Base register (address)
- // .addImm(0)); // Offset (-byte words)
- // FIXME: This is not correct.
MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222
return MmapCode;
>From c50890b9a984a07060b3077e3cabdc32debb935c Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Tue, 1 Jul 2025 09:52:33 -0700
Subject: [PATCH 11/22] [llvm-exegesis] [AArch64] Introduced warnings for
unimplemented functions using dbgs
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 50018831c1923..3b7bfce1ae138 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -386,7 +386,10 @@ std::vector<MCInst> ExegesisAArch64Target::generateMemoryInitialSetup() const {
std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
std::vector<MCInst> instructions; // NOP
- // TODO: Implement this, Found no need for this in AArch64.
+ // Motivation unclear, found no need for this in AArch64.
+ // TODO: Implement this, if required.
+ dbgs() << "Warning: setStackRegisterToAuxMem called but not required for "
+ "AArch64\n";
return instructions;
}
@@ -407,6 +410,8 @@ ExegesisAArch64Target::configurePerfCounter(long Request,
std::vector<MCInst> ConfigurePerfCounterCode; // NOP
// FIXME: SYSCALL exits with EBADF error - file descriptor is invalid
// No file is opened previosly to add as file descriptor
+ errs() << "Warning: configurePerfCounter not implemented, measurements will "
+ "be unreliable\n";
return ConfigurePerfCounterCode;
}
>From 1b0f4c7723d7aad37d95d049ee0f724179fc03fc Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Tue, 1 Jul 2025 10:12:10 -0700
Subject: [PATCH 12/22] [llvm-exegesis] [AArch64] refactor
configurePerfCounter, errs to dbgs
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 3b7bfce1ae138..f7273f70ccf2a 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -410,7 +410,7 @@ ExegesisAArch64Target::configurePerfCounter(long Request,
std::vector<MCInst> ConfigurePerfCounterCode; // NOP
// FIXME: SYSCALL exits with EBADF error - file descriptor is invalid
// No file is opened previosly to add as file descriptor
- errs() << "Warning: configurePerfCounter not implemented, measurements will "
+ dbgs() << "Warning: configurePerfCounter not implemented, measurements will "
"be unreliable\n";
return ConfigurePerfCounterCode;
}
>From a53d5b08e1aeaa6f52fa8d293677721b7cf932a5 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 14 Aug 2025 04:16:37 -0700
Subject: [PATCH 13/22] [llvm-exegesis] [AArch64] Remove unneccessary AArch64
guard.
---
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index a73eaf76a46d7..67d78e82f411b 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -106,7 +106,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
// the setup.
if (Register(RV.Register) == StackPointerRegister)
continue;
-#if defined(__aarch64__)
auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16);
if (!StackLoadInsts.empty() && isFirstRegister) {
for (const auto &Inst : StackLoadInsts)
@@ -116,7 +115,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
<< " bytes\n");
continue;
}
-#endif
}
// Load a constant in the register.
LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n");
>From 4ab9412aa373985a225453cd785236034c69c8a9 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 14 Aug 2025 04:17:48 -0700
Subject: [PATCH 14/22] [llvm-exegesis] [AArch64] Remove register initial value
debug info
---
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 19 -------------------
1 file changed, 19 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 67d78e82f411b..6a9103198e2de 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -80,26 +80,9 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
Register StackPointerRegister = BBF.MF.getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore();
-#define DEBUG_TYPE "register-initial-values"
// FIXME: Only loading first register with memory address is hacky.
bool isFirstRegister = true;
for (const RegisterValue &RV : Key.RegisterInitialValues) {
- // Debug: register name and class name and value from BenchmarkKey
- const MCRegisterInfo *RegInfo = BBF.MF.getTarget().getMCRegisterInfo();
- const char *RegName = RegInfo->getName(RV.Register);
- const char *regClassName = "Unknown";
- for (unsigned i = 0, e = RegInfo->getNumRegClasses(); i < e; ++i) {
- const MCRegisterClass &RC = RegInfo->getRegClass(i);
- if (RC.contains(RV.Register)) {
- regClassName = RegInfo->getRegClassName(&RC);
- break;
- }
- }
- LLVM_DEBUG(
- dbgs() << "Setting register (Class: " << regClassName << ") " << RegName
- << std::string(
- std::max(0, 3 - static_cast<int>(strlen(RegName))), ' '));
-
if (GenerateMemoryInstructions) {
// If we're generating memory instructions, don't load in the value for
// the register with the stack pointer as it will be used later to finish
@@ -117,8 +100,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
}
}
// Load a constant in the register.
- LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n");
-#undef DEBUG_TYPE
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
if (SetRegisterCode.empty())
IsSnippetSetupComplete = false;
>From 13d2a10a01bacf72b2c423e10a3b8839da9df9d3 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 17 Aug 2025 22:24:35 -0700
Subject: [PATCH 15/22] [llvm-exegesis] [AArch64] Remove additional register
initial value debug info
---
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 6a9103198e2de..a9d6125ca98a5 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -94,8 +94,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
for (const auto &Inst : StackLoadInsts)
BBF.addInstruction(Inst);
isFirstRegister = false;
- LLVM_DEBUG(dbgs() << "from stack with post-increment offset of " << 16
- << " bytes\n");
continue;
}
}
>From 1f66364b251ec41ab7a040b5d59f3ec94459309c Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 31 Aug 2025 22:52:23 -0700
Subject: [PATCH 16/22] [llvm-exegesis] Revert ReservedRegisters enum to
ArgumentRegisters in AArch64 and X86 targets
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 8 ++++----
llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 10 +++++-----
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index f7273f70ccf2a..1138d04f84740 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -215,7 +215,7 @@ class ExegesisAArch64Target : public ExegesisTarget {
ExegesisAArch64Target()
: ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {}
- enum ReservedRegisters {
+ enum ArgumentRegisters {
CodeSize = AArch64::X12,
AuxiliaryMemoryFD = AArch64::X13,
TempRegister = AArch64::X16,
@@ -428,9 +428,9 @@ std::vector<MCRegister> ExegesisAArch64Target::getRegistersNeedSaving() const {
AArch64::X4,
AArch64::X5,
AArch64::X8,
- ReservedRegisters::TempRegister,
- ReservedRegisters::CodeSize,
- ReservedRegisters::AuxiliaryMemoryFD,
+ ArgumentRegisters::TempRegister,
+ ArgumentRegisters::CodeSize,
+ ArgumentRegisters::AuxiliaryMemoryFD,
};
}
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index a13bcc595efc6..5dae6c0a25fab 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -725,7 +725,7 @@ class ExegesisX86Target : public ExegesisTarget {
ProcessID);
}
- enum ReservedRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 };
+ enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 };
private:
void addTargetSpecificPasses(PassManagerBase &PM) const override;
@@ -1166,7 +1166,7 @@ void ExegesisX86Target::generateUpperMunmap(
// Load in the size of the snippet to RDI from from the argument register.
GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
.addReg(X86::RDI)
- .addReg(ReservedRegisters::CodeSize));
+ .addReg(ArgumentRegisters::CodeSize));
// Add the length of the snippet (in %RDI) to the current instruction pointer
// (%R8) to get the address where we should start unmapping at.
GeneratedCode.push_back(MCInstBuilder(X86::ADD64rr)
@@ -1236,7 +1236,7 @@ void ExegesisX86Target::generateMmapAuxMem(
loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE)));
GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
.addReg(X86::R8)
- .addReg(ReservedRegisters::AuxiliaryMemoryFD));
+ .addReg(ArgumentRegisters::AuxiliaryMemoryFD));
GeneratedCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0)));
generateSyscall(SYS_mmap, GeneratedCode);
}
@@ -1244,10 +1244,10 @@ void ExegesisX86Target::generateMmapAuxMem(
void ExegesisX86Target::moveArgumentRegisters(
std::vector<MCInst> &GeneratedCode) const {
GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
- .addReg(ReservedRegisters::CodeSize)
+ .addReg(ArgumentRegisters::CodeSize)
.addReg(X86::RDI));
GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr)
- .addReg(ReservedRegisters::AuxiliaryMemoryFD)
+ .addReg(ArgumentRegisters::AuxiliaryMemoryFD)
.addReg(X86::RSI));
}
>From eeb64273b7bbbcc759bc24aa1729cca2522f3bc9 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 4 Sep 2025 01:00:35 -0700
Subject: [PATCH 17/22] [llvm-exegesis] [AArch64] Removed kernal or fixed
address option for auxiliary memory to only fixed address.
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 1138d04f84740..b25a43fcdbf9f 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -288,10 +288,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
} // namespace
#ifdef __linux__
-// true : let use of fixed address to Virtual Address Space Ceiling
-// false: let kernel choose the address of the auxiliary memory
-bool UseFixedAddress = true;
-
static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000;
static void generateRoundToNearestPage(unsigned int TargetRegister,
@@ -394,12 +390,8 @@ std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
}
uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const {
- if (!UseFixedAddress)
- // Allow kernel to select an appropriate memory address
- return 0;
- // Return the second to last page in the virtual address space
- // to try and prevent interference with memory annotations in the snippet
- // VAddressSpaceCeiling = 0x0000800000000000
+ // Return the second to last page in the virtual address space to try and
+ // prevent interference with memory annotations in the snippet
// FIXME: Why 2 pages?
return VAddressSpaceCeiling - (2 * getpagesize());
}
>From 63c199f5f0469dd6adf11b513953cdb559de2898 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 4 Sep 2025 01:02:22 -0700
Subject: [PATCH 18/22] [llvm-exegesis] [AArch64] setStackRegisterToAuxMem
Implemention
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 46 +++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index b25a43fcdbf9f..65c2eb0da7efc 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -386,6 +386,52 @@ std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
// TODO: Implement this, if required.
dbgs() << "Warning: setStackRegisterToAuxMem called but not required for "
"AArch64\n";
+
+ const uint64_t targetSPVal =
+ getAuxiliaryMemoryStartAddress() + SubprocessMemory::AuxiliaryMemorySize;
+ // sub, stack args and local storage
+ // Use X16 as a temporary register since it's a scratch register
+ const MCRegister TempReg = AArch64::X16;
+
+ // Load the 64-bit immediate into TempReg using MOVZ/MOVK sequence
+ // MOVZ Xd, #imm16, LSL #(shift_val * 16)
+ // MOVK Xd, #imm16, LSL #(shift_val * 16) (* 3 times for 64-bit immediate)
+
+ // 1. MOVZ TmpReg, #(targetSPVal & 0xFFFF), LSL #0
+ instructions.push_back(MCInstBuilder(AArch64::MOVZXi)
+ .addReg(TempReg)
+ .addImm(targetSPVal & 0xFFFF) // imm16
+ .addImm(0)); // hw(shift/16) = 0
+ // 2. MOVK TmpReg, #((targetSPVal >> 16) & 0xFFFF), LSL #16
+ if (((targetSPVal >> 16) & 0xFFFF) != 0 || (targetSPVal > 0xFFFF)) {
+ instructions.push_back(MCInstBuilder(AArch64::MOVKXi)
+ .addReg(TempReg)
+ .addReg(TempReg)
+ .addImm((targetSPVal >> 16) & 0xFFFF) // imm16
+ .addImm(1)); // hw(shift/16) = 1
+ }
+ // 3. MOVK TmpReg, #((targetSPVal >> 32) & 0xFFFF), LSL #32
+ if (((targetSPVal >> 32) & 0xFFFF) != 0 || (targetSPVal > 0xFFFFFFFF)) {
+ instructions.push_back(MCInstBuilder(AArch64::MOVKXi)
+ .addReg(TempReg)
+ .addReg(TempReg)
+ .addImm((targetSPVal >> 32) & 0xFFFF) // imm16
+ .addImm(2)); // hw(shift/16) = 2
+ }
+ // 4. MOVK TmpReg, #((targetSPVal >> 48) & 0xFFFF), LSL #48
+ if (((targetSPVal >> 48) & 0xFFFF) != 0 || (targetSPVal > 0xFFFFFFFFFFFF)) {
+ instructions.push_back(MCInstBuilder(AArch64::MOVKXi)
+ .addReg(TempReg)
+ .addReg(TempReg)
+ .addImm((targetSPVal >> 48) & 0xFFFF) // imm16
+ .addImm(3)); // hw(shift/16) = 3
+ }
+ // Finally, move the value from TempReg to SP
+ instructions.push_back(MCInstBuilder(AArch64::ADDXri) // ADD SP, TempReg, #0
+ .addReg(AArch64::SP)
+ .addReg(TempReg)
+ .addImm(0) // imm = 0
+ .addImm(0)); // shift = 0
return instructions;
}
>From a05f2a9323e424fb00f9a4a2424edf908deb34fc Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 4 Sep 2025 01:03:21 -0700
Subject: [PATCH 19/22] [llvm-exegesis] [AArch64] Revert
setStackRegisterToAuxMem Implemention. Introduces Bus error.
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 46 -------------------
1 file changed, 46 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 65c2eb0da7efc..b25a43fcdbf9f 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -386,52 +386,6 @@ std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
// TODO: Implement this, if required.
dbgs() << "Warning: setStackRegisterToAuxMem called but not required for "
"AArch64\n";
-
- const uint64_t targetSPVal =
- getAuxiliaryMemoryStartAddress() + SubprocessMemory::AuxiliaryMemorySize;
- // sub, stack args and local storage
- // Use X16 as a temporary register since it's a scratch register
- const MCRegister TempReg = AArch64::X16;
-
- // Load the 64-bit immediate into TempReg using MOVZ/MOVK sequence
- // MOVZ Xd, #imm16, LSL #(shift_val * 16)
- // MOVK Xd, #imm16, LSL #(shift_val * 16) (* 3 times for 64-bit immediate)
-
- // 1. MOVZ TmpReg, #(targetSPVal & 0xFFFF), LSL #0
- instructions.push_back(MCInstBuilder(AArch64::MOVZXi)
- .addReg(TempReg)
- .addImm(targetSPVal & 0xFFFF) // imm16
- .addImm(0)); // hw(shift/16) = 0
- // 2. MOVK TmpReg, #((targetSPVal >> 16) & 0xFFFF), LSL #16
- if (((targetSPVal >> 16) & 0xFFFF) != 0 || (targetSPVal > 0xFFFF)) {
- instructions.push_back(MCInstBuilder(AArch64::MOVKXi)
- .addReg(TempReg)
- .addReg(TempReg)
- .addImm((targetSPVal >> 16) & 0xFFFF) // imm16
- .addImm(1)); // hw(shift/16) = 1
- }
- // 3. MOVK TmpReg, #((targetSPVal >> 32) & 0xFFFF), LSL #32
- if (((targetSPVal >> 32) & 0xFFFF) != 0 || (targetSPVal > 0xFFFFFFFF)) {
- instructions.push_back(MCInstBuilder(AArch64::MOVKXi)
- .addReg(TempReg)
- .addReg(TempReg)
- .addImm((targetSPVal >> 32) & 0xFFFF) // imm16
- .addImm(2)); // hw(shift/16) = 2
- }
- // 4. MOVK TmpReg, #((targetSPVal >> 48) & 0xFFFF), LSL #48
- if (((targetSPVal >> 48) & 0xFFFF) != 0 || (targetSPVal > 0xFFFFFFFFFFFF)) {
- instructions.push_back(MCInstBuilder(AArch64::MOVKXi)
- .addReg(TempReg)
- .addReg(TempReg)
- .addImm((targetSPVal >> 48) & 0xFFFF) // imm16
- .addImm(3)); // hw(shift/16) = 3
- }
- // Finally, move the value from TempReg to SP
- instructions.push_back(MCInstBuilder(AArch64::ADDXri) // ADD SP, TempReg, #0
- .addReg(AArch64::SP)
- .addReg(TempReg)
- .addImm(0) // imm = 0
- .addImm(0)); // shift = 0
return instructions;
}
>From c7d7676f7f91e06c5a5188aaaabc59b1204e166f Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 4 Sep 2025 01:04:48 -0700
Subject: [PATCH 20/22] [llvm-exegesis] [AArch64] Remove unused
generateRoundToNearestPage function, called by unmap func. (Motivation for
unmap unclear for AArch64)
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 16 ----------------
1 file changed, 16 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index b25a43fcdbf9f..056ec00a2b150 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -290,22 +290,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
#ifdef __linux__
static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000;
-static void generateRoundToNearestPage(unsigned int TargetRegister,
- std::vector<MCInst> &GeneratedCode) {
- int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
- // Round down to the nearest page by getting rid of the least significant bits
- // representing location in the page.
-
- // Single instruction using AND with inverted mask (effectively BIC)
- uint64_t BitsToClearMask = (1ULL << PageSizeShift) - 1; // 0xFFF
- uint64_t AndMask = ~BitsToClearMask; // ...FFFFFFFFFFFF000
- GeneratedCode.push_back(MCInstBuilder(AArch64::ANDXri)
- .addReg(TargetRegister) // Xd
- .addReg(TargetRegister) // Xn
- .addImm(AndMask) // imm bitmask
- );
-}
-
std::vector<MCInst>
ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const {
std::vector<MCInst> ExitCallCode;
>From f52e612e6b3b10f5a1902d6bbbf61c676a6af845 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 4 Sep 2025 01:07:25 -0700
Subject: [PATCH 21/22] [llvm-exegesis] [AArch64] Fix function naming for
syscall register handling and utilized in prologue epilogue regs in
configurePerfCounter.
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 056ec00a2b150..9da5596ad89e0 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -181,7 +181,7 @@ constexpr std::array<unsigned, 8> SyscallArgumentRegisters{
AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7,
};
-static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
// AArch64 follows the AAPCS (ARM Architecture Procedure Call Standard):
// X0-X7 registers contain the first 8 arguments.
@@ -195,7 +195,7 @@ static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode,
}
}
-static void restoreSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+static void restoreSyscallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
assert(ArgumentCount <= 8 &&
"This implementation restores up to 8 argument registers (X0-X7)");
@@ -383,11 +383,18 @@ uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const {
std::vector<MCInst>
ExegesisAArch64Target::configurePerfCounter(long Request,
bool SaveRegisters) const {
- std::vector<MCInst> ConfigurePerfCounterCode; // NOP
+ std::vector<MCInst> ConfigurePerfCounterCode;
+ if (SaveRegisters)
+ saveSyscallRegisters(ConfigurePerfCounterCode, 3);
+
// FIXME: SYSCALL exits with EBADF error - file descriptor is invalid
// No file is opened previosly to add as file descriptor
dbgs() << "Warning: configurePerfCounter not implemented, measurements will "
"be unreliable\n";
+
+ if (SaveRegisters)
+ restoreSyscallRegisters(ConfigurePerfCounterCode, 3);
+
return ConfigurePerfCounterCode;
}
>From a1247555d166d0f1591b222a84d56aeee613ec4c Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 4 Sep 2025 02:01:12 -0700
Subject: [PATCH 22/22] [llvm-exegesis] [AArch64] Header cleanup
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 9da5596ad89e0..651d7e9867ba3 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -17,7 +17,6 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCRegisterInfo.h"
#include <vector>
-#define DEBUG_TYPE "exegesis-aarch64-target"
#if defined(__aarch64__) && defined(__linux__)
#include <sys/mman.h>
@@ -26,8 +25,7 @@
#ifdef HAVE_LIBPFM
#include <perfmon/perf_event.h>
#endif // HAVE_LIBPFM
-#include <linux/prctl.h> // For PR_PAC_* constants
-#include <sys/prctl.h>
+#include <sys/prctl.h> // For PR_PAC_* constants
#ifndef PR_PAC_APIAKEY
#define PR_PAC_APIAKEY (1UL << 0)
#endif
More information about the llvm-commits
mailing list