[llvm] [llvm-exegesis] [AArch64] Add support for Load Instructions in subprocess execution mode (PR #144895)
Lakshay Kumar via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 19 08:46:32 PDT 2025
https://github.com/lakshayk-nv updated https://github.com/llvm/llvm-project/pull/144895
>From 3cfcda496bcd240e0a4141261010582911a71ab5 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 04:49:31 -0700
Subject: [PATCH 1/7] [llvm-exegesis] [AArch64] Use X16 instead of X8
- Switched X16 as temporary register in loadFPCRImmediate instead of X8 which is used by syscalls
- Updated Testcase with hardcoded reg number.
---
llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s | 4 ++--
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index a4350fc6dc2cb..d0dc5c744ab80 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -70,6 +70,6 @@ RUN: llvm-objdump -d %d > %t.s
RUN: FileCheck %s --check-prefix=FPCR-ASM < %t.s
FPCR-ASM: <foo>:
FPCR-ASM: movi d{{[0-9]+}}, #0000000000000000
-FPCR-ASM-NEXT: mov x8, #0x0
-FPCR-ASM-NEXT: msr FPCR, x8
+FPCR-ASM-NEXT: mov x16, #0x0
+FPCR-ASM-NEXT: msr FPCR, x16
FPCR-ASM-NEXT: bfcvt h{{[0-9]+}}, s{{[0-9]+}}
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index a1eb5a46f21fc..cebab371cd5e0 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -120,7 +120,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
// Generates instructions to load an immediate value into an FPCR register.
static std::vector<MCInst>
loadFPCRImmediate(MCRegister Reg, unsigned RegBitWidth, const APInt &Value) {
- MCRegister TempReg = AArch64::X8;
+ MCRegister TempReg = AArch64::X16;
MCInst LoadImm = MCInstBuilder(AArch64::MOVi64imm).addReg(TempReg).addImm(0);
MCInst MoveToFPCR =
MCInstBuilder(AArch64::MSR).addImm(AArch64SysReg::FPCR).addReg(TempReg);
>From b244126e5a904f215228704c482460639d561c79 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 04:57:21 -0700
Subject: [PATCH 2/7] [llvm-exegesis] [AArch64] Add helpers to push/pop GPRs
and save/restore syscall registers and syscall generator
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 83 +++++++++++++++++++
1 file changed, 83 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index cebab371cd5e0..510bc3563ce48 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -153,6 +153,89 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
return Instructions;
}
+static void generateRegisterStackPush(unsigned int RegToPush,
+ std::vector<MCInst> &GeneratedCode,
+ int imm = -16) {
+ // STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes
+ // before the store to maintain 16-bytes alignment.
+ if (AArch64::GPR64RegClass.contains(RegToPush)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre)
+ .addReg(AArch64::SP)
+ .addReg(RegToPush)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else if (AArch64::GPR32RegClass.contains(RegToPush)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre)
+ .addReg(AArch64::SP)
+ .addReg(RegToPush)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else {
+ llvm_unreachable("Unsupported register class for stack push");
+ }
+}
+
+static void generateRegisterStackPop(unsigned int RegToPopTo,
+ std::vector<MCInst> &GeneratedCode,
+ int imm = 16) {
+ // LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load.
+ if (AArch64::GPR64RegClass.contains(RegToPopTo)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost)
+ .addReg(AArch64::SP)
+ .addReg(RegToPopTo)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else if (AArch64::GPR32RegClass.contains(RegToPopTo)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost)
+ .addReg(AArch64::SP)
+ .addReg(RegToPopTo)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else {
+ llvm_unreachable("Unsupported register class for stack pop");
+ }
+}
+
+void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber)));
+ GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0));
+}
+
+/// Functions to save/restore system call registers
+#ifdef __linux__
+constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
+ AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+};
+
+static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+ unsigned ArgumentCount) {
+ // AArch64 Linux typically uses X0-X5 for the first 6 arguments.
+ // Some syscalls can take up to 8 arguments in X0-X7.
+ assert(ArgumentCount <= 6 &&
+ "This implementation saves up to 6 argument registers (X0-X5)");
+ // generateRegisterStackPush(ArgumentRegisters::TempRegister, GeneratedCode);
+ // Preserve X8 (used for the syscall number/return value).
+ generateRegisterStackPush(AArch64::X8, GeneratedCode);
+ // Preserve the registers used to pass arguments to the system call.
+ for (unsigned I = 0; I < ArgumentCount; ++I) {
+ generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode);
+ }
+}
+
+static void restoreSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+ unsigned ArgumentCount) {
+ assert(ArgumentCount <= 6 &&
+ "This implementation restores up to 6 argument registers (X0-X5)");
+ // Restore argument registers, in opposite order of the way they are saved.
+ for (int I = ArgumentCount - 1; I >= 0; --I) {
+ generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode);
+ }
+ generateRegisterStackPop(AArch64::X8, GeneratedCode);
+ // generateRegisterStackPop(ArgumentRegisters::TempRegister, GeneratedCode);
+}
+#endif // __linux__
#include "AArch64GenExegesis.inc"
namespace {
>From 853e171e4645fe5ab56a9d56d34c94459c7cdc7d Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 05:00:21 -0700
Subject: [PATCH 3/7] [llvm-exegesis] [AArch64] Implement memory management
required functions
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 198 ++++++++++++++++++
llvm/tools/llvm-exegesis/lib/Target.h | 4 +
2 files changed, 202 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 510bc3563ce48..1a2dbf32ca490 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -6,10 +6,26 @@
//
//===----------------------------------------------------------------------===//
#include "../Target.h"
+#include "../Error.h"
+#include "../MmapUtils.h"
+#include "../SerialSnippetGenerator.h"
+#include "../SnippetGenerator.h"
+#include "../SubprocessMemory.h"
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <vector>
+#define DEBUG_TYPE "exegesis-aarch64-target"
#if defined(__aarch64__) && defined(__linux__)
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <unistd.h> // for getpagesize()
+#ifdef HAVE_LIBPFM
+#include <perfmon/perf_event.h>
+#endif // HAVE_LIBPFM
#include <linux/prctl.h> // For PR_PAC_* constants
#include <sys/prctl.h>
#ifndef PR_PAC_SET_ENABLED_KEYS
@@ -245,7 +261,39 @@ class ExegesisAArch64Target : public ExegesisTarget {
ExegesisAArch64Target()
: ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {}
+ enum ArgumentRegisters {
+ CodeSize = AArch64::X12,
+ AuxiliaryMemoryFD = AArch64::X13,
+ TempRegister = AArch64::X16,
+ };
+
+ std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
+ int imm = 0) const override {
+ std::vector<MCInst> Insts;
+ if (AArch64::GPR32RegClass.contains(Reg) ||
+ AArch64::GPR64RegClass.contains(Reg)) {
+ generateRegisterStackPop(Reg, Insts, imm);
+ return Insts;
+ }
+ return {};
+ }
+
private:
+#ifdef __linux__
+ std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override;
+ std::vector<MCInst>
+ generateMmap(uintptr_t Address, size_t Length,
+ uintptr_t FileDescriptorAddress) const override;
+ void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override;
+ std::vector<MCInst> generateMemoryInitialSetup() const override;
+ std::vector<MCInst> setStackRegisterToAuxMem() const override;
+ uintptr_t getAuxiliaryMemoryStartAddress() const override;
+ std::vector<MCInst> configurePerfCounter(long Request,
+ bool SaveRegisters) const override;
+ std::vector<MCRegister> getArgumentRegisters() const override;
+ std::vector<MCRegister> getRegistersNeedSaving() const override;
+#endif // __linux__
+
std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
const APInt &Value) const override {
if (AArch64::GPR32RegClass.contains(Reg))
@@ -314,6 +362,156 @@ class ExegesisAArch64Target : public ExegesisTarget {
} // namespace
+#ifdef __linux__
+// true : let use of fixed address to Virtual Address Space Ceiling
+// false: let kernel choose the address of the auxiliary memory
+bool UseFixedAddress = true;
+
+static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000;
+
+static void generateRoundToNearestPage(unsigned int TargetRegister,
+ std::vector<MCInst> &GeneratedCode) {
+ int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
+ // Round down to the nearest page by getting rid of the least significant bits
+ // representing location in the page.
+
+ // Single instruction using AND with inverted mask (effectively BIC)
+ uint64_t BitsToClearMask = (1ULL << PageSizeShift) - 1; // 0xFFF
+ uint64_t AndMask = ~BitsToClearMask; // ...FFFFFFFFFFFF000
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ANDXri)
+ .addReg(TargetRegister) // Xd
+ .addReg(TargetRegister) // Xn
+ .addImm(AndMask) // imm bitmask
+ );
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const {
+ std::vector<MCInst> ExitCallCode;
+ ExitCallCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, ExitCode)));
+ generateSysCall(SYS_exit, ExitCallCode); // SYS_exit is 93
+ return ExitCallCode;
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length,
+ uintptr_t FileDescriptorAddress) const {
+ // mmap(address, length, prot, flags, fd, offset=0)
+ int flags = MAP_SHARED;
+ if (Address != 0) {
+ flags |= MAP_FIXED_NOREPLACE;
+ }
+ std::vector<MCInst> MmapCode;
+ MmapCode.push_back(
+ loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr
+ MmapCode.push_back(
+ loadImmediate(AArch64::X1, 64, APInt(64, Length))); // length
+ MmapCode.push_back(loadImmediate(AArch64::X2, 64,
+ APInt(64, PROT_READ | PROT_WRITE))); // prot
+ MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
+ // FIXME: File descriptor address is not initialized.
+ // Copy file descriptor location from aux memory into X4
+ MmapCode.push_back(
+ loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd
+ // Dereference file descriptor into FD argument register
+ // MmapCode.push_back(MCInstBuilder(AArch64::LDRWui)
+ // .addReg(AArch64::W4) // Destination register
+ // .addReg(AArch64::X4) // Base register (address)
+ // .addImm(0)); // Offset (-byte words)
+ // FIXME: This is not correct.
+ MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
+ generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222
+ return MmapCode;
+}
+
+void ExegesisAArch64Target::generateMmapAuxMem(
+ std::vector<MCInst> &GeneratedCode) const {
+ int fd = -1;
+ int flags = MAP_SHARED;
+ uintptr_t address = getAuxiliaryMemoryStartAddress();
+ if (fd == -1)
+ flags |= MAP_ANONYMOUS;
+ if (address != 0)
+ flags |= MAP_FIXED_NOREPLACE;
+ int prot = PROT_READ | PROT_WRITE;
+
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X0, 64, APInt(64, address))); // map adr
+ GeneratedCode.push_back(loadImmediate(
+ AArch64::X1, 64,
+ APInt(64, SubprocessMemory::AuxiliaryMemorySize))); // length
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X2, 64, APInt(64, prot))); // prot
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
+ GeneratedCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
+ generateSysCall(SYS_mmap, GeneratedCode); // SYS_mmap is 222
+}
+
+std::vector<MCInst> ExegesisAArch64Target::generateMemoryInitialSetup() const {
+ std::vector<MCInst> MemoryInitialSetupCode;
+ generateMmapAuxMem(MemoryInitialSetupCode); // FIXME: Uninit file descriptor
+
+ // If using fixed address for auxiliary memory skip this step,
+ // When using dynamic memory allocation (non-fixed address), we must preserve
+ // the mmap return value (X0) which contains the allocated memory address.
+ // This value is saved to the stack to ensure registers requiring memory
+ // access can retrieve the correct address even if X0 is modified by
+ // intermediate code.
+ generateRegisterStackPush(AArch64::X0, MemoryInitialSetupCode);
+ // FIXME: Ensure stack pointer remains stable to prevent loss of saved address
+ return MemoryInitialSetupCode;
+}
+
+std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
+ std::vector<MCInst> instructions; // NOP
+ // TODO: Implement this, Found no need for this in AArch64.
+ return instructions;
+}
+
+uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const {
+ if (!UseFixedAddress)
+ // Allow kernel to select an appropriate memory address
+ return 0;
+ // Return the second to last page in the virtual address space
+ // to try and prevent interference with memory annotations in the snippet
+ // VAddressSpaceCeiling = 0x0000800000000000
+ // FIXME: Why 2 pages?
+ return VAddressSpaceCeiling - (2 * getpagesize());
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::configurePerfCounter(long Request,
+ bool SaveRegisters) const {
+ std::vector<MCInst> ConfigurePerfCounterCode; // NOP
+ // FIXME: SYSCALL exits with EBADF error - file descriptor is invalid
+ // No file is opened previosly to add as file descriptor
+ return ConfigurePerfCounterCode;
+}
+
+std::vector<MCRegister> ExegesisAArch64Target::getArgumentRegisters() const {
+ return {AArch64::X0, AArch64::X1};
+}
+
+std::vector<MCRegister> ExegesisAArch64Target::getRegistersNeedSaving() const {
+ return {
+ AArch64::X0,
+ AArch64::X1,
+ AArch64::X2,
+ AArch64::X3,
+ AArch64::X4,
+ AArch64::X5,
+ AArch64::X8,
+ ArgumentRegisters::TempRegister,
+ ArgumentRegisters::CodeSize,
+ ArgumentRegisters::AuxiliaryMemoryFD,
+ };
+}
+
+#endif // __linux__
+
static ExegesisTarget *getTheExegesisAArch64Target() {
static ExegesisAArch64Target Target;
return &Target;
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 77fbaa6e95412..736c9d9ff6c23 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -308,6 +308,10 @@ class ExegesisTarget {
return std::make_unique<SavedState>();
}
+ virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg, int imm = 0) const {
+ return {};
+ }
+
private:
virtual bool matchesArch(Triple::ArchType Arch) const = 0;
>From b32026b85e0fc7f1a9ccccd061e060e37370b480 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 06:16:21 -0700
Subject: [PATCH 4/7] [llvm-exegesis] [AArch64] Implement different register
initialization for subprocess execution mode
---
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 34 ++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index fd7924db08441..a73eaf76a46d7 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -66,6 +66,8 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
assert(MM.Address % getpagesize() == 0 &&
"Memory mappings need to be aligned to page boundaries.");
#endif
+ // FIXME: file descriptor for aux memory seems not initialized.
+ // TODO: Invoke openat syscall to get correct fd for aux memory
const MemoryValue &MemVal = Key.MemoryValues.at(MM.MemoryValueName);
BBF.addInstructions(ET.generateMmap(
MM.Address, MemVal.SizeBytes,
@@ -78,15 +80,47 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
Register StackPointerRegister = BBF.MF.getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore();
+#define DEBUG_TYPE "register-initial-values"
+ // FIXME: Only loading first register with memory address is hacky.
+ bool isFirstRegister = true;
for (const RegisterValue &RV : Key.RegisterInitialValues) {
+ // Debug: register name and class name and value from BenchmarkKey
+ const MCRegisterInfo *RegInfo = BBF.MF.getTarget().getMCRegisterInfo();
+ const char *RegName = RegInfo->getName(RV.Register);
+ const char *regClassName = "Unknown";
+ for (unsigned i = 0, e = RegInfo->getNumRegClasses(); i < e; ++i) {
+ const MCRegisterClass &RC = RegInfo->getRegClass(i);
+ if (RC.contains(RV.Register)) {
+ regClassName = RegInfo->getRegClassName(&RC);
+ break;
+ }
+ }
+ LLVM_DEBUG(
+ dbgs() << "Setting register (Class: " << regClassName << ") " << RegName
+ << std::string(
+ std::max(0, 3 - static_cast<int>(strlen(RegName))), ' '));
+
if (GenerateMemoryInstructions) {
// If we're generating memory instructions, don't load in the value for
// the register with the stack pointer as it will be used later to finish
// the setup.
if (Register(RV.Register) == StackPointerRegister)
continue;
+#if defined(__aarch64__)
+ auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16);
+ if (!StackLoadInsts.empty() && isFirstRegister) {
+ for (const auto &Inst : StackLoadInsts)
+ BBF.addInstruction(Inst);
+ isFirstRegister = false;
+ LLVM_DEBUG(dbgs() << "from stack with post-increment offset of " << 16
+ << " bytes\n");
+ continue;
+ }
+#endif
}
// Load a constant in the register.
+ LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n");
+#undef DEBUG_TYPE
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
if (SetRegisterCode.empty())
IsSnippetSetupComplete = false;
>From 018a9db687982aaa808a9dfcf9b0c3a1b728ab17 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 06:23:19 -0700
Subject: [PATCH 5/7] [llvm-exegesis] [AArch64] Resolve Merge Conflict coming
from reverted #136868
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 77 -------------------
1 file changed, 77 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 1a2dbf32ca490..df9fbf6946005 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,12 +28,6 @@
#endif // HAVE_LIBPFM
#include <linux/prctl.h> // For PR_PAC_* constants
#include <sys/prctl.h>
-#ifndef PR_PAC_SET_ENABLED_KEYS
-#define PR_PAC_SET_ENABLED_KEYS 60
-#endif
-#ifndef PR_PAC_GET_ENABLED_KEYS
-#define PR_PAC_GET_ENABLED_KEYS 61
-#endif
#ifndef PR_PAC_APIAKEY
#define PR_PAC_APIAKEY (1UL << 0)
#endif
@@ -54,47 +48,6 @@
namespace llvm {
namespace exegesis {
-bool isPointerAuth(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
-
- // FIXME: Pointer Authentication instructions.
- // We would like to measure these instructions, but they can behave
- // differently on different platforms, and maybe the snippets need to look
- // different for these instructions,
- // Platform-specific handling: On Linux, we disable authentication, may
- // interfere with measurements. On non-Linux platforms, disable opcodes for
- // now.
- case AArch64::AUTDA:
- case AArch64::AUTDB:
- case AArch64::AUTDZA:
- case AArch64::AUTDZB:
- case AArch64::AUTIA:
- case AArch64::AUTIA1716:
- case AArch64::AUTIASP:
- case AArch64::AUTIAZ:
- case AArch64::AUTIB:
- case AArch64::AUTIB1716:
- case AArch64::AUTIBSP:
- case AArch64::AUTIBZ:
- case AArch64::AUTIZA:
- case AArch64::AUTIZB:
- return true;
- }
-}
-
-bool isLoadTagMultiple(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
-
- // Load tag multiple instruction
- case AArch64::LDGM:
- return true;
- }
-}
-
static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
switch (RegBitWidth) {
case 32:
@@ -330,36 +283,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
PM.add(createAArch64ExpandPseudoPass());
}
- const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
- unsigned Opcode) const override {
- if (const char *Reason =
- ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode))
- return Reason;
-
- if (isPointerAuth(Opcode)) {
-#if defined(__aarch64__) && defined(__linux__)
- // Disable all PAC keys. Note that while we expect the measurements to
- // be the same with PAC keys disabled, they could potentially be lower
- // since authentication checks are bypassed.
- if (prctl(PR_PAC_SET_ENABLED_KEYS,
- PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY |
- PR_PAC_APDBKEY, // all keys
- 0, // disable all
- 0, 0) < 0) {
- return "Failed to disable PAC keys";
- }
-#else
- return "Unsupported opcode: isPointerAuth";
-#endif
- }
-
- if (isLoadTagMultiple(Opcode))
- return "Unsupported opcode: load tag multiple";
-
- return nullptr;
- }
-};
-
} // namespace
#ifdef __linux__
>From ef5de8b73e92183cfae6739c6896ae07318c4c63 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 06:58:45 -0700
Subject: [PATCH 6/7] [llvm-exegesis] [AArch64] Format changes
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 3 +--
llvm/tools/llvm-exegesis/lib/Target.h | 3 ++-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index a82470ef74dcf..c8613f360b376 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -223,7 +223,7 @@ class ExegesisAArch64Target : public ExegesisTarget {
std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
int imm = 0) const override {
std::vector<MCInst> Insts;
- if (AArch64::GPR32RegClass.contains(Reg) ||
+ if (AArch64::GPR32RegClass.contains(Reg) ||
AArch64::GPR64RegClass.contains(Reg)) {
generateRegisterStackPop(Reg, Insts, imm);
return Insts;
@@ -283,7 +283,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
PM.add(createAArch64ExpandPseudoPass());
}
-
} // namespace
#ifdef __linux__
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 736c9d9ff6c23..0304908cbb2b2 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -308,7 +308,8 @@ class ExegesisTarget {
return std::make_unique<SavedState>();
}
- virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg, int imm = 0) const {
+ virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
+ int imm = 0) const {
return {};
}
>From 4d0ff8bbd500e5b3b4c4ad31f9ee6f7d56a3db96 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Thu, 19 Jun 2025 08:45:38 -0700
Subject: [PATCH 7/7] [llvm-exegesis] [AArch64] Fix missing closing brace in
Target.cpp
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index c8613f360b376..c3bd18c3a8440 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -282,6 +282,7 @@ class ExegesisAArch64Target : public ExegesisTarget {
// Function return is a pseudo-instruction that needs to be expanded
PM.add(createAArch64ExpandPseudoPass());
}
+};
} // namespace
More information about the llvm-commits
mailing list