[llvm] [llvm-exegesis] [AArch64] Resolving "snippet crashed while running: Segmentation fault" for Load Instructions (PR #142552)
Lakshay Kumar via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 08:04:42 PDT 2025
https://github.com/lakshayk-nv updated https://github.com/llvm/llvm-project/pull/142552
>From 4aed344ec52f3b7eb66444aeeb47acfdd8fb2517 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Fri, 30 May 2025 07:21:39 -0700
Subject: [PATCH 1/9] [llvm-exegesis] [AArch64] Use X16 instead of X8
Switched X16 as temporary register in loadFPCRImmediate instead of X8 which is used by syscalls
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index a1eb5a46f21fc..cebab371cd5e0 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -120,7 +120,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
// Generates instructions to load an immediate value into an FPCR register.
static std::vector<MCInst>
loadFPCRImmediate(MCRegister Reg, unsigned RegBitWidth, const APInt &Value) {
- MCRegister TempReg = AArch64::X8;
+ MCRegister TempReg = AArch64::X16;
MCInst LoadImm = MCInstBuilder(AArch64::MOVi64imm).addReg(TempReg).addImm(0);
MCInst MoveToFPCR =
MCInstBuilder(AArch64::MSR).addImm(AArch64SysReg::FPCR).addReg(TempReg);
>From 0476153210b9605738e5ea886ac6a6f62d203a2a Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:00:04 -0700
Subject: [PATCH 2/9] [llvm-exegesis] [AArch64] Add helpers to push/pop GPRs
and save/restore syscall registers and syscall generator
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 83 +++++++++++++++++++
1 file changed, 83 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index cebab371cd5e0..a1a6913773b52 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -153,6 +153,89 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
return Instructions;
}
+static void generateRegisterStackPush(unsigned int RegToPush,
+ std::vector<MCInst> &GeneratedCode,
+ int imm = -16) {
+ // STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes
+ // before the store to maintain 16-bytes alignment.
+ if (AArch64::GPR64RegClass.contains(RegToPush)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre)
+ .addReg(AArch64::SP)
+ .addReg(RegToPush)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else if (AArch64::GPR32RegClass.contains(RegToPush)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre)
+ .addReg(AArch64::SP)
+ .addReg(RegToPush)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else {
+ llvm_unreachable("Unsupported register class for stack push");
+ }
+}
+
+static void generateRegisterStackPop(unsigned int RegToPopTo,
+ std::vector<MCInst> &GeneratedCode,
+ int imm = 16) {
+ // LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load.
+ if (AArch64::GPR64RegClass.contains(RegToPopTo)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost)
+ .addReg(AArch64::SP)
+ .addReg(RegToPopTo)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else if (AArch64::GPR32RegClass.contains(RegToPopTo)) {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost)
+ .addReg(AArch64::SP)
+ .addReg(RegToPopTo)
+ .addReg(AArch64::SP)
+ .addImm(imm));
+ } else {
+ llvm_unreachable("Unsupported register class for stack pop");
+ }
+}
+
+void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber)));
+ GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0));
+}
+
+/// Functions to save/restore system call registers
+#ifdef __linux__
+constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
+ AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+};
+
+static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+ unsigned ArgumentCount) {
+ // AArch64 Linux typically uses X0-X5 for the first 6 arguments.
+ // Some syscalls can take up to 8 arguments in X0-X7.
+ assert(ArgumentCount <= 6 &&
+ "This implementation saves up to 6 argument registers (X0-X5)");
+ // generateRegisterStackPush(AArch64::X16, GeneratedCode);
+ // Preserve X8 (used for the syscall number/return value).
+ generateRegisterStackPush(AArch64::X8, GeneratedCode);
+ // Preserve the registers used to pass arguments to the system call.
+ for (unsigned I = 0; I < ArgumentCount; ++I) {
+ generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode);
+ }
+}
+
+static void restoreSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+ unsigned ArgumentCount) {
+ assert(ArgumentCount <= 6 &&
+ "This implementation restores up to 6 argument registers (X0-X5)");
+ // Restore argument registers, in opposite order of the way they are saved.
+ for (int I = ArgumentCount - 1; I >= 0; --I) {
+ generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode);
+ }
+ generateRegisterStackPop(AArch64::X8, GeneratedCode);
+ // generateRegisterStackPop(AArch64::X16, GeneratedCode);
+}
+#endif // __linux__
#include "AArch64GenExegesis.inc"
namespace {
>From f54ac57230199af53a4a2dc34056a862f69d198d Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:03:07 -0700
Subject: [PATCH 3/9] [llvm-exegesis] [AArch64] WIP Implemention of memory
management functions used by subprocess execution mode.
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 367 ++++++++++++++++++
1 file changed, 367 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index a1a6913773b52..6b6859729cb10 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -6,10 +6,25 @@
//
//===----------------------------------------------------------------------===//
#include "../Target.h"
+#include "../Error.h"
+#include "../MmapUtils.h"
+#include "../SerialSnippetGenerator.h"
+#include "../SnippetGenerator.h"
+#include "../SubprocessMemory.h"
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <vector>
#if defined(__aarch64__) && defined(__linux__)
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <unistd.h> // for getpagesize()
+#ifdef HAVE_LIBPFM
+#include <perfmon/perf_event.h>
+#endif // HAVE_LIBPFM
#include <linux/prctl.h> // For PR_PAC_* constants
#include <sys/prctl.h>
#ifndef PR_PAC_SET_ENABLED_KEYS
@@ -245,7 +260,30 @@ class ExegesisAArch64Target : public ExegesisTarget {
ExegesisAArch64Target()
: ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {}
+ enum ArgumentRegisters {
+ CodeSize = AArch64::X12,
+ AuxiliaryMemoryFD = AArch64::X13
+ };
+
private:
+#ifdef __linux__
+ void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const override;
+ void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const override;
+ std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override;
+ std::vector<MCInst>
+ generateMmap(uintptr_t Address, size_t Length,
+ uintptr_t FileDescriptorAddress) const override;
+ void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override;
+ void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const override;
+ std::vector<MCInst> generateMemoryInitialSetup() const override;
+ std::vector<MCInst> setStackRegisterToAuxMem() const override;
+ uintptr_t getAuxiliaryMemoryStartAddress() const override;
+ std::vector<MCInst> configurePerfCounter(long Request,
+ bool SaveRegisters) const override;
+ std::vector<MCRegister> getArgumentRegisters() const override;
+ std::vector<MCRegister> getRegistersNeedSaving() const override;
+#endif // __linux__
+
std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
const APInt &Value) const override {
if (AArch64::GPR32RegClass.contains(Reg))
@@ -314,6 +352,335 @@ class ExegesisAArch64Target : public ExegesisTarget {
} // namespace
+#ifdef __linux__
+// true : let use of fixed address to Virtual Address Space Ceiling
+// false: let kernel choose the address of the auxiliary memory
+bool UseFixedAddress = true; // TODO: Remove this later
+
+static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000;
+
+static void generateRoundToNearestPage(unsigned int TargetRegister,
+ std::vector<MCInst> &GeneratedCode) {
+ int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
+ // Round down to the nearest page by getting rid of the least significant bits
+ // representing location in the page.
+
+ // Single instruction using AND with inverted mask (effectively BIC)
+ uint64_t BitsToClearMask = (1ULL << PageSizeShift) - 1; // 0xFFF
+ uint64_t AndMask = ~BitsToClearMask; // ...FFFFFFFFFFFF000
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ANDXri)
+ .addReg(TargetRegister) // Xd
+ .addReg(TargetRegister) // Xn
+ .addImm(AndMask) // imm bitmask
+ );
+}
+static void generateGetInstructionPointer(unsigned int ResultRegister,
+ std::vector<MCInst> &GeneratedCode) {
+ // ADR X[ResultRegister], . : loads address of current instruction
+ // ADR : Form PC-relative address
+ // This instruction adds an immediate value to the PC value to form a
+ // PC-relative address, and writes the result to the destination register.
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ADR)
+ .addReg(ResultRegister) // Xd
+ .addImm(0)); // Offset
+}
+
+// TODO: This implementation mirrors the x86 version and requires validation.
+// The purpose of this memory unmapping needs to be verified for AArch64
+void ExegesisAArch64Target::generateLowerMunmap(
+ std::vector<MCInst> &GeneratedCode) const {
+ // Unmap starting at address zero
+ GeneratedCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, 0)));
+ // Get the current instruction pointer so we know where to unmap up to.
+ generateGetInstructionPointer(AArch64::X1, GeneratedCode);
+ generateRoundToNearestPage(AArch64::X1, GeneratedCode);
+ // Subtract a page from the end of the unmap so we don't unmap the currently
+ // executing section.
+ long page_size = getpagesize();
+ // Load page_size into a temporary register (e.g., X16)
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X16, 64, APInt(64, page_size)));
+ // Subtract X16 (containing page_size) from X1
+ GeneratedCode.push_back(MCInstBuilder(AArch64::SUBXrr)
+ .addReg(AArch64::X1) // Dest
+ .addReg(AArch64::X1) // Src
+ .addReg(AArch64::X16)); // page_size
+ generateSysCall(SYS_munmap, GeneratedCode);
+}
+
+// FIXME: This implementation mirrors the x86 version and requires validation.
+// The purpose of this memory unmapping needs to be verified for AArch64
+// The correctness of this implementation needs to be verified.
+void ExegesisAArch64Target::generateUpperMunmap(
+ std::vector<MCInst> &GeneratedCode) const {
+ generateGetInstructionPointer(AArch64::X4, GeneratedCode);
+ // Load the size of the snippet from the argument register into X0
+ // FIXME: Argument register seems not be initialized.
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ORRXrr)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::XZR)
+ .addReg(ArgumentRegisters::CodeSize));
+ // Add the length of the snippet (in X0) to the current instruction pointer
+ // (in X4) to get the address where we should start unmapping at.
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ADDXrr)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X4));
+ generateRoundToNearestPage(AArch64::X0, GeneratedCode);
+ // Add one page to the start address to ensure the address is above snippet.
+ // Since the above function rounds down.
+ long page_size = getpagesize();
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X16, 64, APInt(64, page_size)));
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ADDXrr)
+ .addReg(AArch64::X0) // Dest
+ .addReg(AArch64::X0) // Src
+ .addReg(AArch64::X16)); // page_size
+ // Unmap to just one page under the ceiling of the address space.
+ GeneratedCode.push_back(loadImmediate(
+ AArch64::X1, 64, APInt(64, VAddressSpaceCeiling - getpagesize())));
+ GeneratedCode.push_back(MCInstBuilder(AArch64::SUBXrr)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::X0));
+ generateSysCall(SYS_munmap, GeneratedCode); // SYS_munmap is 215
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const {
+ std::vector<MCInst> ExitCallCode;
+ ExitCallCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, ExitCode)));
+ generateSysCall(SYS_exit, ExitCallCode); // SYS_exit is 93
+ return ExitCallCode;
+}
+
+// FIXME: This implementation mirrors the x86 version and requires validation.
+// The correctness of this implementation needs to be verified.
+// mmap(address, length, prot, flags, fd, offset=0)
+std::vector<MCInst>
+ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length,
+ uintptr_t FileDescriptorAddress) const {
+ int flags = MAP_SHARED;
+ if (Address != 0) {
+ flags |= MAP_FIXED_NOREPLACE;
+ }
+ std::vector<MCInst> MmapCode;
+ MmapCode.push_back(
+ loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr
+ MmapCode.push_back(
+ loadImmediate(AArch64::X1, 64, APInt(64, Length))); // length
+ MmapCode.push_back(loadImmediate(AArch64::X2, 64,
+ APInt(64, PROT_READ | PROT_WRITE))); // prot
+ MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
+ // FIXME: File descriptor address is not initialized.
+ // Copy file descriptor location from aux memory into X4
+ MmapCode.push_back(
+ loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd
+ // // Dereference file descriptor into FD argument register (TODO: Why? &
+ // correct?) MmapCode.push_back(
+ // MCInstBuilder(AArch64::LDRWui)
+ // .addReg(AArch64::W4) // Destination register
+ // .addReg(AArch64::X4) // Base register (address)
+ // .addImm(0) // Offset (in 4-byte words, so 0 means no
+ // offset)
+ // );
+ MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
+ generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222
+ return MmapCode;
+}
+
+// FIXME: This implementation mirrors the x86 version and requires validation.
+// The correctness of this implementation needs to be verified.
+void ExegesisAArch64Target::generateMmapAuxMem(
+ std::vector<MCInst> &GeneratedCode) const {
+ int fd = -1;
+ int flags = MAP_SHARED;
+ uintptr_t address = getAuxiliaryMemoryStartAddress();
+ if (fd == -1)
+ flags |= MAP_ANONYMOUS;
+ if (address != 0)
+ flags |= MAP_FIXED_NOREPLACE;
+ int prot = PROT_READ | PROT_WRITE;
+
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X0, 64, APInt(64, address))); // map adr
+ GeneratedCode.push_back(loadImmediate(
+ AArch64::X1, 64,
+ APInt(64, SubprocessMemory::AuxiliaryMemorySize))); // length
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X2, 64, APInt(64, prot))); // prot
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
+ GeneratedCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd
+ GeneratedCode.push_back(
+ loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
+ generateSysCall(SYS_mmap, GeneratedCode); // SYS_mmap is 222
+}
+
+void ExegesisAArch64Target::moveArgumentRegisters(
+ std::vector<MCInst> &GeneratedCode) const {
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ORRXrr)
+ .addReg(ArgumentRegisters::CodeSize)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X0));
+ GeneratedCode.push_back(MCInstBuilder(AArch64::ORRXrr)
+ .addReg(ArgumentRegisters::AuxiliaryMemoryFD)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X1));
+}
+
+std::vector<MCInst> ExegesisAArch64Target::generateMemoryInitialSetup() const {
+ std::vector<MCInst> MemoryInitialSetupCode;
+ // moveArgumentRegisters(MemoryInitialSetupCode);
+ // generateLowerMunmap(MemoryInitialSetupCode); // TODO: Motivation Unclear
+ // generateUpperMunmap(MemoryInitialSetupCode); // FIXME: Motivation Unclear
+ // TODO: Revert argument registers value, if munmap is used.
+
+ generateMmapAuxMem(MemoryInitialSetupCode); // FIXME: Uninit file descriptor
+
+ // If using fixed address for auxiliary memory skip this step,
+ // When using dynamic memory allocation (non-fixed address), we must preserve
+ // the mmap return value (X0) which contains the allocated memory address.
+ // This value is saved to the stack to ensure registers requiring memory
+ // access can retrieve the correct address even if X0 is modified by
+ // intermediate code.
+ generateRegisterStackPush(AArch64::X0, MemoryInitialSetupCode);
+ // FIXME: Ensure stack pointer remains stable to prevent loss of saved address
+ return MemoryInitialSetupCode;
+}
+
+// TODO: This implementation mirrors the x86 version and requires validation.
+// The purpose of moving stack pointer to aux memory needs to be verified for
+// AArch64
+std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
+ return std::vector<MCInst>(); // NOP
+
+ // Below is implementation for AArch64 but motivation unclear
+ // std::vector<MCInst> instructions; // NOP
+ // const uint64_t targetSPValue = getAuxiliaryMemoryStartAddress() +
+ // SubprocessMemory::AuxiliaryMemorySize;
+ // // sub, stack args and local storage
+ // // Use X16 as a temporary register since it's a scratch register
+ // const MCRegister TempReg = AArch64::X16;
+
+ // // Load the 64-bit immediate into TempReg using MOVZ/MOVK sequence
+ // // MOVZ Xd, #imm16, LSL #(shift_val * 16)
+ // // MOVK Xd, #imm16, LSL #(shift_val * 16) (* 3 times for 64-bit immediate)
+
+ // // 1. MOVZ TmpReg, #(targetSPValue & 0xFFFF), LSL #0
+ // instructions.push_back(
+ // MCInstBuilder(AArch64::MOVZXi)
+ // .addReg(TempReg)
+ // .addImm(static_cast<uint16_t>(targetSPValue & 0xFFFF)) // imm16
+ // .addImm(0)); // hw (shift/16) = 0
+ // // 2. MOVK TmpReg, #((targetSPValue >> 16) & 0xFFFF), LSL #16
+ // if (((targetSPValue >> 16) & 0xFFFF) != 0 || (targetSPValue > 0xFFFF)) {
+ // instructions.push_back(
+ // MCInstBuilder(AArch64::MOVKXi)
+ // .addReg(TempReg)
+ // .addReg(TempReg)
+ // .addImm(static_cast<uint16_t>((targetSPValue >> 16) & 0xFFFF)) //
+ // imm16 .addImm(1)); // hw
+ // (shift/16) = 1
+ // }
+ // // 3. MOVK TmpReg, #((targetSPValue >> 32) & 0xFFFF), LSL #32
+ // if (((targetSPValue >> 32) & 0xFFFF) != 0 || (targetSPValue > 0xFFFFFFFF))
+ // {
+ // instructions.push_back(
+ // MCInstBuilder(AArch64::MOVKXi)
+ // .addReg(TempReg)
+ // .addReg(TempReg)
+ // .addImm(static_cast<uint16_t>((targetSPValue >> 32) & 0xFFFF)) //
+ // imm16 .addImm(2)); // hw
+ // (shift/16) = 2
+ // }
+ // // 4. MOVK TmpReg, #((targetSPValue >> 48) & 0xFFFF), LSL #48
+ // if (((targetSPValue >> 48) & 0xFFFF) != 0 || (targetSPValue >
+ // 0xFFFFFFFFFFFF)) {
+ // instructions.push_back(
+ // MCInstBuilder(AArch64::MOVKXi)
+ // .addReg(TempReg)
+ // .addReg(TempReg)
+ // .addImm(static_cast<uint16_t>((targetSPValue >> 48) & 0xFFFF)) //
+ // imm16 .addImm(3)); // hw
+ // (shift/16) = 3
+ // }
+ // // Finally, move the value from TempReg to SP
+ // instructions.push_back(
+ // MCInstBuilder(AArch64::ADDXri) // ADD SP, TempReg, #0
+ // .addReg(AArch64::SP)
+ // .addReg(TempReg)
+ // .addImm(0) // imm = 0
+ // .addImm(0)); // shift = 0
+
+ // return instructions;
+}
+
+uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const {
+ if (!UseFixedAddress)
+ // Allow kernel to select an appropriate memory address
+ return 0;
+ // Return the second to last page in the virtual address space
+ // to try and prevent interference with memory annotations in the snippet
+ // VAddressSpaceCeiling = 0x0000800000000000
+ // FIXME: Why 2 pages?
+ return VAddressSpaceCeiling - (2 * getpagesize());
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::configurePerfCounter(long Request,
+ bool SaveRegisters) const {
+ return std::vector<MCInst>(); // NOP
+
+ // Current SYSCALL exits with EBADF error - file descriptor is invalid
+ // Unsure how to implement this for AArch64
+ std::vector<MCInst> ConfigurePerfCounterCode;
+ if (SaveRegisters)
+ saveSysCallRegisters(ConfigurePerfCounterCode, 3);
+
+ // Move the file descriptor (stored at the start of auxiliary memory) into X0.
+ // FIXME: This file descriptor at start of aux memory is not initialized.
+ uintptr_t fd_adr =
+ getAuxiliaryMemoryStartAddress() + SubprocessMemory::AuxiliaryMemorySize;
+ ConfigurePerfCounterCode.push_back(
+ loadImmediate(AArch64::X0, 64, APInt(64, fd_adr)));
+ ConfigurePerfCounterCode.push_back(
+ loadImmediate(AArch64::X1, 64, APInt(64, Request)));
+
+#ifdef HAVE_LIBPFM
+ ConfigurePerfCounterCode.push_back(
+ loadImmediate(AArch64::X2, 64, APInt(64, PERF_IOC_FLAG_GROUP)));
+#endif
+
+ generateSysCall(SYS_ioctl, ConfigurePerfCounterCode);
+
+ if (SaveRegisters)
+ restoreSysCallRegisters(ConfigurePerfCounterCode, 3);
+
+ return ConfigurePerfCounterCode;
+}
+
+std::vector<MCRegister> ExegesisAArch64Target::getArgumentRegisters() const {
+ return {AArch64::X0, AArch64::X1};
+}
+
+std::vector<MCRegister> ExegesisAArch64Target::getRegistersNeedSaving() const {
+ return {
+ AArch64::X0,
+ AArch64::X1,
+ AArch64::X2,
+ AArch64::X3,
+ AArch64::X4,
+ AArch64::X5,
+ AArch64::X8,
+ AArch64::X16,
+ ArgumentRegisters::CodeSize,
+ ArgumentRegisters::AuxiliaryMemoryFD,
+ };
+}
+
+#endif // __linux__
+
static ExegesisTarget *getTheExegesisAArch64Target() {
static ExegesisAArch64Target Target;
return &Target;
>From 524e42f499ed15891588940126105137b01afdb3 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:12:12 -0700
Subject: [PATCH 4/9] [llvm-exegesis] [AArch64] Added WIP implementation to
initalize registers requiring memory address.
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 14 ++++++++++++++
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 11 +++++++++++
llvm/tools/llvm-exegesis/lib/Target.h | 4 ++++
3 files changed, 29 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 6b6859729cb10..2b73e002c4300 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -265,6 +265,20 @@ class ExegesisAArch64Target : public ExegesisTarget {
AuxiliaryMemoryFD = AArch64::X13
};
+ std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
+ int imm = 0) const override {
+ std::vector<MCInst> Insts;
+ if (AArch64::GPR32RegClass.contains(Reg)) {
+ generateRegisterStackPop(Reg, Insts, imm);
+ return Insts;
+ }
+ if (AArch64::GPR64RegClass.contains(Reg)) {
+ generateRegisterStackPop(Reg, Insts, imm);
+ return Insts;
+ }
+ return {};
+ }
+
private:
#ifdef __linux__
void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const override;
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index fd7924db08441..13202968f894d 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -78,6 +78,7 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
Register StackPointerRegister = BBF.MF.getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore();
+ bool isFirstRegister = true;
for (const RegisterValue &RV : Key.RegisterInitialValues) {
if (GenerateMemoryInstructions) {
// If we're generating memory instructions, don't load in the value for
@@ -85,7 +86,17 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
// the setup.
if (Register(RV.Register) == StackPointerRegister)
continue;
+#if defined(__aarch64__)
+ auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16);
+ if (!StackLoadInsts.empty() && isFirstRegister) {
+ for (const auto &Inst : StackLoadInsts)
+ BBF.addInstruction(Inst);
+ isFirstRegister = false;
+ continue;
+ }
+#endif
}
+
// Load a constant in the register.
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
if (SetRegisterCode.empty())
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 77fbaa6e95412..736c9d9ff6c23 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -308,6 +308,10 @@ class ExegesisTarget {
return std::make_unique<SavedState>();
}
+ virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg, int imm = 0) const {
+ return {};
+ }
+
private:
virtual bool matchesArch(Triple::ArchType Arch) const = 0;
>From 95a67bb9495f4ad341ab80e6680d2e2cf0cb76dc Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:13:27 -0700
Subject: [PATCH 5/9] [llvm-exegesis] Debug register initialization with
"exegesis-assembler"
---
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 13202968f894d..56b3e6894bbfa 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#define DEBUG_TYPE "exegesis-assembler"
#ifdef HAVE_LIBPFM
#include "perfmon/perf_event.h"
#endif // HAVE_LIBPFM
@@ -80,6 +81,22 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
->getStackPointerRegisterToSaveRestore();
bool isFirstRegister = true;
for (const RegisterValue &RV : Key.RegisterInitialValues) {
+ // Debug: register name and class name and value from BenchmarkKey
+ const MCRegisterInfo *RegInfo = BBF.MF.getTarget().getMCRegisterInfo();
+ const char *RegName = RegInfo->getName(RV.Register);
+ const char *regClassName = "Unknown";
+ for (unsigned i = 0, e = RegInfo->getNumRegClasses(); i < e; ++i) {
+ const MCRegisterClass &RC = RegInfo->getRegClass(i);
+ if (RC.contains(RV.Register)) {
+ regClassName = RegInfo->getRegClassName(&RC);
+ break;
+ }
+ }
+ LLVM_DEBUG(
+ dbgs() << "Setting register (Class: " << regClassName << ") " << RegName
+ << std::string(
+ std::max(0, 3 - static_cast<int>(strlen(RegName))), ' '));
+
if (GenerateMemoryInstructions) {
// If we're generating memory instructions, don't load in the value for
// the register with the stack pointer as it will be used later to finish
@@ -92,12 +109,15 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
for (const auto &Inst : StackLoadInsts)
BBF.addInstruction(Inst);
isFirstRegister = false;
+ LLVM_DEBUG(dbgs() << "from stack with post-increment offset of " << 16
+ << " bytes\n");
continue;
}
#endif
}
// Load a constant in the register.
+ LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n");
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
if (SetRegisterCode.empty())
IsSnippetSetupComplete = false;
>From 28b23cacd82c3f23586f30135fdc33ef9862ba0c Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:56:28 -0700
Subject: [PATCH 6/9] [llvm-exegesis] [AArch64] Experimental memory operand
handling
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 39 +++++++++++++++++++
.../llvm-exegesis/lib/MCInstrDescView.cpp | 10 ++++-
.../tools/llvm-exegesis/lib/MCInstrDescView.h | 1 +
.../lib/SerialSnippetGenerator.cpp | 6 +++
.../llvm-exegesis/lib/SnippetGenerator.cpp | 8 ++++
5 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 2b73e002c4300..c3df9df1b12f8 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include <vector>
+#define DEBUG_TYPE "exegesis-aarch64-target"
#if defined(__aarch64__) && defined(__linux__)
#include <sys/mman.h>
#include <sys/syscall.h>
@@ -362,10 +363,48 @@ class ExegesisAArch64Target : public ExegesisTarget {
return nullptr;
}
+ MCRegister getScratchMemoryRegister(const Triple &) const override;
+ void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
+ unsigned Offset) const override;
};
} // namespace
+// Implementation follows RISCV pattern for memory operand handling.
+// Note: This implementation requires validation for AArch64-specific requirements.
+void ExegesisAArch64Target::fillMemoryOperands(InstructionTemplate &IT,
+ MCRegister Reg,
+ unsigned Offset) const {
+ LLVM_DEBUG(dbgs() << "Executing fillMemoryOperands");
+ // AArch64 memory operands typically have the following structure:
+ // [base_register, offset]
+ auto &I = IT.getInstr();
+ auto MemOpIt =
+ find_if(I.Operands, [](const Operand &Op) { return Op.isMemory(); });
+ assert(MemOpIt != I.Operands.end() &&
+ "Instruction must have memory operands");
+
+ const Operand &MemOp = *MemOpIt;
+
+ assert(MemOp.isReg() && "Memory operand expected to be register");
+
+ IT.getValueFor(MemOp) = MCOperand::createReg(Reg);
+ IT.getValueFor(MemOp) = MCOperand::createImm(Offset);
+}
+enum ScratchMemoryRegister {
+ Z = AArch64::Z14,
+ X = AArch64::X14,
+ W = AArch64::W14,
+};
+
+MCRegister
+ExegesisAArch64Target::getScratchMemoryRegister(const Triple &TT) const {
+ // return MCRegister(); // Implemented in target.h
+ // return hardcoded scratch memory register, similar to RISCV (uses a0)
+ return ScratchMemoryRegister::X ;
+}
+
+
#ifdef __linux__
// true : let use of fixed address to Virtual Address Space Ceiling
// false: let kernel choose the address of the auxiliary memory
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index e0e796cee8040..0c0107eee0abd 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -52,9 +52,14 @@ bool Operand::isVariable() const { return VariableIndex.has_value(); }
bool Operand::isEarlyClobber() const { return IsEarlyClobber; }
+// FIXME: Verify if mayLoadOrStore check is necessary for AArch64 memory operand detection
bool Operand::isMemory() const {
- return isExplicit() &&
- getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY;
+ return (isExplicit() && getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY)
+ // || mayLoadOrStore
+ ;
+ // AArch64 has no operands with MCOI::OPERAND_MEMORY thus also adding mayLoadOrStore
+ // to check for mayLoad and mayStore which potentially have memory operands
+ // Uncommenting this check will cause illegal instruction error for AArch64
}
bool Operand::isImmediate() const {
@@ -130,6 +135,7 @@ Instruction::create(const MCInstrInfo &InstrInfo,
if (TiedToIndex >= 0)
Operand.TiedToIndex = TiedToIndex;
Operand.Info = &OpInfo;
+ Operand.mayLoadOrStore = Description->mayLoad() || Description->mayStore();
Operands.push_back(Operand);
}
for (MCPhysReg MCPhysReg : Description->implicit_defs()) {
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index 0a62967897c79..dda8243051f77 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -85,6 +85,7 @@ struct Operand {
bool IsDef = false;
bool IsEarlyClobber = false;
const RegisterAliasingTracker *Tracker = nullptr; // Set for Register Op.
+ bool mayLoadOrStore = false; // checks mayLoad and store
const MCOperandInfo *Info = nullptr; // Set for Explicit Op.
std::optional<uint8_t> TiedToIndex; // Set for Reg&Explicit Op.
MCRegister ImplicitReg; // Non-0 for Implicit Op.
diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
index bdfc93e22273b..98a5be53cc086 100644
--- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
@@ -132,6 +132,12 @@ static void appendCodeTemplates(const LLVMState &State,
// Register classes of def operand and memory operand must be the same
// to perform aliasing.
+
+ // TODO: Get a valid scratch memory register,
+
+ // Do we need to set scratch memory register based on reg class ?
+ // Or is this code flow even required i.e. would setting register requiring
+ // memory address from stack correct approach ?
if (!RegClass.contains(ScratchMemoryRegister))
return;
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index 04064ae1d8441..3eed207e56c8f 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -45,10 +45,18 @@ Error SnippetGenerator::generateConfigurations(
ForbiddenRegs |= ExtraForbiddenRegs;
// If the instruction has memory registers, prevent the generator from
// using the scratch register and its aliasing registers.
+
+ // hasMemoryOperands(): if any register is an explicit memory register,
+ // then the instruction has memory operands
if (Variant.getInstr().hasMemoryOperands()) {
const auto &ET = State.getExegesisTarget();
MCRegister ScratchSpacePointerInReg =
ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
+
+ // TODO: Get a valid scratch memory register,
+ // if MCRegister() is used, code flow exits here with below error,
+ // else if hardcoded X14 is used as scratch memory register,
+ // then illegal instruction is generated: undefined physical register
if (!ScratchSpacePointerInReg.isValid())
return make_error<Failure>(
"Infeasible : target does not support memory instructions");
>From d753795a26ac9165b54970d2c9c458d5f317d55d Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:57:39 -0700
Subject: [PATCH 7/9] [llvm-exegesis] [AArch64] Fix aux memory file descriptor
init and ioctl syscall
---
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 56b3e6894bbfa..3af52a160c837 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -67,6 +67,8 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
assert(MM.Address % getpagesize() == 0 &&
"Memory mappings need to be aligned to page boundaries.");
#endif
+ // FIXME: file descriptor for aux memory seems not initialized.
+ // TODO: Invoke openat syscall to get correct fd for aux memory
const MemoryValue &MemVal = Key.MemoryValues.at(MM.MemoryValueName);
BBF.addInstructions(ET.generateMmap(
MM.Address, MemVal.SizeBytes,
>From c3c61ea49af8ea32ad7caad773672b4361aaeadd Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Mon, 2 Jun 2025 23:12:03 -0700
Subject: [PATCH 8/9] [llvm-exegesis] Formatting changes
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 10 ++++----
llvm/tools/llvm-exegesis/lib/Assembler.cpp | 3 ++-
.../llvm-exegesis/lib/MCInstrDescView.cpp | 25 +++++++++++--------
.../llvm-exegesis/lib/SnippetGenerator.cpp | 6 ++---
4 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index c3df9df1b12f8..48a22d011a491 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -371,10 +371,11 @@ class ExegesisAArch64Target : public ExegesisTarget {
} // namespace
// Implementation follows RISCV pattern for memory operand handling.
-// Note: This implementation requires validation for AArch64-specific requirements.
+// Note: This implementation requires validation for AArch64-specific
+// requirements.
void ExegesisAArch64Target::fillMemoryOperands(InstructionTemplate &IT,
- MCRegister Reg,
- unsigned Offset) const {
+ MCRegister Reg,
+ unsigned Offset) const {
LLVM_DEBUG(dbgs() << "Executing fillMemoryOperands");
// AArch64 memory operands typically have the following structure:
// [base_register, offset]
@@ -401,10 +402,9 @@ MCRegister
ExegesisAArch64Target::getScratchMemoryRegister(const Triple &TT) const {
// return MCRegister(); // Implemented in target.h
// return hardcoded scratch memory register, similar to RISCV (uses a0)
- return ScratchMemoryRegister::X ;
+ return ScratchMemoryRegister::X;
}
-
#ifdef __linux__
// true : let use of fixed address to Virtual Address Space Ceiling
// false: let kernel choose the address of the auxiliary memory
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 3af52a160c837..b29e384efe95e 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -258,7 +258,8 @@ ArrayRef<MCRegister> FunctionFiller::getRegistersSetUp() const {
}
static std::unique_ptr<Module>
-createModule(const std::unique_ptr<LLVMContext> &Context, const DataLayout &DL) {
+createModule(const std::unique_ptr<LLVMContext> &Context,
+ const DataLayout &DL) {
auto Mod = std::make_unique<Module>(ModuleID, *Context);
Mod->setDataLayout(DL);
return Mod;
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index 0c0107eee0abd..cf61d33f57fac 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -52,14 +52,17 @@ bool Operand::isVariable() const { return VariableIndex.has_value(); }
bool Operand::isEarlyClobber() const { return IsEarlyClobber; }
-// FIXME: Verify if mayLoadOrStore check is necessary for AArch64 memory operand detection
+// FIXME: Verify if mayLoadOrStore check is necessary for AArch64 memory operand
+// detection
bool Operand::isMemory() const {
- return (isExplicit() && getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY)
- // || mayLoadOrStore
- ;
- // AArch64 has no operands with MCOI::OPERAND_MEMORY thus also adding mayLoadOrStore
- // to check for mayLoad and mayStore which potentially have memory operands
- // Uncommenting this check will cause illegal instruction error for AArch64
+ return (isExplicit() &&
+ getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY)
+ // || mayLoadOrStore
+ ;
+ // AArch64 has no operands with MCOI::OPERAND_MEMORY thus also adding
+ // mayLoadOrStore to check for mayLoad and mayStore which potentially have
+ // memory operands Uncommenting this check will cause illegal instruction
+ // error for AArch64
}
bool Operand::isImmediate() const {
@@ -331,13 +334,13 @@ const Instruction &InstructionsCache::getInstr(unsigned Opcode) const {
return *Found;
}
-bool RegisterOperandAssignment::
-operator==(const RegisterOperandAssignment &Other) const {
+bool RegisterOperandAssignment::operator==(
+ const RegisterOperandAssignment &Other) const {
return std::tie(Op, Reg) == std::tie(Other.Op, Other.Reg);
}
-bool AliasingRegisterOperands::
-operator==(const AliasingRegisterOperands &Other) const {
+bool AliasingRegisterOperands::operator==(
+ const AliasingRegisterOperands &Other) const {
return std::tie(Defs, Uses) == std::tie(Other.Defs, Other.Uses);
}
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index 3eed207e56c8f..9cf5a4429fafb 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -45,14 +45,14 @@ Error SnippetGenerator::generateConfigurations(
ForbiddenRegs |= ExtraForbiddenRegs;
// If the instruction has memory registers, prevent the generator from
// using the scratch register and its aliasing registers.
-
- // hasMemoryOperands(): if any register is an explicit memory register,
+
+ // hasMemoryOperands(): if any register is an explicit memory register,
// then the instruction has memory operands
if (Variant.getInstr().hasMemoryOperands()) {
const auto &ET = State.getExegesisTarget();
MCRegister ScratchSpacePointerInReg =
ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
-
+
// TODO: Get a valid scratch memory register,
// if MCRegister() is used, code flow exits here with below error,
// else if hardcoded X14 is used as scratch memory register,
>From d928755f63ea338a165b5f4c18e0f2616c7b730b Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Tue, 3 Jun 2025 08:02:14 -0700
Subject: [PATCH 9/9] [llvm-exegesis] [AArch64] Resolve Merge Conflict coming
from reverted #136868
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 76 -------------------
1 file changed, 76 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 48a22d011a491..9411ece435f07 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,12 +28,6 @@
#endif // HAVE_LIBPFM
#include <linux/prctl.h> // For PR_PAC_* constants
#include <sys/prctl.h>
-#ifndef PR_PAC_SET_ENABLED_KEYS
-#define PR_PAC_SET_ENABLED_KEYS 60
-#endif
-#ifndef PR_PAC_GET_ENABLED_KEYS
-#define PR_PAC_GET_ENABLED_KEYS 61
-#endif
#ifndef PR_PAC_APIAKEY
#define PR_PAC_APIAKEY (1UL << 0)
#endif
@@ -54,47 +48,6 @@
namespace llvm {
namespace exegesis {
-bool isPointerAuth(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
-
- // FIXME: Pointer Authentication instructions.
- // We would like to measure these instructions, but they can behave
- // differently on different platforms, and maybe the snippets need to look
- // different for these instructions,
- // Platform-specific handling: On Linux, we disable authentication, may
- // interfere with measurements. On non-Linux platforms, disable opcodes for
- // now.
- case AArch64::AUTDA:
- case AArch64::AUTDB:
- case AArch64::AUTDZA:
- case AArch64::AUTDZB:
- case AArch64::AUTIA:
- case AArch64::AUTIA1716:
- case AArch64::AUTIASP:
- case AArch64::AUTIAZ:
- case AArch64::AUTIB:
- case AArch64::AUTIB1716:
- case AArch64::AUTIBSP:
- case AArch64::AUTIBZ:
- case AArch64::AUTIZA:
- case AArch64::AUTIZB:
- return true;
- }
-}
-
-bool isLoadTagMultiple(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
-
- // Load tag multiple instruction
- case AArch64::LDGM:
- return true;
- }
-}
-
static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
switch (RegBitWidth) {
case 32:
@@ -334,35 +287,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
// Function return is a pseudo-instruction that needs to be expanded
PM.add(createAArch64ExpandPseudoPass());
}
-
- const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
- unsigned Opcode) const override {
- if (const char *Reason =
- ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode))
- return Reason;
-
- if (isPointerAuth(Opcode)) {
-#if defined(__aarch64__) && defined(__linux__)
- // Disable all PAC keys. Note that while we expect the measurements to
- // be the same with PAC keys disabled, they could potentially be lower
- // since authentication checks are bypassed.
- if (prctl(PR_PAC_SET_ENABLED_KEYS,
- PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY |
- PR_PAC_APDBKEY, // all keys
- 0, // disable all
- 0, 0) < 0) {
- return "Failed to disable PAC keys";
- }
-#else
- return "Unsupported opcode: isPointerAuth";
-#endif
- }
-
- if (isLoadTagMultiple(Opcode))
- return "Unsupported opcode: load tag multiple";
-
- return nullptr;
- }
MCRegister getScratchMemoryRegister(const Triple &) const override;
void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
unsigned Offset) const override;
More information about the llvm-commits
mailing list