[llvm] [llvm-exegesis] [AArch64] Resolving "snippet crashed while running: Segmentation fault" for Load Instructions (PR #142552)

Lakshay Kumar via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 3 08:04:42 PDT 2025


https://github.com/lakshayk-nv updated https://github.com/llvm/llvm-project/pull/142552

>From 4aed344ec52f3b7eb66444aeeb47acfdd8fb2517 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Fri, 30 May 2025 07:21:39 -0700
Subject: [PATCH 1/9] [llvm-exegesis] [AArch64] Use X16  instead of X8

Switched X16 as temporary register in loadFPCRImmediate instead of X8 which is used by syscalls
---
 llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index a1eb5a46f21fc..cebab371cd5e0 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -120,7 +120,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
 // Generates instructions to load an immediate value into an FPCR register.
 static std::vector<MCInst>
 loadFPCRImmediate(MCRegister Reg, unsigned RegBitWidth, const APInt &Value) {
-  MCRegister TempReg = AArch64::X8;
+  MCRegister TempReg = AArch64::X16;
   MCInst LoadImm = MCInstBuilder(AArch64::MOVi64imm).addReg(TempReg).addImm(0);
   MCInst MoveToFPCR =
       MCInstBuilder(AArch64::MSR).addImm(AArch64SysReg::FPCR).addReg(TempReg);

>From 0476153210b9605738e5ea886ac6a6f62d203a2a Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:00:04 -0700
Subject: [PATCH 2/9] [llvm-exegesis] [AArch64] Add helpers to push/pop GPRs
 and save/restore syscall registers and syscall generator

---
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index cebab371cd5e0..a1a6913773b52 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -153,6 +153,89 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
   return Instructions;
 }
 
+static void generateRegisterStackPush(unsigned int RegToPush,
+                                      std::vector<MCInst> &GeneratedCode,
+                                      int imm = -16) {
+  // STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes
+  //                           before the store to maintain 16-bytes alignment.
+  if (AArch64::GPR64RegClass.contains(RegToPush)) {
+    GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre)
+                                .addReg(AArch64::SP)
+                                .addReg(RegToPush)
+                                .addReg(AArch64::SP)
+                                .addImm(imm));
+  } else if (AArch64::GPR32RegClass.contains(RegToPush)) {
+    GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre)
+                                .addReg(AArch64::SP)
+                                .addReg(RegToPush)
+                                .addReg(AArch64::SP)
+                                .addImm(imm));
+  } else {
+    llvm_unreachable("Unsupported register class for stack push");
+  }
+}
+
+static void generateRegisterStackPop(unsigned int RegToPopTo,
+                                     std::vector<MCInst> &GeneratedCode,
+                                     int imm = 16) {
+  // LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load.
+  if (AArch64::GPR64RegClass.contains(RegToPopTo)) {
+    GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost)
+                                .addReg(AArch64::SP)
+                                .addReg(RegToPopTo)
+                                .addReg(AArch64::SP)
+                                .addImm(imm));
+  } else if (AArch64::GPR32RegClass.contains(RegToPopTo)) {
+    GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost)
+                                .addReg(AArch64::SP)
+                                .addReg(RegToPopTo)
+                                .addReg(AArch64::SP)
+                                .addImm(imm));
+  } else {
+    llvm_unreachable("Unsupported register class for stack pop");
+  }
+}
+
+void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
+  GeneratedCode.push_back(
+      loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber)));
+  GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0));
+}
+
+/// Functions to save/restore system call registers
+#ifdef __linux__
+constexpr std::array<unsigned, 6> SyscallArgumentRegisters{
+    AArch64::X0, AArch64::X1, AArch64::X2,
+    AArch64::X3, AArch64::X4, AArch64::X5,
+};
+
+static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+                                 unsigned ArgumentCount) {
+  // AArch64 Linux typically uses X0-X5 for the first 6 arguments.
+  // Some syscalls can take up to 8 arguments in X0-X7.
+  assert(ArgumentCount <= 6 &&
+         "This implementation saves up to 6 argument registers (X0-X5)");
+  // generateRegisterStackPush(AArch64::X16, GeneratedCode);
+  // Preserve X8 (used for the syscall number/return value).
+  generateRegisterStackPush(AArch64::X8, GeneratedCode);
+  // Preserve the registers used to pass arguments to the system call.
+  for (unsigned I = 0; I < ArgumentCount; ++I) {
+    generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode);
+  }
+}
+
+static void restoreSysCallRegisters(std::vector<MCInst> &GeneratedCode,
+                                    unsigned ArgumentCount) {
+  assert(ArgumentCount <= 6 &&
+         "This implementation restores up to 6 argument registers (X0-X5)");
+  // Restore argument registers, in opposite order of the way they are saved.
+  for (int I = ArgumentCount - 1; I >= 0; --I) {
+    generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode);
+  }
+  generateRegisterStackPop(AArch64::X8, GeneratedCode);
+  // generateRegisterStackPop(AArch64::X16, GeneratedCode);
+}
+#endif // __linux__
 #include "AArch64GenExegesis.inc"
 
 namespace {

>From f54ac57230199af53a4a2dc34056a862f69d198d Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:03:07 -0700
Subject: [PATCH 3/9] [llvm-exegesis] [AArch64] WIP Implemention of memory
 management functions used by subprocess execution mode.

---
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 367 ++++++++++++++++++
 1 file changed, 367 insertions(+)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index a1a6913773b52..6b6859729cb10 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -6,10 +6,25 @@
 //
 //===----------------------------------------------------------------------===//
 #include "../Target.h"
+#include "../Error.h"
+#include "../MmapUtils.h"
+#include "../SerialSnippetGenerator.h"
+#include "../SnippetGenerator.h"
+#include "../SubprocessMemory.h"
 #include "AArch64.h"
 #include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <vector>
 
 #if defined(__aarch64__) && defined(__linux__)
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <unistd.h> // for getpagesize()
+#ifdef HAVE_LIBPFM
+#include <perfmon/perf_event.h>
+#endif                   // HAVE_LIBPFM
 #include <linux/prctl.h> // For PR_PAC_* constants
 #include <sys/prctl.h>
 #ifndef PR_PAC_SET_ENABLED_KEYS
@@ -245,7 +260,30 @@ class ExegesisAArch64Target : public ExegesisTarget {
   ExegesisAArch64Target()
       : ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {}
 
+  enum ArgumentRegisters {
+    CodeSize = AArch64::X12,
+    AuxiliaryMemoryFD = AArch64::X13
+  };
+
 private:
+#ifdef __linux__
+  void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const override;
+  void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const override;
+  std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override;
+  std::vector<MCInst>
+  generateMmap(uintptr_t Address, size_t Length,
+               uintptr_t FileDescriptorAddress) const override;
+  void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override;
+  void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const override;
+  std::vector<MCInst> generateMemoryInitialSetup() const override;
+  std::vector<MCInst> setStackRegisterToAuxMem() const override;
+  uintptr_t getAuxiliaryMemoryStartAddress() const override;
+  std::vector<MCInst> configurePerfCounter(long Request,
+                                           bool SaveRegisters) const override;
+  std::vector<MCRegister> getArgumentRegisters() const override;
+  std::vector<MCRegister> getRegistersNeedSaving() const override;
+#endif // __linux__
+
   std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
                                const APInt &Value) const override {
     if (AArch64::GPR32RegClass.contains(Reg))
@@ -314,6 +352,335 @@ class ExegesisAArch64Target : public ExegesisTarget {
 
 } // namespace
 
+#ifdef __linux__
+// true : let use of fixed address to Virtual Address Space Ceiling
+// false: let kernel choose the address of the auxiliary memory
+bool UseFixedAddress = true; // TODO: Remove this later
+
+static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000;
+
+static void generateRoundToNearestPage(unsigned int TargetRegister,
+                                       std::vector<MCInst> &GeneratedCode) {
+  int PageSizeShift = static_cast<int>(round(log2(getpagesize())));
+  // Round down to the nearest page by getting rid of the least significant bits
+  // representing location in the page.
+
+  // Single instruction using AND with inverted mask (effectively BIC)
+  uint64_t BitsToClearMask = (1ULL << PageSizeShift) - 1; // 0xFFF
+  uint64_t AndMask = ~BitsToClearMask;                    // ...FFFFFFFFFFFF000
+  GeneratedCode.push_back(MCInstBuilder(AArch64::ANDXri)
+                              .addReg(TargetRegister) // Xd
+                              .addReg(TargetRegister) // Xn
+                              .addImm(AndMask)        // imm bitmask
+  );
+}
+static void generateGetInstructionPointer(unsigned int ResultRegister,
+                                          std::vector<MCInst> &GeneratedCode) {
+  // ADR X[ResultRegister], . : loads address of current instruction
+  // ADR : Form PC-relative address
+  // This instruction adds an immediate value to the PC value to form a
+  // PC-relative address, and writes the result to the destination register.
+  GeneratedCode.push_back(MCInstBuilder(AArch64::ADR)
+                              .addReg(ResultRegister) // Xd
+                              .addImm(0));            // Offset
+}
+
+// TODO: This implementation mirrors the x86 version and requires validation.
+// The purpose of this memory unmapping needs to be verified for AArch64
+void ExegesisAArch64Target::generateLowerMunmap(
+    std::vector<MCInst> &GeneratedCode) const {
+  // Unmap starting at address zero
+  GeneratedCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, 0)));
+  // Get the current instruction pointer so we know where to unmap up to.
+  generateGetInstructionPointer(AArch64::X1, GeneratedCode);
+  generateRoundToNearestPage(AArch64::X1, GeneratedCode);
+  // Subtract a page from the end of the unmap so we don't unmap the currently
+  // executing section.
+  long page_size = getpagesize();
+  // Load page_size into a temporary register (e.g., X16)
+  GeneratedCode.push_back(
+      loadImmediate(AArch64::X16, 64, APInt(64, page_size)));
+  // Subtract X16 (containing page_size) from X1
+  GeneratedCode.push_back(MCInstBuilder(AArch64::SUBXrr)
+                              .addReg(AArch64::X1)    // Dest
+                              .addReg(AArch64::X1)    // Src
+                              .addReg(AArch64::X16)); // page_size
+  generateSysCall(SYS_munmap, GeneratedCode);
+}
+
+// FIXME: This implementation mirrors the x86 version and requires validation.
+// The purpose of this memory unmapping needs to be verified for AArch64
+// The correctness of this implementation needs to be verified.
+void ExegesisAArch64Target::generateUpperMunmap(
+    std::vector<MCInst> &GeneratedCode) const {
+  generateGetInstructionPointer(AArch64::X4, GeneratedCode);
+  // Load the size of the snippet from the argument register into X0
+  // FIXME: Argument register seems not be initialized.
+  GeneratedCode.push_back(MCInstBuilder(AArch64::ORRXrr)
+                              .addReg(AArch64::X0)
+                              .addReg(AArch64::XZR)
+                              .addReg(ArgumentRegisters::CodeSize));
+  // Add the length of the snippet (in X0) to the current instruction pointer
+  // (in X4) to get the address where we should start unmapping at.
+  GeneratedCode.push_back(MCInstBuilder(AArch64::ADDXrr)
+                              .addReg(AArch64::X0)
+                              .addReg(AArch64::X0)
+                              .addReg(AArch64::X4));
+  generateRoundToNearestPage(AArch64::X0, GeneratedCode);
+  // Add one page to the start address to ensure the address is above snippet.
+  // Since the above function rounds down.
+  long page_size = getpagesize();
+  GeneratedCode.push_back(
+      loadImmediate(AArch64::X16, 64, APInt(64, page_size)));
+  GeneratedCode.push_back(MCInstBuilder(AArch64::ADDXrr)
+                              .addReg(AArch64::X0)    // Dest
+                              .addReg(AArch64::X0)    // Src
+                              .addReg(AArch64::X16)); // page_size
+  // Unmap to just one page under the ceiling of the address space.
+  GeneratedCode.push_back(loadImmediate(
+      AArch64::X1, 64, APInt(64, VAddressSpaceCeiling - getpagesize())));
+  GeneratedCode.push_back(MCInstBuilder(AArch64::SUBXrr)
+                              .addReg(AArch64::X1)
+                              .addReg(AArch64::X1)
+                              .addReg(AArch64::X0));
+  generateSysCall(SYS_munmap, GeneratedCode); // SYS_munmap is 215
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const {
+  std::vector<MCInst> ExitCallCode;
+  ExitCallCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, ExitCode)));
+  generateSysCall(SYS_exit, ExitCallCode); // SYS_exit is 93
+  return ExitCallCode;
+}
+
+// FIXME: This implementation mirrors the x86 version and requires validation.
+// The correctness of this implementation needs to be verified.
+// mmap(address, length, prot, flags, fd, offset=0)
+std::vector<MCInst>
+ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length,
+                                    uintptr_t FileDescriptorAddress) const {
+  int flags = MAP_SHARED;
+  if (Address != 0) {
+    flags |= MAP_FIXED_NOREPLACE;
+  }
+  std::vector<MCInst> MmapCode;
+  MmapCode.push_back(
+      loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr
+  MmapCode.push_back(
+      loadImmediate(AArch64::X1, 64, APInt(64, Length))); // length
+  MmapCode.push_back(loadImmediate(AArch64::X2, 64,
+                                   APInt(64, PROT_READ | PROT_WRITE))); // prot
+  MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
+  // FIXME: File descriptor address is not initialized.
+  // Copy file descriptor location from aux memory into X4
+  MmapCode.push_back(
+      loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd
+  // // Dereference file descriptor into FD argument register (TODO: Why? &
+  // correct?) MmapCode.push_back(
+  //   MCInstBuilder(AArch64::LDRWui)
+  //       .addReg(AArch64::W4)   // Destination register
+  //       .addReg(AArch64::X4)   // Base register (address)
+  //       .addImm(0)             // Offset (in 4-byte words, so 0 means no
+  //       offset)
+  // );
+  MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
+  generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222
+  return MmapCode;
+}
+
+// FIXME: This implementation mirrors the x86 version and requires validation.
+// The correctness of this implementation needs to be verified.
+void ExegesisAArch64Target::generateMmapAuxMem(
+    std::vector<MCInst> &GeneratedCode) const {
+  int fd = -1;
+  int flags = MAP_SHARED;
+  uintptr_t address = getAuxiliaryMemoryStartAddress();
+  if (fd == -1)
+    flags |= MAP_ANONYMOUS;
+  if (address != 0)
+    flags |= MAP_FIXED_NOREPLACE;
+  int prot = PROT_READ | PROT_WRITE;
+
+  GeneratedCode.push_back(
+      loadImmediate(AArch64::X0, 64, APInt(64, address))); // map adr
+  GeneratedCode.push_back(loadImmediate(
+      AArch64::X1, 64,
+      APInt(64, SubprocessMemory::AuxiliaryMemorySize))); // length
+  GeneratedCode.push_back(
+      loadImmediate(AArch64::X2, 64, APInt(64, prot))); // prot
+  GeneratedCode.push_back(
+      loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
+  GeneratedCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd
+  GeneratedCode.push_back(
+      loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
+  generateSysCall(SYS_mmap, GeneratedCode);          // SYS_mmap is 222
+}
+
+void ExegesisAArch64Target::moveArgumentRegisters(
+    std::vector<MCInst> &GeneratedCode) const {
+  GeneratedCode.push_back(MCInstBuilder(AArch64::ORRXrr)
+                              .addReg(ArgumentRegisters::CodeSize)
+                              .addReg(AArch64::XZR)
+                              .addReg(AArch64::X0));
+  GeneratedCode.push_back(MCInstBuilder(AArch64::ORRXrr)
+                              .addReg(ArgumentRegisters::AuxiliaryMemoryFD)
+                              .addReg(AArch64::XZR)
+                              .addReg(AArch64::X1));
+}
+
+std::vector<MCInst> ExegesisAArch64Target::generateMemoryInitialSetup() const {
+  std::vector<MCInst> MemoryInitialSetupCode;
+  // moveArgumentRegisters(MemoryInitialSetupCode);
+  // generateLowerMunmap(MemoryInitialSetupCode);   // TODO: Motivation Unclear
+  // generateUpperMunmap(MemoryInitialSetupCode);   // FIXME: Motivation Unclear
+  // TODO: Revert argument registers value, if munmap is used.
+
+  generateMmapAuxMem(MemoryInitialSetupCode); // FIXME: Uninit file descriptor
+
+  // If using fixed address for auxiliary memory skip this step,
+  // When using dynamic memory allocation (non-fixed address), we must preserve
+  // the mmap return value (X0) which contains the allocated memory address.
+  // This value is saved to the stack to ensure registers requiring memory
+  // access can retrieve the correct address even if X0 is modified by
+  // intermediate code.
+  generateRegisterStackPush(AArch64::X0, MemoryInitialSetupCode);
+  // FIXME: Ensure stack pointer remains stable to prevent loss of saved address
+  return MemoryInitialSetupCode;
+}
+
+// TODO: This implementation mirrors the x86 version and requires validation.
+// The purpose of moving stack pointer to aux memory needs to be verified for
+// AArch64
+std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
+  return std::vector<MCInst>(); // NOP
+
+  // Below is implementation for AArch64 but motivation unclear
+  // std::vector<MCInst> instructions; // NOP
+  // const uint64_t targetSPValue = getAuxiliaryMemoryStartAddress() +
+  //                               SubprocessMemory::AuxiliaryMemorySize;
+  // // sub, stack args and local storage
+  // // Use X16 as a temporary register since it's a scratch register
+  // const MCRegister TempReg = AArch64::X16;
+
+  // // Load the 64-bit immediate into TempReg using MOVZ/MOVK sequence
+  // // MOVZ Xd, #imm16, LSL #(shift_val * 16)
+  // // MOVK Xd, #imm16, LSL #(shift_val * 16) (* 3 times for 64-bit immediate)
+
+  // // 1. MOVZ TmpReg, #(targetSPValue & 0xFFFF), LSL #0
+  // instructions.push_back(
+  //     MCInstBuilder(AArch64::MOVZXi)
+  //         .addReg(TempReg)
+  //         .addImm(static_cast<uint16_t>(targetSPValue & 0xFFFF)) // imm16
+  //         .addImm(0));                               // hw (shift/16) = 0
+  // // 2. MOVK TmpReg, #((targetSPValue >> 16) & 0xFFFF), LSL #16
+  // if (((targetSPValue >> 16) & 0xFFFF) != 0 || (targetSPValue > 0xFFFF)) {
+  //   instructions.push_back(
+  //       MCInstBuilder(AArch64::MOVKXi)
+  //           .addReg(TempReg)
+  //           .addReg(TempReg)
+  //           .addImm(static_cast<uint16_t>((targetSPValue >> 16) & 0xFFFF)) //
+  //           imm16 .addImm(1));                                       // hw
+  //           (shift/16) = 1
+  // }
+  // // 3. MOVK TmpReg, #((targetSPValue >> 32) & 0xFFFF), LSL #32
+  // if (((targetSPValue >> 32) & 0xFFFF) != 0 || (targetSPValue > 0xFFFFFFFF))
+  // {
+  //   instructions.push_back(
+  //       MCInstBuilder(AArch64::MOVKXi)
+  //           .addReg(TempReg)
+  //           .addReg(TempReg)
+  //           .addImm(static_cast<uint16_t>((targetSPValue >> 32) & 0xFFFF)) //
+  //           imm16 .addImm(2));                                       // hw
+  //           (shift/16) = 2
+  // }
+  // // 4. MOVK TmpReg, #((targetSPValue >> 48) & 0xFFFF), LSL #48
+  // if (((targetSPValue >> 48) & 0xFFFF) != 0 || (targetSPValue >
+  // 0xFFFFFFFFFFFF)) {
+  //   instructions.push_back(
+  //       MCInstBuilder(AArch64::MOVKXi)
+  //           .addReg(TempReg)
+  //           .addReg(TempReg)
+  //           .addImm(static_cast<uint16_t>((targetSPValue >> 48) & 0xFFFF)) //
+  //           imm16 .addImm(3));                                       // hw
+  //           (shift/16) = 3
+  // }
+  // // Finally, move the value from TempReg to SP
+  // instructions.push_back(
+  //     MCInstBuilder(AArch64::ADDXri)  // ADD SP, TempReg, #0
+  //         .addReg(AArch64::SP)
+  //         .addReg(TempReg)
+  //         .addImm(0)                  // imm   = 0
+  //         .addImm(0));                // shift = 0
+
+  // return instructions;
+}
+
+uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const {
+  if (!UseFixedAddress)
+    // Allow kernel to select an appropriate memory address
+    return 0;
+  // Return the second to last page in the virtual address space
+  // to try and prevent interference with memory annotations in the snippet
+  // VAddressSpaceCeiling = 0x0000800000000000
+  // FIXME: Why 2 pages?
+  return VAddressSpaceCeiling - (2 * getpagesize());
+}
+
+std::vector<MCInst>
+ExegesisAArch64Target::configurePerfCounter(long Request,
+                                            bool SaveRegisters) const {
+  return std::vector<MCInst>(); // NOP
+
+  // Current SYSCALL exits with EBADF error - file descriptor is invalid
+  // Unsure how to implement this for AArch64
+  std::vector<MCInst> ConfigurePerfCounterCode;
+  if (SaveRegisters)
+    saveSysCallRegisters(ConfigurePerfCounterCode, 3);
+
+  // Move the file descriptor (stored at the start of auxiliary memory) into X0.
+  // FIXME: This file descriptor at start of aux memory is not initialized.
+  uintptr_t fd_adr =
+      getAuxiliaryMemoryStartAddress() + SubprocessMemory::AuxiliaryMemorySize;
+  ConfigurePerfCounterCode.push_back(
+      loadImmediate(AArch64::X0, 64, APInt(64, fd_adr)));
+  ConfigurePerfCounterCode.push_back(
+      loadImmediate(AArch64::X1, 64, APInt(64, Request)));
+
+#ifdef HAVE_LIBPFM
+  ConfigurePerfCounterCode.push_back(
+      loadImmediate(AArch64::X2, 64, APInt(64, PERF_IOC_FLAG_GROUP)));
+#endif
+
+  generateSysCall(SYS_ioctl, ConfigurePerfCounterCode);
+
+  if (SaveRegisters)
+    restoreSysCallRegisters(ConfigurePerfCounterCode, 3);
+
+  return ConfigurePerfCounterCode;
+}
+
+std::vector<MCRegister> ExegesisAArch64Target::getArgumentRegisters() const {
+  return {AArch64::X0, AArch64::X1};
+}
+
+std::vector<MCRegister> ExegesisAArch64Target::getRegistersNeedSaving() const {
+  return {
+      AArch64::X0,
+      AArch64::X1,
+      AArch64::X2,
+      AArch64::X3,
+      AArch64::X4,
+      AArch64::X5,
+      AArch64::X8,
+      AArch64::X16,
+      ArgumentRegisters::CodeSize,
+      ArgumentRegisters::AuxiliaryMemoryFD,
+  };
+}
+
+#endif // __linux__
+
 static ExegesisTarget *getTheExegesisAArch64Target() {
   static ExegesisAArch64Target Target;
   return &Target;

>From 524e42f499ed15891588940126105137b01afdb3 Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:12:12 -0700
Subject: [PATCH 4/9] [llvm-exegesis] [AArch64] Added WIP implementation to
 initalize registers requiring memory address.

---
 llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 14 ++++++++++++++
 llvm/tools/llvm-exegesis/lib/Assembler.cpp      | 11 +++++++++++
 llvm/tools/llvm-exegesis/lib/Target.h           |  4 ++++
 3 files changed, 29 insertions(+)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 6b6859729cb10..2b73e002c4300 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -265,6 +265,20 @@ class ExegesisAArch64Target : public ExegesisTarget {
     AuxiliaryMemoryFD = AArch64::X13
   };
 
+  std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
+                                                int imm = 0) const override {
+    std::vector<MCInst> Insts;
+    if (AArch64::GPR32RegClass.contains(Reg)) {
+      generateRegisterStackPop(Reg, Insts, imm);
+      return Insts;
+    }
+    if (AArch64::GPR64RegClass.contains(Reg)) {
+      generateRegisterStackPop(Reg, Insts, imm);
+      return Insts;
+    }
+    return {};
+  }
+
 private:
 #ifdef __linux__
   void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const override;
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index fd7924db08441..13202968f894d 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -78,6 +78,7 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
   Register StackPointerRegister = BBF.MF.getSubtarget()
                                       .getTargetLowering()
                                       ->getStackPointerRegisterToSaveRestore();
+  bool isFirstRegister = true;
   for (const RegisterValue &RV : Key.RegisterInitialValues) {
     if (GenerateMemoryInstructions) {
       // If we're generating memory instructions, don't load in the value for
@@ -85,7 +86,17 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
       // the setup.
       if (Register(RV.Register) == StackPointerRegister)
         continue;
+#if defined(__aarch64__)
+      auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16);
+      if (!StackLoadInsts.empty() && isFirstRegister) {
+        for (const auto &Inst : StackLoadInsts)
+          BBF.addInstruction(Inst);
+        isFirstRegister = false;
+        continue;
+      }
+#endif
     }
+
     // Load a constant in the register.
     const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
     if (SetRegisterCode.empty())
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 77fbaa6e95412..736c9d9ff6c23 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -308,6 +308,10 @@ class ExegesisTarget {
     return std::make_unique<SavedState>();
   }
 
+  virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg, int imm = 0) const {
+    return {};
+  }
+
 private:
   virtual bool matchesArch(Triple::ArchType Arch) const = 0;
 

>From 95a67bb9495f4ad341ab80e6680d2e2cf0cb76dc Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:13:27 -0700
Subject: [PATCH 5/9] [llvm-exegesis] Debug register initialization with
 "exegesis-assembler"

---
 llvm/tools/llvm-exegesis/lib/Assembler.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 13202968f894d..56b3e6894bbfa 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define DEBUG_TYPE "exegesis-assembler"
 #ifdef HAVE_LIBPFM
 #include "perfmon/perf_event.h"
 #endif // HAVE_LIBPFM
@@ -80,6 +81,22 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
                                       ->getStackPointerRegisterToSaveRestore();
   bool isFirstRegister = true;
   for (const RegisterValue &RV : Key.RegisterInitialValues) {
+    // Debug: register name and class name and value from BenchmarkKey
+    const MCRegisterInfo *RegInfo = BBF.MF.getTarget().getMCRegisterInfo();
+    const char *RegName = RegInfo->getName(RV.Register);
+    const char *regClassName = "Unknown";
+    for (unsigned i = 0, e = RegInfo->getNumRegClasses(); i < e; ++i) {
+      const MCRegisterClass &RC = RegInfo->getRegClass(i);
+      if (RC.contains(RV.Register)) {
+        regClassName = RegInfo->getRegClassName(&RC);
+        break;
+      }
+    }
+    LLVM_DEBUG(
+        dbgs() << "Setting register (Class: " << regClassName << ") " << RegName
+               << std::string(
+                      std::max(0, 3 - static_cast<int>(strlen(RegName))), ' '));
+
     if (GenerateMemoryInstructions) {
       // If we're generating memory instructions, don't load in the value for
       // the register with the stack pointer as it will be used later to finish
@@ -92,12 +109,15 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
         for (const auto &Inst : StackLoadInsts)
           BBF.addInstruction(Inst);
         isFirstRegister = false;
+        LLVM_DEBUG(dbgs() << "from stack with post-increment offset of " << 16
+                          << " bytes\n");
         continue;
       }
 #endif
     }
 
     // Load a constant in the register.
+    LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n");
     const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
     if (SetRegisterCode.empty())
       IsSnippetSetupComplete = false;

>From 28b23cacd82c3f23586f30135fdc33ef9862ba0c Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:56:28 -0700
Subject: [PATCH 6/9] [llvm-exegesis] [AArch64] Experimental memory operand
 handling

---
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 39 +++++++++++++++++++
 .../llvm-exegesis/lib/MCInstrDescView.cpp     | 10 ++++-
 .../tools/llvm-exegesis/lib/MCInstrDescView.h |  1 +
 .../lib/SerialSnippetGenerator.cpp            |  6 +++
 .../llvm-exegesis/lib/SnippetGenerator.cpp    |  8 ++++
 5 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 2b73e002c4300..c3df9df1b12f8 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include <vector>
 
+#define DEBUG_TYPE "exegesis-aarch64-target"
 #if defined(__aarch64__) && defined(__linux__)
 #include <sys/mman.h>
 #include <sys/syscall.h>
@@ -362,10 +363,48 @@ class ExegesisAArch64Target : public ExegesisTarget {
 
     return nullptr;
   }
+  MCRegister getScratchMemoryRegister(const Triple &) const override;
+  void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
+                          unsigned Offset) const override;
 };
 
 } // namespace
 
+// Implementation follows RISCV pattern for memory operand handling.
+// Note: This implementation requires validation for AArch64-specific requirements.
+void ExegesisAArch64Target::fillMemoryOperands(InstructionTemplate &IT,
+                        MCRegister Reg,
+                        unsigned Offset) const {  
+  LLVM_DEBUG(dbgs() << "Executing fillMemoryOperands");
+  // AArch64 memory operands typically have the following structure:
+  // [base_register, offset]
+  auto &I = IT.getInstr();
+  auto MemOpIt =
+      find_if(I.Operands, [](const Operand &Op) { return Op.isMemory(); });
+  assert(MemOpIt != I.Operands.end() &&
+         "Instruction must have memory operands");
+
+  const Operand &MemOp = *MemOpIt;
+
+  assert(MemOp.isReg() && "Memory operand expected to be register");
+
+  IT.getValueFor(MemOp) = MCOperand::createReg(Reg);
+  IT.getValueFor(MemOp) = MCOperand::createImm(Offset);
+}
+enum ScratchMemoryRegister {
+  Z = AArch64::Z14,
+  X = AArch64::X14,
+  W = AArch64::W14,
+};
+
+MCRegister
+ExegesisAArch64Target::getScratchMemoryRegister(const Triple &TT) const {
+  // return MCRegister();   // Implemented in target.h
+  // return hardcoded scratch memory register, similar to RISCV (uses a0)
+  return ScratchMemoryRegister::X ; 
+}
+
+
 #ifdef __linux__
 // true : let use of fixed address to Virtual Address Space Ceiling
 // false: let kernel choose the address of the auxiliary memory
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index e0e796cee8040..0c0107eee0abd 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -52,9 +52,14 @@ bool Operand::isVariable() const { return VariableIndex.has_value(); }
 
 bool Operand::isEarlyClobber() const { return IsEarlyClobber; }
 
+// FIXME: Verify if mayLoadOrStore check is necessary for AArch64 memory operand detection
 bool Operand::isMemory() const {
-  return isExplicit() &&
-         getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY;
+  return (isExplicit() && getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY) 
+        //  || mayLoadOrStore 
+         ;
+  // AArch64 has no operands with MCOI::OPERAND_MEMORY thus also adding mayLoadOrStore
+  // to check for mayLoad and mayStore which potentially have memory operands
+  // Uncommenting this check will cause illegal instruction error for AArch64
 }
 
 bool Operand::isImmediate() const {
@@ -130,6 +135,7 @@ Instruction::create(const MCInstrInfo &InstrInfo,
     if (TiedToIndex >= 0)
       Operand.TiedToIndex = TiedToIndex;
     Operand.Info = &OpInfo;
+    Operand.mayLoadOrStore = Description->mayLoad() || Description->mayStore();
     Operands.push_back(Operand);
   }
   for (MCPhysReg MCPhysReg : Description->implicit_defs()) {
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index 0a62967897c79..dda8243051f77 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -85,6 +85,7 @@ struct Operand {
   bool IsDef = false;
   bool IsEarlyClobber = false;
   const RegisterAliasingTracker *Tracker = nullptr; // Set for Register Op.
+  bool mayLoadOrStore = false;                      // checks mayLoad and store
   const MCOperandInfo *Info = nullptr;              // Set for Explicit Op.
   std::optional<uint8_t> TiedToIndex;               // Set for Reg&Explicit Op.
   MCRegister ImplicitReg;                           // Non-0 for Implicit Op.
diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
index bdfc93e22273b..98a5be53cc086 100644
--- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
@@ -132,6 +132,12 @@ static void appendCodeTemplates(const LLVMState &State,
 
       // Register classes of def operand and memory operand must be the same
       // to perform aliasing.
+
+      // TODO: Get a valid scratch memory register,
+
+      // Do we need to set scratch memory register based on reg class ?
+      // Or is this code flow even required i.e. would setting register requiring 
+      // memory address from stack correct approach ?
       if (!RegClass.contains(ScratchMemoryRegister))
         return;
 
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index 04064ae1d8441..3eed207e56c8f 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -45,10 +45,18 @@ Error SnippetGenerator::generateConfigurations(
   ForbiddenRegs |= ExtraForbiddenRegs;
   // If the instruction has memory registers, prevent the generator from
   // using the scratch register and its aliasing registers.
+  
+  // hasMemoryOperands(): if any register is an explicit memory register, 
+  // then the instruction has memory operands
   if (Variant.getInstr().hasMemoryOperands()) {
     const auto &ET = State.getExegesisTarget();
     MCRegister ScratchSpacePointerInReg =
         ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
+    
+    // TODO: Get a valid scratch memory register,
+    // if MCRegister() is used, code flow exits here with below error,
+    // else if hardcoded X14 is used as scratch memory register,
+    // then illegal instruction is generated: undefined physical register
     if (!ScratchSpacePointerInReg.isValid())
       return make_error<Failure>(
           "Infeasible : target does not support memory instructions");

>From d753795a26ac9165b54970d2c9c458d5f317d55d Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Sun, 1 Jun 2025 07:57:39 -0700
Subject: [PATCH 7/9] [llvm-exegesis] [AArch64] Fix aux memory file descriptor
 init and ioctl syscall

---
 llvm/tools/llvm-exegesis/lib/Assembler.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 56b3e6894bbfa..3af52a160c837 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -67,6 +67,8 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
       assert(MM.Address % getpagesize() == 0 &&
              "Memory mappings need to be aligned to page boundaries.");
 #endif
+      // FIXME: file descriptor for aux memory seems not initialized.
+      // TODO: Invoke openat syscall to get correct fd for aux memory
       const MemoryValue &MemVal = Key.MemoryValues.at(MM.MemoryValueName);
       BBF.addInstructions(ET.generateMmap(
           MM.Address, MemVal.SizeBytes,

>From c3c61ea49af8ea32ad7caad773672b4361aaeadd Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Mon, 2 Jun 2025 23:12:03 -0700
Subject: [PATCH 8/9] [llvm-exegesis] Formatting changes

---
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 10 ++++----
 llvm/tools/llvm-exegesis/lib/Assembler.cpp    |  3 ++-
 .../llvm-exegesis/lib/MCInstrDescView.cpp     | 25 +++++++++++--------
 .../llvm-exegesis/lib/SnippetGenerator.cpp    |  6 ++---
 4 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index c3df9df1b12f8..48a22d011a491 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -371,10 +371,11 @@ class ExegesisAArch64Target : public ExegesisTarget {
 } // namespace
 
 // Implementation follows RISCV pattern for memory operand handling.
-// Note: This implementation requires validation for AArch64-specific requirements.
+// Note: This implementation requires validation for AArch64-specific
+// requirements.
 void ExegesisAArch64Target::fillMemoryOperands(InstructionTemplate &IT,
-                        MCRegister Reg,
-                        unsigned Offset) const {  
+                                               MCRegister Reg,
+                                               unsigned Offset) const {
   LLVM_DEBUG(dbgs() << "Executing fillMemoryOperands");
   // AArch64 memory operands typically have the following structure:
   // [base_register, offset]
@@ -401,10 +402,9 @@ MCRegister
 ExegesisAArch64Target::getScratchMemoryRegister(const Triple &TT) const {
   // return MCRegister();   // Implemented in target.h
   // return hardcoded scratch memory register, similar to RISCV (uses a0)
-  return ScratchMemoryRegister::X ; 
+  return ScratchMemoryRegister::X;
 }
 
-
 #ifdef __linux__
 // true : let use of fixed address to Virtual Address Space Ceiling
 // false: let kernel choose the address of the auxiliary memory
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index 3af52a160c837..b29e384efe95e 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -258,7 +258,8 @@ ArrayRef<MCRegister> FunctionFiller::getRegistersSetUp() const {
 }
 
 static std::unique_ptr<Module>
-createModule(const std::unique_ptr<LLVMContext> &Context, const DataLayout &DL) {
+createModule(const std::unique_ptr<LLVMContext> &Context,
+             const DataLayout &DL) {
   auto Mod = std::make_unique<Module>(ModuleID, *Context);
   Mod->setDataLayout(DL);
   return Mod;
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index 0c0107eee0abd..cf61d33f57fac 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -52,14 +52,17 @@ bool Operand::isVariable() const { return VariableIndex.has_value(); }
 
 bool Operand::isEarlyClobber() const { return IsEarlyClobber; }
 
-// FIXME: Verify if mayLoadOrStore check is necessary for AArch64 memory operand detection
+// FIXME: Verify if mayLoadOrStore check is necessary for AArch64 memory operand
+// detection
 bool Operand::isMemory() const {
-  return (isExplicit() && getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY) 
-        //  || mayLoadOrStore 
-         ;
-  // AArch64 has no operands with MCOI::OPERAND_MEMORY thus also adding mayLoadOrStore
-  // to check for mayLoad and mayStore which potentially have memory operands
-  // Uncommenting this check will cause illegal instruction error for AArch64
+  return (isExplicit() &&
+          getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY)
+      //  || mayLoadOrStore
+      ;
+  // AArch64 has no operands with MCOI::OPERAND_MEMORY thus also adding
+  // mayLoadOrStore to check for mayLoad and mayStore which potentially have
+  // memory operands Uncommenting this check will cause illegal instruction
+  // error for AArch64
 }
 
 bool Operand::isImmediate() const {
@@ -331,13 +334,13 @@ const Instruction &InstructionsCache::getInstr(unsigned Opcode) const {
   return *Found;
 }
 
-bool RegisterOperandAssignment::
-operator==(const RegisterOperandAssignment &Other) const {
+bool RegisterOperandAssignment::operator==(
+    const RegisterOperandAssignment &Other) const {
   return std::tie(Op, Reg) == std::tie(Other.Op, Other.Reg);
 }
 
-bool AliasingRegisterOperands::
-operator==(const AliasingRegisterOperands &Other) const {
+bool AliasingRegisterOperands::operator==(
+    const AliasingRegisterOperands &Other) const {
   return std::tie(Defs, Uses) == std::tie(Other.Defs, Other.Uses);
 }
 
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index 3eed207e56c8f..9cf5a4429fafb 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -45,14 +45,14 @@ Error SnippetGenerator::generateConfigurations(
   ForbiddenRegs |= ExtraForbiddenRegs;
   // If the instruction has memory registers, prevent the generator from
   // using the scratch register and its aliasing registers.
-  
-  // hasMemoryOperands(): if any register is an explicit memory register, 
+
+  // hasMemoryOperands(): if any register is an explicit memory register,
   // then the instruction has memory operands
   if (Variant.getInstr().hasMemoryOperands()) {
     const auto &ET = State.getExegesisTarget();
     MCRegister ScratchSpacePointerInReg =
         ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
-    
+
     // TODO: Get a valid scratch memory register,
     // if MCRegister() is used, code flow exits here with below error,
     // else if hardcoded X14 is used as scratch memory register,

>From d928755f63ea338a165b5f4c18e0f2616c7b730b Mon Sep 17 00:00:00 2001
From: lakshayk-nv <lakshayk at nvidia.com>
Date: Tue, 3 Jun 2025 08:02:14 -0700
Subject: [PATCH 9/9] [llvm-exegesis] [AArch64] Resolve Merge Conflict coming
 from reverted #136868

---
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 76 -------------------
 1 file changed, 76 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 48a22d011a491..9411ece435f07 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,12 +28,6 @@
 #endif                   // HAVE_LIBPFM
 #include <linux/prctl.h> // For PR_PAC_* constants
 #include <sys/prctl.h>
-#ifndef PR_PAC_SET_ENABLED_KEYS
-#define PR_PAC_SET_ENABLED_KEYS 60
-#endif
-#ifndef PR_PAC_GET_ENABLED_KEYS
-#define PR_PAC_GET_ENABLED_KEYS 61
-#endif
 #ifndef PR_PAC_APIAKEY
 #define PR_PAC_APIAKEY (1UL << 0)
 #endif
@@ -54,47 +48,6 @@
 namespace llvm {
 namespace exegesis {
 
-bool isPointerAuth(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-
-  // FIXME: Pointer Authentication instructions.
-  // We would like to measure these instructions, but they can behave
-  // differently on different platforms, and maybe the snippets need to look
-  // different for these instructions,
-  // Platform-specific handling:  On Linux, we disable authentication, may
-  // interfere with measurements. On non-Linux platforms, disable opcodes for
-  // now.
-  case AArch64::AUTDA:
-  case AArch64::AUTDB:
-  case AArch64::AUTDZA:
-  case AArch64::AUTDZB:
-  case AArch64::AUTIA:
-  case AArch64::AUTIA1716:
-  case AArch64::AUTIASP:
-  case AArch64::AUTIAZ:
-  case AArch64::AUTIB:
-  case AArch64::AUTIB1716:
-  case AArch64::AUTIBSP:
-  case AArch64::AUTIBZ:
-  case AArch64::AUTIZA:
-  case AArch64::AUTIZB:
-    return true;
-  }
-}
-
-bool isLoadTagMultiple(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-
-  // Load tag multiple instruction
-  case AArch64::LDGM:
-    return true;
-  }
-}
-
 static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
   switch (RegBitWidth) {
   case 32:
@@ -334,35 +287,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
     // Function return is a pseudo-instruction that needs to be expanded
     PM.add(createAArch64ExpandPseudoPass());
   }
-
-  const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
-                                           unsigned Opcode) const override {
-    if (const char *Reason =
-            ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode))
-      return Reason;
-
-    if (isPointerAuth(Opcode)) {
-#if defined(__aarch64__) && defined(__linux__)
-      // Disable all PAC keys. Note that while we expect the measurements to
-      // be the same with PAC keys disabled, they could potentially be lower
-      // since authentication checks are bypassed.
-      if (prctl(PR_PAC_SET_ENABLED_KEYS,
-                PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY |
-                    PR_PAC_APDBKEY, // all keys
-                0,                  // disable all
-                0, 0) < 0) {
-        return "Failed to disable PAC keys";
-      }
-#else
-      return "Unsupported opcode: isPointerAuth";
-#endif
-    }
-
-    if (isLoadTagMultiple(Opcode))
-      return "Unsupported opcode: load tag multiple";
-
-    return nullptr;
-  }
   MCRegister getScratchMemoryRegister(const Triple &) const override;
   void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
                           unsigned Offset) const override;



More information about the llvm-commits mailing list