[llvm] Reapply "[llvm-exegesis] Implement the loop repetition mode for AArch64" (#155423) (PR #155589)

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 27 03:15:10 PDT 2025


https://github.com/sjoerdmeijer created https://github.com/llvm/llvm-project/pull/155589

This includes two minor fixes:
- "Codegen" has been added to the LLVM_LINK_COMPONENTS for AArch64 to prevent a link error,
- the test case has been made less strict or fragile by not checking the addresses.

>From 831e097bdfd5749d00bff671b3f797bf56cf3440 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Wed, 27 Aug 2025 02:40:48 -0700
Subject: [PATCH] Reapply "[llvm-exegesis] Implement the loop repetition mode
 for AArch64" (#155423)

This includes two minor fixes:
- "Codegen" has been added to the LLVM_LINK_COMPONENTS for AArch64 to
  prevent a link error,
- the test case has been made less strict or fragile by not checking the
  addresses.
---
 .../llvm-exegesis/AArch64/loop-register.s     | 17 +++++++++++
 .../llvm-exegesis/lib/AArch64/CMakeLists.txt  |  1 +
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 30 +++++++++++++++++++
 3 files changed, 48 insertions(+)
 create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/loop-register.s

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/loop-register.s b/llvm/test/tools/llvm-exegesis/AArch64/loop-register.s
new file mode 100644
index 0000000000000..62040b26c8faf
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/loop-register.s
@@ -0,0 +1,17 @@
+REQUIRES: aarch64-registered-target, asserts
+
+RUN: llvm-exegesis -mcpu=neoverse-v2 --use-dummy-perf-counters --mode=latency --debug-only=print-gen-assembly --opcode-name=ADDVv4i16v -repetition-mode=loop 2>&1 | FileCheck %s
+
+CHECK:       str     x19, [sp, #-16]!
+CHECK-NEXT:  movi    d[[REG:[0-9]+]], #0000000000000000
+CHECK-NEXT:  mov     x19, #10000
+CHECK-NEXT:  nop
+CHECK-NEXT:  nop
+CHECK-NEXT:  nop
+CHECK-NEXT:  nop
+CHECK-NEXT:  nop
+CHECK-NEXT:  addv    h[[REG]], v[[REG]].4h
+CHECK-NEXT:  subs    x19, x19, #1
+CHECK-NEXT:  b.ne    #-8
+CHECK-NEXT:  ldr     x19, [sp], #16
+CHECK-NEXT:  ret
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/AArch64/CMakeLists.txt
index 42f8fd30ac2bb..091215c18632d 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/CMakeLists.txt
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/CMakeLists.txt
@@ -5,6 +5,7 @@ include_directories(
 
 set(LLVM_LINK_COMPONENTS
   AArch64
+  CodeGen
   CodeGenTypes
   Core
   Exegesis
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 3a0021e3c132d..c4ad9ae201d4e 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -8,6 +8,7 @@
 #include "../Target.h"
 #include "AArch64.h"
 #include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 
 #if defined(__aarch64__) && defined(__linux__)
 #include <sys/prctl.h> // For PR_PAC_* constants
@@ -109,6 +110,10 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
 
 namespace {
 
+// Use X19 as the loop counter register since it's a callee-saved register
+// that's available for temporary use.
+constexpr const MCPhysReg kDefaultLoopCounterReg = AArch64::X19;
+
 class ExegesisAArch64Target : public ExegesisTarget {
 public:
   ExegesisAArch64Target()
@@ -141,6 +146,31 @@ class ExegesisAArch64Target : public ExegesisTarget {
     errs() << "setRegTo is not implemented, results will be unreliable\n";
     return {};
   }
+  MCRegister getDefaultLoopCounterRegister(const Triple &) const override {
+    return kDefaultLoopCounterReg;
+  }
+
+  void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
+                                   MachineBasicBlock &TargetMBB,
+                                   const MCInstrInfo &MII,
+                                   MCRegister LoopRegister) const override {
+    // subs LoopRegister, LoopRegister, #1
+    BuildMI(&MBB, DebugLoc(), MII.get(AArch64::SUBSXri))
+        .addDef(LoopRegister)
+        .addUse(LoopRegister)
+        .addImm(1)  // Subtract 1
+        .addImm(0); // No shift amount
+    // b.ne TargetMBB
+    BuildMI(&MBB, DebugLoc(), MII.get(AArch64::Bcc))
+        .addImm(AArch64CC::NE)
+        .addMBB(&TargetMBB);
+  }
+
+  // Registers that should not be selected for use in snippets.
+  const MCPhysReg UnavailableRegisters[1] = {kDefaultLoopCounterReg};
+  ArrayRef<MCPhysReg> getUnavailableRegisters() const override {
+    return UnavailableRegisters;
+  }
 
   bool matchesArch(Triple::ArchType Arch) const override {
     return Arch == Triple::aarch64 || Arch == Triple::aarch64_be;



More information about the llvm-commits mailing list