[llvm] [llvm-exegesis] Implement the loop repetition mode for AArch64 (PR #154751)
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 21 05:48:54 PDT 2025
https://github.com/sjoerdmeijer created https://github.com/llvm/llvm-project/pull/154751
Subject says it all: implement the loop iterator decrement and jump function functions, and reserve X19 for the loop counter.
>From 85cfa6ef561e6c8c1fb67e1c09be76ab174862b2 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 21 Aug 2025 02:54:31 -0700
Subject: [PATCH] [llvm-exegesis] Implement the loop repetition mode for
AArch64
Subject says it all: implement the loop iterator decrement and jump
function functions, and reserve X19 for the loop counter.
---
.../llvm-exegesis/AArch64/loop-register.s | 17 ++++++++++
.../llvm-exegesis/lib/AArch64/Target.cpp | 31 +++++++++++++++++++
2 files changed, 48 insertions(+)
create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/loop-register.s
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/loop-register.s b/llvm/test/tools/llvm-exegesis/AArch64/loop-register.s
new file mode 100644
index 0000000000000..2e67937ad0ef6
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/loop-register.s
@@ -0,0 +1,17 @@
+REQUIRES: aarch64-registered-target, asserts
+
+RUN: llvm-exegesis -mcpu=neoverse-v2 --use-dummy-perf-counters --mode=latency --debug-only=print-gen-assembly --opcode-name=ADDVv4i16v -repetition-mode=loop 2>&1 | FileCheck %s
+
+CHECK: 0: {{.*}} str x19, [sp, #-16]!
+CHECK-NEXT: 4: {{.*}} movi d[[REG:[0-9]+]], #0000000000000000
+CHECK-NEXT: 8: {{.*}} mov x19, #10000
+CHECK-NEXT: c: {{.*}} nop
+CHECK-NEXT: 10: {{.*}} nop
+CHECK-NEXT: 14: {{.*}} nop
+CHECK-NEXT: 18: {{.*}} nop
+CHECK-NEXT: 1c: {{.*}} nop
+CHECK-NEXT: 20: {{.*}} addv h[[REG]], v[[REG]].4h
+CHECK-NEXT: 24: {{.*}} subs x19, x19, #1
+CHECK-NEXT: 28: {{.*}} cbnz x19, #-8
+CHECK-NEXT: 2c: {{.*}} ldr x19, [sp], #16
+CHECK-NEXT: 30: {{.*}} ret
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 3a0021e3c132d..d59dd1688dfa4 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,6 +28,8 @@
#define GET_AVAILABLE_OPCODE_CHECKER
#include "AArch64GenInstrInfo.inc"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
namespace llvm {
namespace exegesis {
@@ -109,6 +111,10 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
namespace {
+// Use X19 as the loop counter register since it's a callee-saved register
+// that's available for temporary use.
+constexpr const MCPhysReg kDefaultLoopCounterReg = AArch64::X19;
+
class ExegesisAArch64Target : public ExegesisTarget {
public:
ExegesisAArch64Target()
@@ -141,6 +147,31 @@ class ExegesisAArch64Target : public ExegesisTarget {
errs() << "setRegTo is not implemented, results will be unreliable\n";
return {};
}
+ MCRegister getDefaultLoopCounterRegister(const Triple &) const override {
+ return kDefaultLoopCounterReg;
+ }
+
+ void decrementLoopCounterAndJump(
+ MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
+ const MCInstrInfo &MII, MCRegister LoopRegister) const override {
+ // subs LoopRegister, LoopRegister, #1
+ BuildMI(&MBB, DebugLoc(), MII.get(AArch64::SUBSXri))
+ .addDef(LoopRegister)
+ .addUse(LoopRegister)
+ .addImm(1) // Subtract 1
+ .addImm(0); // No shift amount
+ // cbnz LoopRegister, TargetMBB
+ BuildMI(&MBB, DebugLoc(), MII.get(AArch64::CBNZX))
+ .addUse(LoopRegister)
+ .addMBB(&TargetMBB);
+ }
+
+
+ // Registers that should not be selected for use in snippets.
+ const MCPhysReg UnavailableRegisters[1] = {kDefaultLoopCounterReg};
+ ArrayRef<MCPhysReg> getUnavailableRegisters() const override {
+ return UnavailableRegisters;
+ }
bool matchesArch(Triple::ArchType Arch) const override {
return Arch == Triple::aarch64 || Arch == Triple::aarch64_be;
More information about the llvm-commits
mailing list