[llvm] [AArch64] Align 0-cycle reg-mov model of GPR64, GPR32 reg classes (PR #146051)
Tomer Shafir via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 27 08:17:55 PDT 2025
https://github.com/tomershafir updated https://github.com/llvm/llvm-project/pull/146051
>From 13275b3e25369ecfc0ee4dcb6e64ff1266e45994 Mon Sep 17 00:00:00 2001
From: tomershafir <tomer.shafir8 at gmail.com>
Date: Fri, 27 Jun 2025 13:25:37 +0300
Subject: [PATCH] [AArch64] Align 0-cycle reg-mov model of GPR64, GPR32 reg
classes
Aligns 0-cycle register MOV model of GPR64 and GPR32 register classes to that of FPR64 and FPR32 resolved in: https://github.com/llvm/llvm-project/pull/144152.
- Splits `FeatureZCRegMove` into `FeatureZCRegMoveGPR64` and `FeatureZCRegMove32` and fix Apple processors and `AArch64InstrInfo` accordingly
- Aligns the test `arm64-zero-cycle-regmov-gpr.ll` to the FPR one
The target feature name change is effectively a breaking change. The absolute most of users shouldn't use `-mattr=zcm` directly, so I think it should be ok to make an immediate switch without a release note.
---
llvm/lib/Target/AArch64/AArch64Features.td | 7 ++-
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 6 ++-
llvm/lib/Target/AArch64/AArch64Processors.td | 20 +++----
.../AArch64/arm64-zero-cycle-regmov-gpr.ll | 54 +++++++++++++++++++
.../AArch64/arm64-zero-cycle-regmov-gpr32.ll | 45 ----------------
5 files changed, 73 insertions(+), 59 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr.ll
delete mode 100644 llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 24fbe207c4969..05562516e5198 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -612,8 +612,11 @@ def FeatureExperimentalZeroingPseudos
def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
"NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;
-def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
- "Has zero-cycle register moves">;
+def FeatureZCRegMoveGPR64 : SubtargetFeature<"zcm-gpr64", "HasZeroCycleRegMoveGPR64", "true",
+ "Has zero-cycle register moves for GPR64 registers">;
+
+def FeatureZCRegMoveGPR32 : SubtargetFeature<"zcm-gpr32", "HasZeroCycleRegMoveGPR32", "true",
+ "Has zero-cycle register moves for GPR32 registers">;
def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFPR64", "true",
"Has zero-cycle register moves for FPR64 registers">;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c3837cfe73d28..652c34ed87f6e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5037,7 +5037,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
// If either operand is WSP, expand to ADD #0.
- if (Subtarget.hasZeroCycleRegMove()) {
+ if (Subtarget.hasZeroCycleRegMoveGPR64() &&
+ !Subtarget.hasZeroCycleRegMoveGPR32()) {
// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
MCRegister DestRegX = TRI->getMatchingSuperReg(
DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
@@ -5063,7 +5064,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else {
- if (Subtarget.hasZeroCycleRegMove()) {
+ if (Subtarget.hasZeroCycleRegMoveGPR64() &&
+ !Subtarget.hasZeroCycleRegMoveGPR32()) {
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
MCRegister DestRegX = TRI->getMatchingSuperReg(
DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 4a5682475d107..dcccde4a4d666 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -311,7 +311,7 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES, FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing,
FeatureZCZeroingFPWorkaround]>;
@@ -325,7 +325,7 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
FeatureFuseAES,
FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;
@@ -338,7 +338,7 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
FeatureFuseAES,
FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;
@@ -351,7 +351,7 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
FeatureFuseAES,
FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;
@@ -364,7 +364,7 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
FeatureFuseAES,
FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;
@@ -382,7 +382,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;
@@ -400,7 +400,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;
@@ -418,7 +418,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;
@@ -436,7 +436,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureStorePairSuppress,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;
@@ -453,7 +453,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
- FeatureZCRegMove,
+ FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing
]>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr.ll
new file mode 100644
index 0000000000000..e14e69b5e6a2a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOTCPU-LINUX --match-full-lines
+; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOTCPU-APPLE --match-full-lines
+; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=CPU --match-full-lines
+; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-gpr64 | FileCheck %s -check-prefixes=NOTATTR --match-full-lines
+; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-gpr64 | FileCheck %s -check-prefixes=ATTR --match-full-lines
+
+define void @zero_cycle_regmov_GPR32(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+; CHECK-LABEL: t:
+; NOTCPU-LINUX: mov w0, w2
+; NOTCPU-LINUX: mov w1, w3
+; NOTCPU-LINUX: mov [[REG2:w[0-9]+]], w3
+; NOTCPU-LINUX: mov [[REG1:w[0-9]+]], w2
+; NOTCPU-LINUX-NEXT: bl {{_?foo_i32}}
+; NOTCPU-LINUX: mov w0, [[REG1]]
+; NOTCPU-LINUX: mov w1, [[REG2]]
+
+; NOTCPU-APPLE: mov w0, w2
+; NOTCPU-APPLE: mov w1, w3
+; NOTCPU-APPLE: mov [[REG2:w[0-9]+]], w3
+; NOTCPU-APPLE: mov [[REG1:w[0-9]+]], w2
+; NOTCPU-APPLE-NEXT: bl {{_?foo_i32}}
+; NOTCPU-APPLE: mov w0, [[REG1]]
+; NOTCPU-APPLE: mov w1, [[REG2]]
+
+; CPU: mov [[REG2:x[0-9]+]], x3
+; CPU: mov [[REG1:x[0-9]+]], x2
+; CPU: mov x0, x2
+; CPU: mov x1, x3
+; CPU-NEXT: bl {{_?foo_i32}}
+; CPU: mov x0, [[REG1]]
+; CPU: mov x1, [[REG2]]
+
+; NOTATTR: mov [[REG2:w[0-9]+]], w3
+; NOTATTR: mov [[REG1:w[0-9]+]], w2
+; NOTATTR: mov w0, w2
+; NOTATTR: mov w1, w3
+; NOTATTR-NEXT: bl {{_?foo_i32}}
+; NOTATTR: mov w0, [[REG1]]
+; NOTATTR: mov w1, [[REG2]]
+
+; ATTR: mov x0, x2
+; ATTR: mov x1, x3
+; ATTR: mov [[REG2:x[0-9]+]], x3
+; ATTR: mov [[REG1:x[0-9]+]], x2
+; ATTR-NEXT: bl {{_?foo_i32}}
+; ATTR: mov x0, [[REG1]]
+; ATTR: mov x1, [[REG2]]
+ %call = call i32 @foo_i32(i32 %c, i32 %d)
+ %call1 = call i32 @foo_i32(i32 %c, i32 %d)
+ unreachable
+}
+
+declare i32 @foo_i32(i32, i32)
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll
deleted file mode 100644
index 5ef6d3e84805a..0000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s -check-prefixes=NOTCPU --match-full-lines
-; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=CPU --match-full-lines
-; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm | FileCheck %s -check-prefixes=NOTATTR --match-full-lines
-; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm | FileCheck %s -check-prefixes=ATTR --match-full-lines
-
-define void @t(i32 %a, i32 %b, i32 %c, i32 %d) {
-entry:
-; CHECK-LABEL: t:
-; NOTCPU: mov w0, w2
-; NOTCPU: mov w1, w3
-; NOTCPU: mov [[REG2:w[0-9]+]], w3
-; NOTCPU: mov [[REG1:w[0-9]+]], w2
-; NOTCPU-NEXT: bl {{_?foo}}
-; NOTCPU: mov w0, [[REG1]]
-; NOTCPU: mov w1, [[REG2]]
-
-; CPU: mov [[REG2:x[0-9]+]], x3
-; CPU: mov [[REG1:x[0-9]+]], x2
-; CPU: mov x0, x2
-; CPU: mov x1, x3
-; CPU-NEXT: bl {{_?foo}}
-; CPU: mov x0, [[REG1]]
-; CPU: mov x1, [[REG2]]
-
-; NOTATTR: mov [[REG2:w[0-9]+]], w3
-; NOTATTR: mov [[REG1:w[0-9]+]], w2
-; NOTATTR: mov w0, w2
-; NOTATTR: mov w1, w3
-; NOTATTR-NEXT: bl {{_?foo}}
-; NOTATTR: mov w0, [[REG1]]
-; NOTATTR: mov w1, [[REG2]]
-
-; ATTR: mov x0, x2
-; ATTR: mov x1, x3
-; ATTR: mov [[REG2:x[0-9]+]], x3
-; ATTR: mov [[REG1:x[0-9]+]], x2
-; ATTR-NEXT: bl {{_?foo}}
-; ATTR: mov x0, [[REG1]]
-; ATTR: mov x1, [[REG2]]
- %call = call i32 @foo(i32 %c, i32 %d)
- %call1 = call i32 @foo(i32 %c, i32 %d)
- unreachable
-}
-
-declare i32 @foo(i32, i32)
More information about the llvm-commits
mailing list