[llvm] [AArch64] Treat COPY between cross-register banks as expensive (PR #167661)
Guy David via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 12:01:42 PST 2025
https://github.com/guy-david updated https://github.com/llvm/llvm-project/pull/167661
>From 24b8b479945ced40111017e219d12e21061246f7 Mon Sep 17 00:00:00 2001
From: Guy David <guyda at apple.com>
Date: Sat, 8 Nov 2025 23:59:30 +0200
Subject: [PATCH] [AArch64] Treat COPY between cross-register banks as
expensive
The motivation is to allow passes such as MachineLICM to hoist trivial
FMOV instructions out of loops, where previously it didn't do so even
when the RHS is a constant.
On most architectures, these expensive move instructions have a latency
of 2-6 cycles, and certainly not cheap as a 0-1 cycle move.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 25 +++
.../CodeGen/AArch64/licm-regclass-copy.mir | 197 ++++++++++++++++++
2 files changed, 222 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/licm-regclass-copy.mir
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 221812f1ebc7b..00fe8ee8b9b4d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1144,6 +1144,28 @@ static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
return Is.size() <= 2;
}
+// Check if a COPY instruction is cheap.
+static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) {
+ assert(MI.isCopy() && "Expected COPY instruction");
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+
+ // Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64,
+ // typically requiring an FMOV instruction with a 2-6 cycle latency.
+ auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * {
+ if (Reg.isVirtual())
+ return MRI.getRegClass(Reg);
+ if (Reg.isPhysical())
+ return RI.getMinimalPhysRegClass(Reg);
+ return nullptr;
+ };
+ const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg());
+ const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg());
+ if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
+ return false;
+
+ return MI.isAsCheapAsAMove();
+}
+
// FIXME: this implementation should be micro-architecture dependent, so a
// micro-architecture target hook should be introduced here in future.
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
@@ -1157,6 +1179,9 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
default:
return MI.isAsCheapAsAMove();
+ case TargetOpcode::COPY:
+ return isCheapCopy(MI, RI);
+
case AArch64::ADDWrs:
case AArch64::ADDXrs:
case AArch64::SUBWrs:
diff --git a/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir b/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir
new file mode 100644
index 0000000000000..6a10df68ddc71
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir
@@ -0,0 +1,197 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=early-machinelicm -o - %s | FileCheck %s
+
+# This test verifies that cross-bank copies (e.g., GPR to FPR, FPR to GPR)
+# are hoisted out of loops by MachineLICM, as they are expensive on AArch64.
+
+--- |
+ declare void @use_float(float)
+ declare void @use_int(i32)
+
+ define void @gpr_to_fpr_virtual_copy_hoisted() {
+ ret void
+ }
+
+ define void @gpr_to_fpr_physical_copy_hoisted() {
+ ret void
+ }
+
+ define void @fpr_to_gpr_virtual_copy_hoisted() {
+ ret void
+ }
+...
+---
+name: gpr_to_fpr_virtual_copy_hoisted
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: gpr_to_fpr_virtual_copy_hoisted
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY $wzr
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[COPY1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.0, %5, %bb.2
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $s0 = COPY [[COPY4]]
+ ; CHECK-NEXT: BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0:
+ liveins: $w0, $w1
+ %1:gpr32 = COPY $w0
+ %0:gpr32 = COPY $w1
+ %3:gpr32all = COPY $wzr
+ %2:gpr32all = COPY %3:gpr32all
+
+ bb.1:
+ %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
+ %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
+ Bcc 1, %bb.3, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %7:fpr32 = COPY %0:gpr32
+ $s0 = COPY %7:fpr32
+ BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+ %8:gpr32sp = ADDWri %4:gpr32common, 1, 0
+ %5:gpr32all = COPY %8:gpr32sp
+ B %bb.1
+
+ bb.3:
+ RET_ReallyLR
+
+...
+---
+name: gpr_to_fpr_physical_copy_hoisted
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: gpr_to_fpr_physical_copy_hoisted
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY $wzr
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $wzr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY2]], %bb.0, %4, %bb.2
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $s0 = COPY [[COPY3]]
+ ; CHECK-NEXT: BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0:
+ liveins: $w0
+ %1:gpr32 = COPY $w0
+ %3:gpr32all = COPY $wzr
+ %2:gpr32all = COPY %3:gpr32all
+
+ bb.1:
+ %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
+ %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
+ Bcc 1, %bb.3, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %7:fpr32 = COPY $wzr
+ $s0 = COPY %7:fpr32
+ BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+ %8:gpr32sp = ADDWri %4:gpr32common, 1, 0
+ %5:gpr32all = COPY %8:gpr32sp
+ B %bb.1
+
+ bb.3:
+ RET_ReallyLR
+
+...
+---
+name: fpr_to_gpr_virtual_copy_hoisted
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: fpr_to_gpr_virtual_copy_hoisted
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY $wzr
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.0, %5, %bb.2
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $w0 = COPY [[COPY4]]
+ ; CHECK-NEXT: BL @use_int, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0:
+ liveins: $w0, $s0
+ %1:gpr32 = COPY $w0
+ %0:fpr32 = COPY $s0
+ %3:gpr32all = COPY $wzr
+ %2:gpr32all = COPY %3:gpr32all
+
+ bb.1:
+ %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
+ %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
+ Bcc 1, %bb.3, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %7:gpr32 = COPY %0:fpr32
+ $w0 = COPY %7:gpr32
+ BL @use_int, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp
+ %8:gpr32sp = ADDWri %4:gpr32common, 1, 0
+ %5:gpr32all = COPY %8:gpr32sp
+ B %bb.1
+
+ bb.3:
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list