[llvm] [AArch64] Consider COPY between disjoint register classes as expensive (PR #167661)
Guy David via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 16 13:05:33 PST 2025
================
@@ -0,0 +1,141 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=early-machinelicm -o - %s | FileCheck %s
+
+# This test verifies that cross-register-class copies (e.g., GPR to FPR)
+# are hoisted out of loops by MachineLICM, as they are expensive on AArch64.
+
+--- |
+ declare void @use_float(float)
+
+ define void @cross_regclass_copy_hoisted() {
+ ret void
+ }
+
+ define void @cross_regclass_physical_copy_hoisted() {
+ ret void
+ }
+...
+---
+name: cross_regclass_copy_hoisted
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cross_regclass_copy_hoisted
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY $wzr
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[COPY1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.0, %5, %bb.2
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: $s0 = COPY [[COPY4]]
+ ; CHECK-NEXT: BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0:
+ liveins: $w0, $w1
+ %1:gpr32 = COPY $w0
+ %0:gpr32 = COPY $w1
+ %3:gpr32all = COPY $wzr
+ %2:gpr32all = COPY %3:gpr32all
+
+ bb.1:
+ %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
+ %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
+ Bcc 1, %bb.3, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %7:fpr32 = COPY %0:gpr32
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $s0 = COPY %7:fpr32
+ BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ %8:gpr32sp = ADDWri %4:gpr32common, 1, 0
+ %5:gpr32all = COPY %8:gpr32sp
+ B %bb.1
+
+ bb.3:
+ RET_ReallyLR
+
+...
+---
+name: cross_regclass_physical_copy_hoisted
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cross_regclass_physical_copy_hoisted
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY $wzr
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $wzr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY2]], %bb.0, %4, %bb.2
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: $s0 = COPY [[COPY3]]
+ ; CHECK-NEXT: BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0:
+ liveins: $w0
+ %1:gpr32 = COPY $w0
+ %3:gpr32all = COPY $wzr
+ %2:gpr32all = COPY %3:gpr32all
+
+ bb.1:
+ %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
+ %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
+ Bcc 1, %bb.3, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %7:fpr32 = COPY $wzr
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $s0 = COPY %7:fpr32
----------------
guy-david wrote:
The expected hoisting in this case is of `%7:fpr32 = COPY $wzr`. Added an FPR to GPR test.
https://github.com/llvm/llvm-project/pull/167661
More information about the llvm-commits
mailing list