[llvm] [AArch64] Consider COPY between disjoint register classes as expensive (PR #167661)

Guy David via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 16 13:05:33 PST 2025


================
@@ -0,0 +1,141 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=early-machinelicm -o - %s | FileCheck %s
+
+# This test verifies that cross-register-class copies (e.g., GPR to FPR)
+# are hoisted out of loops by MachineLICM, as they are expensive on AArch64.
+
+--- |
+  declare void @use_float(float)
+
+  define void @cross_regclass_copy_hoisted() {
+    ret void
+  }
+
+  define void @cross_regclass_physical_copy_hoisted() {
+    ret void
+  }
+...
+---
+name: cross_regclass_copy_hoisted
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: cross_regclass_copy_hoisted
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr32all = COPY $wzr
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gpr32all = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:fpr32 = COPY [[COPY1]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.0, %5, %bb.2
+  ; CHECK-NEXT:   [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 1, %bb.3, implicit $nzcv
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   $s0 = COPY [[COPY4]]
+  ; CHECK-NEXT:   BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+  ; CHECK-NEXT:   B %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   RET_ReallyLR
+  bb.0:
+    liveins: $w0, $w1
+    %1:gpr32 = COPY $w0
+    %0:gpr32 = COPY $w1
+    %3:gpr32all = COPY $wzr
+    %2:gpr32all = COPY %3:gpr32all
+
+  bb.1:
+    %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
+    %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
+    Bcc 1, %bb.3, implicit $nzcv
+    B %bb.2
+
+  bb.2:
+    %7:fpr32 = COPY %0:gpr32
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    $s0 = COPY %7:fpr32
+    BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    %8:gpr32sp = ADDWri %4:gpr32common, 1, 0
+    %5:gpr32all = COPY %8:gpr32sp
+    B %bb.1
+
+  bb.3:
+    RET_ReallyLR
+
+...
+---
+name: cross_regclass_physical_copy_hoisted
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: cross_regclass_physical_copy_hoisted
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32all = COPY $wzr
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr32 = COPY $wzr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY2]], %bb.0, %4, %bb.2
+  ; CHECK-NEXT:   [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 1, %bb.3, implicit $nzcv
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   $s0 = COPY [[COPY3]]
+  ; CHECK-NEXT:   BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
+  ; CHECK-NEXT:   B %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   RET_ReallyLR
+  bb.0:
+    liveins: $w0
+    %1:gpr32 = COPY $w0
+    %3:gpr32all = COPY $wzr
+    %2:gpr32all = COPY %3:gpr32all
+
+  bb.1:
+    %4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
+    %6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
+    Bcc 1, %bb.3, implicit $nzcv
+    B %bb.2
+
+  bb.2:
+    %7:fpr32 = COPY $wzr
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    $s0 = COPY %7:fpr32
----------------
guy-david wrote:

The expected hoisting in this case is of `%7:fpr32 = COPY $wzr`. Added an FPR to GPR test.

https://github.com/llvm/llvm-project/pull/167661


More information about the llvm-commits mailing list