[llvm] [AArch64] Mark FMOVvXfY_ns as rematerializable, cheap (PR #169186)
Guy David via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 23 08:13:37 PST 2025
https://github.com/guy-david updated https://github.com/llvm/llvm-project/pull/169186
>From eea688fded97b3682a9374acc171657099b8ac95 Mon Sep 17 00:00:00 2001
From: Guy David <guyda96 at gmail.com>
Date: Sat, 22 Nov 2025 19:45:46 +0200
Subject: [PATCH] [AArch64] Mark FMOVvXfY_ns as rematerializable, cheap
Otherwise, the register allocator may spill and reload constants that
can be rematerialized with a single instruction.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +
.../CodeGen/AArch64/remat-fmov-vector-imm.mir | 159 ++++++++++++++++++
2 files changed, 161 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.mir
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 50a3a4ab8d8b6..8dca6a054aa52 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8354,6 +8354,7 @@ def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
}
// AdvSIMD FMOV
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
"fmov", ".2d",
[(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
@@ -8371,6 +8372,7 @@ def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
"fmov", ".8h",
[(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
} // Predicates = [HasNEON, HasFullFP16]
+}
// AdvSIMD MOVI
diff --git a/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.mir b/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.mir
new file mode 100644
index 0000000000000..ddc9aa17e8788
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.mir
@@ -0,0 +1,159 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -mattr=+neon -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16 -run-pass=greedy -o - %s | FileCheck %s --check-prefix=CHECK-FP16
+
+# Test that FMOVvXfY_ns instructions marked as rematerializable are
+# rematerialized instead of being spilled when clobbered.
+
+--- |
+ declare void @bar_v2f64(<2 x double>)
+ declare void @bar_v4f32(<4 x float>)
+ declare void @bar_v2f32(<2 x float>)
+ declare void @bar_v4f16(<4 x half>)
+ declare void @bar_v8f16(<8 x half>)
+
+ define void @test_remat_v2f64() {
+ ret void
+ }
+
+ define void @test_remat_v4f32() {
+ ret void
+ }
+
+ define void @test_remat_v2f32() {
+ ret void
+ }
+
+ define void @test_remat_v4f16() {
+ ret void
+ }
+
+ define void @test_remat_v8f16() {
+ ret void
+ }
+...
+
+---
+name: test_remat_v2f64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_remat_v2f64
+ ; CHECK: bb.0:
+ ; CHECK-DAG: FMOVv2f64_ns 127
+ ; CHECK: BL @bar_v2f64
+ ; CHECK: INLINEASM
+ ; CHECK-DAG: FMOVv2f64_ns 127
+ ; CHECK: BL @bar_v2f64
+ bb.0:
+ %0:fpr128 = FMOVv2f64_ns 127
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v2f64, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v2f64, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+---
+name: test_remat_v4f32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_remat_v4f32
+ ; CHECK: bb.0:
+ ; CHECK-DAG: FMOVv4f32_ns 112
+ ; CHECK: BL @bar_v4f32
+ ; CHECK: INLINEASM
+ ; CHECK-DAG: FMOVv4f32_ns 112
+ ; CHECK: BL @bar_v4f32
+ bb.0:
+ %0:fpr128 = FMOVv4f32_ns 112
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v4f32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v4f32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+---
+name: test_remat_v2f32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_remat_v2f32
+ ; CHECK: bb.0:
+ ; CHECK-DAG: FMOVv2f32_ns 115
+ ; CHECK: BL @bar_v2f32
+ ; CHECK: INLINEASM
+ ; CHECK-DAG: FMOVv2f32_ns 115
+ ; CHECK: BL @bar_v2f32
+ bb.0:
+ %0:fpr64 = FMOVv2f32_ns 115
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $d0 = COPY %0
+ BL @bar_v2f32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $d0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $d0 = COPY %0
+ BL @bar_v2f32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $d0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+---
+name: test_remat_v4f16
+tracksRegLiveness: true
+body: |
+ ; CHECK-FP16-LABEL: name: test_remat_v4f16
+ ; CHECK-FP16: bb.0:
+ ; CHECK-FP16-DAG: FMOVv4f16_ns 124
+ ; CHECK-FP16: BL @bar_v4f16
+ ; CHECK-FP16: INLINEASM
+ ; CHECK-FP16-DAG: FMOVv4f16_ns 124
+ ; CHECK-FP16: BL @bar_v4f16
+ bb.0:
+ %0:fpr64 = FMOVv4f16_ns 124
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $d0 = COPY %0
+ BL @bar_v4f16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $d0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $d0 = COPY %0
+ BL @bar_v4f16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $d0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+---
+name: test_remat_v8f16
+tracksRegLiveness: true
+body: |
+ ; CHECK-FP16-LABEL: name: test_remat_v8f16
+ ; CHECK-FP16: bb.0:
+ ; CHECK-FP16-DAG: FMOVv8f16_ns 128
+ ; CHECK-FP16: BL @bar_v8f16
+ ; CHECK-FP16: INLINEASM
+ ; CHECK-FP16-DAG: FMOVv8f16_ns 128
+ ; CHECK-FP16: BL @bar_v8f16
+ bb.0:
+ %0:fpr128 = FMOVv8f16_ns 128
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v8f16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v8f16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
More information about the llvm-commits
mailing list