[llvm] [AArch64] Mark FMOVvXfY_ns as rematerializable, cheap (PR #169186)
Guy David via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 23 08:03:26 PST 2025
https://github.com/guy-david updated https://github.com/llvm/llvm-project/pull/169186
>From 592ce3249c94528c77c47e01e48510449700fd72 Mon Sep 17 00:00:00 2001
From: Guy David <guyda96 at gmail.com>
Date: Sat, 22 Nov 2025 19:45:46 +0200
Subject: [PATCH] [AArch64] Mark FMOVvXfY_ns as rematerializable, cheap
Otherwise, the register allocator may spill and reload constants that
can be rematerialized with a single instruction.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +
.../CodeGen/AArch64/remat-fmov-vector-imm.ll | 456 ++++++++++++++++++
.../CodeGen/AArch64/remat-fmov-vector-imm.mir | 159 ++++++
3 files changed, 617 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.ll
create mode 100644 llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.mir
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 50a3a4ab8d8b6..8dca6a054aa52 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8354,6 +8354,7 @@ def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
}
// AdvSIMD FMOV
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
"fmov", ".2d",
[(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
@@ -8371,6 +8372,7 @@ def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
"fmov", ".8h",
[(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
} // Predicates = [HasNEON, HasFullFP16]
+}
// AdvSIMD MOVI
diff --git a/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.ll b/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.ll
new file mode 100644
index 0000000000000..035e9a701d9d1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.ll
@@ -0,0 +1,456 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16
+
+; Check that vector float constants that can be created with FMOV immediate
+; get rematerialized instead of being spilled/filled.
+
+declare void @bar_v2f64(<2 x double>)
+declare void @bar_v4f32(<4 x float>)
+declare void @bar_v2f32(<2 x float>)
+declare void @bar_v4f16(<4 x half>)
+declare void @bar_v8f16(<8 x half>)
+
+; Test rematerialization of v2f64 FMOV immediate
+define void @test_v2f64_remat() {
+; CHECK-LABEL: test_v2f64_remat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: fmov v0.2d, #2.00000000
+; CHECK-NEXT: bl bar_v2f64
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: fmov v0.2d, #2.00000000
+; CHECK-NEXT: bl bar_v2f64
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; CHECK-FP16-LABEL: test_v2f64_remat:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-FP16-NEXT: .cfi_def_cfa_offset 80
+; CHECK-FP16-NEXT: .cfi_offset w30, -16
+; CHECK-FP16-NEXT: .cfi_offset b8, -24
+; CHECK-FP16-NEXT: .cfi_offset b9, -32
+; CHECK-FP16-NEXT: .cfi_offset b10, -40
+; CHECK-FP16-NEXT: .cfi_offset b11, -48
+; CHECK-FP16-NEXT: .cfi_offset b12, -56
+; CHECK-FP16-NEXT: .cfi_offset b13, -64
+; CHECK-FP16-NEXT: .cfi_offset b14, -72
+; CHECK-FP16-NEXT: .cfi_offset b15, -80
+; CHECK-FP16-NEXT: fmov v0.2d, #2.00000000
+; CHECK-FP16-NEXT: bl bar_v2f64
+; CHECK-FP16-NEXT: //APP
+; CHECK-FP16-NEXT: //NO_APP
+; CHECK-FP16-NEXT: fmov v0.2d, #2.00000000
+; CHECK-FP16-NEXT: bl bar_v2f64
+; CHECK-FP16-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-FP16-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ret
+entry:
+ ; 2.0 can be encoded as FMOV immediate
+ %const = insertelement <2 x double> undef, double 2.0, i32 0
+ %vec = shufflevector <2 x double> %const, <2 x double> undef, <2 x i32> zeroinitializer
+ call void @bar_v2f64(<2 x double> %vec)
+ ; Clobber all vector registers to force spilling
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ call void @bar_v2f64(<2 x double> %vec)
+ ret void
+}
+
+; Test rematerialization of v4f32 FMOV immediate
+define void @test_v4f32_remat() {
+; CHECK-LABEL: test_v4f32_remat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: fmov v0.4s, #1.00000000
+; CHECK-NEXT: bl bar_v4f32
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: fmov v0.4s, #1.00000000
+; CHECK-NEXT: bl bar_v4f32
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; CHECK-FP16-LABEL: test_v4f32_remat:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-FP16-NEXT: .cfi_def_cfa_offset 80
+; CHECK-FP16-NEXT: .cfi_offset w30, -16
+; CHECK-FP16-NEXT: .cfi_offset b8, -24
+; CHECK-FP16-NEXT: .cfi_offset b9, -32
+; CHECK-FP16-NEXT: .cfi_offset b10, -40
+; CHECK-FP16-NEXT: .cfi_offset b11, -48
+; CHECK-FP16-NEXT: .cfi_offset b12, -56
+; CHECK-FP16-NEXT: .cfi_offset b13, -64
+; CHECK-FP16-NEXT: .cfi_offset b14, -72
+; CHECK-FP16-NEXT: .cfi_offset b15, -80
+; CHECK-FP16-NEXT: fmov v0.4s, #1.00000000
+; CHECK-FP16-NEXT: bl bar_v4f32
+; CHECK-FP16-NEXT: //APP
+; CHECK-FP16-NEXT: //NO_APP
+; CHECK-FP16-NEXT: fmov v0.4s, #1.00000000
+; CHECK-FP16-NEXT: bl bar_v4f32
+; CHECK-FP16-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-FP16-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ret
+entry:
+ ; 1.0 can be encoded as FMOV immediate
+ %const = insertelement <4 x float> undef, float 1.0, i32 0
+ %vec = shufflevector <4 x float> %const, <4 x float> undef, <4 x i32> zeroinitializer
+ call void @bar_v4f32(<4 x float> %vec)
+ ; Clobber all vector registers to force spilling
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ call void @bar_v4f32(<4 x float> %vec)
+ ret void
+}
+
+; Test rematerialization of v2f32 FMOV immediate
+define void @test_v2f32_remat() {
+; CHECK-LABEL: test_v2f32_remat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: fmov v0.2s, #3.00000000
+; CHECK-NEXT: bl bar_v2f32
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: fmov v0.2s, #3.00000000
+; CHECK-NEXT: bl bar_v2f32
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; CHECK-FP16-LABEL: test_v2f32_remat:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-FP16-NEXT: .cfi_def_cfa_offset 80
+; CHECK-FP16-NEXT: .cfi_offset w30, -16
+; CHECK-FP16-NEXT: .cfi_offset b8, -24
+; CHECK-FP16-NEXT: .cfi_offset b9, -32
+; CHECK-FP16-NEXT: .cfi_offset b10, -40
+; CHECK-FP16-NEXT: .cfi_offset b11, -48
+; CHECK-FP16-NEXT: .cfi_offset b12, -56
+; CHECK-FP16-NEXT: .cfi_offset b13, -64
+; CHECK-FP16-NEXT: .cfi_offset b14, -72
+; CHECK-FP16-NEXT: .cfi_offset b15, -80
+; CHECK-FP16-NEXT: fmov v0.2s, #3.00000000
+; CHECK-FP16-NEXT: bl bar_v2f32
+; CHECK-FP16-NEXT: //APP
+; CHECK-FP16-NEXT: //NO_APP
+; CHECK-FP16-NEXT: fmov v0.2s, #3.00000000
+; CHECK-FP16-NEXT: bl bar_v2f32
+; CHECK-FP16-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-FP16-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ret
+entry:
+ ; 3.0 can be encoded as FMOV immediate
+ %const = insertelement <2 x float> undef, float 3.0, i32 0
+ %vec = shufflevector <2 x float> %const, <2 x float> undef, <2 x i32> zeroinitializer
+ call void @bar_v2f32(<2 x float> %vec)
+ ; Clobber all vector registers to force spilling
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ call void @bar_v2f32(<2 x float> %vec)
+ ret void
+}
+
+; Test rematerialization of v4f16 FMOV immediate (requires fullfp16)
+define void @test_v4f16_remat() {
+; CHECK-LABEL: test_v4f16_remat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: movi v0.4h, #60, lsl #8
+; CHECK-NEXT: bl bar_v4f16
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: movi v0.4h, #60, lsl #8
+; CHECK-NEXT: bl bar_v4f16
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; CHECK-FP16-LABEL: test_v4f16_remat:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-FP16-NEXT: .cfi_def_cfa_offset 80
+; CHECK-FP16-NEXT: .cfi_offset w30, -16
+; CHECK-FP16-NEXT: .cfi_offset b8, -24
+; CHECK-FP16-NEXT: .cfi_offset b9, -32
+; CHECK-FP16-NEXT: .cfi_offset b10, -40
+; CHECK-FP16-NEXT: .cfi_offset b11, -48
+; CHECK-FP16-NEXT: .cfi_offset b12, -56
+; CHECK-FP16-NEXT: .cfi_offset b13, -64
+; CHECK-FP16-NEXT: .cfi_offset b14, -72
+; CHECK-FP16-NEXT: .cfi_offset b15, -80
+; CHECK-FP16-NEXT: movi v0.4h, #60, lsl #8
+; CHECK-FP16-NEXT: bl bar_v4f16
+; CHECK-FP16-NEXT: //APP
+; CHECK-FP16-NEXT: //NO_APP
+; CHECK-FP16-NEXT: movi v0.4h, #60, lsl #8
+; CHECK-FP16-NEXT: bl bar_v4f16
+; CHECK-FP16-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-FP16-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ret
+entry:
+ ; 1.0 can be encoded as FMOV immediate
+ %const = insertelement <4 x half> undef, half 1.0, i32 0
+ %vec = shufflevector <4 x half> %const, <4 x half> undef, <4 x i32> zeroinitializer
+ call void @bar_v4f16(<4 x half> %vec)
+ ; Clobber all vector registers to force spilling
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ call void @bar_v4f16(<4 x half> %vec)
+ ret void
+}
+
+; Test rematerialization of v8f16 FMOV immediate (requires fullfp16)
+define void @test_v8f16_remat() {
+; CHECK-LABEL: test_v8f16_remat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: movi v0.8h, #64, lsl #8
+; CHECK-NEXT: bl bar_v8f16
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: movi v0.8h, #64, lsl #8
+; CHECK-NEXT: bl bar_v8f16
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; CHECK-FP16-LABEL: test_v8f16_remat:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-FP16-NEXT: .cfi_def_cfa_offset 80
+; CHECK-FP16-NEXT: .cfi_offset w30, -16
+; CHECK-FP16-NEXT: .cfi_offset b8, -24
+; CHECK-FP16-NEXT: .cfi_offset b9, -32
+; CHECK-FP16-NEXT: .cfi_offset b10, -40
+; CHECK-FP16-NEXT: .cfi_offset b11, -48
+; CHECK-FP16-NEXT: .cfi_offset b12, -56
+; CHECK-FP16-NEXT: .cfi_offset b13, -64
+; CHECK-FP16-NEXT: .cfi_offset b14, -72
+; CHECK-FP16-NEXT: .cfi_offset b15, -80
+; CHECK-FP16-NEXT: movi v0.8h, #64, lsl #8
+; CHECK-FP16-NEXT: bl bar_v8f16
+; CHECK-FP16-NEXT: //APP
+; CHECK-FP16-NEXT: //NO_APP
+; CHECK-FP16-NEXT: movi v0.8h, #64, lsl #8
+; CHECK-FP16-NEXT: bl bar_v8f16
+; CHECK-FP16-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-FP16-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ret
+entry:
+ ; 2.0 can be encoded as FMOV immediate
+ %const = insertelement <8 x half> undef, half 2.0, i32 0
+ %vec = shufflevector <8 x half> %const, <8 x half> undef, <8 x i32> zeroinitializer
+ call void @bar_v8f16(<8 x half> %vec)
+ ; Clobber all vector registers to force spilling
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ call void @bar_v8f16(<8 x half> %vec)
+ ret void
+}
+
+; Test that non-immediate constants still get spilled/filled
+define void @test_v4f32_no_remat() {
+; CHECK-LABEL: test_v4f32_no_remat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: mov x8, #62390 // =0xf3b6
+; CHECK-NEXT: movk x8, #16285, lsl #16
+; CHECK-NEXT: orr x8, x8, x8, lsl #32
+; CHECK-NEXT: dup v0.2d, x8
+; CHECK-NEXT: str q0, [sp] // 16-byte Spill
+; CHECK-NEXT: bl bar_v4f32
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload
+; CHECK-NEXT: bl bar_v4f32
+; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: ret
+;
+; CHECK-FP16-LABEL: test_v4f32_no_remat:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: sub sp, sp, #96
+; CHECK-FP16-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-FP16-NEXT: str x30, [sp, #80] // 8-byte Spill
+; CHECK-FP16-NEXT: .cfi_def_cfa_offset 96
+; CHECK-FP16-NEXT: .cfi_offset w30, -16
+; CHECK-FP16-NEXT: .cfi_offset b8, -24
+; CHECK-FP16-NEXT: .cfi_offset b9, -32
+; CHECK-FP16-NEXT: .cfi_offset b10, -40
+; CHECK-FP16-NEXT: .cfi_offset b11, -48
+; CHECK-FP16-NEXT: .cfi_offset b12, -56
+; CHECK-FP16-NEXT: .cfi_offset b13, -64
+; CHECK-FP16-NEXT: .cfi_offset b14, -72
+; CHECK-FP16-NEXT: .cfi_offset b15, -80
+; CHECK-FP16-NEXT: mov x8, #62390 // =0xf3b6
+; CHECK-FP16-NEXT: movk x8, #16285, lsl #16
+; CHECK-FP16-NEXT: orr x8, x8, x8, lsl #32
+; CHECK-FP16-NEXT: dup v0.2d, x8
+; CHECK-FP16-NEXT: str q0, [sp] // 16-byte Spill
+; CHECK-FP16-NEXT: bl bar_v4f32
+; CHECK-FP16-NEXT: //APP
+; CHECK-FP16-NEXT: //NO_APP
+; CHECK-FP16-NEXT: ldr q0, [sp] // 16-byte Reload
+; CHECK-FP16-NEXT: bl bar_v4f32
+; CHECK-FP16-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldr x30, [sp, #80] // 8-byte Reload
+; CHECK-FP16-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-FP16-NEXT: add sp, sp, #96
+; CHECK-FP16-NEXT: ret
+entry:
+ ; 1.234 cannot be encoded as FMOV immediate, so it should be spilled
+ %const = insertelement <4 x float> undef, float 0x3FF3BE76C0000000, i32 0
+ %vec = shufflevector <4 x float> %const, <4 x float> undef, <4 x i32> zeroinitializer
+ call void @bar_v4f32(<4 x float> %vec)
+ ; Clobber all vector registers to force spilling
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ call void @bar_v4f32(<4 x float> %vec)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.mir b/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.mir
new file mode 100644
index 0000000000000..783f74f7d33cf
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/remat-fmov-vector-imm.mir
@@ -0,0 +1,159 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -mattr=+neon -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16 -run-pass=greedy -o - %s | FileCheck %s --check-prefix=CHECK-FP16
+
+# Test that FMOVvXfY_ns instructions marked as rematerializable are
+# rematerialized instead of being spilled when clobbered.
+
+--- |
+ declare void @bar_v2f64(<2 x double>)
+ declare void @bar_v4f32(<4 x float>)
+ declare void @bar_v2f32(<2 x float>)
+ declare void @bar_v4f16(<4 x half>)
+ declare void @bar_v8f16(<8 x half>)
+
+ define void @test_remat_v2f64() {
+ ret void
+ }
+
+ define void @test_remat_v4f32() {
+ ret void
+ }
+
+ define void @test_remat_v2f32() {
+ ret void
+ }
+
+ define void @test_remat_v4f16() {
+ ret void
+ }
+
+ define void @test_remat_v8f16() {
+ ret void
+ }
+...
+
+---
+name: test_remat_v2f64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_remat_v2f64
+ ; CHECK: bb.0:
+ ; CHECK-DAG: FMOVv2f64_ns 127
+ ; CHECK: BL @bar_v2f64
+ ; CHECK: INLINEASM
+ ; CHECK-DAG: FMOVv2f64_ns 127
+ ; CHECK: BL @bar_v2f64
+ bb.0:
+ %0:fpr128 = FMOVv2f64_ns 127
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v2f64, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v2f64, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+---
+name: test_remat_v4f32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_remat_v4f32
+ ; CHECK: bb.0:
+ ; CHECK-DAG: FMOVv4f32_ns 112
+ ; CHECK: BL @bar_v4f32
+ ; CHECK: INLINEASM
+ ; CHECK-DAG: FMOVv4f32_ns 112
+ ; CHECK: BL @bar_v4f32
+ bb.0:
+ %0:fpr128 = FMOVv4f32_ns 112
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v4f32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v4f32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+---
+name: test_remat_v2f32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_remat_v2f32
+ ; CHECK: bb.0:
+ ; CHECK-DAG: FMOVv2f32_ns 115
+ ; CHECK: BL @bar_v2f32
+ ; CHECK: INLINEASM
+ ; CHECK-DAG: FMOVv2f32_ns 115
+ ; CHECK: BL @bar_v2f32
+ bb.0:
+ %0:fpr64 = FMOVv2f32_ns 115
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $d0 = COPY %0
+ BL @bar_v2f32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $d0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $d0 = COPY %0
+ BL @bar_v2f32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $d0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+---
+name: test_remat_v4f16
+tracksRegLiveness: true
+body: |
+ ; CHECK-FP16-LABEL: name: test_remat_v4f16
+ ; CHECK-FP16: bb.0:
+ ; CHECK-FP16-DAG: FMOVv4f16_ns 124
+ ; CHECK-FP16: BL @bar_v4f16
+ ; CHECK-FP16: INLINEASM
+ ; CHECK-FP16-DAG: FMOVv4f16_ns 124
+ ; CHECK-FP16: BL @bar_v4f16
+ bb.0:
+ %0:fpr64 = FMOVv4f16_ns 124
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $d0 = COPY %0
+ BL @bar_v4f16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $d0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $d0 = COPY %0
+ BL @bar_v4f16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $d0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+---
+name: test_remat_v8f16
+tracksRegLiveness: true
+body: |
+ ; CHECK-FP16-LABEL: name: test_remat_v8f16
+ ; CHECK-FP16: bb.0:
+ ; CHECK-FP16-DAG: FMOVv8f16_ns 128
+ ; CHECK-FP16: BL @bar_v8f16
+ ; CHECK-FP16: INLINEASM
+ ; CHECK-FP16-DAG: FMOVv8f16_ns 128
+ ; CHECK-FP16: BL @bar_v8f16
+ bb.0:
+ %0:fpr128 = FMOVv8f16_ns 128
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v8f16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; Clobber all FP registers.
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q0, 12 /* clobber */, implicit-def dead early-clobber $q1, 12 /* clobber */, implicit-def dead early-clobber $q2, 12 /* clobber */, implicit-def dead early-clobber $q3, 12 /* clobber */, implicit-def dead early-clobber $q4, 12 /* clobber */, implicit-def dead early-clobber $q5, 12 /* clobber */, implicit-def dead early-clobber $q6, 12 /* clobber */, implicit-def dead early-clobber $q7, 12 /* clobber */, implicit-def dead early-clobber $q8, 12 /* clobber */, implicit-def dead early-clobber $q9, 12 /* clobber */, implicit-def dead early-clobber $q10, 12 /* clobber */, implicit-def dead early-clobber $q11, 12 /* clobber */, implicit-def dead early-clobber $q12, 12 /* clobber */, implicit-def dead early-clobber $q13, 12 /* clobber */, implicit-def dead early-clobber $q14, 12 /* clobber */, implicit-def dead early-clobber $q15, 12 /* clobber */, implicit-def dead early-clobber $q16, 12 /* clobber */, implicit-def dead early-clobber $q17, 12 /* clobber */, implicit-def dead early-clobber $q18, 12 /* clobber */, implicit-def dead early-clobber $q19, 12 /* clobber */, implicit-def dead early-clobber $q20, 12 /* clobber */, implicit-def dead early-clobber $q21, 12 /* clobber */, implicit-def dead early-clobber $q22, 12 /* clobber */, implicit-def dead early-clobber $q23, 12 /* clobber */, implicit-def dead early-clobber $q24, 12 /* clobber */, implicit-def dead early-clobber $q25, 12 /* clobber */, implicit-def dead early-clobber $q26, 12 /* clobber */, implicit-def dead early-clobber $q27, 12 /* clobber */, implicit-def dead early-clobber $q28, 12 /* clobber */, implicit-def dead early-clobber $q29, 12 /* clobber */, implicit-def dead early-clobber $q30, 12 /* clobber */, implicit-def dead early-clobber $q31
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $q0 = COPY %0
+ BL @bar_v8f16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
\ No newline at end of file
More information about the llvm-commits
mailing list