[llvm] [WIP] - [LLVM][SVE] Honour NEON calling convention when targeting SVE VLS. (PR #70847)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 23 10:23:04 PST 2023
https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/70847
>From 2f4066650ede384df622236daf4428bdbe021b3d Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 23 Nov 2023 14:44:58 +0000
Subject: [PATCH 1/2] [LLVM][SVE] Add tests to prove SVE does not affect the
calling convention.
NOTE: When SVE is enable some of these tests trigger asserts.
---
.../sve-fixed-length-function-calls.ll | 162 ++++++++++++++++++
.../AArch64/sve-fixed-length-functions.ll | 144 ++++++++++++++++
2 files changed, 306 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
new file mode 100644
index 000000000000000..fb7bdddcd84b0c0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @foo_nxv32i8(<32 x i8>)
+define void @test_nxv32i8(<32 x i8> %unused, <32 x i8> %a) #0 {
+; CHECK-LABEL: test_nxv32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_nxv32i8
+ tail call void @foo_nxv32i8(<32 x i8> %a)
+ ret void
+}
+
+declare void @foo_nxv16i16(<16 x i16>)
+define void @test_nxv16i16(<16 x i16> %unused, <16 x i16> %a) #0 {
+; CHECK-LABEL: test_nxv16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_nxv16i16
+ tail call void @foo_nxv16i16(<16 x i16> %a)
+ ret void
+}
+
+declare void @foo_nxv8i32(<8 x i32>)
+define void @test_nxv8i32(<8 x i32> %unused, <8 x i32> %a) #0 {
+; CHECK-LABEL: test_nxv8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_nxv8i32
+ tail call void @foo_nxv8i32(<8 x i32> %a)
+ ret void
+}
+
+declare void @foo_nxv4i64(<4 x i64>)
+define void @test_nxv4i64(<4 x i64> %unused, <4 x i64> %a) #0 {
+; CHECK-LABEL: test_nxv4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_nxv4i64
+ tail call void @foo_nxv4i64(<4 x i64> %a)
+ ret void
+}
+
+declare void @foo_nxv16f16(<16 x half>)
+define void @test_nxv16f16(<16 x half> %unused, <16 x half> %a) #0 {
+; CHECK-LABEL: test_nxv16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_nxv16f16
+ tail call void @foo_nxv16f16(<16 x half> %a)
+ ret void
+}
+
+declare void @foo_nxv8f32(<8 x float>)
+define void @test_nxv8f32(<8 x float> %unused, <8 x float> %a) #0 {
+; CHECK-LABEL: test_nxv8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_nxv8f32
+ tail call void @foo_nxv8f32(<8 x float> %a)
+ ret void
+}
+
+declare void @foo_nxv4f64(<4 x double>)
+define void @test_nxv4f64(<4 x double> %unused, <4 x double> %a) #0 {
+; CHECK-LABEL: test_nxv4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_nxv4f64
+ tail call void @foo_nxv4f64(<4 x double> %a)
+ ret void
+}
+
+declare void @foo_nxv16bf16(<16 x bfloat>)
+define void @test_nxv16bf16(<16 x bfloat> %unused, <16 x bfloat> %a) #0 {
+; CHECK-LABEL: test_nxv16bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_nxv16bf16
+ tail call void @foo_nxv16bf16(<16 x bfloat> %a)
+ ret void
+}
+
+declare void @foo_nxv3i64(<3 x i64>)
+define void @test_nxv3i64(<3 x i64> %unused, <3 x i64> %a) #0 {
+; CHECK-LABEL: test_nxv3i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d2, d5
+; CHECK-NEXT: fmov d1, d4
+; CHECK-NEXT: fmov d0, d3
+; CHECK-NEXT: b foo_nxv3i64
+ tail call void @foo_nxv3i64(<3 x i64> %a)
+ ret void
+}
+
+declare void @foo_nxv5i64(<5 x i64>)
+define void @test_nxv5i64(<5 x i64> %unused, <5 x i64> %a) #0 {
+; CHECK-LABEL: test_nxv5i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d1, d6
+; CHECK-NEXT: fmov d0, d5
+; CHECK-NEXT: fmov d2, d7
+; CHECK-NEXT: ldp d3, d4, [sp]
+; CHECK-NEXT: b foo_nxv5i64
+ tail call void @foo_nxv5i64(<5 x i64> %a)
+ ret void
+}
+
+declare void @foo_nxv9i16(<9 x i16>)
+define void @test_nxv9i16(<9 x i16> %unused, <9 x i16> %a) #0 {
+; CHECK-LABEL: test_nxv9i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w0, [sp, #8]
+; CHECK-NEXT: ldr w1, [sp, #16]
+; CHECK-NEXT: ldr w2, [sp, #24]
+; CHECK-NEXT: ldr w3, [sp, #32]
+; CHECK-NEXT: ldr w4, [sp, #40]
+; CHECK-NEXT: ldr w5, [sp, #48]
+; CHECK-NEXT: ldr w6, [sp, #56]
+; CHECK-NEXT: ldr w7, [sp, #64]
+; CHECK-NEXT: ldr w8, [sp, #72]
+; CHECK-NEXT: str w8, [sp]
+; CHECK-NEXT: b foo_nxv9i16
+ tail call void @foo_nxv9i16(<9 x i16> %a)
+ ret void
+}
+
+declare void @foo_nxv16i1(<16 x i1>)
+define void @test_nxv16i1(<16 x i1> %unused, <16 x i1> %a) #0 {
+; CHECK-LABEL: test_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: b foo_nxv16i1
+ tail call void @foo_nxv16i1(<16 x i1> %a)
+ ret void
+}
+
+; UTC_ARGS: --disable
+; The output from this test is large and generally not useful, what matters is
+; no vector registers are used.
+declare void @foo_nxv32i1(<32 x i1>)
+define void @test_nxv32i1(<32 x i1> %unused, <32 x i1> %a) #0 {
+; CHECK-LABEL: test_nxv32i1:
+; CHECK: // %bb.0:
+; CHECK-NOT: [q,v,z][0-9]+
+; CHECK: b foo_nxv32i1
+ tail call void @foo_nxv32i1(<32 x i1> %a)
+ ret void
+}
+; UTC_ARGS: --enable
+
+attributes #0 = { "target-features"="+sve,+bf16" nounwind }
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
new file mode 100644
index 000000000000000..307f4a9ecb4b610
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <32 x i8> @test_nxv32i8(<32 x i8> %unused, <32 x i8> %a) #0 {
+; CHECK-LABEL: test_nxv32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <32 x i8> %a
+}
+
+define <16 x i16> @test_nxv16i16(<16 x i16> %unused, <16 x i16> %a) #0 {
+; CHECK-LABEL: test_nxv16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <16 x i16> %a
+}
+
+define <8 x i32> @test_nxv8i32(<8 x i32> %unused, <8 x i32> %a) #0 {
+; CHECK-LABEL: test_nxv8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <8 x i32> %a
+}
+
+define <4 x i64> @test_nxv4i64(<4 x i64> %unused, <4 x i64> %a) #0 {
+; CHECK-LABEL: test_nxv4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <4 x i64> %a
+}
+
+define <16 x half> @test_nxv16f16(<16 x half> %unused, <16 x half> %a) #0 {
+; CHECK-LABEL: test_nxv16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <16 x half> %a
+}
+
+define <8 x float> @test_nxv8f32(<8 x float> %unused, <8 x float> %a) #0 {
+; CHECK-LABEL: test_nxv8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <8 x float> %a
+}
+
+define <4 x double> @test_nxv4f64(<4 x double> %unused, <4 x double> %a) #0 {
+; CHECK-LABEL: test_nxv4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <4 x double> %a
+}
+
+define <16 x bfloat> @test_nxv16bf16(<16 x bfloat> %unused, <16 x bfloat> %a) #0 {
+; CHECK-LABEL: test_nxv16bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <16 x bfloat> %a
+}
+
+define <3 x i64> @test_nxv3i64(<3 x i64> %unused, <3 x i64> %a) #0 {
+; CHECK-LABEL: test_nxv3i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d2, d5
+; CHECK-NEXT: fmov d1, d4
+; CHECK-NEXT: fmov d0, d3
+; CHECK-NEXT: ret
+ ret <3 x i64> %a
+}
+
+define <5 x i64> @test_nxv5i64(<5 x i64> %unused, <5 x i64> %a) #0 {
+; CHECK-LABEL: test_nxv5i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d1, d6
+; CHECK-NEXT: fmov d0, d5
+; CHECK-NEXT: fmov d2, d7
+; CHECK-NEXT: ldp d3, d4, [sp]
+; CHECK-NEXT: ret
+ ret <5 x i64> %a
+}
+
+define <9 x i16> @test_nxv9i16(<9 x i16> %unused, <9 x i16> %a) #0 {
+; CHECK-LABEL: test_nxv9i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [sp, #8]
+; CHECK-NEXT: add x9, sp, #16
+; CHECK-NEXT: ld1 { v0.h }[1], [x9]
+; CHECK-NEXT: add x9, sp, #24
+; CHECK-NEXT: ld1 { v0.h }[2], [x9]
+; CHECK-NEXT: add x9, sp, #32
+; CHECK-NEXT: ld1 { v0.h }[3], [x9]
+; CHECK-NEXT: add x9, sp, #40
+; CHECK-NEXT: ld1 { v0.h }[4], [x9]
+; CHECK-NEXT: add x9, sp, #48
+; CHECK-NEXT: ld1 { v0.h }[5], [x9]
+; CHECK-NEXT: add x9, sp, #56
+; CHECK-NEXT: ld1 { v0.h }[6], [x9]
+; CHECK-NEXT: add x9, sp, #64
+; CHECK-NEXT: ld1 { v0.h }[7], [x9]
+; CHECK-NEXT: ldrh w9, [sp, #72]
+; CHECK-NEXT: strh w9, [x8, #16]
+; CHECK-NEXT: str q0, [x8]
+; CHECK-NEXT: ret
+ ret <9 x i16> %a
+}
+
+define <16 x i1> @test_nxv16i1(<16 x i1> %unused, <16 x i1> %a) #0 {
+; CHECK-LABEL: test_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
+ ret <16 x i1> %a
+}
+
+; UTC_ARGS: --disable
+; The output from this test is large and generally not useful, what matters is
+; no vector registers are used.
+define <32 x i1> @test_nxv32i1(<32 x i1> %unused, <32 x i1> %a) #0 {
+; CHECK-LABEL: test_nxv32i1:
+; CHECK: // %bb.0:
+; CHECK-NOT: [q,v,z][0-9]+
+; CHECK: ret
+ ret <32 x i1> %a
+}
+; UTC_ARGS: --enable
+
+attributes #0 = { "target-features"="+sve,+bf16" }
>From 9bef6a7a98ceecfac21d0f0d9eb3bd2e05acd5c7 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Tue, 31 Oct 2023 17:44:31 +0000
Subject: [PATCH 2/2] [LLVM][SVE] Honour calling convention when using SVE for
fixed length vectors.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 92 +++++++++++++++++++
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 12 +++
.../AArch64/sve-fixed-length-concat.ll | 9 +-
.../sve-fixed-length-extract-subvector.ll | 5 +-
.../AArch64/sve-fixed-length-fp-to-int.ll | 8 +-
.../sve-fixed-length-function-calls.ll | 2 +
.../AArch64/sve-fixed-length-functions.ll | 2 +
.../AArch64/sve-fixed-length-int-to-fp.ll | 8 +-
.../sve-streaming-mode-fixed-length-concat.ll | 18 ++--
...ing-mode-fixed-length-extract-subvector.ll | 4 +-
...e-streaming-mode-fixed-length-fp-to-int.ll | 18 ++--
...e-streaming-mode-fixed-length-int-to-fp.ll | 4 +-
12 files changed, 151 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 28e038abcecfc8e..961e6eeeb53b6d8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26718,3 +26718,95 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
unsigned AArch64TargetLowering::getMinimumJumpTableEntries() const {
return Subtarget->getMinimumJumpTableEntries();
}
+
+MVT AArch64TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ if (!VT.isFixedLengthVector() || !Subtarget->useSVEForFixedLengthVectors())
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+ EVT VT1;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ getVectorTypeBreakdownForCallingConv(Context, CC, VT, VT1, NumIntermediates,
+ RegisterVT);
+ return RegisterVT;
+}
+
+unsigned AArch64TargetLowering::getNumRegistersForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
+ if (!VT.isFixedLengthVector() || !Subtarget->useSVEForFixedLengthVectors())
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+
+ EVT VT1;
+ MVT VT2;
+ unsigned NumIntermediates;
+ return getVectorTypeBreakdownForCallingConv(Context, CC, VT, VT1,
+ NumIntermediates, VT2);
+}
+
+unsigned AArch64TargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ int NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+ if (!RegisterVT.isFixedLengthVector() ||
+ RegisterVT.getFixedSizeInBits() <= 128)
+ return NumRegs;
+
+ assert(Subtarget->useSVEForFixedLengthVectors() && "Unexpected mode!");
+ assert(IntermediateVT == RegisterVT && "Unexpected VT mismatch!");
+ assert(RegisterVT.getFixedSizeInBits() % 128 == 0 && "Unexpected size!");
+
+ // A size mismatch here implies either type promotion or widening and would
+ // have resulted in scalarisation if larger vectors had not be available.
+ if (RegisterVT.getSizeInBits() * NumRegs != VT.getSizeInBits()) {
+ EVT EltTy = VT.getVectorElementType();
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, ElementCount::getFixed(1));
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+
+ IntermediateVT = NewVT;
+ NumIntermediates = VT.getVectorNumElements();
+ RegisterVT = getRegisterType(Context, NewVT);
+ return NumIntermediates;
+ }
+
+ // SVE VLS support does not introduce a new ABI so we should use NEON sized
+ // types for vector arguments and returns.
+
+ unsigned NumSubRegs = RegisterVT.getFixedSizeInBits() / 128;
+ NumIntermediates *= NumSubRegs;
+ NumRegs *= NumSubRegs;
+
+ switch (RegisterVT.getVectorElementType().SimpleTy) {
+ default:
+ llvm_unreachable("unexpected element type for vector");
+ case MVT::i8:
+ IntermediateVT = RegisterVT = MVT::v16i8;
+ break;
+ case MVT::i16:
+ IntermediateVT = RegisterVT = MVT::v8i16;
+ break;
+ case MVT::i32:
+ IntermediateVT = RegisterVT = MVT::v4i32;
+ break;
+ case MVT::i64:
+ IntermediateVT = RegisterVT = MVT::v2i64;
+ break;
+ case MVT::f16:
+ IntermediateVT = RegisterVT = MVT::v8i16;
+ break;
+ case MVT::f32:
+ IntermediateVT = RegisterVT = MVT::v4f32;
+ break;
+ case MVT::f64:
+ IntermediateVT = RegisterVT = MVT::v2f64;
+ break;
+ case MVT::bf16:
+ IntermediateVT = RegisterVT = MVT::v8bf16;
+ break;
+ }
+
+ return NumRegs;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 169b0dbab65cdca..c67c7c5affdc48e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -954,6 +954,18 @@ class AArch64TargetLowering : public TargetLowering {
// used for 64bit and 128bit vectors as well.
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
+ // Follow NEON ABI rules even when using SVE for fixed length vectors.
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const override;
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
index e54d22b140bf60c..d2938fe72f97048 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
@@ -304,7 +304,9 @@ define void @concat_v128i16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov v0.s[1], w1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%res = shufflevector <1 x i32> %op1, <1 x i32> %op2, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %res
@@ -647,7 +649,10 @@ define void @concat_v128f16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%res = shufflevector <1 x float> %op1, <1 x float> %op2, <2 x i32> <i32 0, i32 1>
ret <2 x float> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
index 066c06d5aa76c59..8e5b9ae7394c677 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
@@ -194,7 +194,7 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: dup v0.2s, v0.s[1]
+; CHECK-NEXT: mov w0, v0.s[1]
; CHECK-NEXT: ret
%ret = call <1 x i32> @llvm.vector.extract.v1i32.v2i32(<2 x i32> %op, i64 1)
ret <1 x i32> %ret
@@ -449,7 +449,8 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) vscale_range(2,0) #
; CHECK-LABEL: extract_subvector_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: dup v0.2s, v0.s[1]
+; CHECK-NEXT: mov v0.s[0], v0.s[1]
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%ret = call <1 x float> @llvm.vector.extract.v1f32.v2f32(<2 x float> %op, i64 1)
ret <1 x float> %ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
index da0cf927d74d24c..515f9f1f74b151d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
@@ -510,6 +510,7 @@ define void @fcvtzu_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v1f32_v1i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -615,7 +616,7 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i16>
ret <1 x i16> %res
@@ -717,6 +718,7 @@ define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i32>
ret <1 x i32> %res
@@ -1403,6 +1405,7 @@ define void @fcvtzs_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v1f32_v1i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -1508,7 +1511,7 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i16>
ret <1 x i16> %res
@@ -1610,6 +1613,7 @@ define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i32>
ret <1 x i32> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
index fb7bdddcd84b0c0..fe98101dcd5f7d3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s | FileCheck %s
+; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s
+; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
index 307f4a9ecb4b610..ab3ac47d070870f 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s | FileCheck %s
+; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s
+; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
index 50040eaa61e6c57..cc38b0dc93b2e32 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
@@ -208,7 +208,7 @@ define void @ucvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v1i16_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
@@ -513,6 +513,7 @@ define void @ucvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x double> @ucvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v1i32_v1f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ucvtf v0.2d, v0.2d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -724,6 +725,7 @@ define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ucvtf v0.2d, v0.2d
; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%res = uitofp <1 x i64> %op1 to <1 x float>
ret <1 x float> %res
@@ -1121,7 +1123,7 @@ define void @scvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x double> @scvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v1i16_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sunpklo z0.d, z0.s
@@ -1432,6 +1434,7 @@ define void @scvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x double> @scvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v1i32_v1f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-NEXT: scvtf v0.2d, v0.2d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -1649,6 +1652,7 @@ define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: scvtf v0.2d, v0.2d
; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%res = sitofp <1 x i64> %op1 to <1 x float>
ret <1 x float> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index 86494c4be501230..f6dc2f4461e3be7 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -180,10 +180,11 @@ define void @concat_v32i16(ptr %a, ptr %b, ptr %c) {
define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) {
; CHECK-LABEL: concat_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: stp w0, w1, [sp, #8]
+; CHECK-NEXT: ldr d0, [sp, #8]
+; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%res = shufflevector <1 x i32> %op1, <1 x i32> %op2, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %res
@@ -358,10 +359,11 @@ define void @concat_v32f16(ptr %a, ptr %b, ptr %c) {
define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) {
; CHECK-LABEL: concat_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: stp s0, s1, [sp, #8]
+; CHECK-NEXT: ldr d0, [sp, #8]
+; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%res = shufflevector <1 x float> %op1, <1 x float> %op2, <2 x i32> <i32 0, i32 1>
ret <2 x float> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index 25ecd7a8d7e32ef..456721725deafb3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -125,7 +125,7 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z0.s, z0.s[1]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%ret = call <1 x i32> @llvm.vector.extract.v1i32.v2i32(<2 x i32> %op, i64 1)
ret <1 x i32> %ret
@@ -224,7 +224,7 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z0.s, z0.s[1]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
%ret = call <1 x float> @llvm.vector.extract.v1f32.v2f32(<2 x float> %op, i64 1)
ret <1 x float> %ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index d6adf9cf0ad6723..7600a8be8e0dc32 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -392,8 +392,9 @@ define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) {
define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) {
; CHECK-LABEL: fcvtzu_v1f32_v1i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -465,9 +466,7 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) {
; CHECK-LABEL: fcvtzu_v1f64_v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fcvtzs w8, d0
-; CHECK-NEXT: mov z0.h, w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fcvtzs w0, d0
; CHECK-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i16>
ret <1 x i16> %res
@@ -647,7 +646,7 @@ define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i32>
ret <1 x i32> %res
@@ -1142,8 +1141,9 @@ define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) {
define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) {
; CHECK-LABEL: fcvtzs_v1f32_v1i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -1217,9 +1217,7 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) {
; CHECK-LABEL: fcvtzs_v1f64_v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fcvtzs w8, d0
-; CHECK-NEXT: mov z0.h, w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fcvtzs w0, d0
; CHECK-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i16>
ret <1 x i16> %res
@@ -1399,7 +1397,7 @@ define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i32>
ret <1 x i32> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index c110e89326cc0cd..c1c143588c0cdd7 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -127,9 +127,7 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) {
define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
; CHECK-LABEL: ucvtf_v1i16_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: ucvtf d0, w8
; CHECK-NEXT: ret
%res = uitofp <1 x i16> %op1 to <1 x double>
More information about the llvm-commits
mailing list