[llvm] 4db451a - [LLVM][SVE] Honour calling convention when using SVE for fixed length vectors. (#70847)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 04:10:04 PST 2023
Author: Paul Walker
Date: 2023-11-30T12:09:58Z
New Revision: 4db451a87d56ab469af2eecbae47338e540b1276
URL: https://github.com/llvm/llvm-project/commit/4db451a87d56ab469af2eecbae47338e540b1276
DIFF: https://github.com/llvm/llvm-project/commit/4db451a87d56ab469af2eecbae47338e540b1276.diff
LOG: [LLVM][SVE] Honour calling convention when using SVE for fixed length vectors. (#70847)
NOTE: I'm not sure how many of the corner cases are part of the
documented ABI but that shouldn't matter because my goal is for
`-msve-vector-bits` to have no affect on the way arguments and returns
are processed.
Added:
llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 166b0d84a2ce28a..149a6d413d81442 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26719,3 +26719,99 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
unsigned AArch64TargetLowering::getMinimumJumpTableEntries() const {
return Subtarget->getMinimumJumpTableEntries();
}
+
+MVT AArch64TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ bool NonUnitFixedLengthVector =
+ VT.isFixedLengthVector() && !VT.getVectorElementCount().isScalar();
+ if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+ EVT VT1;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ getVectorTypeBreakdownForCallingConv(Context, CC, VT, VT1, NumIntermediates,
+ RegisterVT);
+ return RegisterVT;
+}
+
+unsigned AArch64TargetLowering::getNumRegistersForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
+ bool NonUnitFixedLengthVector =
+ VT.isFixedLengthVector() && !VT.getVectorElementCount().isScalar();
+ if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+
+ EVT VT1;
+ MVT VT2;
+ unsigned NumIntermediates;
+ return getVectorTypeBreakdownForCallingConv(Context, CC, VT, VT1,
+ NumIntermediates, VT2);
+}
+
+unsigned AArch64TargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ int NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+ if (!RegisterVT.isFixedLengthVector() ||
+ RegisterVT.getFixedSizeInBits() <= 128)
+ return NumRegs;
+
+ assert(Subtarget->useSVEForFixedLengthVectors() && "Unexpected mode!");
+ assert(IntermediateVT == RegisterVT && "Unexpected VT mismatch!");
+ assert(RegisterVT.getFixedSizeInBits() % 128 == 0 && "Unexpected size!");
+
+ // A size mismatch here implies either type promotion or widening and would
+ // have resulted in scalarisation if larger vectors had not be available.
+ if (RegisterVT.getSizeInBits() * NumRegs != VT.getSizeInBits()) {
+ EVT EltTy = VT.getVectorElementType();
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, ElementCount::getFixed(1));
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+
+ IntermediateVT = NewVT;
+ NumIntermediates = VT.getVectorNumElements();
+ RegisterVT = getRegisterType(Context, NewVT);
+ return NumIntermediates;
+ }
+
+ // SVE VLS support does not introduce a new ABI so we should use NEON sized
+ // types for vector arguments and returns.
+
+ unsigned NumSubRegs = RegisterVT.getFixedSizeInBits() / 128;
+ NumIntermediates *= NumSubRegs;
+ NumRegs *= NumSubRegs;
+
+ switch (RegisterVT.getVectorElementType().SimpleTy) {
+ default:
+ llvm_unreachable("unexpected element type for vector");
+ case MVT::i8:
+ IntermediateVT = RegisterVT = MVT::v16i8;
+ break;
+ case MVT::i16:
+ IntermediateVT = RegisterVT = MVT::v8i16;
+ break;
+ case MVT::i32:
+ IntermediateVT = RegisterVT = MVT::v4i32;
+ break;
+ case MVT::i64:
+ IntermediateVT = RegisterVT = MVT::v2i64;
+ break;
+ case MVT::f16:
+ IntermediateVT = RegisterVT = MVT::v8f16;
+ break;
+ case MVT::f32:
+ IntermediateVT = RegisterVT = MVT::v4f32;
+ break;
+ case MVT::f64:
+ IntermediateVT = RegisterVT = MVT::v2f64;
+ break;
+ case MVT::bf16:
+ IntermediateVT = RegisterVT = MVT::v8bf16;
+ break;
+ }
+
+ return NumRegs;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 169b0dbab65cdca..c67c7c5affdc48e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -954,6 +954,18 @@ class AArch64TargetLowering : public TargetLowering {
// used for 64bit and 128bit vectors as well.
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
+ // Follow NEON ABI rules even when using SVE for fixed length vectors.
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const override;
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for
diff erent targets.
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
new file mode 100644
index 000000000000000..cea3915f3ea8dca
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-function-calls.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s
+; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @foo_v32i8(<32 x i8>)
+define void @test_v32i8(<32 x i8> %unused, <32 x i8> %a) #0 {
+; CHECK-LABEL: test_v32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v32i8
+ tail call void @foo_v32i8(<32 x i8> %a)
+ ret void
+}
+
+declare void @foo_v16i16(<16 x i16>)
+define void @test_v16i16(<16 x i16> %unused, <16 x i16> %a) #0 {
+; CHECK-LABEL: test_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v16i16
+ tail call void @foo_v16i16(<16 x i16> %a)
+ ret void
+}
+
+declare void @foo_v8i32(<8 x i32>)
+define void @test_v8i32(<8 x i32> %unused, <8 x i32> %a) #0 {
+; CHECK-LABEL: test_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v8i32
+ tail call void @foo_v8i32(<8 x i32> %a)
+ ret void
+}
+
+declare void @foo_v4i64(<4 x i64>)
+define void @test_v4i64(<4 x i64> %unused, <4 x i64> %a) #0 {
+; CHECK-LABEL: test_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v4i64
+ tail call void @foo_v4i64(<4 x i64> %a)
+ ret void
+}
+
+declare void @foo_v16f16(<16 x half>)
+define void @test_v16f16(<16 x half> %unused, <16 x half> %a) #0 {
+; CHECK-LABEL: test_v16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v16f16
+ tail call void @foo_v16f16(<16 x half> %a)
+ ret void
+}
+
+declare void @foo_v8f32(<8 x float>)
+define void @test_v8f32(<8 x float> %unused, <8 x float> %a) #0 {
+; CHECK-LABEL: test_v8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v8f32
+ tail call void @foo_v8f32(<8 x float> %a)
+ ret void
+}
+
+declare void @foo_v4f64(<4 x double>)
+define void @test_v4f64(<4 x double> %unused, <4 x double> %a) #0 {
+; CHECK-LABEL: test_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v4f64
+ tail call void @foo_v4f64(<4 x double> %a)
+ ret void
+}
+
+declare void @foo_v16bf16(<16 x bfloat>)
+define void @test_v16bf16(<16 x bfloat> %unused, <16 x bfloat> %a) #0 {
+; CHECK-LABEL: test_v16bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v16bf16
+ tail call void @foo_v16bf16(<16 x bfloat> %a)
+ ret void
+}
+
+declare void @foo_v3i64(<3 x i64>)
+define void @test_v3i64(<3 x i64> %unused, <3 x i64> %a) #0 {
+; CHECK-LABEL: test_v3i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d2, d5
+; CHECK-NEXT: fmov d1, d4
+; CHECK-NEXT: fmov d0, d3
+; CHECK-NEXT: b foo_v3i64
+ tail call void @foo_v3i64(<3 x i64> %a)
+ ret void
+}
+
+declare void @foo_v5i64(<5 x i64>)
+define void @test_v5i64(<5 x i64> %unused, <5 x i64> %a) #0 {
+; CHECK-LABEL: test_v5i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d1, d6
+; CHECK-NEXT: fmov d0, d5
+; CHECK-NEXT: fmov d2, d7
+; CHECK-NEXT: ldp d3, d4, [sp]
+; CHECK-NEXT: b foo_v5i64
+ tail call void @foo_v5i64(<5 x i64> %a)
+ ret void
+}
+
+declare void @foo_v1i16(<1 x i16>)
+define void @test_v1i16(<1 x i16> %unused, <1 x i16> %a) #0 {
+; CHECK-LABEL: test_v1i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: b foo_v1i16
+ tail call void @foo_v1i16(<1 x i16> %a)
+ ret void
+}
+
+declare void @foo_v9i16(<9 x i16>)
+define void @test_v9i16(<9 x i16> %unused, <9 x i16> %a) #0 {
+; CHECK-LABEL: test_v9i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w0, [sp, #8]
+; CHECK-NEXT: ldr w1, [sp, #16]
+; CHECK-NEXT: ldr w2, [sp, #24]
+; CHECK-NEXT: ldr w3, [sp, #32]
+; CHECK-NEXT: ldr w4, [sp, #40]
+; CHECK-NEXT: ldr w5, [sp, #48]
+; CHECK-NEXT: ldr w6, [sp, #56]
+; CHECK-NEXT: ldr w7, [sp, #64]
+; CHECK-NEXT: ldr w8, [sp, #72]
+; CHECK-NEXT: str w8, [sp]
+; CHECK-NEXT: b foo_v9i16
+ tail call void @foo_v9i16(<9 x i16> %a)
+ ret void
+}
+
+declare void @foo_v16i1(<16 x i1>)
+define void @test_v16i1(<16 x i1> %unused, <16 x i1> %a) #0 {
+; CHECK-LABEL: test_v16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: b foo_v16i1
+ tail call void @foo_v16i1(<16 x i1> %a)
+ ret void
+}
+
+; UTC_ARGS: --disable
+; The output from this test is large and generally not useful, what matters is
+; no vector registers are used.
+declare void @foo_v32i1(<32 x i1>)
+define void @test_v32i1(<32 x i1> %unused, <32 x i1> %a) #0 {
+; CHECK-LABEL: test_v32i1:
+; CHECK: // %bb.0:
+; CHECK-NOT: [q,v,z][0-9]+
+; CHECK: b foo_v32i1
+ tail call void @foo_v32i1(<32 x i1> %a)
+ ret void
+}
+; UTC_ARGS: --enable
+
+declare void @foo_v1i128(<1 x i128>)
+define void @test_v1i128(<1 x i128> %unused, <1 x i128> %a) #0 {
+; CHECK-LABEL: test_v1i128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x1, x3
+; CHECK-NEXT: mov x0, x2
+; CHECK-NEXT: b foo_v1i128
+ tail call void @foo_v1i128(<1 x i128> %a)
+ ret void
+}
+
+declare void @foo_v2i128(<2 x i128>)
+define void @test_v2i128(<2 x i128> %unused, <2 x i128> %a) #0 {
+; CHECK-LABEL: test_v2i128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x3, x7
+; CHECK-NEXT: mov x2, x6
+; CHECK-NEXT: mov x0, x4
+; CHECK-NEXT: mov x1, x5
+; CHECK-NEXT: b foo_v2i128
+ tail call void @foo_v2i128(<2 x i128> %a)
+ ret void
+}
+
+declare void @foo_v1i256(<1 x i256>)
+define void @test_v1i256(<1 x i256> %unused, <1 x i256> %a) #0 {
+; CHECK-LABEL: test_v1i256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x3, x7
+; CHECK-NEXT: mov x2, x6
+; CHECK-NEXT: mov x0, x4
+; CHECK-NEXT: mov x1, x5
+; CHECK-NEXT: b foo_v1i256
+ tail call void @foo_v1i256(<1 x i256> %a)
+ ret void
+}
+
+declare void @foo_v2i256(<2 x i256>)
+define void @test_v2i256(<2 x i256> %unused, <2 x i256> %a) #0 {
+; CHECK-LABEL: test_v2i256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp x0, x1, [sp]
+; CHECK-NEXT: ldp x2, x3, [sp, #16]
+; CHECK-NEXT: ldp x4, x5, [sp, #32]
+; CHECK-NEXT: ldp x6, x7, [sp, #48]
+; CHECK-NEXT: b foo_v2i256
+ tail call void @foo_v2i256(<2 x i256> %a)
+ ret void
+}
+
+declare void @foo_v1f128(<1 x fp128>)
+define void @test_v1f128(<1 x fp128> %unused, <1 x fp128> %a) #0 {
+; CHECK-LABEL: test_v1f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: b foo_v1f128
+ tail call void @foo_v1f128(<1 x fp128> %a)
+ ret void
+}
+
+declare void @foo_v2f128(<2 x fp128>)
+define void @test_v2f128(<2 x fp128> %unused, <2 x fp128> %a) #0 {
+; CHECK-LABEL: test_v2f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: b foo_v2f128
+ tail call void @foo_v2f128(<2 x fp128> %a)
+ ret void
+}
+
+attributes #0 = { "target-features"="+sve,+bf16" nounwind }
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
new file mode 100644
index 000000000000000..96f550826bb4ccc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-functions.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s
+; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <32 x i8> @test_v32i8(<32 x i8> %unused, <32 x i8> %a) #0 {
+; CHECK-LABEL: test_v32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <32 x i8> %a
+}
+
+define <16 x i16> @test_v16i16(<16 x i16> %unused, <16 x i16> %a) #0 {
+; CHECK-LABEL: test_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <16 x i16> %a
+}
+
+define <8 x i32> @test_v8i32(<8 x i32> %unused, <8 x i32> %a) #0 {
+; CHECK-LABEL: test_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <8 x i32> %a
+}
+
+define <4 x i64> @test_v4i64(<4 x i64> %unused, <4 x i64> %a) #0 {
+; CHECK-LABEL: test_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <4 x i64> %a
+}
+
+define <16 x half> @test_v16f16(<16 x half> %unused, <16 x half> %a) #0 {
+; CHECK-LABEL: test_v16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <16 x half> %a
+}
+
+define <8 x float> @test_v8f32(<8 x float> %unused, <8 x float> %a) #0 {
+; CHECK-LABEL: test_v8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <8 x float> %a
+}
+
+define <4 x double> @test_v4f64(<4 x double> %unused, <4 x double> %a) #0 {
+; CHECK-LABEL: test_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <4 x double> %a
+}
+
+define <16 x bfloat> @test_v16bf16(<16 x bfloat> %unused, <16 x bfloat> %a) #0 {
+; CHECK-LABEL: test_v16bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <16 x bfloat> %a
+}
+
+define <3 x i64> @test_v3i64(<3 x i64> %unused, <3 x i64> %a) #0 {
+; CHECK-LABEL: test_v3i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d2, d5
+; CHECK-NEXT: fmov d1, d4
+; CHECK-NEXT: fmov d0, d3
+; CHECK-NEXT: ret
+ ret <3 x i64> %a
+}
+
+define <5 x i64> @test_v5i64(<5 x i64> %unused, <5 x i64> %a) #0 {
+; CHECK-LABEL: test_v5i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d1, d6
+; CHECK-NEXT: fmov d0, d5
+; CHECK-NEXT: fmov d2, d7
+; CHECK-NEXT: ldp d3, d4, [sp]
+; CHECK-NEXT: ret
+ ret <5 x i64> %a
+}
+
+define <1 x i16> @test_v1i16(<1 x i16> %unused, <1 x i16> %a) #0 {
+; CHECK-LABEL: test_v1i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
+ ret <1 x i16> %a
+}
+
+define <9 x i16> @test_v9i16(<9 x i16> %unused, <9 x i16> %a) #0 {
+; CHECK-LABEL: test_v9i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [sp, #8]
+; CHECK-NEXT: add x9, sp, #16
+; CHECK-NEXT: ld1 { v0.h }[1], [x9]
+; CHECK-NEXT: add x9, sp, #24
+; CHECK-NEXT: ld1 { v0.h }[2], [x9]
+; CHECK-NEXT: add x9, sp, #32
+; CHECK-NEXT: ld1 { v0.h }[3], [x9]
+; CHECK-NEXT: add x9, sp, #40
+; CHECK-NEXT: ld1 { v0.h }[4], [x9]
+; CHECK-NEXT: add x9, sp, #48
+; CHECK-NEXT: ld1 { v0.h }[5], [x9]
+; CHECK-NEXT: add x9, sp, #56
+; CHECK-NEXT: ld1 { v0.h }[6], [x9]
+; CHECK-NEXT: add x9, sp, #64
+; CHECK-NEXT: ld1 { v0.h }[7], [x9]
+; CHECK-NEXT: ldrh w9, [sp, #72]
+; CHECK-NEXT: strh w9, [x8, #16]
+; CHECK-NEXT: str q0, [x8]
+; CHECK-NEXT: ret
+ ret <9 x i16> %a
+}
+
+define <16 x i1> @test_v16i1(<16 x i1> %unused, <16 x i1> %a) #0 {
+; CHECK-LABEL: test_v16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
+ ret <16 x i1> %a
+}
+
+; UTC_ARGS: --disable
+; The output from this test is large and generally not useful, what matters is
+; no vector registers are used.
+define <32 x i1> @test_v32i1(<32 x i1> %unused, <32 x i1> %a) #0 {
+; CHECK-LABEL: test_v32i1:
+; CHECK: // %bb.0:
+; CHECK-NOT: [q,v,z][0-9]+
+; CHECK: ret
+ ret <32 x i1> %a
+}
+; UTC_ARGS: --enable
+
+define <1 x i128> @test_v1i128(<1 x i128> %unused, <1 x i128> %a) #0 {
+; CHECK-LABEL: test_v1i128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x1, x3
+; CHECK-NEXT: mov x0, x2
+; CHECK-NEXT: ret
+ ret <1 x i128> %a
+}
+
+define <2 x i128> @test_v2i128(<2 x i128> %unused, <2 x i128> %a) #0 {
+; CHECK-LABEL: test_v2i128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x3, x7
+; CHECK-NEXT: mov x2, x6
+; CHECK-NEXT: mov x0, x4
+; CHECK-NEXT: mov x1, x5
+; CHECK-NEXT: ret
+ ret <2 x i128> %a
+}
+
+define <1 x i256> @test_v1i256(<1 x i256> %unused, <1 x i256> %a) #0 {
+; CHECK-LABEL: test_v1i256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x3, x7
+; CHECK-NEXT: mov x2, x6
+; CHECK-NEXT: mov x0, x4
+; CHECK-NEXT: mov x1, x5
+; CHECK-NEXT: ret
+ ret <1 x i256> %a
+}
+
+define <2 x i256> @test_v2i256(<2 x i256> %unused, <2 x i256> %a) #0 {
+; CHECK-LABEL: test_v2i256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp x0, x1, [sp]
+; CHECK-NEXT: ldp x2, x3, [sp, #16]
+; CHECK-NEXT: ldp x4, x5, [sp, #32]
+; CHECK-NEXT: ldp x6, x7, [sp, #48]
+; CHECK-NEXT: ret
+ ret <2 x i256> %a
+}
+
+define <1 x fp128> @test_v1f128(<1 x fp128> %unused, <1 x fp128> %a) #0 {
+; CHECK-LABEL: test_v1f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
+ ret <1 x fp128> %a
+}
+
+define <2 x fp128> @test_v2f128(<2 x fp128> %unused, <2 x fp128> %a) #0 {
+; CHECK-LABEL: test_v2f128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
+ ret <2 x fp128> %a
+}
+
+attributes #0 = { "target-features"="+sve,+bf16" }
More information about the llvm-commits
mailing list