[llvm] ISel/AArch64: custom lower vector ISD::LRINT, ISD::LLRINT (PR #89035)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 17 01:29:15 PDT 2024
https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/89035
Since 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), ISD::LRINT and ISD::LLRINT now have vector variants, that are custom lowered on RISCV, and scalarized on all other targets. Since 2302e4c (Reland "VectorUtils: mark xrint as trivially vectorizable"), lrint and llrint are trivially vectorizable, so all the vectorizers in-tree will produce vector variants when possible. Add a custom lowering for AArch64 to custom-lower the vector variants natively using a combination of frintx, fcvte, and fcvtzs.
>From 741b358818a3148cfce7d7eeaf0802688a864b45 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ram.ramachandra at arm.com>
Date: Tue, 16 Apr 2024 15:15:11 +0100
Subject: [PATCH] ISel/AArch64: custom lower vector ISD::LRINT, ISD::LLRINT
Since 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom
RISCV lowering), ISD::LRINT and ISD::LLRINT now have vector variants,
that are custom lowered on RISCV, and scalarized on all other targets.
Since 2302e4c (Reland "VectorUtils: mark xrint as trivially
vectorizable"), lrint and llrint are trivially vectorizable, so all the
vectorizers in-tree will produce vector variants when possible. Add a
custom lowering for AArch64 to custom-lower the vector variants natively
using a combination of frintx, fcvte, and fcvtzs.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 77 +-
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 +
.../CodeGen/AArch64/fixed-vector-llrint.ll | 733 +++++++++++++
.../CodeGen/AArch64/fixed-vector-lrint.ll | 747 ++++++++++++++
llvm/test/CodeGen/AArch64/vector-llrint.ll | 961 ++++++++---------
llvm/test/CodeGen/AArch64/vector-lrint.ll | 974 ++++++++----------
6 files changed, 2384 insertions(+), 1109 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/fixed-vector-llrint.ll
create mode 100644 llvm/test/CodeGen/AArch64/fixed-vector-lrint.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 80181a77c9d238..29d8ac65a7566c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -790,7 +790,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, V8Narrow, Legal);
setOperationAction(ISD::FROUNDEVEN, V8Narrow, Legal);
setOperationAction(ISD::FRINT, V8Narrow, Legal);
- setOperationAction(ISD::FSQRT, V8Narrow, Expand);
+ setOperationAction(ISD::FSQRT, V8Narrow, Expand);
setOperationAction(ISD::FSUB, V8Narrow, Legal);
setOperationAction(ISD::FTRUNC, V8Narrow, Legal);
setOperationAction(ISD::SETCC, V8Narrow, Expand);
@@ -1147,8 +1147,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (auto Op :
{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP,
- ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL,
- ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT,
+ ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::LRINT,
+ ISD::LLRINT, ISD::MUL, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT,
ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_ROUND})
setOperationAction(Op, MVT::v1i64, Expand);
@@ -1355,6 +1355,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::LRINT, VT, Custom);
+ setOperationAction(ISD::LLRINT, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
@@ -1420,6 +1422,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::LRINT, VT, Custom);
+ setOperationAction(ISD::LLRINT, VT, Custom);
}
// Legalize unpacked bitcasts to REINTERPRET_CAST.
@@ -1522,6 +1526,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FFLOOR, VT, Custom);
setOperationAction(ISD::FNEARBYINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
+ setOperationAction(ISD::LRINT, VT, Custom);
+ setOperationAction(ISD::LLRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
@@ -1785,9 +1791,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
- for (unsigned Opcode :
- {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
- ISD::FP_TO_UINT_SAT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
+ for (unsigned Opcode : {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
+ ISD::FP_TO_UINT_SAT, ISD::LRINT, ISD::LLRINT,
+ ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
setOperationAction(Opcode, VT, Custom);
if (!VT.isFloatingPoint())
@@ -1947,6 +1953,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
+ setOperationAction(ISD::LRINT, VT, Custom);
+ setOperationAction(ISD::LLRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
@@ -4371,6 +4379,54 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
+SDValue AArch64TargetLowering::LowerVectorXRINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ SDLoc DL(Op);
+
+ assert(VT.isVector() && "Expected vector type");
+
+ EVT ContainerVT = VT;
+ EVT SrcVT = Src.getValueType();
+ EVT CastVT =
+ ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
+
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ CastVT = ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
+ Src = convertToScalableVector(DAG, CastVT, Src);
+ }
+
+ // First, round the floating-point value into a floating-point register with
+ // the current rounding mode.
+ SDValue FOp = DAG.getNode(ISD::FRINT, DL, CastVT, Src);
+
+ // In the case of vector filled with f32, ftrunc will convert it to an i32,
+ // but a vector filled with i32 isn't legal. So, FP_EXTEND the f32 into the
+ // required size.
+ size_t SrcSz = SrcVT.getScalarSizeInBits();
+ size_t ContainerSz = ContainerVT.getScalarSizeInBits();
+ if (ContainerSz > SrcSz) {
+ EVT WidenedVT = MVT::getVectorVT(MVT::getFloatingPointVT(ContainerSz),
+ ContainerVT.getVectorElementCount());
+ FOp = DAG.getNode(ISD::FP_EXTEND, DL, WidenedVT, FOp.getOperand(0));
+ }
+
+ // Finally, truncate the rounded floating point to an integer, rounding to
+ // zero.
+ SDValue Pred = getPredicateForVector(DAG, DL, ContainerVT);
+ SDValue Undef = DAG.getUNDEF(ContainerVT);
+ SDValue Truncated =
+ DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, DL, ContainerVT,
+ {Pred, FOp.getOperand(0), Undef}, FOp->getFlags());
+
+ if (!VT.isFixedLengthVector())
+ return Truncated;
+
+ return convertFromScalableVector(DAG, VT, Truncated);
+}
+
SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
@@ -6628,10 +6684,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerVECTOR_DEINTERLEAVE(Op, DAG);
case ISD::VECTOR_INTERLEAVE:
return LowerVECTOR_INTERLEAVE(Op, DAG);
- case ISD::LROUND:
- case ISD::LLROUND:
case ISD::LRINT:
- case ISD::LLRINT: {
+ case ISD::LLRINT:
+ if (Op.getValueType().isVector())
+ return LowerVectorXRINT(Op, DAG);
+ [[fallthrough]];
+ case ISD::LROUND:
+ case ISD::LLROUND: {
assert((Op.getOperand(0).getValueType() == MVT::f16 ||
Op.getOperand(0).getValueType() == MVT::bf16) &&
"Expected custom lowering of rounding operations only for f16");
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 18439dc7f01020..65277a09320705 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1155,6 +1155,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/fixed-vector-llrint.ll
new file mode 100644
index 00000000000000..772d767380a848
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-llrint.ll
@@ -0,0 +1,733 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
+
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+; CHECK-LABEL: llrint_v1i64_v1f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: fcvtzs x8, h0
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
+
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+; CHECK-LABEL: llrint_v1i64_v2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: fcvtzs x8, h0
+; CHECK-NEXT: fcvtzs x9, h1
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
+
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+; CHECK-LABEL: llrint_v4i64_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[2]
+; CHECK-NEXT: mov h2, v0.h[1]
+; CHECK-NEXT: mov h3, v0.h[3]
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: frintx h2, h2
+; CHECK-NEXT: frintx h3, h3
+; CHECK-NEXT: fcvtzs x8, h0
+; CHECK-NEXT: fcvtzs x9, h1
+; CHECK-NEXT: fcvtzs x10, h2
+; CHECK-NEXT: fcvtzs x11, h3
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], x10
+; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
+
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+; CHECK-LABEL: llrint_v8i64_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: mov h4, v0.h[2]
+; CHECK-NEXT: mov h3, v0.h[1]
+; CHECK-NEXT: mov h7, v0.h[3]
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: mov h2, v1.h[2]
+; CHECK-NEXT: mov h5, v1.h[1]
+; CHECK-NEXT: mov h6, v1.h[3]
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: frintx h4, h4
+; CHECK-NEXT: frintx h3, h3
+; CHECK-NEXT: frintx h7, h7
+; CHECK-NEXT: fcvtzs x9, h0
+; CHECK-NEXT: frintx h2, h2
+; CHECK-NEXT: frintx h5, h5
+; CHECK-NEXT: frintx h6, h6
+; CHECK-NEXT: fcvtzs x8, h1
+; CHECK-NEXT: fcvtzs x12, h4
+; CHECK-NEXT: fcvtzs x11, h3
+; CHECK-NEXT: fcvtzs x15, h7
+; CHECK-NEXT: fmov d0, x9
+; CHECK-NEXT: fcvtzs x10, h2
+; CHECK-NEXT: fcvtzs x13, h5
+; CHECK-NEXT: fcvtzs x14, h6
+; CHECK-NEXT: fmov d2, x8
+; CHECK-NEXT: fmov d1, x12
+; CHECK-NEXT: mov v0.d[1], x11
+; CHECK-NEXT: fmov d3, x10
+; CHECK-NEXT: mov v2.d[1], x13
+; CHECK-NEXT: mov v1.d[1], x15
+; CHECK-NEXT: mov v3.d[1], x14
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
+
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+; CHECK-LABEL: llrint_v16i64_v16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: mov h4, v0.h[1]
+; CHECK-NEXT: frintx h5, h0
+; CHECK-NEXT: mov h18, v0.h[2]
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: frintx h6, h2
+; CHECK-NEXT: mov h7, v2.h[1]
+; CHECK-NEXT: mov h16, v2.h[2]
+; CHECK-NEXT: mov h17, v3.h[2]
+; CHECK-NEXT: frintx h19, h3
+; CHECK-NEXT: frintx h4, h4
+; CHECK-NEXT: fcvtzs x8, h5
+; CHECK-NEXT: mov h5, v1.h[1]
+; CHECK-NEXT: mov h2, v2.h[3]
+; CHECK-NEXT: frintx h18, h18
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: fcvtzs x9, h6
+; CHECK-NEXT: frintx h6, h7
+; CHECK-NEXT: frintx h7, h16
+; CHECK-NEXT: mov h16, v1.h[2]
+; CHECK-NEXT: frintx h17, h17
+; CHECK-NEXT: fcvtzs x10, h19
+; CHECK-NEXT: mov h19, v3.h[1]
+; CHECK-NEXT: fcvtzs x11, h4
+; CHECK-NEXT: mov h4, v1.h[3]
+; CHECK-NEXT: mov h3, v3.h[3]
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: frintx h5, h5
+; CHECK-NEXT: fcvtzs x13, h7
+; CHECK-NEXT: fcvtzs x12, h6
+; CHECK-NEXT: fcvtzs x15, h18
+; CHECK-NEXT: frintx h7, h16
+; CHECK-NEXT: fcvtzs x14, h17
+; CHECK-NEXT: frintx h16, h2
+; CHECK-NEXT: frintx h17, h19
+; CHECK-NEXT: frintx h4, h4
+; CHECK-NEXT: fmov d2, x9
+; CHECK-NEXT: frintx h19, h3
+; CHECK-NEXT: fcvtzs x9, h1
+; CHECK-NEXT: fmov d6, x10
+; CHECK-NEXT: fmov d3, x13
+; CHECK-NEXT: fcvtzs x13, h0
+; CHECK-NEXT: fcvtzs x16, h5
+; CHECK-NEXT: fcvtzs x10, h7
+; CHECK-NEXT: fmov d7, x14
+; CHECK-NEXT: fcvtzs x14, h16
+; CHECK-NEXT: fcvtzs x17, h17
+; CHECK-NEXT: fcvtzs x0, h4
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzs x18, h19
+; CHECK-NEXT: fmov d1, x15
+; CHECK-NEXT: fmov d4, x9
+; CHECK-NEXT: mov v2.d[1], x12
+; CHECK-NEXT: fmov d5, x10
+; CHECK-NEXT: mov v0.d[1], x11
+; CHECK-NEXT: mov v3.d[1], x14
+; CHECK-NEXT: mov v1.d[1], x13
+; CHECK-NEXT: mov v4.d[1], x16
+; CHECK-NEXT: mov v6.d[1], x17
+; CHECK-NEXT: mov v7.d[1], x18
+; CHECK-NEXT: mov v5.d[1], x0
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
+
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+; CHECK-LABEL: llrint_v32i64_v32f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8
+; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: frintx h21, h1
+; CHECK-NEXT: frintx h22, h2
+; CHECK-NEXT: mov h26, v2.h[2]
+; CHECK-NEXT: frintx h19, h0
+; CHECK-NEXT: mov h27, v3.h[2]
+; CHECK-NEXT: mov h20, v2.h[1]
+; CHECK-NEXT: mov h18, v1.h[1]
+; CHECK-NEXT: mov h16, v4.h[2]
+; CHECK-NEXT: mov h17, v5.h[2]
+; CHECK-NEXT: frintx h23, h5
+; CHECK-NEXT: frintx h24, h6
+; CHECK-NEXT: mov h25, v6.h[2]
+; CHECK-NEXT: fcvtzs x9, h21
+; CHECK-NEXT: fcvtzs x11, h22
+; CHECK-NEXT: frintx h22, h7
+; CHECK-NEXT: mov h21, v3.h[3]
+; CHECK-NEXT: fcvtzs x10, h19
+; CHECK-NEXT: frintx h27, h27
+; CHECK-NEXT: frintx h20, h20
+; CHECK-NEXT: frintx h16, h16
+; CHECK-NEXT: frintx h17, h17
+; CHECK-NEXT: fcvtzs x12, h23
+; CHECK-NEXT: fcvtzs x13, h24
+; CHECK-NEXT: frintx h23, h25
+; CHECK-NEXT: frintx h25, h26
+; CHECK-NEXT: mov h26, v3.h[1]
+; CHECK-NEXT: mov h24, v2.h[3]
+; CHECK-NEXT: fmov d19, x9
+; CHECK-NEXT: fcvtzs x9, h22
+; CHECK-NEXT: frintx h22, h3
+; CHECK-NEXT: frintx h21, h21
+; CHECK-NEXT: fcvtzs x14, h16
+; CHECK-NEXT: fcvtzs x15, h17
+; CHECK-NEXT: fmov d2, x12
+; CHECK-NEXT: fmov d16, x13
+; CHECK-NEXT: fcvtzs x12, h23
+; CHECK-NEXT: fcvtzs x13, h25
+; CHECK-NEXT: mov h23, v1.h[2]
+; CHECK-NEXT: frintx h25, h26
+; CHECK-NEXT: frintx h24, h24
+; CHECK-NEXT: mov h1, v1.h[3]
+; CHECK-NEXT: fmov d26, x11
+; CHECK-NEXT: fcvtzs x11, h21
+; CHECK-NEXT: fmov d3, x14
+; CHECK-NEXT: fmov d17, x15
+; CHECK-NEXT: fcvtzs x14, h22
+; CHECK-NEXT: fcvtzs x15, h27
+; CHECK-NEXT: mov h22, v0.h[2]
+; CHECK-NEXT: frintx h18, h18
+; CHECK-NEXT: frintx h21, h23
+; CHECK-NEXT: fmov d23, x13
+; CHECK-NEXT: fcvtzs x13, h25
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: fmov d25, x14
+; CHECK-NEXT: fcvtzs x14, h24
+; CHECK-NEXT: fmov d24, x15
+; CHECK-NEXT: frintx h22, h22
+; CHECK-NEXT: fcvtzs x15, h18
+; CHECK-NEXT: mov h18, v7.h[1]
+; CHECK-NEXT: mov v25.d[1], x13
+; CHECK-NEXT: fcvtzs x13, h21
+; CHECK-NEXT: mov h21, v7.h[2]
+; CHECK-NEXT: mov v24.d[1], x11
+; CHECK-NEXT: fcvtzs x11, h20
+; CHECK-NEXT: mov h20, v0.h[1]
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: mov v23.d[1], x14
+; CHECK-NEXT: fcvtzs x14, h1
+; CHECK-NEXT: mov h1, v6.h[3]
+; CHECK-NEXT: mov h6, v6.h[1]
+; CHECK-NEXT: mov v19.d[1], x15
+; CHECK-NEXT: mov h7, v7.h[3]
+; CHECK-NEXT: stp q25, q24, [x8, #192]
+; CHECK-NEXT: fmov d24, x13
+; CHECK-NEXT: frintx h20, h20
+; CHECK-NEXT: mov v26.d[1], x11
+; CHECK-NEXT: fcvtzs x11, h22
+; CHECK-NEXT: mov h22, v5.h[1]
+; CHECK-NEXT: mov h5, v5.h[3]
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: mov v24.d[1], x14
+; CHECK-NEXT: mov h25, v4.h[3]
+; CHECK-NEXT: frintx h6, h6
+; CHECK-NEXT: stp q26, q23, [x8, #128]
+; CHECK-NEXT: fmov d23, x12
+; CHECK-NEXT: fcvtzs x12, h20
+; CHECK-NEXT: mov h20, v4.h[1]
+; CHECK-NEXT: frintx h5, h5
+; CHECK-NEXT: fcvtzs x13, h0
+; CHECK-NEXT: stp q19, q24, [x8, #64]
+; CHECK-NEXT: frintx h22, h22
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: fmov d19, x11
+; CHECK-NEXT: frintx h4, h4
+; CHECK-NEXT: fcvtzs x10, h1
+; CHECK-NEXT: frintx h1, h21
+; CHECK-NEXT: frintx h24, h25
+; CHECK-NEXT: fcvtzs x11, h6
+; CHECK-NEXT: frintx h20, h20
+; CHECK-NEXT: frintx h6, h7
+; CHECK-NEXT: fcvtzs x14, h5
+; CHECK-NEXT: mov v19.d[1], x13
+; CHECK-NEXT: frintx h5, h18
+; CHECK-NEXT: fcvtzs x13, h22
+; CHECK-NEXT: mov v0.d[1], x12
+; CHECK-NEXT: fcvtzs x12, h4
+; CHECK-NEXT: mov v23.d[1], x10
+; CHECK-NEXT: fcvtzs x10, h1
+; CHECK-NEXT: fcvtzs x15, h24
+; CHECK-NEXT: mov v16.d[1], x11
+; CHECK-NEXT: fcvtzs x11, h20
+; CHECK-NEXT: mov v17.d[1], x14
+; CHECK-NEXT: fcvtzs x14, h6
+; CHECK-NEXT: mov v2.d[1], x13
+; CHECK-NEXT: fcvtzs x13, h5
+; CHECK-NEXT: fmov d4, x9
+; CHECK-NEXT: stp q0, q19, [x8]
+; CHECK-NEXT: fmov d0, x12
+; CHECK-NEXT: stp q16, q23, [x8, #224]
+; CHECK-NEXT: fmov d1, x10
+; CHECK-NEXT: mov v3.d[1], x15
+; CHECK-NEXT: stp q2, q17, [x8, #160]
+; CHECK-NEXT: mov v0.d[1], x11
+; CHECK-NEXT: mov v4.d[1], x13
+; CHECK-NEXT: mov v1.d[1], x14
+; CHECK-NEXT: stp q0, q3, [x8, #96]
+; CHECK-NEXT: stp q4, q1, [x8, #32]
+; CHECK-NEXT: ret
+ %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
+
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+; CHECK-LABEL: llrint_v1i64_v1f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: frintx s0, s0
+; CHECK-NEXT: fcvtzs x8, s0
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+; CHECK-LABEL: llrint_v2i64_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: str d0, [x8]
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+; CHECK-LABEL: llrint_v4i64_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp]
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
+
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+; CHECK-LABEL: llrint_v8i64_v8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: addvl x8, sp, #1
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: str d2, [x8]
+; CHECK-NEXT: addpl x8, sp, #12
+; CHECK-NEXT: str d3, [x8]
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [sp, #2, mul vl]
+; CHECK-NEXT: ld1w { z3.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: ld1w { z4.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp]
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s
+; CHECK-NEXT: movprfx z1, z3
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z3.s
+; CHECK-NEXT: movprfx z3, z4
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.s
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
+
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+; CHECK-LABEL: llrint_v16i64_v16f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-4
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: addvl x8, sp, #1
+; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: addvl x8, sp, #2
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: str d2, [x8]
+; CHECK-NEXT: ext v1.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT: addvl x8, sp, #3
+; CHECK-NEXT: ext v2.16b, v3.16b, v3.16b, #8
+; CHECK-NEXT: str d3, [x8]
+; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: str d0, [x8]
+; CHECK-NEXT: addpl x8, sp, #12
+; CHECK-NEXT: str d4, [x8]
+; CHECK-NEXT: addpl x8, sp, #20
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: addpl x8, sp, #28
+; CHECK-NEXT: str d2, [x8]
+; CHECK-NEXT: ld1w { z3.d }, p0/z, [sp, #4, mul vl]
+; CHECK-NEXT: ld1w { z5.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT: ld1w { z7.d }, p0/z, [sp, #5, mul vl]
+; CHECK-NEXT: ld1w { z16.d }, p0/z, [sp, #7, mul vl]
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp]
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: ld1w { z2.d }, p0/z, [sp, #2, mul vl]
+; CHECK-NEXT: ld1w { z6.d }, p0/z, [sp, #6, mul vl]
+; CHECK-NEXT: movprfx z4, z3
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z3.s
+; CHECK-NEXT: movprfx z3, z5
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s
+; CHECK-NEXT: movprfx z5, z7
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z7.s
+; CHECK-NEXT: movprfx z7, z16
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z16.s
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4
+; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5
+; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7
+; CHECK-NEXT: addvl sp, sp, #4
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
+
+define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
+; CHECK-LABEL: llrint_v32i64_v32f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-8
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ext v16.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ext v17.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: addpl x9, sp, #4
+; CHECK-NEXT: ext v18.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT: ext v19.16b, v3.16b, v3.16b, #8
+; CHECK-NEXT: ext v20.16b, v4.16b, v4.16b, #8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: str d16, [x9]
+; CHECK-NEXT: addpl x9, sp, #12
+; CHECK-NEXT: ext v16.16b, v5.16b, v5.16b, #8
+; CHECK-NEXT: str d17, [x9]
+; CHECK-NEXT: addpl x9, sp, #20
+; CHECK-NEXT: ext v17.16b, v6.16b, v6.16b, #8
+; CHECK-NEXT: str d18, [x9]
+; CHECK-NEXT: addpl x9, sp, #28
+; CHECK-NEXT: ext v18.16b, v7.16b, v7.16b, #8
+; CHECK-NEXT: str d19, [x9]
+; CHECK-NEXT: addpl x9, sp, #31
+; CHECK-NEXT: addpl x9, x9, #5
+; CHECK-NEXT: str d20, [x9]
+; CHECK-NEXT: addpl x9, sp, #31
+; CHECK-NEXT: addpl x9, x9, #13
+; CHECK-NEXT: str d16, [x9]
+; CHECK-NEXT: addpl x9, sp, #31
+; CHECK-NEXT: addpl x9, x9, #21
+; CHECK-NEXT: str d17, [x9]
+; CHECK-NEXT: addpl x9, sp, #31
+; CHECK-NEXT: addpl x9, x9, #29
+; CHECK-NEXT: str d18, [x9]
+; CHECK-NEXT: addvl x9, sp, #1
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: str d1, [x9]
+; CHECK-NEXT: addvl x9, sp, #2
+; CHECK-NEXT: str d2, [x9]
+; CHECK-NEXT: addvl x9, sp, #3
+; CHECK-NEXT: str d3, [x9]
+; CHECK-NEXT: addvl x9, sp, #4
+; CHECK-NEXT: str d4, [x9]
+; CHECK-NEXT: addvl x9, sp, #5
+; CHECK-NEXT: str d5, [x9]
+; CHECK-NEXT: addvl x9, sp, #6
+; CHECK-NEXT: str d6, [x9]
+; CHECK-NEXT: addvl x9, sp, #7
+; CHECK-NEXT: str d7, [x9]
+; CHECK-NEXT: addpl x9, sp, #28
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addvl x9, sp, #4
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: ld1w { z3.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addvl x9, sp, #3
+; CHECK-NEXT: ld1w { z2.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT: ld1w { z5.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addpl x9, sp, #20
+; CHECK-NEXT: ld1w { z4.d }, p0/z, [sp, #5, mul vl]
+; CHECK-NEXT: ld1w { z6.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addvl x9, sp, #1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: ld1w { z16.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addvl x9, sp, #2
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s
+; CHECK-NEXT: ld1w { z17.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addpl x9, sp, #12
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s
+; CHECK-NEXT: ld1w { z18.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s
+; CHECK-NEXT: addpl x9, sp, #4
+; CHECK-NEXT: ld1w { z22.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: ld1w { z7.d }, p0/z, [sp, #7, mul vl]
+; CHECK-NEXT: ld1w { z19.d }, p0/z, [sp]
+; CHECK-NEXT: ld1w { z20.d }, p0/z, [sp, #2, mul vl]
+; CHECK-NEXT: ld1w { z21.d }, p0/z, [sp, #4, mul vl]
+; CHECK-NEXT: ld1w { z23.d }, p0/z, [sp, #6, mul vl]
+; CHECK-NEXT: fcvtzs z18.d, p0/m, z18.s
+; CHECK-NEXT: stp q0, q3, [x8, #224]
+; CHECK-NEXT: movprfx z0, z17
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z17.s
+; CHECK-NEXT: movprfx z3, z22
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z22.s
+; CHECK-NEXT: stp q6, q5, [x8, #192]
+; CHECK-NEXT: movprfx z6, z16
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z16.s
+; CHECK-NEXT: movprfx z5, z23
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z23.s
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s
+; CHECK-NEXT: stp q18, q0, [x8, #160]
+; CHECK-NEXT: movprfx z0, z21
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z21.s
+; CHECK-NEXT: stp q3, q6, [x8, #128]
+; CHECK-NEXT: movprfx z3, z4
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.s
+; CHECK-NEXT: movprfx z4, z20
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z20.s
+; CHECK-NEXT: stp q5, q7, [x8, #96]
+; CHECK-NEXT: movprfx z5, z19
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z19.s
+; CHECK-NEXT: stp q4, q2, [x8, #32]
+; CHECK-NEXT: stp q0, q3, [x8, #64]
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
+; CHECK-NEXT: stp q5, q0, [x8]
+; CHECK-NEXT: addvl sp, sp, #8
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
+
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+; CHECK-LABEL: llrint_v1i64_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
+
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+; CHECK-LABEL: llrint_v2i64_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
+
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+; CHECK-LABEL: llrint_v4i64_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
+
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+; CHECK-LABEL: llrint_v8i64_v8f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+
+define <16 x i64> @llrint_v16f64(<16 x double> %x) {
+; CHECK-LABEL: llrint_v16f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
+; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
+; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
+; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4
+; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5
+; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6
+; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
+
+define <32 x i64> @llrint_v32f64(<32 x double> %x) {
+; CHECK-LABEL: llrint_v32f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldp q17, q16, [sp, #96]
+; CHECK-NEXT: ldp q19, q18, [sp, #64]
+; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
+; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
+; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
+; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: ldp q21, q20, [sp, #32]
+; CHECK-NEXT: fcvtzs z16.d, p0/m, z16.d
+; CHECK-NEXT: fcvtzs z17.d, p0/m, z17.d
+; CHECK-NEXT: fcvtzs z18.d, p0/m, z18.d
+; CHECK-NEXT: fcvtzs z19.d, p0/m, z19.d
+; CHECK-NEXT: fcvtzs z20.d, p0/m, z20.d
+; CHECK-NEXT: fcvtzs z21.d, p0/m, z21.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: str q16, [x8, #240]
+; CHECK-NEXT: ldp q22, q16, [sp]
+; CHECK-NEXT: stp q18, q17, [x8, #208]
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: stp q5, q6, [x8, #80]
+; CHECK-NEXT: fcvtzs z16.d, p0/m, z16.d
+; CHECK-NEXT: movprfx z17, z22
+; CHECK-NEXT: fcvtzs z17.d, p0/m, z22.d
+; CHECK-NEXT: stp q3, q4, [x8, #48]
+; CHECK-NEXT: stp q20, q19, [x8, #176]
+; CHECK-NEXT: stp q1, q2, [x8, #16]
+; CHECK-NEXT: str q0, [x8]
+; CHECK-NEXT: stp q7, q17, [x8, #112]
+; CHECK-NEXT: stp q16, q21, [x8, #144]
+; CHECK-NEXT: ret
+ %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/fixed-vector-lrint.ll
new file mode 100644
index 00000000000000..c3fd1fe1d064e1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-lrint.ll
@@ -0,0 +1,747 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
+
+define <1 x i64> @lrint_v1f16(<1 x half> %x) {
+; CHECK-LABEL: lrint_v1f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: fcvtzs x8, h0
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>)
+
+define <2 x i64> @lrint_v2f16(<2 x half> %x) {
+; CHECK-LABEL: lrint_v2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: fcvtzs x8, h0
+; CHECK-NEXT: fcvtzs x9, h1
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>)
+
+define <4 x i64> @lrint_v4f16(<4 x half> %x) {
+; CHECK-LABEL: lrint_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[2]
+; CHECK-NEXT: mov h2, v0.h[1]
+; CHECK-NEXT: mov h3, v0.h[3]
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: frintx h2, h2
+; CHECK-NEXT: frintx h3, h3
+; CHECK-NEXT: fcvtzs x8, h0
+; CHECK-NEXT: fcvtzs x9, h1
+; CHECK-NEXT: fcvtzs x10, h2
+; CHECK-NEXT: fcvtzs x11, h3
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], x10
+; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>)
+
+define <8 x i64> @lrint_v8f16(<8 x half> %x) {
+; CHECK-LABEL: lrint_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: mov h4, v0.h[2]
+; CHECK-NEXT: mov h3, v0.h[1]
+; CHECK-NEXT: mov h7, v0.h[3]
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: mov h2, v1.h[2]
+; CHECK-NEXT: mov h5, v1.h[1]
+; CHECK-NEXT: mov h6, v1.h[3]
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: frintx h4, h4
+; CHECK-NEXT: frintx h3, h3
+; CHECK-NEXT: frintx h7, h7
+; CHECK-NEXT: fcvtzs x9, h0
+; CHECK-NEXT: frintx h2, h2
+; CHECK-NEXT: frintx h5, h5
+; CHECK-NEXT: frintx h6, h6
+; CHECK-NEXT: fcvtzs x8, h1
+; CHECK-NEXT: fcvtzs x12, h4
+; CHECK-NEXT: fcvtzs x11, h3
+; CHECK-NEXT: fcvtzs x15, h7
+; CHECK-NEXT: fmov d0, x9
+; CHECK-NEXT: fcvtzs x10, h2
+; CHECK-NEXT: fcvtzs x13, h5
+; CHECK-NEXT: fcvtzs x14, h6
+; CHECK-NEXT: fmov d2, x8
+; CHECK-NEXT: fmov d1, x12
+; CHECK-NEXT: mov v0.d[1], x11
+; CHECK-NEXT: fmov d3, x10
+; CHECK-NEXT: mov v2.d[1], x13
+; CHECK-NEXT: mov v1.d[1], x15
+; CHECK-NEXT: mov v3.d[1], x14
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>)
+
+define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
+; CHECK-LABEL: lrint_v16i64_v16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: mov h4, v0.h[1]
+; CHECK-NEXT: frintx h5, h0
+; CHECK-NEXT: mov h18, v0.h[2]
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: frintx h6, h2
+; CHECK-NEXT: mov h7, v2.h[1]
+; CHECK-NEXT: mov h16, v2.h[2]
+; CHECK-NEXT: mov h17, v3.h[2]
+; CHECK-NEXT: frintx h19, h3
+; CHECK-NEXT: frintx h4, h4
+; CHECK-NEXT: fcvtzs x8, h5
+; CHECK-NEXT: mov h5, v1.h[1]
+; CHECK-NEXT: mov h2, v2.h[3]
+; CHECK-NEXT: frintx h18, h18
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: fcvtzs x9, h6
+; CHECK-NEXT: frintx h6, h7
+; CHECK-NEXT: frintx h7, h16
+; CHECK-NEXT: mov h16, v1.h[2]
+; CHECK-NEXT: frintx h17, h17
+; CHECK-NEXT: fcvtzs x10, h19
+; CHECK-NEXT: mov h19, v3.h[1]
+; CHECK-NEXT: fcvtzs x11, h4
+; CHECK-NEXT: mov h4, v1.h[3]
+; CHECK-NEXT: mov h3, v3.h[3]
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: frintx h5, h5
+; CHECK-NEXT: fcvtzs x13, h7
+; CHECK-NEXT: fcvtzs x12, h6
+; CHECK-NEXT: fcvtzs x15, h18
+; CHECK-NEXT: frintx h7, h16
+; CHECK-NEXT: fcvtzs x14, h17
+; CHECK-NEXT: frintx h16, h2
+; CHECK-NEXT: frintx h17, h19
+; CHECK-NEXT: frintx h4, h4
+; CHECK-NEXT: fmov d2, x9
+; CHECK-NEXT: frintx h19, h3
+; CHECK-NEXT: fcvtzs x9, h1
+; CHECK-NEXT: fmov d6, x10
+; CHECK-NEXT: fmov d3, x13
+; CHECK-NEXT: fcvtzs x13, h0
+; CHECK-NEXT: fcvtzs x16, h5
+; CHECK-NEXT: fcvtzs x10, h7
+; CHECK-NEXT: fmov d7, x14
+; CHECK-NEXT: fcvtzs x14, h16
+; CHECK-NEXT: fcvtzs x17, h17
+; CHECK-NEXT: fcvtzs x0, h4
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvtzs x18, h19
+; CHECK-NEXT: fmov d1, x15
+; CHECK-NEXT: fmov d4, x9
+; CHECK-NEXT: mov v2.d[1], x12
+; CHECK-NEXT: fmov d5, x10
+; CHECK-NEXT: mov v0.d[1], x11
+; CHECK-NEXT: mov v3.d[1], x14
+; CHECK-NEXT: mov v1.d[1], x13
+; CHECK-NEXT: mov v4.d[1], x16
+; CHECK-NEXT: mov v6.d[1], x17
+; CHECK-NEXT: mov v7.d[1], x18
+; CHECK-NEXT: mov v5.d[1], x0
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>)
+
+define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
+; CHECK-LABEL: lrint_v32i64_v32f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8
+; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: frintx h21, h1
+; CHECK-NEXT: frintx h22, h2
+; CHECK-NEXT: mov h26, v2.h[2]
+; CHECK-NEXT: frintx h19, h0
+; CHECK-NEXT: mov h27, v3.h[2]
+; CHECK-NEXT: mov h20, v2.h[1]
+; CHECK-NEXT: mov h18, v1.h[1]
+; CHECK-NEXT: mov h16, v4.h[2]
+; CHECK-NEXT: mov h17, v5.h[2]
+; CHECK-NEXT: frintx h23, h5
+; CHECK-NEXT: frintx h24, h6
+; CHECK-NEXT: mov h25, v6.h[2]
+; CHECK-NEXT: fcvtzs x9, h21
+; CHECK-NEXT: fcvtzs x11, h22
+; CHECK-NEXT: frintx h22, h7
+; CHECK-NEXT: mov h21, v3.h[3]
+; CHECK-NEXT: fcvtzs x10, h19
+; CHECK-NEXT: frintx h27, h27
+; CHECK-NEXT: frintx h20, h20
+; CHECK-NEXT: frintx h16, h16
+; CHECK-NEXT: frintx h17, h17
+; CHECK-NEXT: fcvtzs x12, h23
+; CHECK-NEXT: fcvtzs x13, h24
+; CHECK-NEXT: frintx h23, h25
+; CHECK-NEXT: frintx h25, h26
+; CHECK-NEXT: mov h26, v3.h[1]
+; CHECK-NEXT: mov h24, v2.h[3]
+; CHECK-NEXT: fmov d19, x9
+; CHECK-NEXT: fcvtzs x9, h22
+; CHECK-NEXT: frintx h22, h3
+; CHECK-NEXT: frintx h21, h21
+; CHECK-NEXT: fcvtzs x14, h16
+; CHECK-NEXT: fcvtzs x15, h17
+; CHECK-NEXT: fmov d2, x12
+; CHECK-NEXT: fmov d16, x13
+; CHECK-NEXT: fcvtzs x12, h23
+; CHECK-NEXT: fcvtzs x13, h25
+; CHECK-NEXT: mov h23, v1.h[2]
+; CHECK-NEXT: frintx h25, h26
+; CHECK-NEXT: frintx h24, h24
+; CHECK-NEXT: mov h1, v1.h[3]
+; CHECK-NEXT: fmov d26, x11
+; CHECK-NEXT: fcvtzs x11, h21
+; CHECK-NEXT: fmov d3, x14
+; CHECK-NEXT: fmov d17, x15
+; CHECK-NEXT: fcvtzs x14, h22
+; CHECK-NEXT: fcvtzs x15, h27
+; CHECK-NEXT: mov h22, v0.h[2]
+; CHECK-NEXT: frintx h18, h18
+; CHECK-NEXT: frintx h21, h23
+; CHECK-NEXT: fmov d23, x13
+; CHECK-NEXT: fcvtzs x13, h25
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: fmov d25, x14
+; CHECK-NEXT: fcvtzs x14, h24
+; CHECK-NEXT: fmov d24, x15
+; CHECK-NEXT: frintx h22, h22
+; CHECK-NEXT: fcvtzs x15, h18
+; CHECK-NEXT: mov h18, v7.h[1]
+; CHECK-NEXT: mov v25.d[1], x13
+; CHECK-NEXT: fcvtzs x13, h21
+; CHECK-NEXT: mov h21, v7.h[2]
+; CHECK-NEXT: mov v24.d[1], x11
+; CHECK-NEXT: fcvtzs x11, h20
+; CHECK-NEXT: mov h20, v0.h[1]
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: mov v23.d[1], x14
+; CHECK-NEXT: fcvtzs x14, h1
+; CHECK-NEXT: mov h1, v6.h[3]
+; CHECK-NEXT: mov h6, v6.h[1]
+; CHECK-NEXT: mov v19.d[1], x15
+; CHECK-NEXT: mov h7, v7.h[3]
+; CHECK-NEXT: stp q25, q24, [x8, #192]
+; CHECK-NEXT: fmov d24, x13
+; CHECK-NEXT: frintx h20, h20
+; CHECK-NEXT: mov v26.d[1], x11
+; CHECK-NEXT: fcvtzs x11, h22
+; CHECK-NEXT: mov h22, v5.h[1]
+; CHECK-NEXT: mov h5, v5.h[3]
+; CHECK-NEXT: frintx h0, h0
+; CHECK-NEXT: frintx h1, h1
+; CHECK-NEXT: mov v24.d[1], x14
+; CHECK-NEXT: mov h25, v4.h[3]
+; CHECK-NEXT: frintx h6, h6
+; CHECK-NEXT: stp q26, q23, [x8, #128]
+; CHECK-NEXT: fmov d23, x12
+; CHECK-NEXT: fcvtzs x12, h20
+; CHECK-NEXT: mov h20, v4.h[1]
+; CHECK-NEXT: frintx h5, h5
+; CHECK-NEXT: fcvtzs x13, h0
+; CHECK-NEXT: stp q19, q24, [x8, #64]
+; CHECK-NEXT: frintx h22, h22
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: fmov d19, x11
+; CHECK-NEXT: frintx h4, h4
+; CHECK-NEXT: fcvtzs x10, h1
+; CHECK-NEXT: frintx h1, h21
+; CHECK-NEXT: frintx h24, h25
+; CHECK-NEXT: fcvtzs x11, h6
+; CHECK-NEXT: frintx h20, h20
+; CHECK-NEXT: frintx h6, h7
+; CHECK-NEXT: fcvtzs x14, h5
+; CHECK-NEXT: mov v19.d[1], x13
+; CHECK-NEXT: frintx h5, h18
+; CHECK-NEXT: fcvtzs x13, h22
+; CHECK-NEXT: mov v0.d[1], x12
+; CHECK-NEXT: fcvtzs x12, h4
+; CHECK-NEXT: mov v23.d[1], x10
+; CHECK-NEXT: fcvtzs x10, h1
+; CHECK-NEXT: fcvtzs x15, h24
+; CHECK-NEXT: mov v16.d[1], x11
+; CHECK-NEXT: fcvtzs x11, h20
+; CHECK-NEXT: mov v17.d[1], x14
+; CHECK-NEXT: fcvtzs x14, h6
+; CHECK-NEXT: mov v2.d[1], x13
+; CHECK-NEXT: fcvtzs x13, h5
+; CHECK-NEXT: fmov d4, x9
+; CHECK-NEXT: stp q0, q19, [x8]
+; CHECK-NEXT: fmov d0, x12
+; CHECK-NEXT: stp q16, q23, [x8, #224]
+; CHECK-NEXT: fmov d1, x10
+; CHECK-NEXT: mov v3.d[1], x15
+; CHECK-NEXT: stp q2, q17, [x8, #160]
+; CHECK-NEXT: mov v0.d[1], x11
+; CHECK-NEXT: mov v4.d[1], x13
+; CHECK-NEXT: mov v1.d[1], x14
+; CHECK-NEXT: stp q0, q3, [x8, #96]
+; CHECK-NEXT: stp q4, q1, [x8, #32]
+; CHECK-NEXT: ret
+ %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>)
+
+define <1 x i64> @lrint_v1f32(<1 x float> %x) {
+; CHECK-SD-LABEL: lrint_v1f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: frintx s0, s0
+; CHECK-SD-NEXT: fcvtzs x8, s0
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: lrint_v1f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: frintx s0, s0
+; CHECK-GI-NEXT: fcvtzs x8, s0
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
+; CHECK-LABEL: lrint_v1f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: frintx s0, s0
+; CHECK-NEXT: fcvtzs x8, s0
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>)
+
+define <2 x i64> @lrint_v2f32(<2 x float> %x) {
+; CHECK-LABEL: lrint_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: str d0, [x8]
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>)
+
+define <4 x i64> @lrint_v4f32(<4 x float> %x) {
+; CHECK-LABEL: lrint_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp]
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>)
+
+define <8 x i64> @lrint_v8f32(<8 x float> %x) {
+; CHECK-LABEL: lrint_v8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: addvl x8, sp, #1
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: str d2, [x8]
+; CHECK-NEXT: addpl x8, sp, #12
+; CHECK-NEXT: str d3, [x8]
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [sp, #2, mul vl]
+; CHECK-NEXT: ld1w { z3.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: ld1w { z4.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp]
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s
+; CHECK-NEXT: movprfx z1, z3
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z3.s
+; CHECK-NEXT: movprfx z3, z4
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.s
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>)
+
+define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
+; CHECK-LABEL: lrint_v16i64_v16f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-4
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: addvl x8, sp, #1
+; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: addvl x8, sp, #2
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: str d2, [x8]
+; CHECK-NEXT: ext v1.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT: addvl x8, sp, #3
+; CHECK-NEXT: ext v2.16b, v3.16b, v3.16b, #8
+; CHECK-NEXT: str d3, [x8]
+; CHECK-NEXT: addpl x8, sp, #4
+; CHECK-NEXT: str d0, [x8]
+; CHECK-NEXT: addpl x8, sp, #12
+; CHECK-NEXT: str d4, [x8]
+; CHECK-NEXT: addpl x8, sp, #20
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: addpl x8, sp, #28
+; CHECK-NEXT: str d2, [x8]
+; CHECK-NEXT: ld1w { z3.d }, p0/z, [sp, #4, mul vl]
+; CHECK-NEXT: ld1w { z5.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT: ld1w { z7.d }, p0/z, [sp, #5, mul vl]
+; CHECK-NEXT: ld1w { z16.d }, p0/z, [sp, #7, mul vl]
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp]
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: ld1w { z2.d }, p0/z, [sp, #2, mul vl]
+; CHECK-NEXT: ld1w { z6.d }, p0/z, [sp, #6, mul vl]
+; CHECK-NEXT: movprfx z4, z3
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z3.s
+; CHECK-NEXT: movprfx z3, z5
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s
+; CHECK-NEXT: movprfx z5, z7
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z7.s
+; CHECK-NEXT: movprfx z7, z16
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z16.s
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4
+; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5
+; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7
+; CHECK-NEXT: addvl sp, sp, #4
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>)
+
+define <32 x i64> @lrint_v32i64_v32f32(<32 x float> %x) {
+; CHECK-LABEL: lrint_v32i64_v32f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-8
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ext v16.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ext v17.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: addpl x9, sp, #4
+; CHECK-NEXT: ext v18.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT: ext v19.16b, v3.16b, v3.16b, #8
+; CHECK-NEXT: ext v20.16b, v4.16b, v4.16b, #8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: str d16, [x9]
+; CHECK-NEXT: addpl x9, sp, #12
+; CHECK-NEXT: ext v16.16b, v5.16b, v5.16b, #8
+; CHECK-NEXT: str d17, [x9]
+; CHECK-NEXT: addpl x9, sp, #20
+; CHECK-NEXT: ext v17.16b, v6.16b, v6.16b, #8
+; CHECK-NEXT: str d18, [x9]
+; CHECK-NEXT: addpl x9, sp, #28
+; CHECK-NEXT: ext v18.16b, v7.16b, v7.16b, #8
+; CHECK-NEXT: str d19, [x9]
+; CHECK-NEXT: addpl x9, sp, #31
+; CHECK-NEXT: addpl x9, x9, #5
+; CHECK-NEXT: str d20, [x9]
+; CHECK-NEXT: addpl x9, sp, #31
+; CHECK-NEXT: addpl x9, x9, #13
+; CHECK-NEXT: str d16, [x9]
+; CHECK-NEXT: addpl x9, sp, #31
+; CHECK-NEXT: addpl x9, x9, #21
+; CHECK-NEXT: str d17, [x9]
+; CHECK-NEXT: addpl x9, sp, #31
+; CHECK-NEXT: addpl x9, x9, #29
+; CHECK-NEXT: str d18, [x9]
+; CHECK-NEXT: addvl x9, sp, #1
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: str d1, [x9]
+; CHECK-NEXT: addvl x9, sp, #2
+; CHECK-NEXT: str d2, [x9]
+; CHECK-NEXT: addvl x9, sp, #3
+; CHECK-NEXT: str d3, [x9]
+; CHECK-NEXT: addvl x9, sp, #4
+; CHECK-NEXT: str d4, [x9]
+; CHECK-NEXT: addvl x9, sp, #5
+; CHECK-NEXT: str d5, [x9]
+; CHECK-NEXT: addvl x9, sp, #6
+; CHECK-NEXT: str d6, [x9]
+; CHECK-NEXT: addvl x9, sp, #7
+; CHECK-NEXT: str d7, [x9]
+; CHECK-NEXT: addpl x9, sp, #28
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addvl x9, sp, #4
+; CHECK-NEXT: ld1w { z1.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: ld1w { z3.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addvl x9, sp, #3
+; CHECK-NEXT: ld1w { z2.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT: ld1w { z5.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addpl x9, sp, #20
+; CHECK-NEXT: ld1w { z4.d }, p0/z, [sp, #5, mul vl]
+; CHECK-NEXT: ld1w { z6.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addvl x9, sp, #1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: ld1w { z16.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addvl x9, sp, #2
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s
+; CHECK-NEXT: ld1w { z17.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: addpl x9, sp, #12
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s
+; CHECK-NEXT: ld1w { z18.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s
+; CHECK-NEXT: addpl x9, sp, #4
+; CHECK-NEXT: ld1w { z22.d }, p0/z, [x9, #7, mul vl]
+; CHECK-NEXT: ld1w { z7.d }, p0/z, [sp, #7, mul vl]
+; CHECK-NEXT: ld1w { z19.d }, p0/z, [sp]
+; CHECK-NEXT: ld1w { z20.d }, p0/z, [sp, #2, mul vl]
+; CHECK-NEXT: ld1w { z21.d }, p0/z, [sp, #4, mul vl]
+; CHECK-NEXT: ld1w { z23.d }, p0/z, [sp, #6, mul vl]
+; CHECK-NEXT: fcvtzs z18.d, p0/m, z18.s
+; CHECK-NEXT: stp q0, q3, [x8, #224]
+; CHECK-NEXT: movprfx z0, z17
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z17.s
+; CHECK-NEXT: movprfx z3, z22
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z22.s
+; CHECK-NEXT: stp q6, q5, [x8, #192]
+; CHECK-NEXT: movprfx z6, z16
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z16.s
+; CHECK-NEXT: movprfx z5, z23
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z23.s
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s
+; CHECK-NEXT: stp q18, q0, [x8, #160]
+; CHECK-NEXT: movprfx z0, z21
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z21.s
+; CHECK-NEXT: stp q3, q6, [x8, #128]
+; CHECK-NEXT: movprfx z3, z4
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.s
+; CHECK-NEXT: movprfx z4, z20
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z20.s
+; CHECK-NEXT: stp q5, q7, [x8, #96]
+; CHECK-NEXT: movprfx z5, z19
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z19.s
+; CHECK-NEXT: stp q4, q2, [x8, #32]
+; CHECK-NEXT: stp q0, q3, [x8, #64]
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
+; CHECK-NEXT: stp q5, q0, [x8]
+; CHECK-NEXT: addvl sp, sp, #8
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float>)
+
+define <1 x i64> @lrint_v1f64(<1 x double> %x) {
+; CHECK-LABEL: lrint_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>)
+
+define <2 x i64> @lrint_v2f64(<2 x double> %x) {
+; CHECK-LABEL: lrint_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>)
+
+define <4 x i64> @lrint_v4f64(<4 x double> %x) {
+; CHECK-LABEL: lrint_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>)
+
+define <8 x i64> @lrint_v8f64(<8 x double> %x) {
+; CHECK-LABEL: lrint_v8f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>)
+
+define <16 x i64> @lrint_v16f64(<16 x double> %x) {
+; CHECK-LABEL: lrint_v16f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
+; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
+; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
+; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4
+; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5
+; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6
+; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>)
+
+define <32 x i64> @lrint_v32f64(<32 x double> %x) {
+; CHECK-LABEL: lrint_v32f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldp q17, q16, [sp, #96]
+; CHECK-NEXT: ldp q19, q18, [sp, #64]
+; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
+; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
+; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
+; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: ldp q21, q20, [sp, #32]
+; CHECK-NEXT: fcvtzs z16.d, p0/m, z16.d
+; CHECK-NEXT: fcvtzs z17.d, p0/m, z17.d
+; CHECK-NEXT: fcvtzs z18.d, p0/m, z18.d
+; CHECK-NEXT: fcvtzs z19.d, p0/m, z19.d
+; CHECK-NEXT: fcvtzs z20.d, p0/m, z20.d
+; CHECK-NEXT: fcvtzs z21.d, p0/m, z21.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: str q16, [x8, #240]
+; CHECK-NEXT: ldp q22, q16, [sp]
+; CHECK-NEXT: stp q18, q17, [x8, #208]
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: stp q5, q6, [x8, #80]
+; CHECK-NEXT: fcvtzs z16.d, p0/m, z16.d
+; CHECK-NEXT: movprfx z17, z22
+; CHECK-NEXT: fcvtzs z17.d, p0/m, z22.d
+; CHECK-NEXT: stp q3, q4, [x8, #48]
+; CHECK-NEXT: stp q20, q19, [x8, #176]
+; CHECK-NEXT: stp q1, q2, [x8, #16]
+; CHECK-NEXT: str q0, [x8]
+; CHECK-NEXT: stp q7, q17, [x8, #112]
+; CHECK-NEXT: stp q16, q21, [x8, #144]
+; CHECK-NEXT: ret
+ %a = call <32 x i64> @llvm.lrint.v32i64.v16f64(<32 x double> %x)
+ ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.lrint.v32i64.v32f64(<32 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index beb2b6a1346001..51d2e72945b52e 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -1,621 +1,492 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <vscale x 1 x i64> @llrint_v1i64_v1f16(<vscale x 1 x half> %x) {
; CHECK-LABEL: llrint_v1i64_v1f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
; CHECK-NEXT: ret
- %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x)
- ret <1 x i64> %a
+ %a = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f16(<vscale x 1 x half> %x)
+ ret <vscale x 1 x i64> %a
}
-declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
+declare <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f16(<vscale x 1 x half>)
-define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+define <vscale x 2 x i64> @llrint_v1i64_v2f16(<vscale x 2 x half> %x) {
; CHECK-LABEL: llrint_v1i64_v2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
; CHECK-NEXT: ret
- %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
- ret <2 x i64> %a
+ %a = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f16(<vscale x 2 x half> %x)
+ ret <vscale x 2 x i64> %a
}
-declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
+declare <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f16(<vscale x 2 x half>)
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <vscale x 4 x i64> @llrint_v4i64_v4f16(<vscale x 4 x half> %x) {
; CHECK-LABEL: llrint_v4i64_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[2]
-; CHECK-NEXT: mov h2, v0.h[1]
-; CHECK-NEXT: mov h3, v0.h[3]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z1.d, z0.s
+; CHECK-NEXT: uunpkhi z2.d, z0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h
+; CHECK-NEXT: movprfx z1, z2
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h
; CHECK-NEXT: ret
- %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
- ret <4 x i64> %a
+ %a = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f16(<vscale x 4 x half> %x)
+ ret <vscale x 4 x i64> %a
}
-declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
+declare <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f16(<vscale x 4 x half>)
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <vscale x 8 x i64> @llrint_v8i64_v8f16(<vscale x 8 x half> %x) {
; CHECK-LABEL: llrint_v8i64_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov h4, v0.h[2]
-; CHECK-NEXT: mov h3, v0.h[1]
-; CHECK-NEXT: mov h7, v0.h[3]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: mov h2, v1.h[2]
-; CHECK-NEXT: mov h5, v1.h[1]
-; CHECK-NEXT: mov h6, v1.h[3]
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvt s7, h7
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: fcvt s6, h6
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x12, s4
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fcvtzs x15, s7
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fcvtzs x13, s5
-; CHECK-NEXT: fcvtzs x14, s6
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: mov v2.d[1], x13
-; CHECK-NEXT: mov v1.d[1], x15
-; CHECK-NEXT: mov v3.d[1], x14
+; CHECK-NEXT: uunpklo z1.s, z0.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z2.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: uunpklo z3.d, z0.s
+; CHECK-NEXT: uunpkhi z4.d, z0.s
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h
+; CHECK-NEXT: movprfx z2, z3
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.h
+; CHECK-NEXT: movprfx z3, z4
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h
; CHECK-NEXT: ret
- %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
- ret <8 x i64> %a
+ %a = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f16(<vscale x 8 x half> %x)
+ ret <vscale x 8 x i64> %a
}
-declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
+declare <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f16(<vscale x 8 x half>)
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
; CHECK-LABEL: llrint_v16i64_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: mov h17, v0.h[1]
-; CHECK-NEXT: mov h19, v0.h[2]
-; CHECK-NEXT: fcvt s18, h0
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: mov h4, v2.h[1]
-; CHECK-NEXT: mov h5, v2.h[2]
-; CHECK-NEXT: fcvt s7, h3
-; CHECK-NEXT: fcvt s6, h2
-; CHECK-NEXT: mov h16, v3.h[2]
-; CHECK-NEXT: mov h2, v2.h[3]
-; CHECK-NEXT: fcvt s17, h17
-; CHECK-NEXT: fcvt s19, h19
-; CHECK-NEXT: frintx s18, s18
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvt s16, h16
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: frintx s17, s17
-; CHECK-NEXT: frintx s19, s19
-; CHECK-NEXT: fcvtzs x13, s18
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: fcvtzs x9, s7
-; CHECK-NEXT: mov h7, v1.h[2]
-; CHECK-NEXT: fcvtzs x8, s6
-; CHECK-NEXT: mov h6, v1.h[1]
-; CHECK-NEXT: frintx s16, s16
-; CHECK-NEXT: fcvtzs x14, s17
-; CHECK-NEXT: fcvtzs x15, s19
-; CHECK-NEXT: fcvtzs x10, s4
-; CHECK-NEXT: mov h4, v3.h[1]
-; CHECK-NEXT: fcvtzs x11, s5
-; CHECK-NEXT: mov h5, v1.h[3]
-; CHECK-NEXT: mov h3, v3.h[3]
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s7, h7
-; CHECK-NEXT: fcvt s6, h6
-; CHECK-NEXT: fcvtzs x12, s16
-; CHECK-NEXT: frintx s16, s2
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: frintx s17, s6
-; CHECK-NEXT: fmov d6, x9
-; CHECK-NEXT: mov v2.d[1], x10
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s18, s3
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x9, s7
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: fcvtzs x11, s0
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzs x12, s16
-; CHECK-NEXT: fcvtzs x16, s17
-; CHECK-NEXT: fcvtzs x17, s4
-; CHECK-NEXT: fmov d0, x13
-; CHECK-NEXT: fmov d1, x15
-; CHECK-NEXT: fcvtzs x18, s18
-; CHECK-NEXT: fcvtzs x0, s5
-; CHECK-NEXT: fmov d4, x8
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: mov v0.d[1], x14
-; CHECK-NEXT: mov v1.d[1], x11
-; CHECK-NEXT: mov v3.d[1], x12
-; CHECK-NEXT: mov v4.d[1], x16
-; CHECK-NEXT: mov v6.d[1], x17
-; CHECK-NEXT: mov v7.d[1], x18
-; CHECK-NEXT: mov v5.d[1], x0
+; CHECK-NEXT: uunpklo z2.s, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z3.s, z1.h
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z4.d, z2.s
+; CHECK-NEXT: uunpkhi z2.d, z2.s
+; CHECK-NEXT: uunpklo z5.d, z0.s
+; CHECK-NEXT: uunpkhi z6.d, z0.s
+; CHECK-NEXT: uunpklo z7.d, z3.s
+; CHECK-NEXT: uunpkhi z24.d, z3.s
+; CHECK-NEXT: uunpklo z25.d, z1.s
+; CHECK-NEXT: uunpkhi z26.d, z1.s
+; CHECK-NEXT: movprfx z0, z4
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h
+; CHECK-NEXT: movprfx z1, z2
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h
+; CHECK-NEXT: movprfx z2, z5
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.h
+; CHECK-NEXT: movprfx z3, z6
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h
+; CHECK-NEXT: movprfx z4, z7
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h
+; CHECK-NEXT: movprfx z5, z24
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z24.h
+; CHECK-NEXT: movprfx z6, z25
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z25.h
+; CHECK-NEXT: movprfx z7, z26
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z26.h
; CHECK-NEXT: ret
- %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
- ret <16 x i64> %a
+ %a = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f16(<vscale x 16 x half> %x)
+ ret <vscale x 16 x i64> %a
}
-declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
+declare <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f16(<vscale x 16 x half>)
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <vscale x 32 x i64> @llrint_v32i64_v32f16(<vscale x 32 x half> %x) {
; CHECK-LABEL: llrint_v32i64_v32f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov h19, v0.h[1]
-; CHECK-NEXT: fcvt s21, h0
-; CHECK-NEXT: mov h23, v1.h[2]
-; CHECK-NEXT: fcvt s22, h1
-; CHECK-NEXT: fcvt s26, h2
-; CHECK-NEXT: mov h27, v2.h[1]
-; CHECK-NEXT: mov h28, v2.h[2]
-; CHECK-NEXT: mov h16, v4.h[2]
-; CHECK-NEXT: fcvt s17, h5
-; CHECK-NEXT: mov h18, v5.h[2]
-; CHECK-NEXT: mov h20, v6.h[2]
-; CHECK-NEXT: fcvt s24, h7
-; CHECK-NEXT: fcvt s25, h6
-; CHECK-NEXT: fcvt s19, h19
-; CHECK-NEXT: frintx s22, s22
-; CHECK-NEXT: fcvt s16, h16
-; CHECK-NEXT: frintx s17, s17
-; CHECK-NEXT: fcvt s18, h18
-; CHECK-NEXT: fcvt s20, h20
-; CHECK-NEXT: frintx s16, s16
-; CHECK-NEXT: fcvtzs x12, s17
-; CHECK-NEXT: frintx s17, s18
-; CHECK-NEXT: frintx s18, s21
-; CHECK-NEXT: fcvt s21, h23
-; CHECK-NEXT: frintx s23, s24
-; CHECK-NEXT: frintx s24, s25
-; CHECK-NEXT: frintx s25, s19
-; CHECK-NEXT: mov h19, v7.h[1]
-; CHECK-NEXT: fcvtzs x13, s16
-; CHECK-NEXT: frintx s16, s20
-; CHECK-NEXT: frintx s20, s26
-; CHECK-NEXT: fcvtzs x9, s23
-; CHECK-NEXT: mov h23, v3.h[2]
-; CHECK-NEXT: fcvt s26, h27
-; CHECK-NEXT: fcvtzs x15, s24
-; CHECK-NEXT: fcvtzs x10, s25
-; CHECK-NEXT: fcvt s24, h28
-; CHECK-NEXT: mov h25, v3.h[3]
-; CHECK-NEXT: fcvtzs x14, s17
-; CHECK-NEXT: frintx s21, s21
-; CHECK-NEXT: fmov d17, x12
-; CHECK-NEXT: fcvtzs x12, s16
-; CHECK-NEXT: fmov d16, x13
-; CHECK-NEXT: fcvtzs x13, s22
-; CHECK-NEXT: fcvt s22, h3
-; CHECK-NEXT: mov h3, v3.h[1]
-; CHECK-NEXT: mov h27, v0.h[2]
-; CHECK-NEXT: mov h28, v2.h[3]
-; CHECK-NEXT: fcvt s23, h23
-; CHECK-NEXT: frintx s26, s26
-; CHECK-NEXT: fcvtzs x16, s20
-; CHECK-NEXT: frintx s20, s24
-; CHECK-NEXT: fcvt s24, h25
-; CHECK-NEXT: fcvtzs x11, s18
-; CHECK-NEXT: fmov d18, x14
-; CHECK-NEXT: fcvtzs x14, s21
-; CHECK-NEXT: frintx s22, s22
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvt s25, h27
-; CHECK-NEXT: fcvt s27, h28
-; CHECK-NEXT: frintx s23, s23
-; CHECK-NEXT: mov h21, v1.h[3]
-; CHECK-NEXT: fmov d2, x15
-; CHECK-NEXT: fcvtzs x15, s26
-; CHECK-NEXT: fmov d26, x13
-; CHECK-NEXT: mov h1, v1.h[1]
-; CHECK-NEXT: fcvtzs x13, s20
-; CHECK-NEXT: frintx s20, s24
-; CHECK-NEXT: fmov d24, x14
-; CHECK-NEXT: fcvtzs x14, s22
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: fmov d22, x16
-; CHECK-NEXT: frintx s27, s27
-; CHECK-NEXT: fcvtzs x16, s23
-; CHECK-NEXT: fcvt s21, h21
-; CHECK-NEXT: frintx s25, s25
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: mov h23, v7.h[2]
-; CHECK-NEXT: mov v22.d[1], x15
-; CHECK-NEXT: fcvtzs x15, s20
-; CHECK-NEXT: fmov d20, x13
-; CHECK-NEXT: fcvtzs x13, s3
-; CHECK-NEXT: fmov d3, x14
-; CHECK-NEXT: fcvtzs x14, s27
-; CHECK-NEXT: fmov d27, x16
-; CHECK-NEXT: frintx s21, s21
-; CHECK-NEXT: mov h7, v7.h[3]
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s23, h23
-; CHECK-NEXT: fcvt s19, h19
-; CHECK-NEXT: mov v27.d[1], x15
-; CHECK-NEXT: fcvtzs x15, s25
-; CHECK-NEXT: mov h25, v6.h[3]
-; CHECK-NEXT: mov h6, v6.h[1]
-; CHECK-NEXT: mov v3.d[1], x13
-; CHECK-NEXT: fcvtzs x13, s21
-; CHECK-NEXT: mov h21, v5.h[1]
-; CHECK-NEXT: mov h5, v5.h[3]
-; CHECK-NEXT: mov v20.d[1], x14
-; CHECK-NEXT: fcvtzs x14, s1
-; CHECK-NEXT: mov h1, v4.h[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: fcvt s25, h25
-; CHECK-NEXT: fcvt s7, h7
-; CHECK-NEXT: stp q3, q27, [x8, #192]
-; CHECK-NEXT: fcvt s6, h6
-; CHECK-NEXT: mov h3, v4.h[3]
-; CHECK-NEXT: stp q22, q20, [x8, #128]
-; CHECK-NEXT: fcvt s21, h21
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: mov v24.d[1], x13
-; CHECK-NEXT: mov v26.d[1], x14
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: frintx s22, s25
-; CHECK-NEXT: fmov d20, x12
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvtzs x12, s0
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s21, s21
-; CHECK-NEXT: fmov d0, x11
-; CHECK-NEXT: stp q26, q24, [x8, #64]
-; CHECK-NEXT: fmov d24, x15
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: fcvtzs x11, s22
-; CHECK-NEXT: frintx s22, s23
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvtzs x13, s6
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: frintx s6, s7
-; CHECK-NEXT: fcvtzs x14, s5
-; CHECK-NEXT: mov v24.d[1], x12
-; CHECK-NEXT: frintx s5, s19
-; CHECK-NEXT: fcvtzs x12, s21
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: fcvtzs x10, s4
-; CHECK-NEXT: mov v20.d[1], x11
-; CHECK-NEXT: fcvtzs x11, s22
-; CHECK-NEXT: mov v2.d[1], x13
-; CHECK-NEXT: fcvtzs x15, s3
-; CHECK-NEXT: fcvtzs x13, s1
-; CHECK-NEXT: mov v18.d[1], x14
-; CHECK-NEXT: fcvtzs x14, s6
-; CHECK-NEXT: stp q0, q24, [x8]
-; CHECK-NEXT: mov v17.d[1], x12
-; CHECK-NEXT: fcvtzs x12, s5
-; CHECK-NEXT: fmov d0, x10
-; CHECK-NEXT: fmov d1, x11
-; CHECK-NEXT: stp q2, q20, [x8, #224]
-; CHECK-NEXT: fmov d2, x9
-; CHECK-NEXT: mov v16.d[1], x15
-; CHECK-NEXT: stp q17, q18, [x8, #160]
-; CHECK-NEXT: mov v0.d[1], x13
-; CHECK-NEXT: mov v1.d[1], x14
-; CHECK-NEXT: mov v2.d[1], x12
-; CHECK-NEXT: stp q0, q16, [x8, #96]
-; CHECK-NEXT: stp q2, q1, [x8, #32]
+; CHECK-NEXT: uunpkhi z4.s, z3.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: rdvl x9, #15
+; CHECK-NEXT: uunpklo z3.s, z3.h
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: uunpkhi z7.s, z2.h
+; CHECK-NEXT: uunpklo z2.s, z2.h
+; CHECK-NEXT: uunpklo z24.s, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpkhi z5.d, z4.s
+; CHECK-NEXT: uunpklo z4.d, z4.s
+; CHECK-NEXT: uunpkhi z6.d, z3.s
+; CHECK-NEXT: uunpklo z3.d, z3.s
+; CHECK-NEXT: uunpkhi z25.d, z2.s
+; CHECK-NEXT: uunpklo z2.d, z2.s
+; CHECK-NEXT: uunpklo z26.d, z0.s
+; CHECK-NEXT: uunpkhi z0.d, z0.s
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #14
+; CHECK-NEXT: movprfx z5, z6
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h
+; CHECK-NEXT: uunpkhi z6.d, z7.s
+; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9]
+; CHECK-NEXT: uunpkhi z4.s, z1.h
+; CHECK-NEXT: uunpklo z7.d, z7.s
+; CHECK-NEXT: rdvl x9, #13
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #12
+; CHECK-NEXT: movprfx z5, z6
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h
+; CHECK-NEXT: st1b { z3.b }, p1, [x8, x9]
+; CHECK-NEXT: uunpkhi z3.d, z4.s
+; CHECK-NEXT: uunpklo z4.d, z4.s
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h
+; CHECK-NEXT: rdvl x9, #11
+; CHECK-NEXT: uunpkhi z6.d, z24.s
+; CHECK-NEXT: uunpkhi z27.d, z1.s
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #10
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9]
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h
+; CHECK-NEXT: uunpklo z7.d, z24.s
+; CHECK-NEXT: rdvl x9, #9
+; CHECK-NEXT: movprfx z5, z27
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z27.h
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #8
+; CHECK-NEXT: st1b { z2.b }, p1, [x8, x9]
+; CHECK-NEXT: movprfx z2, z26
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z26.h
+; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl]
+; CHECK-NEXT: movprfx z3, z6
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h
+; CHECK-NEXT: st1d { z4.d }, p0, [x8, #6, mul vl]
+; CHECK-NEXT: movprfx z4, z7
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h
+; CHECK-NEXT: st1d { z5.d }, p0, [x8, #5, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [x8, #4, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [x8, #3, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl]
+; CHECK-NEXT: st1d { z3.d }, p0, [x8, #1, mul vl]
+; CHECK-NEXT: st1d { z4.d }, p0, [x8]
; CHECK-NEXT: ret
- %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x)
- ret <32 x i64> %a
+ %a = call <vscale x 32 x i64> @llvm.llrint.nxv32i64.nxv32f16(<vscale x 32 x half> %x)
+ ret <vscale x 32 x i64> %a
}
-declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
+declare <vscale x 32 x i64> @llvm.llrint.nxv32i64.nxv32f16(<vscale x 32 x half>)
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <vscale x 1 x i64> @llrint_v1i64_v1f32(<vscale x 1 x float> %x) {
; CHECK-LABEL: llrint_v1i64_v1f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: ret
- %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
- ret <1 x i64> %a
+ %a = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f32(<vscale x 1 x float> %x)
+ ret <vscale x 1 x i64> %a
}
-declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
+declare <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f32(<vscale x 1 x float>)
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <vscale x 2 x i64> @llrint_v2i64_v2f32(<vscale x 2 x float> %x) {
; CHECK-LABEL: llrint_v2i64_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: ret
- %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
- ret <2 x i64> %a
+ %a = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> %x)
+ ret <vscale x 2 x i64> %a
}
-declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
+declare <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f32(<vscale x 2 x float>)
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <vscale x 4 x i64> @llrint_v4i64_v4f32(<vscale x 4 x float> %x) {
; CHECK-LABEL: llrint_v4i64_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov s3, v0.s[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: mov s2, v1.s[1]
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v1.d[1], x10
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z1.d, z0.s
+; CHECK-NEXT: uunpkhi z2.d, z0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
+; CHECK-NEXT: movprfx z1, z2
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s
; CHECK-NEXT: ret
- %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
- ret <4 x i64> %a
+ %a = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> %x)
+ ret <vscale x 4 x i64> %a
}
-declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
+declare <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f32(<vscale x 4 x float>)
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <vscale x 8 x i64> @llrint_v8i64_v8f32(<vscale x 8 x float> %x) {
; CHECK-LABEL: llrint_v8i64_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: mov s4, v0.s[1]
-; CHECK-NEXT: mov s7, v1.s[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: mov s5, v2.s[1]
-; CHECK-NEXT: mov s6, v3.s[1]
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: fcvtzs x12, s1
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvtzs x8, s2
-; CHECK-NEXT: fcvtzs x10, s3
-; CHECK-NEXT: fcvtzs x11, s4
-; CHECK-NEXT: fcvtzs x15, s7
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fmov d2, x12
-; CHECK-NEXT: fcvtzs x13, s5
-; CHECK-NEXT: fcvtzs x14, s6
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v2.d[1], x15
-; CHECK-NEXT: mov v1.d[1], x13
-; CHECK-NEXT: mov v3.d[1], x14
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: uunpkhi z3.d, z0.s
+; CHECK-NEXT: uunpklo z4.d, z1.s
+; CHECK-NEXT: uunpkhi z5.d, z1.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s
+; CHECK-NEXT: movprfx z1, z3
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z3.s
+; CHECK-NEXT: movprfx z2, z4
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z4.s
+; CHECK-NEXT: movprfx z3, z5
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s
; CHECK-NEXT: ret
- %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
- ret <8 x i64> %a
+ %a = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> %x)
+ ret <vscale x 8 x i64> %a
}
-declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
+declare <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f32(<vscale x 8 x float>)
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <vscale x 16 x i64> @llrint_v16i64_v16f32(<vscale x 16 x float> %x) {
; CHECK-LABEL: llrint_v16i64_v16f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: frintx s7, s0
-; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: mov s0, v0.s[1]
-; CHECK-NEXT: frintx s17, s4
-; CHECK-NEXT: mov s4, v4.s[1]
-; CHECK-NEXT: mov s18, v5.s[1]
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s19, s6
-; CHECK-NEXT: fcvtzs x8, s7
-; CHECK-NEXT: frintx s7, s16
-; CHECK-NEXT: mov s6, v6.s[1]
-; CHECK-NEXT: mov s16, v16.s[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: fcvtzs x9, s17
-; CHECK-NEXT: frintx s17, s1
-; CHECK-NEXT: mov s1, v1.s[1]
-; CHECK-NEXT: frintx s18, s18
-; CHECK-NEXT: fcvtzs x10, s5
-; CHECK-NEXT: mov s5, v2.s[1]
-; CHECK-NEXT: fcvtzs x11, s19
-; CHECK-NEXT: mov s19, v3.s[1]
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: fcvtzs x12, s7
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvtzs x13, s4
-; CHECK-NEXT: frintx s4, s3
-; CHECK-NEXT: frintx s16, s16
-; CHECK-NEXT: fcvtzs x14, s18
-; CHECK-NEXT: frintx s18, s1
-; CHECK-NEXT: fcvtzs x15, s17
-; CHECK-NEXT: frintx s20, s5
-; CHECK-NEXT: frintx s17, s19
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fcvtzs x9, s2
-; CHECK-NEXT: fmov d5, x11
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: fcvtzs x11, s4
-; CHECK-NEXT: fcvtzs x10, s0
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzs x12, s18
-; CHECK-NEXT: fcvtzs x17, s6
-; CHECK-NEXT: fcvtzs x18, s16
-; CHECK-NEXT: fcvtzs x16, s20
-; CHECK-NEXT: fcvtzs x0, s17
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d2, x15
-; CHECK-NEXT: fmov d4, x9
-; CHECK-NEXT: mov v1.d[1], x13
-; CHECK-NEXT: fmov d6, x11
-; CHECK-NEXT: mov v3.d[1], x14
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v5.d[1], x17
-; CHECK-NEXT: mov v7.d[1], x18
-; CHECK-NEXT: mov v2.d[1], x12
-; CHECK-NEXT: mov v4.d[1], x16
-; CHECK-NEXT: mov v6.d[1], x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z4.d, z0.s
+; CHECK-NEXT: uunpkhi z5.d, z0.s
+; CHECK-NEXT: uunpklo z6.d, z1.s
+; CHECK-NEXT: uunpkhi z7.d, z1.s
+; CHECK-NEXT: uunpklo z24.d, z2.s
+; CHECK-NEXT: uunpkhi z25.d, z2.s
+; CHECK-NEXT: uunpklo z26.d, z3.s
+; CHECK-NEXT: uunpkhi z27.d, z3.s
+; CHECK-NEXT: movprfx z0, z4
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.s
+; CHECK-NEXT: movprfx z1, z5
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s
+; CHECK-NEXT: movprfx z2, z6
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s
+; CHECK-NEXT: movprfx z3, z7
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s
+; CHECK-NEXT: movprfx z4, z24
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.s
+; CHECK-NEXT: movprfx z5, z25
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.s
+; CHECK-NEXT: movprfx z6, z26
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z26.s
+; CHECK-NEXT: movprfx z7, z27
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z27.s
; CHECK-NEXT: ret
- %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
- ret <16 x i64> %a
+ %a = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f32(<vscale x 16 x float> %x)
+ ret <vscale x 16 x i64> %a
}
-declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
+declare <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f32(<vscale x 16 x float>)
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <vscale x 32 x i64> @llrint_v32i64_v32f32(<vscale x 32 x float> %x) {
+; CHECK-LABEL: llrint_v32i64_v32f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpkhi z24.d, z7.s
+; CHECK-NEXT: uunpklo z7.d, z7.s
+; CHECK-NEXT: uunpkhi z27.d, z6.s
+; CHECK-NEXT: rdvl x9, #15
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: uunpklo z6.d, z6.s
+; CHECK-NEXT: uunpkhi z29.d, z5.s
+; CHECK-NEXT: uunpklo z5.d, z5.s
+; CHECK-NEXT: uunpkhi z31.d, z4.s
+; CHECK-NEXT: uunpklo z30.d, z3.s
+; CHECK-NEXT: uunpkhi z3.d, z3.s
+; CHECK-NEXT: uunpklo z4.d, z4.s
+; CHECK-NEXT: uunpkhi z25.d, z0.s
+; CHECK-NEXT: uunpklo z26.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: uunpklo z28.d, z2.s
+; CHECK-NEXT: uunpkhi z2.d, z2.s
+; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s
+; CHECK-NEXT: fcvtzs z27.d, p0/m, z27.s
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s
+; CHECK-NEXT: fcvtzs z29.d, p0/m, z29.s
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s
+; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #14
+; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #13
+; CHECK-NEXT: movprfx z7, z31
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z31.s
+; CHECK-NEXT: st1b { z27.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #12
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #11
+; CHECK-NEXT: movprfx z6, z30
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z30.s
+; CHECK-NEXT: st1b { z29.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #10
+; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #9
+; CHECK-NEXT: movprfx z5, z28
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s
+; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #8
+; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9]
+; CHECK-NEXT: movprfx z4, z25
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z25.s
+; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl]
+; CHECK-NEXT: movprfx z3, z26
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z26.s
+; CHECK-NEXT: st1d { z6.d }, p0, [x8, #6, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl]
+; CHECK-NEXT: st1d { z5.d }, p0, [x8, #4, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl]
+; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl]
+; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [x8]
+; CHECK-NEXT: ret
+ %a = call <vscale x 32 x i64> @llvm.llrint.nxv32i64.nxv32f32(<vscale x 32 x float> %x)
+ ret <vscale x 32 x i64> %a
+}
+declare <vscale x 32 x i64> @llvm.llrint.nxv32i64.nxv32f32(<vscale x 32 x float>)
+
+define <vscale x 1 x i64> @llrint_v1i64_v1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: llrint_v1i64_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: ret
- %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
- ret <1 x i64> %a
+ %a = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f64(<vscale x 1 x double> %x)
+ ret <vscale x 1 x i64> %a
}
-declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
+declare <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f64(<vscale x 1 x double>)
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <vscale x 2 x i64> @llrint_v2i64_v2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: llrint_v2i64_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov d1, v0.d[1]
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: frintx d1, d1
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fcvtzs x9, d1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: ret
- %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
- ret <2 x i64> %a
+ %a = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f64(<vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %a
}
-declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
+declare <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f64(<vscale x 2 x double>)
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <vscale x 4 x i64> @llrint_v4i64_v4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: llrint_v4i64_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov d2, v0.d[1]
-; CHECK-NEXT: mov d3, v1.d[1]
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: frintx d1, d1
-; CHECK-NEXT: frintx d2, d2
-; CHECK-NEXT: frintx d3, d3
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fcvtzs x9, d1
-; CHECK-NEXT: fcvtzs x10, d2
-; CHECK-NEXT: fcvtzs x11, d3
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
; CHECK-NEXT: ret
- %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
- ret <4 x i64> %a
+ %a = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> %x)
+ ret <vscale x 4 x i64> %a
}
-declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
+declare <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f64(<vscale x 4 x double>)
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <vscale x 8 x i64> @llrint_v8i64_v8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: llrint_v8i64_v8f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov d4, v0.d[1]
-; CHECK-NEXT: mov d5, v1.d[1]
-; CHECK-NEXT: mov d6, v2.d[1]
-; CHECK-NEXT: mov d7, v3.d[1]
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: frintx d1, d1
-; CHECK-NEXT: frintx d2, d2
-; CHECK-NEXT: frintx d3, d3
-; CHECK-NEXT: frintx d4, d4
-; CHECK-NEXT: frintx d5, d5
-; CHECK-NEXT: frintx d6, d6
-; CHECK-NEXT: frintx d7, d7
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fcvtzs x9, d1
-; CHECK-NEXT: fcvtzs x10, d2
-; CHECK-NEXT: fcvtzs x11, d3
-; CHECK-NEXT: fcvtzs x12, d4
-; CHECK-NEXT: fcvtzs x13, d5
-; CHECK-NEXT: fcvtzs x14, d6
-; CHECK-NEXT: fcvtzs x15, d7
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: mov v0.d[1], x12
-; CHECK-NEXT: mov v1.d[1], x13
-; CHECK-NEXT: mov v2.d[1], x14
-; CHECK-NEXT: mov v3.d[1], x15
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: ret
+ %a = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f64(<vscale x 8 x double> %x)
+ ret <vscale x 8 x i64> %a
+}
+declare <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f64(<vscale x 8 x double>)
+
+define <vscale x 16 x i64> @llrint_v16f64(<vscale x 16 x double> %x) {
+; CHECK-LABEL: llrint_v16f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: ret
+ %a = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f64(<vscale x 16 x double> %x)
+ ret <vscale x 16 x i64> %a
+}
+declare <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f64(<vscale x 16 x double>)
+
+define <vscale x 32 x i64> @llrint_v32f64(<vscale x 32 x double> %x) {
+; CHECK-LABEL: llrint_v32f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: rdvl x9, #15
+; CHECK-NEXT: rdvl x10, #14
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: rdvl x11, #13
+; CHECK-NEXT: rdvl x12, #12
+; CHECK-NEXT: rdvl x13, #11
+; CHECK-NEXT: rdvl x14, #10
+; CHECK-NEXT: rdvl x15, #9
+; CHECK-NEXT: rdvl x16, #8
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x9]
+; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x10]
+; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x11]
+; CHECK-NEXT: ld1b { z3.b }, p0/z, [x0, x12]
+; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x13]
+; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x14]
+; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x15]
+; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16]
+; CHECK-NEXT: ld1d { z24.d }, p1/z, [x0, #7, mul vl]
+; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
+; CHECK-NEXT: ld1d { z25.d }, p1/z, [x0, #6, mul vl]
+; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl]
+; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d
+; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0, #4, mul vl]
+; CHECK-NEXT: ld1d { z28.d }, p1/z, [x0, #3, mul vl]
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d
+; CHECK-NEXT: ld1d { z29.d }, p1/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1d { z30.d }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d
+; CHECK-NEXT: ld1d { z31.d }, p1/z, [x0]
+; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d
+; CHECK-NEXT: st1b { z0.b }, p0, [x8, x9]
+; CHECK-NEXT: movprfx z0, z5
+; CHECK-NEXT: fcvtzs z0.d, p1/m, z5.d
+; CHECK-NEXT: st1b { z1.b }, p0, [x8, x10]
+; CHECK-NEXT: movprfx z1, z6
+; CHECK-NEXT: fcvtzs z1.d, p1/m, z6.d
+; CHECK-NEXT: st1b { z2.b }, p0, [x8, x11]
+; CHECK-NEXT: movprfx z2, z7
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z7.d
+; CHECK-NEXT: st1b { z3.b }, p0, [x8, x12]
+; CHECK-NEXT: movprfx z3, z24
+; CHECK-NEXT: fcvtzs z3.d, p1/m, z24.d
+; CHECK-NEXT: st1b { z4.b }, p0, [x8, x13]
+; CHECK-NEXT: movprfx z4, z25
+; CHECK-NEXT: fcvtzs z4.d, p1/m, z25.d
+; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14]
+; CHECK-NEXT: movprfx z0, z26
+; CHECK-NEXT: fcvtzs z0.d, p1/m, z26.d
+; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15]
+; CHECK-NEXT: movprfx z1, z27
+; CHECK-NEXT: fcvtzs z1.d, p1/m, z27.d
+; CHECK-NEXT: st1b { z2.b }, p0, [x8, x16]
+; CHECK-NEXT: movprfx z2, z28
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z28.d
+; CHECK-NEXT: st1d { z3.d }, p1, [x8, #7, mul vl]
+; CHECK-NEXT: movprfx z3, z29
+; CHECK-NEXT: fcvtzs z3.d, p1/m, z29.d
+; CHECK-NEXT: st1d { z4.d }, p1, [x8, #6, mul vl]
+; CHECK-NEXT: movprfx z4, z30
+; CHECK-NEXT: fcvtzs z4.d, p1/m, z30.d
+; CHECK-NEXT: st1d { z0.d }, p1, [x8, #5, mul vl]
+; CHECK-NEXT: movprfx z0, z31
+; CHECK-NEXT: fcvtzs z0.d, p1/m, z31.d
+; CHECK-NEXT: st1d { z1.d }, p1, [x8, #4, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p1, [x8, #3, mul vl]
+; CHECK-NEXT: st1d { z3.d }, p1, [x8, #2, mul vl]
+; CHECK-NEXT: st1d { z4.d }, p1, [x8, #1, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p1, [x8]
; CHECK-NEXT: ret
- %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
- ret <8 x i64> %a
+ %a = call <vscale x 32 x i64> @llvm.llrint.nxv32i64.nxv16f64(<vscale x 32 x double> %x)
+ ret <vscale x 32 x i64> %a
}
-declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+declare <vscale x 32 x i64> @llvm.llrint.nxv32i64.nxv32f64(<vscale x 32 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index db85b23428216a..d95493688ff8b4 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -1,390 +1,187 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
-; CHECK-GI: warning: Instruction selection used fallback path for lrint_v2f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64
-
-define <1 x i64> @lrint_v1f16(<1 x half> %x) {
+define <vscale x 1 x i64> @lrint_v1f16(<vscale x 1 x half> %x) {
; CHECK-LABEL: lrint_v1f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
; CHECK-NEXT: ret
- %a = call <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half> %x)
- ret <1 x i64> %a
+ %a = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f16(<vscale x 1 x half> %x)
+ ret <vscale x 1 x i64> %a
}
-declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>)
+declare <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f16(<vscale x 1 x half>)
-define <2 x i64> @lrint_v2f16(<2 x half> %x) {
+define <vscale x 2 x i64> @lrint_v2f16(<vscale x 2 x half> %x) {
; CHECK-LABEL: lrint_v2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
; CHECK-NEXT: ret
- %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x)
- ret <2 x i64> %a
+ %a = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f16(<vscale x 2 x half> %x)
+ ret <vscale x 2 x i64> %a
}
-declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>)
+declare <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f16(<vscale x 2 x half>)
-define <4 x i64> @lrint_v4f16(<4 x half> %x) {
+define <vscale x 4 x i64> @lrint_v4f16(<vscale x 4 x half> %x) {
; CHECK-LABEL: lrint_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[2]
-; CHECK-NEXT: mov h2, v0.h[1]
-; CHECK-NEXT: mov h3, v0.h[3]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z1.d, z0.s
+; CHECK-NEXT: uunpkhi z2.d, z0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h
+; CHECK-NEXT: movprfx z1, z2
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h
; CHECK-NEXT: ret
- %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x)
- ret <4 x i64> %a
+ %a = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f16(<vscale x 4 x half> %x)
+ ret <vscale x 4 x i64> %a
}
-declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>)
+declare <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f16(<vscale x 4 x half>)
-define <8 x i64> @lrint_v8f16(<8 x half> %x) {
+define <vscale x 8 x i64> @lrint_v8f16(<vscale x 8 x half> %x) {
; CHECK-LABEL: lrint_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov h4, v0.h[2]
-; CHECK-NEXT: mov h3, v0.h[1]
-; CHECK-NEXT: mov h7, v0.h[3]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: mov h2, v1.h[2]
-; CHECK-NEXT: mov h5, v1.h[1]
-; CHECK-NEXT: mov h6, v1.h[3]
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvt s7, h7
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: fcvt s6, h6
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x12, s4
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fcvtzs x15, s7
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fcvtzs x13, s5
-; CHECK-NEXT: fcvtzs x14, s6
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: mov v2.d[1], x13
-; CHECK-NEXT: mov v1.d[1], x15
-; CHECK-NEXT: mov v3.d[1], x14
+; CHECK-NEXT: uunpklo z1.s, z0.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z2.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: uunpklo z3.d, z0.s
+; CHECK-NEXT: uunpkhi z4.d, z0.s
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h
+; CHECK-NEXT: movprfx z2, z3
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.h
+; CHECK-NEXT: movprfx z3, z4
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h
; CHECK-NEXT: ret
- %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x)
- ret <8 x i64> %a
+ %a = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f16(<vscale x 8 x half> %x)
+ ret <vscale x 8 x i64> %a
}
-declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>)
+declare <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f16(<vscale x 8 x half>)
-define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
+define <vscale x 16 x i64> @lrint_v16i64_v16f16(<vscale x 16 x half> %x) {
; CHECK-LABEL: lrint_v16i64_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: mov h17, v0.h[1]
-; CHECK-NEXT: mov h19, v0.h[2]
-; CHECK-NEXT: fcvt s18, h0
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: mov h4, v2.h[1]
-; CHECK-NEXT: mov h5, v2.h[2]
-; CHECK-NEXT: fcvt s7, h3
-; CHECK-NEXT: fcvt s6, h2
-; CHECK-NEXT: mov h16, v3.h[2]
-; CHECK-NEXT: mov h2, v2.h[3]
-; CHECK-NEXT: fcvt s17, h17
-; CHECK-NEXT: fcvt s19, h19
-; CHECK-NEXT: frintx s18, s18
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvt s16, h16
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: frintx s17, s17
-; CHECK-NEXT: frintx s19, s19
-; CHECK-NEXT: fcvtzs x13, s18
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: fcvtzs x9, s7
-; CHECK-NEXT: mov h7, v1.h[2]
-; CHECK-NEXT: fcvtzs x8, s6
-; CHECK-NEXT: mov h6, v1.h[1]
-; CHECK-NEXT: frintx s16, s16
-; CHECK-NEXT: fcvtzs x14, s17
-; CHECK-NEXT: fcvtzs x15, s19
-; CHECK-NEXT: fcvtzs x10, s4
-; CHECK-NEXT: mov h4, v3.h[1]
-; CHECK-NEXT: fcvtzs x11, s5
-; CHECK-NEXT: mov h5, v1.h[3]
-; CHECK-NEXT: mov h3, v3.h[3]
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s7, h7
-; CHECK-NEXT: fcvt s6, h6
-; CHECK-NEXT: fcvtzs x12, s16
-; CHECK-NEXT: frintx s16, s2
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: frintx s17, s6
-; CHECK-NEXT: fmov d6, x9
-; CHECK-NEXT: mov v2.d[1], x10
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s18, s3
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x9, s7
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: fcvtzs x11, s0
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzs x12, s16
-; CHECK-NEXT: fcvtzs x16, s17
-; CHECK-NEXT: fcvtzs x17, s4
-; CHECK-NEXT: fmov d0, x13
-; CHECK-NEXT: fmov d1, x15
-; CHECK-NEXT: fcvtzs x18, s18
-; CHECK-NEXT: fcvtzs x0, s5
-; CHECK-NEXT: fmov d4, x8
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: mov v0.d[1], x14
-; CHECK-NEXT: mov v1.d[1], x11
-; CHECK-NEXT: mov v3.d[1], x12
-; CHECK-NEXT: mov v4.d[1], x16
-; CHECK-NEXT: mov v6.d[1], x17
-; CHECK-NEXT: mov v7.d[1], x18
-; CHECK-NEXT: mov v5.d[1], x0
+; CHECK-NEXT: uunpklo z2.s, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpklo z3.s, z1.h
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z4.d, z2.s
+; CHECK-NEXT: uunpkhi z2.d, z2.s
+; CHECK-NEXT: uunpklo z5.d, z0.s
+; CHECK-NEXT: uunpkhi z6.d, z0.s
+; CHECK-NEXT: uunpklo z7.d, z3.s
+; CHECK-NEXT: uunpkhi z24.d, z3.s
+; CHECK-NEXT: uunpklo z25.d, z1.s
+; CHECK-NEXT: uunpkhi z26.d, z1.s
+; CHECK-NEXT: movprfx z0, z4
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h
+; CHECK-NEXT: movprfx z1, z2
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h
+; CHECK-NEXT: movprfx z2, z5
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.h
+; CHECK-NEXT: movprfx z3, z6
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h
+; CHECK-NEXT: movprfx z4, z7
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h
+; CHECK-NEXT: movprfx z5, z24
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z24.h
+; CHECK-NEXT: movprfx z6, z25
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z25.h
+; CHECK-NEXT: movprfx z7, z26
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z26.h
; CHECK-NEXT: ret
- %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x)
- ret <16 x i64> %a
+ %a = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f16(<vscale x 16 x half> %x)
+ ret <vscale x 16 x i64> %a
}
-declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>)
+declare <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f16(<vscale x 16 x half>)
-define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
+define <vscale x 32 x i64> @lrint_v32i64_v32f16(<vscale x 32 x half> %x) {
; CHECK-LABEL: lrint_v32i64_v32f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov h19, v0.h[1]
-; CHECK-NEXT: fcvt s21, h0
-; CHECK-NEXT: mov h23, v1.h[2]
-; CHECK-NEXT: fcvt s22, h1
-; CHECK-NEXT: fcvt s26, h2
-; CHECK-NEXT: mov h27, v2.h[1]
-; CHECK-NEXT: mov h28, v2.h[2]
-; CHECK-NEXT: mov h16, v4.h[2]
-; CHECK-NEXT: fcvt s17, h5
-; CHECK-NEXT: mov h18, v5.h[2]
-; CHECK-NEXT: mov h20, v6.h[2]
-; CHECK-NEXT: fcvt s24, h7
-; CHECK-NEXT: fcvt s25, h6
-; CHECK-NEXT: fcvt s19, h19
-; CHECK-NEXT: frintx s22, s22
-; CHECK-NEXT: fcvt s16, h16
-; CHECK-NEXT: frintx s17, s17
-; CHECK-NEXT: fcvt s18, h18
-; CHECK-NEXT: fcvt s20, h20
-; CHECK-NEXT: frintx s16, s16
-; CHECK-NEXT: fcvtzs x12, s17
-; CHECK-NEXT: frintx s17, s18
-; CHECK-NEXT: frintx s18, s21
-; CHECK-NEXT: fcvt s21, h23
-; CHECK-NEXT: frintx s23, s24
-; CHECK-NEXT: frintx s24, s25
-; CHECK-NEXT: frintx s25, s19
-; CHECK-NEXT: mov h19, v7.h[1]
-; CHECK-NEXT: fcvtzs x13, s16
-; CHECK-NEXT: frintx s16, s20
-; CHECK-NEXT: frintx s20, s26
-; CHECK-NEXT: fcvtzs x9, s23
-; CHECK-NEXT: mov h23, v3.h[2]
-; CHECK-NEXT: fcvt s26, h27
-; CHECK-NEXT: fcvtzs x15, s24
-; CHECK-NEXT: fcvtzs x10, s25
-; CHECK-NEXT: fcvt s24, h28
-; CHECK-NEXT: mov h25, v3.h[3]
-; CHECK-NEXT: fcvtzs x14, s17
-; CHECK-NEXT: frintx s21, s21
-; CHECK-NEXT: fmov d17, x12
-; CHECK-NEXT: fcvtzs x12, s16
-; CHECK-NEXT: fmov d16, x13
-; CHECK-NEXT: fcvtzs x13, s22
-; CHECK-NEXT: fcvt s22, h3
-; CHECK-NEXT: mov h3, v3.h[1]
-; CHECK-NEXT: mov h27, v0.h[2]
-; CHECK-NEXT: mov h28, v2.h[3]
-; CHECK-NEXT: fcvt s23, h23
-; CHECK-NEXT: frintx s26, s26
-; CHECK-NEXT: fcvtzs x16, s20
-; CHECK-NEXT: frintx s20, s24
-; CHECK-NEXT: fcvt s24, h25
-; CHECK-NEXT: fcvtzs x11, s18
-; CHECK-NEXT: fmov d18, x14
-; CHECK-NEXT: fcvtzs x14, s21
-; CHECK-NEXT: frintx s22, s22
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvt s25, h27
-; CHECK-NEXT: fcvt s27, h28
-; CHECK-NEXT: frintx s23, s23
-; CHECK-NEXT: mov h21, v1.h[3]
-; CHECK-NEXT: fmov d2, x15
-; CHECK-NEXT: fcvtzs x15, s26
-; CHECK-NEXT: fmov d26, x13
-; CHECK-NEXT: mov h1, v1.h[1]
-; CHECK-NEXT: fcvtzs x13, s20
-; CHECK-NEXT: frintx s20, s24
-; CHECK-NEXT: fmov d24, x14
-; CHECK-NEXT: fcvtzs x14, s22
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: fmov d22, x16
-; CHECK-NEXT: frintx s27, s27
-; CHECK-NEXT: fcvtzs x16, s23
-; CHECK-NEXT: fcvt s21, h21
-; CHECK-NEXT: frintx s25, s25
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: mov h23, v7.h[2]
-; CHECK-NEXT: mov v22.d[1], x15
-; CHECK-NEXT: fcvtzs x15, s20
-; CHECK-NEXT: fmov d20, x13
-; CHECK-NEXT: fcvtzs x13, s3
-; CHECK-NEXT: fmov d3, x14
-; CHECK-NEXT: fcvtzs x14, s27
-; CHECK-NEXT: fmov d27, x16
-; CHECK-NEXT: frintx s21, s21
-; CHECK-NEXT: mov h7, v7.h[3]
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s23, h23
-; CHECK-NEXT: fcvt s19, h19
-; CHECK-NEXT: mov v27.d[1], x15
-; CHECK-NEXT: fcvtzs x15, s25
-; CHECK-NEXT: mov h25, v6.h[3]
-; CHECK-NEXT: mov h6, v6.h[1]
-; CHECK-NEXT: mov v3.d[1], x13
-; CHECK-NEXT: fcvtzs x13, s21
-; CHECK-NEXT: mov h21, v5.h[1]
-; CHECK-NEXT: mov h5, v5.h[3]
-; CHECK-NEXT: mov v20.d[1], x14
-; CHECK-NEXT: fcvtzs x14, s1
-; CHECK-NEXT: mov h1, v4.h[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: fcvt s25, h25
-; CHECK-NEXT: fcvt s7, h7
-; CHECK-NEXT: stp q3, q27, [x8, #192]
-; CHECK-NEXT: fcvt s6, h6
-; CHECK-NEXT: mov h3, v4.h[3]
-; CHECK-NEXT: stp q22, q20, [x8, #128]
-; CHECK-NEXT: fcvt s21, h21
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: mov v24.d[1], x13
-; CHECK-NEXT: mov v26.d[1], x14
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: frintx s22, s25
-; CHECK-NEXT: fmov d20, x12
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvtzs x12, s0
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s21, s21
-; CHECK-NEXT: fmov d0, x11
-; CHECK-NEXT: stp q26, q24, [x8, #64]
-; CHECK-NEXT: fmov d24, x15
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: fcvtzs x11, s22
-; CHECK-NEXT: frintx s22, s23
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvtzs x13, s6
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: frintx s6, s7
-; CHECK-NEXT: fcvtzs x14, s5
-; CHECK-NEXT: mov v24.d[1], x12
-; CHECK-NEXT: frintx s5, s19
-; CHECK-NEXT: fcvtzs x12, s21
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: fcvtzs x10, s4
-; CHECK-NEXT: mov v20.d[1], x11
-; CHECK-NEXT: fcvtzs x11, s22
-; CHECK-NEXT: mov v2.d[1], x13
-; CHECK-NEXT: fcvtzs x15, s3
-; CHECK-NEXT: fcvtzs x13, s1
-; CHECK-NEXT: mov v18.d[1], x14
-; CHECK-NEXT: fcvtzs x14, s6
-; CHECK-NEXT: stp q0, q24, [x8]
-; CHECK-NEXT: mov v17.d[1], x12
-; CHECK-NEXT: fcvtzs x12, s5
-; CHECK-NEXT: fmov d0, x10
-; CHECK-NEXT: fmov d1, x11
-; CHECK-NEXT: stp q2, q20, [x8, #224]
-; CHECK-NEXT: fmov d2, x9
-; CHECK-NEXT: mov v16.d[1], x15
-; CHECK-NEXT: stp q17, q18, [x8, #160]
-; CHECK-NEXT: mov v0.d[1], x13
-; CHECK-NEXT: mov v1.d[1], x14
-; CHECK-NEXT: mov v2.d[1], x12
-; CHECK-NEXT: stp q0, q16, [x8, #96]
-; CHECK-NEXT: stp q2, q1, [x8, #32]
+; CHECK-NEXT: uunpkhi z4.s, z3.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: rdvl x9, #15
+; CHECK-NEXT: uunpklo z3.s, z3.h
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: uunpkhi z7.s, z2.h
+; CHECK-NEXT: uunpklo z2.s, z2.h
+; CHECK-NEXT: uunpklo z24.s, z0.h
+; CHECK-NEXT: uunpkhi z0.s, z0.h
+; CHECK-NEXT: uunpkhi z5.d, z4.s
+; CHECK-NEXT: uunpklo z4.d, z4.s
+; CHECK-NEXT: uunpkhi z6.d, z3.s
+; CHECK-NEXT: uunpklo z3.d, z3.s
+; CHECK-NEXT: uunpkhi z25.d, z2.s
+; CHECK-NEXT: uunpklo z2.d, z2.s
+; CHECK-NEXT: uunpklo z26.d, z0.s
+; CHECK-NEXT: uunpkhi z0.d, z0.s
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #14
+; CHECK-NEXT: movprfx z5, z6
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h
+; CHECK-NEXT: uunpkhi z6.d, z7.s
+; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9]
+; CHECK-NEXT: uunpkhi z4.s, z1.h
+; CHECK-NEXT: uunpklo z7.d, z7.s
+; CHECK-NEXT: rdvl x9, #13
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #12
+; CHECK-NEXT: movprfx z5, z6
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h
+; CHECK-NEXT: st1b { z3.b }, p1, [x8, x9]
+; CHECK-NEXT: uunpkhi z3.d, z4.s
+; CHECK-NEXT: uunpklo z4.d, z4.s
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h
+; CHECK-NEXT: rdvl x9, #11
+; CHECK-NEXT: uunpkhi z6.d, z24.s
+; CHECK-NEXT: uunpkhi z27.d, z1.s
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #10
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h
+; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9]
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h
+; CHECK-NEXT: uunpklo z7.d, z24.s
+; CHECK-NEXT: rdvl x9, #9
+; CHECK-NEXT: movprfx z5, z27
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z27.h
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h
+; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #8
+; CHECK-NEXT: st1b { z2.b }, p1, [x8, x9]
+; CHECK-NEXT: movprfx z2, z26
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z26.h
+; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl]
+; CHECK-NEXT: movprfx z3, z6
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h
+; CHECK-NEXT: st1d { z4.d }, p0, [x8, #6, mul vl]
+; CHECK-NEXT: movprfx z4, z7
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h
+; CHECK-NEXT: st1d { z5.d }, p0, [x8, #5, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [x8, #4, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [x8, #3, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl]
+; CHECK-NEXT: st1d { z3.d }, p0, [x8, #1, mul vl]
+; CHECK-NEXT: st1d { z4.d }, p0, [x8]
; CHECK-NEXT: ret
- %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x)
- ret <32 x i64> %a
+ %a = call <vscale x 32 x i64> @llvm.lrint.nxv32i64.nxv32f16(<vscale x 32 x half> %x)
+ ret <vscale x 32 x i64> %a
}
-declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>)
+declare <vscale x 32 x i64> @llvm.lrint.nxv32i64.nxv32f16(<vscale x 32 x half>)
-define <1 x i64> @lrint_v1f32(<1 x float> %x) {
+define <vscale x 1 x i64> @lrint_v1f32(<vscale x 1 x float> %x) {
; CHECK-SD-LABEL: lrint_v1f32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
@@ -399,244 +196,311 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) {
; CHECK-GI-NEXT: fcvtzs x8, s0
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
- %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x)
- ret <1 x i64> %a
+; CHECK-LABEL: lrint_v1f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: ret
+ %a = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f32(<vscale x 1 x float> %x)
+ ret <vscale x 1 x i64> %a
}
-declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>)
+declare <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f32(<vscale x 1 x float>)
-define <2 x i64> @lrint_v2f32(<2 x float> %x) {
+define <vscale x 2 x i64> @lrint_v2f32(<vscale x 2 x float> %x) {
; CHECK-LABEL: lrint_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: ret
- %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x)
- ret <2 x i64> %a
+ %a = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f32(<vscale x 2 x float> %x)
+ ret <vscale x 2 x i64> %a
}
-declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>)
+declare <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f32(<vscale x 2 x float>)
-define <4 x i64> @lrint_v4f32(<4 x float> %x) {
+define <vscale x 4 x i64> @lrint_v4f32(<vscale x 4 x float> %x) {
; CHECK-LABEL: lrint_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov s3, v0.s[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: mov s2, v1.s[1]
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v1.d[1], x10
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z1.d, z0.s
+; CHECK-NEXT: uunpkhi z2.d, z0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
+; CHECK-NEXT: movprfx z1, z2
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s
; CHECK-NEXT: ret
- %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x)
- ret <4 x i64> %a
+ %a = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f32(<vscale x 4 x float> %x)
+ ret <vscale x 4 x i64> %a
}
-declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>)
+declare <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f32(<vscale x 4 x float>)
-define <8 x i64> @lrint_v8f32(<8 x float> %x) {
+define <vscale x 8 x i64> @lrint_v8f32(<vscale x 8 x float> %x) {
; CHECK-LABEL: lrint_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: mov s4, v0.s[1]
-; CHECK-NEXT: mov s7, v1.s[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: mov s5, v2.s[1]
-; CHECK-NEXT: mov s6, v3.s[1]
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: fcvtzs x12, s1
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvtzs x8, s2
-; CHECK-NEXT: fcvtzs x10, s3
-; CHECK-NEXT: fcvtzs x11, s4
-; CHECK-NEXT: fcvtzs x15, s7
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fmov d2, x12
-; CHECK-NEXT: fcvtzs x13, s5
-; CHECK-NEXT: fcvtzs x14, s6
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v2.d[1], x15
-; CHECK-NEXT: mov v1.d[1], x13
-; CHECK-NEXT: mov v3.d[1], x14
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: uunpkhi z3.d, z0.s
+; CHECK-NEXT: uunpklo z4.d, z1.s
+; CHECK-NEXT: uunpkhi z5.d, z1.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s
+; CHECK-NEXT: movprfx z1, z3
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z3.s
+; CHECK-NEXT: movprfx z2, z4
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z4.s
+; CHECK-NEXT: movprfx z3, z5
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s
; CHECK-NEXT: ret
- %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x)
- ret <8 x i64> %a
+ %a = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f32(<vscale x 8 x float> %x)
+ ret <vscale x 8 x i64> %a
}
-declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>)
+declare <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f32(<vscale x 8 x float>)
-define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
+define <vscale x 16 x i64> @lrint_v16i64_v16f32(<vscale x 16 x float> %x) {
; CHECK-LABEL: lrint_v16i64_v16f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: frintx s7, s0
-; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: mov s0, v0.s[1]
-; CHECK-NEXT: frintx s17, s4
-; CHECK-NEXT: mov s4, v4.s[1]
-; CHECK-NEXT: mov s18, v5.s[1]
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s19, s6
-; CHECK-NEXT: fcvtzs x8, s7
-; CHECK-NEXT: frintx s7, s16
-; CHECK-NEXT: mov s6, v6.s[1]
-; CHECK-NEXT: mov s16, v16.s[1]
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: fcvtzs x9, s17
-; CHECK-NEXT: frintx s17, s1
-; CHECK-NEXT: mov s1, v1.s[1]
-; CHECK-NEXT: frintx s18, s18
-; CHECK-NEXT: fcvtzs x10, s5
-; CHECK-NEXT: mov s5, v2.s[1]
-; CHECK-NEXT: fcvtzs x11, s19
-; CHECK-NEXT: mov s19, v3.s[1]
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: fcvtzs x12, s7
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvtzs x13, s4
-; CHECK-NEXT: frintx s4, s3
-; CHECK-NEXT: frintx s16, s16
-; CHECK-NEXT: fcvtzs x14, s18
-; CHECK-NEXT: frintx s18, s1
-; CHECK-NEXT: fcvtzs x15, s17
-; CHECK-NEXT: frintx s20, s5
-; CHECK-NEXT: frintx s17, s19
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fcvtzs x9, s2
-; CHECK-NEXT: fmov d5, x11
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: fcvtzs x11, s4
-; CHECK-NEXT: fcvtzs x10, s0
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzs x12, s18
-; CHECK-NEXT: fcvtzs x17, s6
-; CHECK-NEXT: fcvtzs x18, s16
-; CHECK-NEXT: fcvtzs x16, s20
-; CHECK-NEXT: fcvtzs x0, s17
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d2, x15
-; CHECK-NEXT: fmov d4, x9
-; CHECK-NEXT: mov v1.d[1], x13
-; CHECK-NEXT: fmov d6, x11
-; CHECK-NEXT: mov v3.d[1], x14
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v5.d[1], x17
-; CHECK-NEXT: mov v7.d[1], x18
-; CHECK-NEXT: mov v2.d[1], x12
-; CHECK-NEXT: mov v4.d[1], x16
-; CHECK-NEXT: mov v6.d[1], x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z4.d, z0.s
+; CHECK-NEXT: uunpkhi z5.d, z0.s
+; CHECK-NEXT: uunpklo z6.d, z1.s
+; CHECK-NEXT: uunpkhi z7.d, z1.s
+; CHECK-NEXT: uunpklo z24.d, z2.s
+; CHECK-NEXT: uunpkhi z25.d, z2.s
+; CHECK-NEXT: uunpklo z26.d, z3.s
+; CHECK-NEXT: uunpkhi z27.d, z3.s
+; CHECK-NEXT: movprfx z0, z4
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.s
+; CHECK-NEXT: movprfx z1, z5
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s
+; CHECK-NEXT: movprfx z2, z6
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s
+; CHECK-NEXT: movprfx z3, z7
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s
+; CHECK-NEXT: movprfx z4, z24
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.s
+; CHECK-NEXT: movprfx z5, z25
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.s
+; CHECK-NEXT: movprfx z6, z26
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z26.s
+; CHECK-NEXT: movprfx z7, z27
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z27.s
+; CHECK-NEXT: ret
+ %a = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f32(<vscale x 16 x float> %x)
+ ret <vscale x 16 x i64> %a
+}
+declare <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f32(<vscale x 16 x float>)
+
+define <vscale x 32 x i64> @lrint_v32i64_v32f32(<vscale x 32 x float> %x) {
+; CHECK-LABEL: lrint_v32i64_v32f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpkhi z24.d, z7.s
+; CHECK-NEXT: uunpklo z7.d, z7.s
+; CHECK-NEXT: uunpkhi z27.d, z6.s
+; CHECK-NEXT: rdvl x9, #15
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: uunpklo z6.d, z6.s
+; CHECK-NEXT: uunpkhi z29.d, z5.s
+; CHECK-NEXT: uunpklo z5.d, z5.s
+; CHECK-NEXT: uunpkhi z31.d, z4.s
+; CHECK-NEXT: uunpklo z30.d, z3.s
+; CHECK-NEXT: uunpkhi z3.d, z3.s
+; CHECK-NEXT: uunpklo z4.d, z4.s
+; CHECK-NEXT: uunpkhi z25.d, z0.s
+; CHECK-NEXT: uunpklo z26.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: uunpklo z28.d, z2.s
+; CHECK-NEXT: uunpkhi z2.d, z2.s
+; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s
+; CHECK-NEXT: fcvtzs z27.d, p0/m, z27.s
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s
+; CHECK-NEXT: fcvtzs z29.d, p0/m, z29.s
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s
+; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #14
+; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #13
+; CHECK-NEXT: movprfx z7, z31
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z31.s
+; CHECK-NEXT: st1b { z27.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #12
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #11
+; CHECK-NEXT: movprfx z6, z30
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z30.s
+; CHECK-NEXT: st1b { z29.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #10
+; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #9
+; CHECK-NEXT: movprfx z5, z28
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s
+; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9]
+; CHECK-NEXT: rdvl x9, #8
+; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9]
+; CHECK-NEXT: movprfx z4, z25
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z25.s
+; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl]
+; CHECK-NEXT: movprfx z3, z26
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z26.s
+; CHECK-NEXT: st1d { z6.d }, p0, [x8, #6, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl]
+; CHECK-NEXT: st1d { z5.d }, p0, [x8, #4, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl]
+; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl]
+; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x)
- ret <16 x i64> %a
+ %a = call <vscale x 32 x i64> @llvm.lrint.nxv32i64.nxv32f32(<vscale x 32 x float> %x)
+ ret <vscale x 32 x i64> %a
}
-declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>)
+declare <vscale x 32 x i64> @llvm.lrint.nxv32i64.nxv32f32(<vscale x 32 x float>)
-define <1 x i64> @lrint_v1f64(<1 x double> %x) {
+define <vscale x 1 x i64> @lrint_v1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: lrint_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: ret
- %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x)
- ret <1 x i64> %a
+ %a = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f64(<vscale x 1 x double> %x)
+ ret <vscale x 1 x i64> %a
}
-declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>)
+declare <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f64(<vscale x 1 x double>)
-define <2 x i64> @lrint_v2f64(<2 x double> %x) {
+define <vscale x 2 x i64> @lrint_v2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: lrint_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov d1, v0.d[1]
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: frintx d1, d1
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fcvtzs x9, d1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: ret
- %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x)
- ret <2 x i64> %a
+ %a = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f64(<vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %a
}
-declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>)
+declare <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f64(<vscale x 2 x double>)
-define <4 x i64> @lrint_v4f64(<4 x double> %x) {
+define <vscale x 4 x i64> @lrint_v4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: lrint_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov d2, v0.d[1]
-; CHECK-NEXT: mov d3, v1.d[1]
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: frintx d1, d1
-; CHECK-NEXT: frintx d2, d2
-; CHECK-NEXT: frintx d3, d3
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fcvtzs x9, d1
-; CHECK-NEXT: fcvtzs x10, d2
-; CHECK-NEXT: fcvtzs x11, d3
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
; CHECK-NEXT: ret
- %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x)
- ret <4 x i64> %a
+ %a = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> %x)
+ ret <vscale x 4 x i64> %a
}
-declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>)
+declare <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f64(<vscale x 4 x double>)
-define <8 x i64> @lrint_v8f64(<8 x double> %x) {
+define <vscale x 8 x i64> @lrint_v8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: lrint_v8f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov d4, v0.d[1]
-; CHECK-NEXT: mov d5, v1.d[1]
-; CHECK-NEXT: mov d6, v2.d[1]
-; CHECK-NEXT: mov d7, v3.d[1]
-; CHECK-NEXT: frintx d0, d0
-; CHECK-NEXT: frintx d1, d1
-; CHECK-NEXT: frintx d2, d2
-; CHECK-NEXT: frintx d3, d3
-; CHECK-NEXT: frintx d4, d4
-; CHECK-NEXT: frintx d5, d5
-; CHECK-NEXT: frintx d6, d6
-; CHECK-NEXT: frintx d7, d7
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fcvtzs x9, d1
-; CHECK-NEXT: fcvtzs x10, d2
-; CHECK-NEXT: fcvtzs x11, d3
-; CHECK-NEXT: fcvtzs x12, d4
-; CHECK-NEXT: fcvtzs x13, d5
-; CHECK-NEXT: fcvtzs x14, d6
-; CHECK-NEXT: fcvtzs x15, d7
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fmov d2, x10
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: mov v0.d[1], x12
-; CHECK-NEXT: mov v1.d[1], x13
-; CHECK-NEXT: mov v2.d[1], x14
-; CHECK-NEXT: mov v3.d[1], x15
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: ret
+ %a = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f64(<vscale x 8 x double> %x)
+ ret <vscale x 8 x i64> %a
+}
+declare <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f64(<vscale x 8 x double>)
+
+define <vscale x 16 x i64> @lrint_v16f64(<vscale x 16 x double> %x) {
+; CHECK-LABEL: lrint_v16f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: ret
+ %a = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f64(<vscale x 16 x double> %x)
+ ret <vscale x 16 x i64> %a
+}
+declare <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f64(<vscale x 16 x double>)
+
+define <vscale x 32 x i64> @lrint_v32f64(<vscale x 32 x double> %x) {
+; CHECK-LABEL: lrint_v32f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: rdvl x9, #15
+; CHECK-NEXT: rdvl x10, #14
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: rdvl x11, #13
+; CHECK-NEXT: rdvl x12, #12
+; CHECK-NEXT: rdvl x13, #11
+; CHECK-NEXT: rdvl x14, #10
+; CHECK-NEXT: rdvl x15, #9
+; CHECK-NEXT: rdvl x16, #8
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x9]
+; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x10]
+; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x11]
+; CHECK-NEXT: ld1b { z3.b }, p0/z, [x0, x12]
+; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x13]
+; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x14]
+; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x15]
+; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16]
+; CHECK-NEXT: ld1d { z24.d }, p1/z, [x0, #7, mul vl]
+; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
+; CHECK-NEXT: ld1d { z25.d }, p1/z, [x0, #6, mul vl]
+; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl]
+; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d
+; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0, #4, mul vl]
+; CHECK-NEXT: ld1d { z28.d }, p1/z, [x0, #3, mul vl]
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d
+; CHECK-NEXT: ld1d { z29.d }, p1/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1d { z30.d }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d
+; CHECK-NEXT: ld1d { z31.d }, p1/z, [x0]
+; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d
+; CHECK-NEXT: st1b { z0.b }, p0, [x8, x9]
+; CHECK-NEXT: movprfx z0, z5
+; CHECK-NEXT: fcvtzs z0.d, p1/m, z5.d
+; CHECK-NEXT: st1b { z1.b }, p0, [x8, x10]
+; CHECK-NEXT: movprfx z1, z6
+; CHECK-NEXT: fcvtzs z1.d, p1/m, z6.d
+; CHECK-NEXT: st1b { z2.b }, p0, [x8, x11]
+; CHECK-NEXT: movprfx z2, z7
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z7.d
+; CHECK-NEXT: st1b { z3.b }, p0, [x8, x12]
+; CHECK-NEXT: movprfx z3, z24
+; CHECK-NEXT: fcvtzs z3.d, p1/m, z24.d
+; CHECK-NEXT: st1b { z4.b }, p0, [x8, x13]
+; CHECK-NEXT: movprfx z4, z25
+; CHECK-NEXT: fcvtzs z4.d, p1/m, z25.d
+; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14]
+; CHECK-NEXT: movprfx z0, z26
+; CHECK-NEXT: fcvtzs z0.d, p1/m, z26.d
+; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15]
+; CHECK-NEXT: movprfx z1, z27
+; CHECK-NEXT: fcvtzs z1.d, p1/m, z27.d
+; CHECK-NEXT: st1b { z2.b }, p0, [x8, x16]
+; CHECK-NEXT: movprfx z2, z28
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z28.d
+; CHECK-NEXT: st1d { z3.d }, p1, [x8, #7, mul vl]
+; CHECK-NEXT: movprfx z3, z29
+; CHECK-NEXT: fcvtzs z3.d, p1/m, z29.d
+; CHECK-NEXT: st1d { z4.d }, p1, [x8, #6, mul vl]
+; CHECK-NEXT: movprfx z4, z30
+; CHECK-NEXT: fcvtzs z4.d, p1/m, z30.d
+; CHECK-NEXT: st1d { z0.d }, p1, [x8, #5, mul vl]
+; CHECK-NEXT: movprfx z0, z31
+; CHECK-NEXT: fcvtzs z0.d, p1/m, z31.d
+; CHECK-NEXT: st1d { z1.d }, p1, [x8, #4, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p1, [x8, #3, mul vl]
+; CHECK-NEXT: st1d { z3.d }, p1, [x8, #2, mul vl]
+; CHECK-NEXT: st1d { z4.d }, p1, [x8, #1, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p1, [x8]
; CHECK-NEXT: ret
- %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x)
- ret <8 x i64> %a
+ %a = call <vscale x 32 x i64> @llvm.lrint.nxv32i64.nxv16f64(<vscale x 32 x double> %x)
+ ret <vscale x 32 x i64> %a
}
-declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>)
+declare <vscale x 32 x i64> @llvm.lrint.nxv32i64.nxv32f64(<vscale x 32 x double>)
More information about the llvm-commits
mailing list