[llvm] fa1a682 - [AArch64] Improve fptosi.sat vector lowering
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 15 03:38:07 PDT 2021
Author: David Green
Date: 2021-10-15T11:37:53+01:00
New Revision: fa1a68285e404d3bc9ae363afdff863e5348312b
URL: https://github.com/llvm/llvm-project/commit/fa1a68285e404d3bc9ae363afdff863e5348312b
DIFF: https://github.com/llvm/llvm-project/commit/fa1a68285e404d3bc9ae363afdff863e5348312b.diff
LOG: [AArch64] Improve fptosi.sat vector lowering
Similar to D111236, this improves the lowering of vector fptosi.sat and
fptoui.sat, using legal converts and further saturating from there with
min/max. f64 are excluded for the moment due to producing worse code in
places compared to the unrolling.
Differential Revision: https://reviews.llvm.org/D111787
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5aedd015a804..ca9ddfa08081 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3422,30 +3422,54 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
if (DstVT.isScalableVector())
return SDValue();
- // TODO: Saturate to SatWidth explicitly.
- if (SatWidth != DstElementWidth)
- return SDValue();
-
EVT SrcElementVT = SrcVT.getVectorElementType();
- // In the absence of FP16 support, promote f16 to f32, like
- // LowerVectorFP_TO_INT().
- if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) {
+ // In the absence of FP16 support, promote f16 to f32 and saturate the result.
+ if (SrcElementVT == MVT::f16 &&
+ (!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal),
- Op.getOperand(1));
- }
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
+ SrcVT = F32VT;
+ SrcElementVT = MVT::f32;
+ SrcElementWidth = 32;
+ } else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
+ SrcElementVT != MVT::f16)
+ return SDValue();
+ SDLoc DL(Op);
// Cases that we can emit directly.
- if ((SrcElementWidth == DstElementWidth) &&
- (SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 ||
- (SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) {
- return Op;
+ if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
+ return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
+ DAG.getValueType(DstVT.getScalarType()));
+
+ // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
+ // result. This is only valid if the legal cvt is larger than the saturate
+ // width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
+ // (at least until sqxtn is selected).
+ if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
+ return SDValue();
+
+ EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
+ SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
+ DAG.getValueType(IntVT.getScalarType()));
+ SDValue Sat;
+ if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
+ SDValue MinC = DAG.getConstant(
+ APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
+ SDValue MaxC = DAG.getConstant(
+ APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
+ } else {
+ SDValue MinC = DAG.getConstant(
+ APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
}
- // For all other cases, fall back on the expanded form.
- return SDValue();
+ return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 0f49fde7d1fe..964c806ad092 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -582,224 +582,98 @@ define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
}
define <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v2f16_v2i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v2f16_v2i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: fmov s0, w8
-; CHECK-FP16-NEXT: fcvtzs w8, h1
-; CHECK-FP16-NEXT: mov v0.s[1], w8
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v2f16_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
%x = call <2 x i32> @llvm.fptosi.sat.v2f16.v2i32(<2 x half> %f)
ret <2 x i32> %x
}
define <3 x i32> @test_signed_v3f16_v3i32(<3 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v3f16_v3i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v3f16_v3i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v3f16_v3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <3 x i32> @llvm.fptosi.sat.v3f16.v3i32(<3 x half> %f)
ret <3 x i32> %x
}
define <4 x i32> @test_signed_v4f16_v4i32(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v4f16_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptosi.sat.v4f16.v4i32(<4 x half> %f)
ret <4 x i32> %x
}
define <5 x i32> @test_signed_v5f16_v5i32(<5 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v5f16_v5i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: mov w1, v1.s[1]
-; CHECK-CVT-NEXT: mov w2, v1.s[2]
-; CHECK-CVT-NEXT: mov w3, v1.s[3]
-; CHECK-CVT-NEXT: fmov w0, s1
-; CHECK-CVT-NEXT: fmov w4, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v5f16_v5i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzs w0, h0
-; CHECK-FP16-NEXT: fcvtzs w1, h1
-; CHECK-FP16-NEXT: fcvtzs w2, h2
-; CHECK-FP16-NEXT: fcvtzs w4, h3
-; CHECK-FP16-NEXT: fcvtzs w3, h4
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v5f16_v5i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: mov w1, v1.s[1]
+; CHECK-NEXT: mov w2, v1.s[2]
+; CHECK-NEXT: mov w3, v1.s[3]
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: fmov w4, s0
+; CHECK-NEXT: ret
%x = call <5 x i32> @llvm.fptosi.sat.v5f16.v5i32(<5 x half> %f)
ret <5 x i32> %x
}
define <6 x i32> @test_signed_v6f16_v6i32(<6 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v6f16_v6i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: mov w1, v1.s[1]
-; CHECK-CVT-NEXT: mov w2, v1.s[2]
-; CHECK-CVT-NEXT: mov w3, v1.s[3]
-; CHECK-CVT-NEXT: mov w5, v0.s[1]
-; CHECK-CVT-NEXT: fmov w0, s1
-; CHECK-CVT-NEXT: fmov w4, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v6f16_v6i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzs w0, h0
-; CHECK-FP16-NEXT: mov h2, v1.h[1]
-; CHECK-FP16-NEXT: fcvtzs w8, h1
-; CHECK-FP16-NEXT: fcvtzs w2, h3
-; CHECK-FP16-NEXT: fcvtzs w3, h4
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzs w5, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: mov v1.s[1], w5
-; CHECK-FP16-NEXT: fcvtzs w1, h2
-; CHECK-FP16-NEXT: fmov w4, s1
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v6f16_v6i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: mov w1, v1.s[1]
+; CHECK-NEXT: mov w2, v1.s[2]
+; CHECK-NEXT: mov w3, v1.s[3]
+; CHECK-NEXT: mov w5, v0.s[1]
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: fmov w4, s0
+; CHECK-NEXT: ret
%x = call <6 x i32> @llvm.fptosi.sat.v6f16.v6i32(<6 x half> %f)
ret <6 x i32> %x
}
define <7 x i32> @test_signed_v7f16_v7i32(<7 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v7f16_v7i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: mov w1, v1.s[1]
-; CHECK-CVT-NEXT: mov w2, v1.s[2]
-; CHECK-CVT-NEXT: mov w3, v1.s[3]
-; CHECK-CVT-NEXT: mov w5, v0.s[1]
-; CHECK-CVT-NEXT: mov w6, v0.s[2]
-; CHECK-CVT-NEXT: fmov w0, s1
-; CHECK-CVT-NEXT: fmov w4, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v7f16_v7i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzs w0, h0
-; CHECK-FP16-NEXT: mov h2, v1.h[1]
-; CHECK-FP16-NEXT: fcvtzs w8, h1
-; CHECK-FP16-NEXT: mov h1, v1.h[2]
-; CHECK-FP16-NEXT: fcvtzs w3, h4
-; CHECK-FP16-NEXT: fmov s3, w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: fcvtzs w6, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov v3.s[1], w8
-; CHECK-FP16-NEXT: fcvtzs w2, h2
-; CHECK-FP16-NEXT: fcvtzs w1, h1
-; CHECK-FP16-NEXT: mov v3.s[2], w6
-; CHECK-FP16-NEXT: mov w5, v3.s[1]
-; CHECK-FP16-NEXT: fmov w4, s3
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v7f16_v7i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: mov w1, v1.s[1]
+; CHECK-NEXT: mov w2, v1.s[2]
+; CHECK-NEXT: mov w3, v1.s[3]
+; CHECK-NEXT: mov w5, v0.s[1]
+; CHECK-NEXT: mov w6, v0.s[2]
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: fmov w4, s0
+; CHECK-NEXT: ret
%x = call <7 x i32> @llvm.fptosi.sat.v7f16.v7i32(<7 x half> %f)
ret <7 x i32> %x
}
define <8 x i32> @test_signed_v8f16_v8i32(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h4, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs w9, h0
-; CHECK-FP16-NEXT: mov h2, v3.h[1]
-; CHECK-FP16-NEXT: fcvtzs w8, h3
-; CHECK-FP16-NEXT: mov h5, v3.h[2]
-; CHECK-FP16-NEXT: mov h3, v3.h[3]
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: fmov s2, w9
-; CHECK-FP16-NEXT: fcvtzs w9, h4
-; CHECK-FP16-NEXT: mov h4, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h5
-; CHECK-FP16-NEXT: mov v2.s[1], w9
-; CHECK-FP16-NEXT: fcvtzs w9, h4
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h3
-; CHECK-FP16-NEXT: mov v2.s[2], w9
-; CHECK-FP16-NEXT: fcvtzs w9, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v2.s[3], w9
-; CHECK-FP16-NEXT: mov v0.16b, v2.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v8f16_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f)
ret <8 x i32> %x
}
@@ -821,21 +695,11 @@ declare <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float>)
define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) {
; CHECK-LABEL: test_signed_v2f32_v2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs w9, s0
-; CHECK-NEXT: fcvtzs w8, s1
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: csel w8, w8, wzr, lt
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: csinv w8, w8, wzr, ge
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: csel w9, w9, wzr, lt
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: csinv w9, w9, wzr, ge
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i1> @llvm.fptosi.sat.v2f32.v2i1(<2 x float> %f)
ret <2 x i1> %x
@@ -844,23 +708,11 @@ define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) {
define <2 x i8> @test_signed_v2f32_v2i8(<2 x float> %f) {
; CHECK-LABEL: test_signed_v2f32_v2i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: mov w8, #127
-; CHECK-NEXT: fcvtzs w10, s0
-; CHECK-NEXT: mov w11, #-128
-; CHECK-NEXT: fcvtzs w9, s1
-; CHECK-NEXT: cmp w9, #127
-; CHECK-NEXT: csel w9, w9, w8, lt
-; CHECK-NEXT: cmn w9, #128
-; CHECK-NEXT: csel w9, w9, w11, gt
-; CHECK-NEXT: cmp w10, #127
-; CHECK-NEXT: csel w8, w10, w8, lt
-; CHECK-NEXT: cmn w8, #128
-; CHECK-NEXT: csel w8, w8, w11, gt
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2s, #127
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: mvni v1.2s, #127
+; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i8> @llvm.fptosi.sat.v2f32.v2i8(<2 x float> %f)
ret <2 x i8> %x
@@ -869,23 +721,11 @@ define <2 x i8> @test_signed_v2f32_v2i8(<2 x float> %f) {
define <2 x i13> @test_signed_v2f32_v2i13(<2 x float> %f) {
; CHECK-LABEL: test_signed_v2f32_v2i13:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: mov w8, #4095
-; CHECK-NEXT: fcvtzs w10, s0
-; CHECK-NEXT: mov w11, #-4096
-; CHECK-NEXT: fcvtzs w9, s1
-; CHECK-NEXT: cmp w9, #4095
-; CHECK-NEXT: csel w9, w9, w8, lt
-; CHECK-NEXT: cmn w9, #1, lsl #12 // =4096
-; CHECK-NEXT: csel w9, w9, w11, gt
-; CHECK-NEXT: cmp w10, #4095
-; CHECK-NEXT: csel w8, w10, w8, lt
-; CHECK-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-NEXT: csel w8, w8, w11, gt
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2s, #15, msl #8
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: mvni v1.2s, #15, msl #8
+; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i13> @llvm.fptosi.sat.v2f32.v2i13(<2 x float> %f)
ret <2 x i13> %x
@@ -894,23 +734,11 @@ define <2 x i13> @test_signed_v2f32_v2i13(<2 x float> %f) {
define <2 x i16> @test_signed_v2f32_v2i16(<2 x float> %f) {
; CHECK-LABEL: test_signed_v2f32_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: mov w8, #32767
-; CHECK-NEXT: fcvtzs w10, s0
-; CHECK-NEXT: mov w11, #-32768
-; CHECK-NEXT: fcvtzs w9, s1
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w9, w9, w8, lt
-; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768
-; CHECK-NEXT: csel w9, w9, w11, gt
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w8, w10, w8, lt
-; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-NEXT: csel w8, w8, w11, gt
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2s, #127, msl #8
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: mvni v1.2s, #127, msl #8
+; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i16> @llvm.fptosi.sat.v2f32.v2i16(<2 x float> %f)
ret <2 x i16> %x
@@ -919,23 +747,11 @@ define <2 x i16> @test_signed_v2f32_v2i16(<2 x float> %f) {
define <2 x i19> @test_signed_v2f32_v2i19(<2 x float> %f) {
; CHECK-LABEL: test_signed_v2f32_v2i19:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: mov w8, #262143
-; CHECK-NEXT: fcvtzs w10, s0
-; CHECK-NEXT: mov w11, #-262144
-; CHECK-NEXT: fcvtzs w9, s1
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w9, w9, w8, lt
-; CHECK-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-NEXT: csel w9, w9, w11, gt
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w8, w10, w8, lt
-; CHECK-NEXT: cmn w8, #64, lsl #12 // =262144
-; CHECK-NEXT: csel w8, w8, w11, gt
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2s, #3, msl #16
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: mvni v1.2s, #3, msl #16
+; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i19> @llvm.fptosi.sat.v2f32.v2i19(<2 x float> %f)
ret <2 x i19> %x
@@ -1139,34 +955,12 @@ declare <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float>)
define <4 x i1> @test_signed_v4f32_v4i1(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs w9, s0
-; CHECK-NEXT: fcvtzs w8, s1
-; CHECK-NEXT: mov s1, v0.s[2]
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: csel w8, w8, wzr, lt
-; CHECK-NEXT: fcvtzs w10, s1
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: mov s1, v0.s[3]
-; CHECK-NEXT: csinv w8, w8, wzr, ge
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: csel w9, w9, wzr, lt
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: csinv w9, w9, wzr, ge
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: csel w9, w10, wzr, lt
-; CHECK-NEXT: fcvtzs w10, s1
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: csinv w8, w9, wzr, ge
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csel w9, w10, wzr, lt
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: mov v0.h[2], w8
-; CHECK-NEXT: csinv w8, w9, wzr, ge
-; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%x = call <4 x i1> @llvm.fptosi.sat.v4f32.v4i1(<4 x float> %f)
ret <4 x i1> %x
@@ -1175,36 +969,12 @@ define <4 x i1> @test_signed_v4f32_v4i1(<4 x float> %f) {
define <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: mov w8, #127
-; CHECK-NEXT: fcvtzs w10, s0
-; CHECK-NEXT: mov w11, #-128
-; CHECK-NEXT: fcvtzs w9, s1
-; CHECK-NEXT: mov s1, v0.s[2]
-; CHECK-NEXT: cmp w9, #127
-; CHECK-NEXT: csel w9, w9, w8, lt
-; CHECK-NEXT: fcvtzs w12, s1
-; CHECK-NEXT: cmn w9, #128
-; CHECK-NEXT: mov s1, v0.s[3]
-; CHECK-NEXT: csel w9, w9, w11, gt
-; CHECK-NEXT: cmp w10, #127
-; CHECK-NEXT: csel w10, w10, w8, lt
-; CHECK-NEXT: cmn w10, #128
-; CHECK-NEXT: csel w10, w10, w11, gt
-; CHECK-NEXT: cmp w12, #127
-; CHECK-NEXT: fmov s0, w10
-; CHECK-NEXT: csel w10, w12, w8, lt
-; CHECK-NEXT: fcvtzs w12, s1
-; CHECK-NEXT: cmn w10, #128
-; CHECK-NEXT: mov v0.h[1], w9
-; CHECK-NEXT: csel w9, w10, w11, gt
-; CHECK-NEXT: cmp w12, #127
-; CHECK-NEXT: csel w8, w12, w8, lt
-; CHECK-NEXT: cmn w8, #128
-; CHECK-NEXT: mov v0.h[2], w9
-; CHECK-NEXT: csel w8, w8, w11, gt
-; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.4s, #127
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mvni v1.4s, #127
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%x = call <4 x i8> @llvm.fptosi.sat.v4f32.v4i8(<4 x float> %f)
ret <4 x i8> %x
@@ -1213,36 +983,12 @@ define <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) {
define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i13:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: mov w8, #4095
-; CHECK-NEXT: fcvtzs w10, s0
-; CHECK-NEXT: mov w11, #-4096
-; CHECK-NEXT: fcvtzs w9, s1
-; CHECK-NEXT: mov s1, v0.s[2]
-; CHECK-NEXT: cmp w9, #4095
-; CHECK-NEXT: csel w9, w9, w8, lt
-; CHECK-NEXT: fcvtzs w12, s1
-; CHECK-NEXT: cmn w9, #1, lsl #12 // =4096
-; CHECK-NEXT: mov s1, v0.s[3]
-; CHECK-NEXT: csel w9, w9, w11, gt
-; CHECK-NEXT: cmp w10, #4095
-; CHECK-NEXT: csel w10, w10, w8, lt
-; CHECK-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-NEXT: csel w10, w10, w11, gt
-; CHECK-NEXT: cmp w12, #4095
-; CHECK-NEXT: fmov s0, w10
-; CHECK-NEXT: csel w10, w12, w8, lt
-; CHECK-NEXT: fcvtzs w12, s1
-; CHECK-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-NEXT: mov v0.h[1], w9
-; CHECK-NEXT: csel w9, w10, w11, gt
-; CHECK-NEXT: cmp w12, #4095
-; CHECK-NEXT: csel w8, w12, w8, lt
-; CHECK-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-NEXT: mov v0.h[2], w9
-; CHECK-NEXT: csel w8, w8, w11, gt
-; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.4s, #15, msl #8
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mvni v1.4s, #15, msl #8
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%x = call <4 x i13> @llvm.fptosi.sat.v4f32.v4i13(<4 x float> %f)
ret <4 x i13> %x
@@ -1251,36 +997,8 @@ define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) {
define <4 x i16> @test_signed_v4f32_v4i16(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: mov w8, #32767
-; CHECK-NEXT: fcvtzs w10, s0
-; CHECK-NEXT: mov w11, #-32768
-; CHECK-NEXT: fcvtzs w9, s1
-; CHECK-NEXT: mov s1, v0.s[2]
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w9, w9, w8, lt
-; CHECK-NEXT: fcvtzs w12, s1
-; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768
-; CHECK-NEXT: mov s1, v0.s[3]
-; CHECK-NEXT: csel w9, w9, w11, gt
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w10, w10, w8, lt
-; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-NEXT: csel w10, w10, w11, gt
-; CHECK-NEXT: cmp w12, w8
-; CHECK-NEXT: fmov s0, w10
-; CHECK-NEXT: csel w10, w12, w8, lt
-; CHECK-NEXT: fcvtzs w12, s1
-; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-NEXT: mov v0.h[1], w9
-; CHECK-NEXT: csel w9, w10, w11, gt
-; CHECK-NEXT: cmp w12, w8
-; CHECK-NEXT: csel w8, w12, w8, lt
-; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-NEXT: mov v0.h[2], w9
-; CHECK-NEXT: csel w8, w8, w11, gt
-; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: sqxtn v0.4h, v0.4s
; CHECK-NEXT: ret
%x = call <4 x i16> @llvm.fptosi.sat.v4f32.v4i16(<4 x float> %f)
ret <4 x i16> %x
@@ -1289,35 +1007,11 @@ define <4 x i16> @test_signed_v4f32_v4i16(<4 x float> %f) {
define <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i19:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: mov w8, #262143
-; CHECK-NEXT: fcvtzs w10, s0
-; CHECK-NEXT: mov w11, #-262144
-; CHECK-NEXT: fcvtzs w9, s1
-; CHECK-NEXT: mov s1, v0.s[2]
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w9, w9, w8, lt
-; CHECK-NEXT: fcvtzs w12, s1
-; CHECK-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-NEXT: mov s1, v0.s[3]
-; CHECK-NEXT: csel w9, w9, w11, gt
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w10, w10, w8, lt
-; CHECK-NEXT: cmn w10, #64, lsl #12 // =262144
-; CHECK-NEXT: csel w10, w10, w11, gt
-; CHECK-NEXT: cmp w12, w8
-; CHECK-NEXT: fmov s0, w10
-; CHECK-NEXT: csel w10, w12, w8, lt
-; CHECK-NEXT: fcvtzs w12, s1
-; CHECK-NEXT: cmn w10, #64, lsl #12 // =262144
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: csel w9, w10, w11, gt
-; CHECK-NEXT: cmp w12, w8
-; CHECK-NEXT: csel w8, w12, w8, lt
-; CHECK-NEXT: cmn w8, #64, lsl #12 // =262144
-; CHECK-NEXT: mov v0.s[2], w9
-; CHECK-NEXT: csel w8, w8, w11, gt
-; CHECK-NEXT: mov v0.s[3], w8
+; CHECK-NEXT: movi v1.4s, #3, msl #16
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mvni v1.4s, #3, msl #16
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%x = call <4 x i19> @llvm.fptosi.sat.v4f32.v4i19(<4 x float> %f)
ret <4 x i19> %x
@@ -1921,72 +1615,22 @@ declare <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half>)
define <4 x i1> @test_signed_v4f16_v4i1(<4 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v4f16_v4i1:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzs w9, s2
-; CHECK-CVT-NEXT: fcvtzs w8, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: mov h0, v0.h[3]
-; CHECK-CVT-NEXT: cmp w8, #0
-; CHECK-CVT-NEXT: csel w8, w8, wzr, lt
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: cmp w8, #0
-; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: csel w9, w9, wzr, lt
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: fcvtzs w10, s1
-; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge
-; CHECK-CVT-NEXT: fcvt s1, h0
-; CHECK-CVT-NEXT: cmp w10, #0
-; CHECK-CVT-NEXT: fmov s0, w9
-; CHECK-CVT-NEXT: csel w9, w10, wzr, lt
-; CHECK-CVT-NEXT: fcvtzs w10, s1
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: mov v0.h[1], w8
-; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge
-; CHECK-CVT-NEXT: cmp w10, #0
-; CHECK-CVT-NEXT: csel w9, w10, wzr, lt
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: mov v0.h[2], w8
-; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge
-; CHECK-CVT-NEXT: mov v0.h[3], w8
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: movi v1.2d, #0xffffffffffffffff
+; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v4f16_v4i1:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs w9, h0
-; CHECK-FP16-NEXT: fcvtzs w8, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[2]
-; CHECK-FP16-NEXT: cmp w8, #0
-; CHECK-FP16-NEXT: csel w8, w8, wzr, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h1
-; CHECK-FP16-NEXT: cmp w8, #0
-; CHECK-FP16-NEXT: mov h1, v0.h[3]
-; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: csel w9, w9, wzr, lt
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: csinv w9, w9, wzr, ge
-; CHECK-FP16-NEXT: cmp w10, #0
-; CHECK-FP16-NEXT: fmov s0, w9
-; CHECK-FP16-NEXT: csel w9, w10, wzr, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h1
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: mov v0.h[1], w8
-; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge
-; CHECK-FP16-NEXT: cmp w10, #0
-; CHECK-FP16-NEXT: csel w9, w10, wzr, lt
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: mov v0.h[2], w8
-; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge
-; CHECK-FP16-NEXT: mov v0.h[3], w8
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000
+; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h
+; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h
+; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff
+; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
%x = call <4 x i1> @llvm.fptosi.sat.v4f16.v4i1(<4 x half> %f)
ret <4 x i1> %x
@@ -1995,76 +1639,22 @@ define <4 x i1> @test_signed_v4f16_v4i1(<4 x half> %f) {
define <4 x i8> @test_signed_v4f16_v4i8(<4 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v4f16_v4i8:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov w8, #127
-; CHECK-CVT-NEXT: mov w11, #-128
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzs w10, s2
-; CHECK-CVT-NEXT: fcvtzs w9, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: mov h0, v0.h[3]
-; CHECK-CVT-NEXT: cmp w9, #127
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: cmn w9, #128
-; CHECK-CVT-NEXT: csel w9, w9, w11, gt
-; CHECK-CVT-NEXT: cmp w10, #127
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #128
-; CHECK-CVT-NEXT: fcvtzs w12, s1
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: fcvt s1, h0
-; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: fmov s0, w10
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s1
-; CHECK-CVT-NEXT: cmn w10, #128
-; CHECK-CVT-NEXT: mov v0.h[1], w9
-; CHECK-CVT-NEXT: csel w9, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: csel w8, w12, w8, lt
-; CHECK-CVT-NEXT: cmn w8, #128
-; CHECK-CVT-NEXT: mov v0.h[2], w9
-; CHECK-CVT-NEXT: csel w8, w8, w11, gt
-; CHECK-CVT-NEXT: mov v0.h[3], w8
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.4s, #127
+; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: mvni v1.4s, #127
+; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v4f16_v4i8:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov w8, #127
-; CHECK-FP16-NEXT: fcvtzs w10, h0
-; CHECK-FP16-NEXT: mov w11, #-128
-; CHECK-FP16-NEXT: fcvtzs w9, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[2]
-; CHECK-FP16-NEXT: cmp w9, #127
-; CHECK-FP16-NEXT: csel w9, w9, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h1
-; CHECK-FP16-NEXT: cmn w9, #128
-; CHECK-FP16-NEXT: mov h1, v0.h[3]
-; CHECK-FP16-NEXT: csel w9, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w10, #127
-; CHECK-FP16-NEXT: csel w10, w10, w8, lt
-; CHECK-FP16-NEXT: cmn w10, #128
-; CHECK-FP16-NEXT: csel w10, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #127
-; CHECK-FP16-NEXT: fmov s0, w10
-; CHECK-FP16-NEXT: csel w10, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h1
-; CHECK-FP16-NEXT: cmn w10, #128
-; CHECK-FP16-NEXT: mov v0.h[1], w9
-; CHECK-FP16-NEXT: csel w9, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #127
-; CHECK-FP16-NEXT: csel w8, w12, w8, lt
-; CHECK-FP16-NEXT: cmn w8, #128
-; CHECK-FP16-NEXT: mov v0.h[2], w9
-; CHECK-FP16-NEXT: csel w8, w8, w11, gt
-; CHECK-FP16-NEXT: mov v0.h[3], w8
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-FP16-NEXT: movi v1.4h, #127
+; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h
+; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h
+; CHECK-FP16-NEXT: mvni v1.4h, #127
+; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
%x = call <4 x i8> @llvm.fptosi.sat.v4f16.v4i8(<4 x half> %f)
ret <4 x i8> %x
@@ -2073,76 +1663,22 @@ define <4 x i8> @test_signed_v4f16_v4i8(<4 x half> %f) {
define <4 x i13> @test_signed_v4f16_v4i13(<4 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v4f16_v4i13:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov w8, #4095
-; CHECK-CVT-NEXT: mov w11, #-4096
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzs w10, s2
-; CHECK-CVT-NEXT: fcvtzs w9, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: mov h0, v0.h[3]
-; CHECK-CVT-NEXT: cmp w9, #4095
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w9, w9, w11, gt
-; CHECK-CVT-NEXT: cmp w10, #4095
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: fcvtzs w12, s1
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: fcvt s1, h0
-; CHECK-CVT-NEXT: cmp w12, #4095
-; CHECK-CVT-NEXT: fmov s0, w10
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s1
-; CHECK-CVT-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: mov v0.h[1], w9
-; CHECK-CVT-NEXT: csel w9, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, #4095
-; CHECK-CVT-NEXT: csel w8, w12, w8, lt
-; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: mov v0.h[2], w9
-; CHECK-CVT-NEXT: csel w8, w8, w11, gt
-; CHECK-CVT-NEXT: mov v0.h[3], w8
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.4s, #15, msl #8
+; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: mvni v1.4s, #15, msl #8
+; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v4f16_v4i13:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov w8, #4095
-; CHECK-FP16-NEXT: fcvtzs w10, h0
-; CHECK-FP16-NEXT: mov w11, #-4096
-; CHECK-FP16-NEXT: fcvtzs w9, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[2]
-; CHECK-FP16-NEXT: cmp w9, #4095
-; CHECK-FP16-NEXT: csel w9, w9, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h1
-; CHECK-FP16-NEXT: cmn w9, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov h1, v0.h[3]
-; CHECK-FP16-NEXT: csel w9, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w10, #4095
-; CHECK-FP16-NEXT: csel w10, w10, w8, lt
-; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: csel w10, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #4095
-; CHECK-FP16-NEXT: fmov s0, w10
-; CHECK-FP16-NEXT: csel w10, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h1
-; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov v0.h[1], w9
-; CHECK-FP16-NEXT: csel w9, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #4095
-; CHECK-FP16-NEXT: csel w8, w12, w8, lt
-; CHECK-FP16-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov v0.h[2], w9
-; CHECK-FP16-NEXT: csel w8, w8, w11, gt
-; CHECK-FP16-NEXT: mov v0.h[3], w8
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8
+; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h
+; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h
+; CHECK-FP16-NEXT: movi v1.4h, #240, lsl #8
+; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
%x = call <4 x i13> @llvm.fptosi.sat.v4f16.v4i13(<4 x half> %f)
ret <4 x i13> %x
@@ -2152,36 +1688,8 @@ define <4 x i16> @test_signed_v4f16_v4i16(<4 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v4f16_v4i16:
; CHECK-CVT: // %bb.0:
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov w8, #32767
-; CHECK-CVT-NEXT: mov w11, #-32768
-; CHECK-CVT-NEXT: mov s1, v0.s[1]
-; CHECK-CVT-NEXT: fcvtzs w10, s0
-; CHECK-CVT-NEXT: fcvtzs w9, s1
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s1
-; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov s1, v0.s[3]
-; CHECK-CVT-NEXT: csel w9, w9, w11, gt
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: fmov s0, w10
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s1
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v0.h[1], w9
-; CHECK-CVT-NEXT: csel w9, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: csel w8, w12, w8, lt
-; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v0.h[2], w9
-; CHECK-CVT-NEXT: csel w8, w8, w11, gt
-; CHECK-CVT-NEXT: mov v0.h[3], w8
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT: sqxtn v0.4h, v0.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v4f16_v4i16:
@@ -2193,104 +1701,25 @@ define <4 x i16> @test_signed_v4f16_v4i16(<4 x half> %f) {
}
define <4 x i19> @test_signed_v4f16_v4i19(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i19:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov w8, #262143
-; CHECK-CVT-NEXT: mov w11, #-262144
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzs w10, s2
-; CHECK-CVT-NEXT: fcvtzs w9, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: mov h0, v0.h[3]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: csel w9, w9, w11, gt
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: fcvtzs w12, s1
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: fcvt s1, h0
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: fmov s0, w10
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s1
-; CHECK-CVT-NEXT: cmn w10, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: mov v0.s[1], w9
-; CHECK-CVT-NEXT: csel w9, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: csel w8, w12, w8, lt
-; CHECK-CVT-NEXT: cmn w8, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: mov v0.s[2], w9
-; CHECK-CVT-NEXT: csel w8, w8, w11, gt
-; CHECK-CVT-NEXT: mov v0.s[3], w8
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i19:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov w8, #262143
-; CHECK-FP16-NEXT: fcvtzs w10, h0
-; CHECK-FP16-NEXT: mov w11, #-262144
-; CHECK-FP16-NEXT: fcvtzs w9, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[2]
-; CHECK-FP16-NEXT: cmp w9, w8
-; CHECK-FP16-NEXT: csel w9, w9, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h1
-; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: mov h1, v0.h[3]
-; CHECK-FP16-NEXT: csel w9, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w10, w10, w8, lt
-; CHECK-FP16-NEXT: cmn w10, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: csel w10, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, w8
-; CHECK-FP16-NEXT: fmov s0, w10
-; CHECK-FP16-NEXT: csel w10, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h1
-; CHECK-FP16-NEXT: cmn w10, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: mov v0.s[1], w9
-; CHECK-FP16-NEXT: csel w9, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, w8
-; CHECK-FP16-NEXT: csel w8, w12, w8, lt
-; CHECK-FP16-NEXT: cmn w8, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: mov v0.s[2], w9
-; CHECK-FP16-NEXT: csel w8, w8, w11, gt
-; CHECK-FP16-NEXT: mov v0.s[3], w8
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v4f16_v4i19:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: movi v1.4s, #3, msl #16
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mvni v1.4s, #3, msl #16
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%x = call <4 x i19> @llvm.fptosi.sat.v4f16.v4i19(<4 x half> %f)
ret <4 x i19> %x
}
define <4 x i32> @test_signed_v4f16_v4i32_duplicate(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v4f16_v4i32_duplicate:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v4f16_v4i32_duplicate:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v4f16_v4i32_duplicate:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptosi.sat.v4f16.v4i32(<4 x half> %f)
ret <4 x i32> %x
}
@@ -2636,130 +2065,75 @@ declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>)
define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v8f16_v8i1:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov h3, v0.h[4]
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzs w9, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[3]
-; CHECK-CVT-NEXT: fcvtzs w8, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: mov s2, v1.s[1]
+; CHECK-CVT-NEXT: fcvtzs w9, s1
+; CHECK-CVT-NEXT: fcvtzs w13, s0
+; CHECK-CVT-NEXT: fcvtzs w8, s2
+; CHECK-CVT-NEXT: mov s2, v1.s[2]
+; CHECK-CVT-NEXT: mov s1, v1.s[3]
; CHECK-CVT-NEXT: cmp w8, #0
; CHECK-CVT-NEXT: csel w8, w8, wzr, lt
-; CHECK-CVT-NEXT: fcvt s1, h1
+; CHECK-CVT-NEXT: fcvtzs w10, s2
; CHECK-CVT-NEXT: cmp w8, #0
+; CHECK-CVT-NEXT: mov s2, v0.s[1]
; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
; CHECK-CVT-NEXT: cmp w9, #0
; CHECK-CVT-NEXT: csel w9, w9, wzr, lt
+; CHECK-CVT-NEXT: fcvtzs w11, s1
; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: fcvtzs w10, s1
+; CHECK-CVT-NEXT: mov s1, v0.s[2]
; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge
; CHECK-CVT-NEXT: cmp w10, #0
-; CHECK-CVT-NEXT: fmov s1, w9
-; CHECK-CVT-NEXT: fcvtzs w9, s2
-; CHECK-CVT-NEXT: fcvt s2, h3
-; CHECK-CVT-NEXT: mov h3, v0.h[5]
; CHECK-CVT-NEXT: csel w10, w10, wzr, lt
-; CHECK-CVT-NEXT: mov v1.b[1], w8
-; CHECK-CVT-NEXT: cmp w10, #0
-; CHECK-CVT-NEXT: csinv w8, w10, wzr, ge
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: fcvtzs w10, s2
-; CHECK-CVT-NEXT: fcvt s2, h3
-; CHECK-CVT-NEXT: csel w9, w9, wzr, lt
-; CHECK-CVT-NEXT: mov h3, v0.h[6]
-; CHECK-CVT-NEXT: mov v1.b[2], w8
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge
-; CHECK-CVT-NEXT: cmp w10, #0
-; CHECK-CVT-NEXT: fcvtzs w9, s2
-; CHECK-CVT-NEXT: csel w10, w10, wzr, lt
-; CHECK-CVT-NEXT: fcvt s2, h3
-; CHECK-CVT-NEXT: mov h0, v0.h[7]
-; CHECK-CVT-NEXT: mov v1.b[3], w8
-; CHECK-CVT-NEXT: cmp w10, #0
-; CHECK-CVT-NEXT: csinv w8, w10, wzr, ge
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: csel w9, w9, wzr, lt
-; CHECK-CVT-NEXT: fcvtzs w10, s2
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov v1.b[4], w8
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge
-; CHECK-CVT-NEXT: cmp w10, #0
-; CHECK-CVT-NEXT: csel w9, w10, wzr, lt
-; CHECK-CVT-NEXT: fcvtzs w10, s0
-; CHECK-CVT-NEXT: mov v1.b[5], w8
-; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge
+; CHECK-CVT-NEXT: fcvtzs w12, s2
; CHECK-CVT-NEXT: cmp w10, #0
-; CHECK-CVT-NEXT: csel w9, w10, wzr, lt
-; CHECK-CVT-NEXT: mov v1.b[6], w8
+; CHECK-CVT-NEXT: mov s0, v0.s[3]
+; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge
+; CHECK-CVT-NEXT: cmp w11, #0
+; CHECK-CVT-NEXT: csel w11, w11, wzr, lt
+; CHECK-CVT-NEXT: fcvtzs w14, s1
+; CHECK-CVT-NEXT: cmp w11, #0
+; CHECK-CVT-NEXT: fmov s2, w9
+; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge
+; CHECK-CVT-NEXT: cmp w12, #0
+; CHECK-CVT-NEXT: csel w12, w12, wzr, lt
+; CHECK-CVT-NEXT: cmp w12, #0
+; CHECK-CVT-NEXT: csinv w12, w12, wzr, ge
+; CHECK-CVT-NEXT: cmp w13, #0
+; CHECK-CVT-NEXT: csel w13, w13, wzr, lt
+; CHECK-CVT-NEXT: cmp w13, #0
+; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge
+; CHECK-CVT-NEXT: cmp w14, #0
+; CHECK-CVT-NEXT: csel w9, w14, wzr, lt
; CHECK-CVT-NEXT: cmp w9, #0
-; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge
-; CHECK-CVT-NEXT: mov v1.b[7], w8
-; CHECK-CVT-NEXT: fmov d0, d1
+; CHECK-CVT-NEXT: fmov s1, w13
+; CHECK-CVT-NEXT: fcvtzs w13, s0
+; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge
+; CHECK-CVT-NEXT: mov v2.s[1], w8
+; CHECK-CVT-NEXT: mov v1.s[1], w12
+; CHECK-CVT-NEXT: cmp w13, #0
+; CHECK-CVT-NEXT: csel w8, w13, wzr, lt
+; CHECK-CVT-NEXT: cmp w8, #0
+; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
+; CHECK-CVT-NEXT: mov v1.s[2], w9
+; CHECK-CVT-NEXT: mov v2.s[2], w10
+; CHECK-CVT-NEXT: mov v1.s[3], w8
+; CHECK-CVT-NEXT: mov v2.s[3], w11
+; CHECK-CVT-NEXT: xtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v8f16_v8i1:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs w9, h0
-; CHECK-FP16-NEXT: mov h2, v0.h[3]
-; CHECK-FP16-NEXT: mov h3, v0.h[5]
-; CHECK-FP16-NEXT: fcvtzs w8, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[2]
-; CHECK-FP16-NEXT: cmp w8, #0
-; CHECK-FP16-NEXT: csel w8, w8, wzr, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h1
-; CHECK-FP16-NEXT: cmp w8, #0
-; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: csel w9, w9, wzr, lt
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: csinv w9, w9, wzr, ge
-; CHECK-FP16-NEXT: cmp w10, #0
-; CHECK-FP16-NEXT: fmov s1, w9
-; CHECK-FP16-NEXT: csel w9, w10, wzr, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[4]
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: mov v1.b[1], w8
-; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge
-; CHECK-FP16-NEXT: cmp w10, #0
-; CHECK-FP16-NEXT: fcvtzs w9, h2
-; CHECK-FP16-NEXT: csel w10, w10, wzr, lt
-; CHECK-FP16-NEXT: cmp w10, #0
-; CHECK-FP16-NEXT: mov h2, v0.h[6]
-; CHECK-FP16-NEXT: mov v1.b[2], w8
-; CHECK-FP16-NEXT: csinv w8, w10, wzr, ge
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: fcvtzs w10, h3
-; CHECK-FP16-NEXT: csel w9, w9, wzr, lt
-; CHECK-FP16-NEXT: mov h0, v0.h[7]
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: mov v1.b[3], w8
-; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge
-; CHECK-FP16-NEXT: cmp w10, #0
-; CHECK-FP16-NEXT: csel w9, w10, wzr, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h2
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: mov v1.b[4], w8
-; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge
-; CHECK-FP16-NEXT: cmp w10, #0
-; CHECK-FP16-NEXT: csel w9, w10, wzr, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h0
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: mov v1.b[5], w8
-; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge
-; CHECK-FP16-NEXT: cmp w10, #0
-; CHECK-FP16-NEXT: csel w9, w10, wzr, lt
-; CHECK-FP16-NEXT: cmp w9, #0
-; CHECK-FP16-NEXT: mov v1.b[6], w8
-; CHECK-FP16-NEXT: csinv w8, w9, wzr, ge
-; CHECK-FP16-NEXT: mov v1.b[7], w8
-; CHECK-FP16-NEXT: fmov d0, d1
+; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000
+; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
+; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff
+; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
%x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f)
ret <8 x i1> %x
@@ -2768,127 +2142,73 @@ define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v8f16_v8i8:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
; CHECK-CVT-NEXT: mov w8, #127
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: mov w10, #-128
; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: fcvtzs w10, s1
+; CHECK-CVT-NEXT: fcvtzs w11, s1
+; CHECK-CVT-NEXT: fcvtzs w15, s0
; CHECK-CVT-NEXT: fcvtzs w9, s2
; CHECK-CVT-NEXT: mov s2, v1.s[2]
+; CHECK-CVT-NEXT: mov s1, v1.s[3]
; CHECK-CVT-NEXT: cmp w9, #127
-; CHECK-CVT-NEXT: csel w11, w9, w8, lt
-; CHECK-CVT-NEXT: mov w9, #-128
-; CHECK-CVT-NEXT: cmn w11, #128
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: csel w11, w11, w9, gt
-; CHECK-CVT-NEXT: cmp w10, #127
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: mov s2, v1.s[3]
-; CHECK-CVT-NEXT: cmn w10, #128
-; CHECK-CVT-NEXT: csel w10, w10, w9, gt
-; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
+; CHECK-CVT-NEXT: csel w9, w9, w8, lt
; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: cmn w10, #128
-; CHECK-CVT-NEXT: csel w10, w10, w9, gt
+; CHECK-CVT-NEXT: cmn w9, #128
; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: mov v1.b[1], w11
-; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: csel w11, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s0
+; CHECK-CVT-NEXT: csel w9, w9, w10, gt
+; CHECK-CVT-NEXT: cmp w11, #127
+; CHECK-CVT-NEXT: csel w11, w11, w8, lt
+; CHECK-CVT-NEXT: fcvtzs w13, s1
; CHECK-CVT-NEXT: cmn w11, #128
-; CHECK-CVT-NEXT: mov v1.b[2], w10
-; CHECK-CVT-NEXT: csel w10, w11, w9, gt
+; CHECK-CVT-NEXT: mov s1, v0.s[2]
+; CHECK-CVT-NEXT: csel w11, w11, w10, gt
; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: fcvtzs w11, s2
; CHECK-CVT-NEXT: csel w12, w12, w8, lt
-; CHECK-CVT-NEXT: mov s2, v0.s[2]
+; CHECK-CVT-NEXT: fcvtzs w14, s2
; CHECK-CVT-NEXT: cmn w12, #128
; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: mov v1.b[3], w10
-; CHECK-CVT-NEXT: csel w10, w12, w9, gt
-; CHECK-CVT-NEXT: cmp w11, #127
-; CHECK-CVT-NEXT: csel w11, w11, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: cmn w11, #128
-; CHECK-CVT-NEXT: mov v1.b[4], w10
-; CHECK-CVT-NEXT: csel w10, w11, w9, gt
-; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: csel w11, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s0
+; CHECK-CVT-NEXT: csel w12, w12, w10, gt
+; CHECK-CVT-NEXT: cmp w13, #127
+; CHECK-CVT-NEXT: csel w13, w13, w8, lt
+; CHECK-CVT-NEXT: fcvtzs w16, s1
+; CHECK-CVT-NEXT: cmn w13, #128
+; CHECK-CVT-NEXT: fmov s2, w11
+; CHECK-CVT-NEXT: csel w13, w13, w10, gt
+; CHECK-CVT-NEXT: cmp w14, #127
+; CHECK-CVT-NEXT: csel w14, w14, w8, lt
+; CHECK-CVT-NEXT: cmn w14, #128
+; CHECK-CVT-NEXT: csel w14, w14, w10, gt
+; CHECK-CVT-NEXT: cmp w15, #127
+; CHECK-CVT-NEXT: csel w15, w15, w8, lt
+; CHECK-CVT-NEXT: cmn w15, #128
+; CHECK-CVT-NEXT: csel w15, w15, w10, gt
+; CHECK-CVT-NEXT: cmp w16, #127
+; CHECK-CVT-NEXT: csel w11, w16, w8, lt
; CHECK-CVT-NEXT: cmn w11, #128
-; CHECK-CVT-NEXT: mov v1.b[5], w10
-; CHECK-CVT-NEXT: csel w10, w11, w9, gt
-; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: csel w8, w12, w8, lt
+; CHECK-CVT-NEXT: fmov s1, w15
+; CHECK-CVT-NEXT: fcvtzs w15, s0
+; CHECK-CVT-NEXT: csel w11, w11, w10, gt
+; CHECK-CVT-NEXT: mov v2.s[1], w9
+; CHECK-CVT-NEXT: mov v1.s[1], w14
+; CHECK-CVT-NEXT: cmp w15, #127
+; CHECK-CVT-NEXT: csel w8, w15, w8, lt
; CHECK-CVT-NEXT: cmn w8, #128
-; CHECK-CVT-NEXT: mov v1.b[6], w10
-; CHECK-CVT-NEXT: csel w8, w8, w9, gt
-; CHECK-CVT-NEXT: mov v1.b[7], w8
-; CHECK-CVT-NEXT: fmov d0, d1
+; CHECK-CVT-NEXT: csel w8, w8, w10, gt
+; CHECK-CVT-NEXT: mov v1.s[2], w11
+; CHECK-CVT-NEXT: mov v2.s[2], w12
+; CHECK-CVT-NEXT: mov v1.s[3], w8
+; CHECK-CVT-NEXT: mov v2.s[3], w13
+; CHECK-CVT-NEXT: xtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v8f16_v8i8:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov w8, #127
-; CHECK-FP16-NEXT: fcvtzs w10, h0
-; CHECK-FP16-NEXT: mov h2, v0.h[3]
-; CHECK-FP16-NEXT: mov h3, v0.h[5]
-; CHECK-FP16-NEXT: fcvtzs w9, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[2]
-; CHECK-FP16-NEXT: cmp w9, #127
-; CHECK-FP16-NEXT: csel w11, w9, w8, lt
-; CHECK-FP16-NEXT: mov w9, #-128
-; CHECK-FP16-NEXT: cmn w11, #128
-; CHECK-FP16-NEXT: fcvtzs w12, h1
-; CHECK-FP16-NEXT: csel w11, w11, w9, gt
-; CHECK-FP16-NEXT: cmp w10, #127
-; CHECK-FP16-NEXT: csel w10, w10, w8, lt
-; CHECK-FP16-NEXT: cmn w10, #128
-; CHECK-FP16-NEXT: csel w10, w10, w9, gt
-; CHECK-FP16-NEXT: cmp w12, #127
-; CHECK-FP16-NEXT: fmov s1, w10
-; CHECK-FP16-NEXT: csel w10, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[4]
-; CHECK-FP16-NEXT: cmn w10, #128
-; CHECK-FP16-NEXT: mov v1.b[1], w11
-; CHECK-FP16-NEXT: csel w10, w10, w9, gt
-; CHECK-FP16-NEXT: cmp w12, #127
-; CHECK-FP16-NEXT: fcvtzs w11, h2
-; CHECK-FP16-NEXT: csel w12, w12, w8, lt
-; CHECK-FP16-NEXT: cmn w12, #128
-; CHECK-FP16-NEXT: mov h2, v0.h[6]
-; CHECK-FP16-NEXT: mov v1.b[2], w10
-; CHECK-FP16-NEXT: csel w10, w12, w9, gt
-; CHECK-FP16-NEXT: cmp w11, #127
-; CHECK-FP16-NEXT: fcvtzs w12, h3
-; CHECK-FP16-NEXT: csel w11, w11, w8, lt
-; CHECK-FP16-NEXT: mov h0, v0.h[7]
-; CHECK-FP16-NEXT: cmn w11, #128
-; CHECK-FP16-NEXT: mov v1.b[3], w10
-; CHECK-FP16-NEXT: csel w10, w11, w9, gt
-; CHECK-FP16-NEXT: cmp w12, #127
-; CHECK-FP16-NEXT: csel w11, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h2
-; CHECK-FP16-NEXT: cmn w11, #128
-; CHECK-FP16-NEXT: mov v1.b[4], w10
-; CHECK-FP16-NEXT: csel w10, w11, w9, gt
-; CHECK-FP16-NEXT: cmp w12, #127
-; CHECK-FP16-NEXT: csel w11, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h0
-; CHECK-FP16-NEXT: cmn w11, #128
-; CHECK-FP16-NEXT: mov v1.b[5], w10
-; CHECK-FP16-NEXT: csel w10, w11, w9, gt
-; CHECK-FP16-NEXT: cmp w12, #127
-; CHECK-FP16-NEXT: csel w8, w12, w8, lt
-; CHECK-FP16-NEXT: cmn w8, #128
-; CHECK-FP16-NEXT: mov v1.b[6], w10
-; CHECK-FP16-NEXT: csel w8, w8, w9, gt
-; CHECK-FP16-NEXT: mov v1.b[7], w8
-; CHECK-FP16-NEXT: fmov d0, d1
+; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
+; CHECK-FP16-NEXT: sqxtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
%x = call <8 x i8> @llvm.fptosi.sat.v8f16.v8i8(<8 x half> %f)
ret <8 x i8> %x
@@ -2897,134 +2217,75 @@ define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v8f16_v8i13:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov w9, #4095
+; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: mov w8, #4095
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-NEXT: mov w10, #-4096
-; CHECK-CVT-NEXT: mov h3, v0.h[4]
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzs w11, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[3]
-; CHECK-CVT-NEXT: fcvtzs w8, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: fcvt s2, h2
-; CHECK-CVT-NEXT: cmp w8, #4095
-; CHECK-CVT-NEXT: csel w8, w8, w9, lt
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w8, w8, w10, gt
+; CHECK-CVT-NEXT: mov s2, v1.s[1]
+; CHECK-CVT-NEXT: fcvtzs w11, s1
+; CHECK-CVT-NEXT: fcvtzs w15, s0
+; CHECK-CVT-NEXT: fcvtzs w9, s2
+; CHECK-CVT-NEXT: mov s2, v1.s[2]
+; CHECK-CVT-NEXT: mov s1, v1.s[3]
+; CHECK-CVT-NEXT: cmp w9, #4095
+; CHECK-CVT-NEXT: csel w9, w9, w8, lt
+; CHECK-CVT-NEXT: fcvtzs w12, s2
+; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096
+; CHECK-CVT-NEXT: mov s2, v0.s[1]
+; CHECK-CVT-NEXT: csel w9, w9, w10, gt
; CHECK-CVT-NEXT: cmp w11, #4095
-; CHECK-CVT-NEXT: csel w11, w11, w9, lt
+; CHECK-CVT-NEXT: csel w11, w11, w8, lt
+; CHECK-CVT-NEXT: fcvtzs w13, s1
; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: fcvtzs w12, s1
+; CHECK-CVT-NEXT: mov s1, v0.s[2]
; CHECK-CVT-NEXT: csel w11, w11, w10, gt
; CHECK-CVT-NEXT: cmp w12, #4095
-; CHECK-CVT-NEXT: fmov s1, w11
-; CHECK-CVT-NEXT: fcvtzs w11, s2
-; CHECK-CVT-NEXT: fcvt s2, h3
-; CHECK-CVT-NEXT: mov h3, v0.h[5]
-; CHECK-CVT-NEXT: csel w12, w12, w9, lt
-; CHECK-CVT-NEXT: mov v1.h[1], w8
-; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w8, w12, w10, gt
-; CHECK-CVT-NEXT: cmp w11, #4095
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: fcvt s2, h3
-; CHECK-CVT-NEXT: csel w11, w11, w9, lt
-; CHECK-CVT-NEXT: mov h3, v0.h[6]
-; CHECK-CVT-NEXT: mov v1.h[2], w8
-; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w8, w11, w10, gt
-; CHECK-CVT-NEXT: cmp w12, #4095
-; CHECK-CVT-NEXT: fcvtzs w11, s2
-; CHECK-CVT-NEXT: csel w12, w12, w9, lt
-; CHECK-CVT-NEXT: fcvt s2, h3
-; CHECK-CVT-NEXT: mov h0, v0.h[7]
-; CHECK-CVT-NEXT: mov v1.h[3], w8
+; CHECK-CVT-NEXT: csel w12, w12, w8, lt
+; CHECK-CVT-NEXT: fcvtzs w14, s2
; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w8, w12, w10, gt
-; CHECK-CVT-NEXT: cmp w11, #4095
-; CHECK-CVT-NEXT: csel w11, w11, w9, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov v1.h[4], w8
-; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w8, w11, w10, gt
-; CHECK-CVT-NEXT: cmp w12, #4095
-; CHECK-CVT-NEXT: csel w11, w12, w9, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s0
-; CHECK-CVT-NEXT: mov v1.h[5], w8
+; CHECK-CVT-NEXT: mov s0, v0.s[3]
+; CHECK-CVT-NEXT: csel w12, w12, w10, gt
+; CHECK-CVT-NEXT: cmp w13, #4095
+; CHECK-CVT-NEXT: csel w13, w13, w8, lt
+; CHECK-CVT-NEXT: fcvtzs w16, s1
+; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096
+; CHECK-CVT-NEXT: fmov s2, w11
+; CHECK-CVT-NEXT: csel w13, w13, w10, gt
+; CHECK-CVT-NEXT: cmp w14, #4095
+; CHECK-CVT-NEXT: csel w14, w14, w8, lt
+; CHECK-CVT-NEXT: cmn w14, #1, lsl #12 // =4096
+; CHECK-CVT-NEXT: csel w14, w14, w10, gt
+; CHECK-CVT-NEXT: cmp w15, #4095
+; CHECK-CVT-NEXT: csel w15, w15, w8, lt
+; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096
+; CHECK-CVT-NEXT: csel w15, w15, w10, gt
+; CHECK-CVT-NEXT: cmp w16, #4095
+; CHECK-CVT-NEXT: csel w11, w16, w8, lt
; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w8, w11, w10, gt
-; CHECK-CVT-NEXT: cmp w12, #4095
-; CHECK-CVT-NEXT: csel w9, w12, w9, lt
-; CHECK-CVT-NEXT: mov v1.h[6], w8
-; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w8, w9, w10, gt
-; CHECK-CVT-NEXT: mov v1.h[7], w8
-; CHECK-CVT-NEXT: mov v0.16b, v1.16b
+; CHECK-CVT-NEXT: fmov s1, w15
+; CHECK-CVT-NEXT: fcvtzs w15, s0
+; CHECK-CVT-NEXT: csel w11, w11, w10, gt
+; CHECK-CVT-NEXT: mov v2.s[1], w9
+; CHECK-CVT-NEXT: mov v1.s[1], w14
+; CHECK-CVT-NEXT: cmp w15, #4095
+; CHECK-CVT-NEXT: csel w8, w15, w8, lt
+; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096
+; CHECK-CVT-NEXT: csel w8, w8, w10, gt
+; CHECK-CVT-NEXT: mov v1.s[2], w11
+; CHECK-CVT-NEXT: mov v2.s[2], w12
+; CHECK-CVT-NEXT: mov v1.s[3], w8
+; CHECK-CVT-NEXT: mov v2.s[3], w13
+; CHECK-CVT-NEXT: xtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v8f16_v8i13:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov w8, #4095
-; CHECK-FP16-NEXT: fcvtzs w10, h0
-; CHECK-FP16-NEXT: mov w11, #-4096
-; CHECK-FP16-NEXT: mov h2, v0.h[3]
-; CHECK-FP16-NEXT: mov h3, v0.h[5]
-; CHECK-FP16-NEXT: fcvtzs w9, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[2]
-; CHECK-FP16-NEXT: cmp w9, #4095
-; CHECK-FP16-NEXT: csel w9, w9, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h1
-; CHECK-FP16-NEXT: cmn w9, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: csel w9, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w10, #4095
-; CHECK-FP16-NEXT: csel w10, w10, w8, lt
-; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: csel w10, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #4095
-; CHECK-FP16-NEXT: fmov s1, w10
-; CHECK-FP16-NEXT: csel w10, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[4]
-; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov v1.h[1], w9
-; CHECK-FP16-NEXT: csel w9, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #4095
-; CHECK-FP16-NEXT: fcvtzs w10, h2
-; CHECK-FP16-NEXT: csel w12, w12, w8, lt
-; CHECK-FP16-NEXT: cmn w12, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov h2, v0.h[6]
-; CHECK-FP16-NEXT: mov v1.h[2], w9
-; CHECK-FP16-NEXT: csel w9, w12, w11, gt
-; CHECK-FP16-NEXT: cmp w10, #4095
-; CHECK-FP16-NEXT: fcvtzs w12, h3
-; CHECK-FP16-NEXT: csel w10, w10, w8, lt
-; CHECK-FP16-NEXT: mov h0, v0.h[7]
-; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov v1.h[3], w9
-; CHECK-FP16-NEXT: csel w9, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #4095
-; CHECK-FP16-NEXT: csel w10, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h2
-; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov v1.h[4], w9
-; CHECK-FP16-NEXT: csel w9, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #4095
-; CHECK-FP16-NEXT: csel w10, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h0
-; CHECK-FP16-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov v1.h[5], w9
-; CHECK-FP16-NEXT: csel w9, w10, w11, gt
-; CHECK-FP16-NEXT: cmp w12, #4095
-; CHECK-FP16-NEXT: csel w8, w12, w8, lt
-; CHECK-FP16-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-FP16-NEXT: mov v1.h[6], w9
-; CHECK-FP16-NEXT: csel w8, w8, w11, gt
-; CHECK-FP16-NEXT: mov v1.h[7], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
+; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8
+; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
+; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT: movi v1.8h, #240, lsl #8
+; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: ret
%x = call <8 x i13> @llvm.fptosi.sat.v8f16.v8i13(<8 x half> %f)
ret <8 x i13> %x
@@ -3033,65 +2294,66 @@ define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v8f16_v8i16:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
; CHECK-CVT-NEXT: mov w8, #32767
-; CHECK-CVT-NEXT: mov w11, #-32768
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: mov w10, #-32768
; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: fcvtzs w10, s1
+; CHECK-CVT-NEXT: fcvtzs w11, s1
+; CHECK-CVT-NEXT: fcvtzs w15, s0
; CHECK-CVT-NEXT: fcvtzs w9, s2
; CHECK-CVT-NEXT: mov s2, v1.s[2]
+; CHECK-CVT-NEXT: mov s1, v1.s[3]
; CHECK-CVT-NEXT: cmp w9, w8
; CHECK-CVT-NEXT: csel w9, w9, w8, lt
; CHECK-CVT-NEXT: fcvtzs w12, s2
; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov s2, v1.s[3]
-; CHECK-CVT-NEXT: csel w9, w9, w11, gt
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: mov v1.h[1], w9
-; CHECK-CVT-NEXT: csel w9, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s0
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v1.h[2], w9
-; CHECK-CVT-NEXT: csel w9, w10, w11, gt
+; CHECK-CVT-NEXT: csel w9, w9, w10, gt
+; CHECK-CVT-NEXT: cmp w11, w8
+; CHECK-CVT-NEXT: csel w11, w11, w8, lt
+; CHECK-CVT-NEXT: fcvtzs w13, s1
+; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT: mov s1, v0.s[2]
+; CHECK-CVT-NEXT: csel w11, w11, w10, gt
; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: fcvtzs w10, s2
; CHECK-CVT-NEXT: csel w12, w12, w8, lt
-; CHECK-CVT-NEXT: mov s2, v0.s[2]
+; CHECK-CVT-NEXT: fcvtzs w14, s2
; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768
; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: mov v1.h[3], w9
-; CHECK-CVT-NEXT: csel w9, w12, w11, gt
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v1.h[4], w9
-; CHECK-CVT-NEXT: csel w9, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w12, s0
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v1.h[5], w9
-; CHECK-CVT-NEXT: csel w9, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: csel w8, w12, w8, lt
+; CHECK-CVT-NEXT: csel w12, w12, w10, gt
+; CHECK-CVT-NEXT: cmp w13, w8
+; CHECK-CVT-NEXT: csel w13, w13, w8, lt
+; CHECK-CVT-NEXT: fcvtzs w16, s1
+; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT: fmov s2, w11
+; CHECK-CVT-NEXT: csel w13, w13, w10, gt
+; CHECK-CVT-NEXT: cmp w14, w8
+; CHECK-CVT-NEXT: csel w14, w14, w8, lt
+; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT: csel w14, w14, w10, gt
+; CHECK-CVT-NEXT: cmp w15, w8
+; CHECK-CVT-NEXT: csel w15, w15, w8, lt
+; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT: csel w15, w15, w10, gt
+; CHECK-CVT-NEXT: cmp w16, w8
+; CHECK-CVT-NEXT: csel w11, w16, w8, lt
+; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT: fmov s1, w15
+; CHECK-CVT-NEXT: fcvtzs w15, s0
+; CHECK-CVT-NEXT: csel w11, w11, w10, gt
+; CHECK-CVT-NEXT: mov v2.s[1], w9
+; CHECK-CVT-NEXT: mov v1.s[1], w14
+; CHECK-CVT-NEXT: cmp w15, w8
+; CHECK-CVT-NEXT: csel w8, w15, w8, lt
; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v1.h[6], w9
-; CHECK-CVT-NEXT: csel w8, w8, w11, gt
-; CHECK-CVT-NEXT: mov v1.h[7], w8
-; CHECK-CVT-NEXT: mov v0.16b, v1.16b
+; CHECK-CVT-NEXT: csel w8, w8, w10, gt
+; CHECK-CVT-NEXT: mov v1.s[2], w11
+; CHECK-CVT-NEXT: mov v2.s[2], w12
+; CHECK-CVT-NEXT: mov v1.s[3], w8
+; CHECK-CVT-NEXT: mov v2.s[3], w13
+; CHECK-CVT-NEXT: xtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v8f16_v8i16:
@@ -3103,159 +2365,39 @@ define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
}
define <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i19:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-CVT-NEXT: mov w8, #262143
-; CHECK-CVT-NEXT: mov w12, #-262144
-; CHECK-CVT-NEXT: fcvt s5, h0
-; CHECK-CVT-NEXT: mov h2, v1.h[1]
-; CHECK-CVT-NEXT: fcvt s3, h1
-; CHECK-CVT-NEXT: mov h4, v1.h[2]
-; CHECK-CVT-NEXT: mov h1, v1.h[3]
-; CHECK-CVT-NEXT: fcvtzs w10, s5
-; CHECK-CVT-NEXT: fcvt s2, h2
-; CHECK-CVT-NEXT: fcvtzs w9, s3
-; CHECK-CVT-NEXT: fcvt s3, h4
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: fcvtzs w11, s2
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: fcvtzs w13, s3
-; CHECK-CVT-NEXT: csel w4, w9, w12, gt
-; CHECK-CVT-NEXT: mov h2, v0.h[1]
-; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: fcvtzs w9, s1
-; CHECK-CVT-NEXT: csel w11, w11, w8, lt
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: cmn w11, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: mov h0, v0.h[3]
-; CHECK-CVT-NEXT: csel w5, w11, w12, gt
-; CHECK-CVT-NEXT: cmp w13, w8
-; CHECK-CVT-NEXT: csel w11, w13, w8, lt
-; CHECK-CVT-NEXT: fcvt s2, h2
-; CHECK-CVT-NEXT: cmn w11, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: csel w6, w11, w12, gt
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: fcvtzs w11, s2
-; CHECK-CVT-NEXT: csel w7, w9, w12, gt
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w9, w10, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w10, s1
-; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: csel w0, w9, w12, gt
-; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: csel w9, w11, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w11, s0
-; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: csel w1, w9, w12, gt
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w9, w10, w8, lt
-; CHECK-CVT-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: csel w2, w9, w12, gt
-; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: csel w8, w11, w8, lt
-; CHECK-CVT-NEXT: cmn w8, #64, lsl #12 // =262144
-; CHECK-CVT-NEXT: csel w3, w8, w12, gt
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i19:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov w8, #262143
-; CHECK-FP16-NEXT: mov w11, #-262144
-; CHECK-FP16-NEXT: mov h2, v1.h[1]
-; CHECK-FP16-NEXT: fcvtzs w9, h1
-; CHECK-FP16-NEXT: mov h3, v1.h[2]
-; CHECK-FP16-NEXT: mov h1, v1.h[3]
-; CHECK-FP16-NEXT: cmp w9, w8
-; CHECK-FP16-NEXT: fcvtzs w10, h2
-; CHECK-FP16-NEXT: csel w9, w9, w8, lt
-; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: fcvtzs w12, h3
-; CHECK-FP16-NEXT: csel w4, w9, w11, gt
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w9, w10, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h1
-; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: csel w5, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w12, w8
-; CHECK-FP16-NEXT: csel w9, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h0
-; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: csel w6, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w9, w10, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h1
-; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: csel w7, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w12, w8
-; CHECK-FP16-NEXT: csel w9, w12, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w12, h2
-; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: csel w0, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w9, w10, w8, lt
-; CHECK-FP16-NEXT: fcvtzs w10, h0
-; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: csel w1, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w12, w8
-; CHECK-FP16-NEXT: csel w9, w12, w8, lt
-; CHECK-FP16-NEXT: cmn w9, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: csel w2, w9, w11, gt
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w8, w10, w8, lt
-; CHECK-FP16-NEXT: cmn w8, #64, lsl #12 // =262144
-; CHECK-FP16-NEXT: csel w3, w8, w11, gt
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v8f16_v8i19:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: movi v1.4s, #3, msl #16
+; CHECK-NEXT: mvni v3.4s, #3, msl #16
+; CHECK-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: smax v0.4s, v0.4s, v3.4s
+; CHECK-NEXT: mov w1, v1.s[1]
+; CHECK-NEXT: mov w2, v1.s[2]
+; CHECK-NEXT: mov w5, v0.s[1]
+; CHECK-NEXT: mov w3, v1.s[3]
+; CHECK-NEXT: mov w6, v0.s[2]
+; CHECK-NEXT: mov w7, v0.s[3]
+; CHECK-NEXT: fmov w4, s0
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: ret
%x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f)
ret <8 x i19> %x
}
define <8 x i32> @test_signed_v8f16_v8i32_duplicate(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_signed_v8f16_v8i32_duplicate:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_signed_v8f16_v8i32_duplicate:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h4, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs w9, h0
-; CHECK-FP16-NEXT: mov h2, v3.h[1]
-; CHECK-FP16-NEXT: fcvtzs w8, h3
-; CHECK-FP16-NEXT: mov h5, v3.h[2]
-; CHECK-FP16-NEXT: mov h3, v3.h[3]
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzs w8, h2
-; CHECK-FP16-NEXT: fmov s2, w9
-; CHECK-FP16-NEXT: fcvtzs w9, h4
-; CHECK-FP16-NEXT: mov h4, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h5
-; CHECK-FP16-NEXT: mov v2.s[1], w9
-; CHECK-FP16-NEXT: fcvtzs w9, h4
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzs w8, h3
-; CHECK-FP16-NEXT: mov v2.s[2], w9
-; CHECK-FP16-NEXT: fcvtzs w9, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v2.s[3], w9
-; CHECK-FP16-NEXT: mov v0.16b, v2.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_signed_v8f16_v8i32_duplicate:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f)
ret <8 x i32> %x
}
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 471d8d423545..a903d842ec9b 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -511,224 +511,98 @@ define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
}
define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v2f16_v2i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v2f16_v2i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: fmov s0, w8
-; CHECK-FP16-NEXT: fcvtzu w8, h1
-; CHECK-FP16-NEXT: mov v0.s[1], w8
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v2f16_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
%x = call <2 x i32> @llvm.fptoui.sat.v2f16.v2i32(<2 x half> %f)
ret <2 x i32> %x
}
define <3 x i32> @test_unsigned_v3f16_v3i32(<3 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v3f16_v3i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v3f16_v3i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v3f16_v3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <3 x i32> @llvm.fptoui.sat.v3f16.v3i32(<3 x half> %f)
ret <3 x i32> %x
}
define <4 x i32> @test_unsigned_v4f16_v4i32(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v4f16_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptoui.sat.v4f16.v4i32(<4 x half> %f)
ret <4 x i32> %x
}
define <5 x i32> @test_unsigned_v5f16_v5i32(<5 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v5f16_v5i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: mov w1, v1.s[1]
-; CHECK-CVT-NEXT: mov w2, v1.s[2]
-; CHECK-CVT-NEXT: mov w3, v1.s[3]
-; CHECK-CVT-NEXT: fmov w0, s1
-; CHECK-CVT-NEXT: fmov w4, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v5f16_v5i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w0, h0
-; CHECK-FP16-NEXT: fcvtzu w1, h1
-; CHECK-FP16-NEXT: fcvtzu w2, h2
-; CHECK-FP16-NEXT: fcvtzu w4, h3
-; CHECK-FP16-NEXT: fcvtzu w3, h4
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v5f16_v5i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: mov w1, v1.s[1]
+; CHECK-NEXT: mov w2, v1.s[2]
+; CHECK-NEXT: mov w3, v1.s[3]
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: fmov w4, s0
+; CHECK-NEXT: ret
%x = call <5 x i32> @llvm.fptoui.sat.v5f16.v5i32(<5 x half> %f)
ret <5 x i32> %x
}
define <6 x i32> @test_unsigned_v6f16_v6i32(<6 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v6f16_v6i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: mov w1, v1.s[1]
-; CHECK-CVT-NEXT: mov w2, v1.s[2]
-; CHECK-CVT-NEXT: mov w3, v1.s[3]
-; CHECK-CVT-NEXT: mov w5, v0.s[1]
-; CHECK-CVT-NEXT: fmov w0, s1
-; CHECK-CVT-NEXT: fmov w4, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v6f16_v6i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w0, h0
-; CHECK-FP16-NEXT: mov h2, v1.h[1]
-; CHECK-FP16-NEXT: fcvtzu w8, h1
-; CHECK-FP16-NEXT: fcvtzu w2, h3
-; CHECK-FP16-NEXT: fcvtzu w3, h4
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzu w5, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: mov v1.s[1], w5
-; CHECK-FP16-NEXT: fcvtzu w1, h2
-; CHECK-FP16-NEXT: fmov w4, s1
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v6f16_v6i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: mov w1, v1.s[1]
+; CHECK-NEXT: mov w2, v1.s[2]
+; CHECK-NEXT: mov w3, v1.s[3]
+; CHECK-NEXT: mov w5, v0.s[1]
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: fmov w4, s0
+; CHECK-NEXT: ret
%x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f)
ret <6 x i32> %x
}
define <7 x i32> @test_unsigned_v7f16_v7i32(<7 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v7f16_v7i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: mov w1, v1.s[1]
-; CHECK-CVT-NEXT: mov w2, v1.s[2]
-; CHECK-CVT-NEXT: mov w3, v1.s[3]
-; CHECK-CVT-NEXT: mov w5, v0.s[1]
-; CHECK-CVT-NEXT: mov w6, v0.s[2]
-; CHECK-CVT-NEXT: fmov w0, s1
-; CHECK-CVT-NEXT: fmov w4, s0
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v7f16_v7i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h4, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w0, h0
-; CHECK-FP16-NEXT: mov h2, v1.h[1]
-; CHECK-FP16-NEXT: fcvtzu w8, h1
-; CHECK-FP16-NEXT: mov h1, v1.h[2]
-; CHECK-FP16-NEXT: fcvtzu w3, h4
-; CHECK-FP16-NEXT: fmov s3, w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: fcvtzu w6, h1
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: mov v3.s[1], w8
-; CHECK-FP16-NEXT: fcvtzu w2, h2
-; CHECK-FP16-NEXT: fcvtzu w1, h1
-; CHECK-FP16-NEXT: mov v3.s[2], w6
-; CHECK-FP16-NEXT: mov w5, v3.s[1]
-; CHECK-FP16-NEXT: fmov w4, s3
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v7f16_v7i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: mov w1, v1.s[1]
+; CHECK-NEXT: mov w2, v1.s[2]
+; CHECK-NEXT: mov w3, v1.s[3]
+; CHECK-NEXT: mov w5, v0.s[1]
+; CHECK-NEXT: mov w6, v0.s[2]
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: fmov w4, s0
+; CHECK-NEXT: ret
%x = call <7 x i32> @llvm.fptoui.sat.v7f16.v7i32(<7 x half> %f)
ret <7 x i32> %x
}
define <8 x i32> @test_unsigned_v8f16_v8i32(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i32:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i32:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h4, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov h2, v3.h[1]
-; CHECK-FP16-NEXT: fcvtzu w8, h3
-; CHECK-FP16-NEXT: mov h5, v3.h[2]
-; CHECK-FP16-NEXT: mov h3, v3.h[3]
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: fmov s2, w9
-; CHECK-FP16-NEXT: fcvtzu w9, h4
-; CHECK-FP16-NEXT: mov h4, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h5
-; CHECK-FP16-NEXT: mov v2.s[1], w9
-; CHECK-FP16-NEXT: fcvtzu w9, h4
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h3
-; CHECK-FP16-NEXT: mov v2.s[2], w9
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v2.s[3], w9
-; CHECK-FP16-NEXT: mov v0.16b, v2.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v8f16_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f)
ret <8 x i32> %x
}
@@ -750,17 +624,9 @@ declare <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float>)
define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w9, s0
-; CHECK-NEXT: fcvtzu w8, s1
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: csinc w8, w8, wzr, lo
-; CHECK-NEXT: cmp w9, #1
-; CHECK-NEXT: csinc w9, w9, wzr, lo
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2s, #1
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i1> @llvm.fptoui.sat.v2f32.v2i1(<2 x float> %f)
ret <2 x i1> %x
@@ -769,18 +635,9 @@ define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) {
define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov w8, #255
-; CHECK-NEXT: fcvtzu w9, s1
-; CHECK-NEXT: cmp w9, #255
-; CHECK-NEXT: csel w9, w9, w8, lo
-; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: csel w8, w10, w8, lo
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i8> @llvm.fptoui.sat.v2f32.v2i8(<2 x float> %f)
ret <2 x i8> %x
@@ -789,18 +646,9 @@ define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) {
define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i13:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w9, s0
-; CHECK-NEXT: mov w10, #8191
-; CHECK-NEXT: fcvtzu w8, s1
-; CHECK-NEXT: cmp w8, w10
-; CHECK-NEXT: csel w8, w8, w10, lo
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w9, w9, w10, lo
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2s, #31, msl #8
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i13> @llvm.fptoui.sat.v2f32.v2i13(<2 x float> %f)
ret <2 x i13> %x
@@ -809,18 +657,9 @@ define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) {
define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w9, s0
-; CHECK-NEXT: mov w10, #65535
-; CHECK-NEXT: fcvtzu w8, s1
-; CHECK-NEXT: cmp w8, w10
-; CHECK-NEXT: csel w8, w8, w10, lo
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w9, w9, w10, lo
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i16> @llvm.fptoui.sat.v2f32.v2i16(<2 x float> %f)
ret <2 x i16> %x
@@ -829,18 +668,9 @@ define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) {
define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i19:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w9, s0
-; CHECK-NEXT: mov w10, #524287
-; CHECK-NEXT: fcvtzu w8, s1
-; CHECK-NEXT: cmp w8, w10
-; CHECK-NEXT: csel w8, w8, w10, lo
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w9, w9, w10, lo
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: movi v1.2s, #7, msl #16
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%x = call <2 x i19> @llvm.fptoui.sat.v2f32.v2i19(<2 x float> %f)
ret <2 x i19> %x
@@ -1007,26 +837,10 @@ declare <4 x i128> @llvm.fptoui.sat.v4f32.v4i128(<4 x float>)
define <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w9, s0
-; CHECK-NEXT: mov s2, v0.s[2]
-; CHECK-NEXT: mov s0, v0.s[3]
-; CHECK-NEXT: fcvtzu w8, s1
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: csinc w8, w8, wzr, lo
-; CHECK-NEXT: cmp w9, #1
-; CHECK-NEXT: csinc w9, w9, wzr, lo
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: fcvtzu w9, s2
-; CHECK-NEXT: mov v1.h[1], w8
-; CHECK-NEXT: cmp w9, #1
-; CHECK-NEXT: csinc w8, w9, wzr, lo
-; CHECK-NEXT: fcvtzu w9, s0
-; CHECK-NEXT: mov v1.h[2], w8
-; CHECK-NEXT: cmp w9, #1
-; CHECK-NEXT: csinc w8, w9, wzr, lo
-; CHECK-NEXT: mov v1.h[3], w8
-; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%x = call <4 x i1> @llvm.fptoui.sat.v4f32.v4i1(<4 x float> %f)
ret <4 x i1> %x
@@ -1035,27 +849,10 @@ define <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) {
define <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov w8, #255
-; CHECK-NEXT: mov s2, v0.s[2]
-; CHECK-NEXT: mov s0, v0.s[3]
-; CHECK-NEXT: fcvtzu w9, s1
-; CHECK-NEXT: cmp w9, #255
-; CHECK-NEXT: csel w9, w9, w8, lo
-; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: csel w10, w10, w8, lo
-; CHECK-NEXT: fmov s1, w10
-; CHECK-NEXT: fcvtzu w10, s2
-; CHECK-NEXT: mov v1.h[1], w9
-; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: csel w9, w10, w8, lo
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov v1.h[2], w9
-; CHECK-NEXT: cmp w10, #255
-; CHECK-NEXT: csel w8, w10, w8, lo
-; CHECK-NEXT: mov v1.h[3], w8
-; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%x = call <4 x i8> @llvm.fptoui.sat.v4f32.v4i8(<4 x float> %f)
ret <4 x i8> %x
@@ -1064,27 +861,10 @@ define <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) {
define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i13:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov w8, #8191
-; CHECK-NEXT: mov s2, v0.s[2]
-; CHECK-NEXT: mov s0, v0.s[3]
-; CHECK-NEXT: fcvtzu w9, s1
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w9, w9, w8, lo
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w10, w10, w8, lo
-; CHECK-NEXT: fmov s1, w10
-; CHECK-NEXT: fcvtzu w10, s2
-; CHECK-NEXT: mov v1.h[1], w9
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w9, w10, w8, lo
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov v1.h[2], w9
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w8, w10, w8, lo
-; CHECK-NEXT: mov v1.h[3], w8
-; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: movi v1.4s, #31, msl #8
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%x = call <4 x i13> @llvm.fptoui.sat.v4f32.v4i13(<4 x float> %f)
ret <4 x i13> %x
@@ -1093,27 +873,8 @@ define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) {
define <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov w8, #65535
-; CHECK-NEXT: mov s2, v0.s[2]
-; CHECK-NEXT: mov s0, v0.s[3]
-; CHECK-NEXT: fcvtzu w9, s1
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w9, w9, w8, lo
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w10, w10, w8, lo
-; CHECK-NEXT: fmov s1, w10
-; CHECK-NEXT: fcvtzu w10, s2
-; CHECK-NEXT: mov v1.h[1], w9
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w9, w10, w8, lo
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov v1.h[2], w9
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w8, w10, w8, lo
-; CHECK-NEXT: mov v1.h[3], w8
-; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
; CHECK-NEXT: ret
%x = call <4 x i16> @llvm.fptoui.sat.v4f32.v4i16(<4 x float> %f)
ret <4 x i16> %x
@@ -1122,27 +883,9 @@ define <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) {
define <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i19:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov w8, #524287
-; CHECK-NEXT: mov s2, v0.s[2]
-; CHECK-NEXT: mov s0, v0.s[3]
-; CHECK-NEXT: fcvtzu w9, s1
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w9, w9, w8, lo
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w10, w10, w8, lo
-; CHECK-NEXT: fmov s1, w10
-; CHECK-NEXT: fcvtzu w10, s2
-; CHECK-NEXT: mov v1.s[1], w9
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w9, w10, w8, lo
-; CHECK-NEXT: fcvtzu w10, s0
-; CHECK-NEXT: mov v1.s[2], w9
-; CHECK-NEXT: cmp w10, w8
-; CHECK-NEXT: csel w8, w10, w8, lo
-; CHECK-NEXT: mov v1.s[3], w8
-; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: movi v1.4s, #7, msl #16
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%x = call <4 x i19> @llvm.fptoui.sat.v4f32.v4i19(<4 x float> %f)
ret <4 x i19> %x
@@ -1632,56 +1375,18 @@ declare <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half>)
define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i1:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[3]
-; CHECK-CVT-NEXT: fcvtzu w8, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: cmp w8, #1
-; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo
-; CHECK-CVT-NEXT: cmp w9, #1
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo
-; CHECK-CVT-NEXT: fmov s0, w9
-; CHECK-CVT-NEXT: fcvtzu w9, s1
-; CHECK-CVT-NEXT: fcvt s1, h2
-; CHECK-CVT-NEXT: mov v0.h[1], w8
-; CHECK-CVT-NEXT: cmp w9, #1
-; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo
-; CHECK-CVT-NEXT: fcvtzu w9, s1
-; CHECK-CVT-NEXT: mov v0.h[2], w8
-; CHECK-CVT-NEXT: cmp w9, #1
-; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo
-; CHECK-CVT-NEXT: mov v0.h[3], w8
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.4s, #1
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i1:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w8, h1
-; CHECK-FP16-NEXT: cmp w8, #1
-; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo
-; CHECK-FP16-NEXT: cmp w9, #1
-; CHECK-FP16-NEXT: csinc w9, w9, wzr, lo
-; CHECK-FP16-NEXT: fmov s1, w9
-; CHECK-FP16-NEXT: fcvtzu w9, h2
-; CHECK-FP16-NEXT: mov v1.h[1], w8
-; CHECK-FP16-NEXT: cmp w9, #1
-; CHECK-FP16-NEXT: csinc w8, w9, wzr, lo
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov v1.h[2], w8
-; CHECK-FP16-NEXT: cmp w9, #1
-; CHECK-FP16-NEXT: csinc w8, w9, wzr, lo
-; CHECK-FP16-NEXT: mov v1.h[3], w8
-; CHECK-FP16-NEXT: fmov d0, d1
+; CHECK-FP16-NEXT: movi v1.4h, #1
+; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h
+; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
%x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f)
ret <4 x i1> %x
@@ -1690,58 +1395,18 @@ define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) {
define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i8:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov w8, #255
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[3]
-; CHECK-CVT-NEXT: fcvtzu w9, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: cmp w9, #255
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: fmov s0, w10
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: fcvt s1, h2
-; CHECK-CVT-NEXT: mov v0.h[1], w9
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: csel w9, w10, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: mov v0.h[2], w9
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: csel w8, w10, w8, lo
-; CHECK-CVT-NEXT: mov v0.h[3], w8
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i8:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov w8, #255
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w9, h1
-; CHECK-FP16-NEXT: cmp w9, #255
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: cmp w10, #255
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: fmov s1, w10
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov v1.h[1], w9
-; CHECK-FP16-NEXT: cmp w10, #255
-; CHECK-FP16-NEXT: csel w9, w10, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov v1.h[2], w9
-; CHECK-FP16-NEXT: cmp w10, #255
-; CHECK-FP16-NEXT: csel w8, w10, w8, lo
-; CHECK-FP16-NEXT: mov v1.h[3], w8
-; CHECK-FP16-NEXT: fmov d0, d1
+; CHECK-FP16-NEXT: movi d1, #0xff00ff00ff00ff
+; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h
+; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
%x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f)
ret <4 x i8> %x
@@ -1750,58 +1415,18 @@ define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) {
define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i13:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov w8, #8191
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[3]
-; CHECK-CVT-NEXT: fcvtzu w9, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: fmov s0, w10
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: fcvt s1, h2
-; CHECK-CVT-NEXT: mov v0.h[1], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w9, w10, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: mov v0.h[2], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w8, w10, w8, lo
-; CHECK-CVT-NEXT: mov v0.h[3], w8
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.4s, #31, msl #8
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i13:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov w8, #8191
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w9, h1
-; CHECK-FP16-NEXT: cmp w9, w8
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: fmov s1, w10
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov v1.h[1], w9
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w9, w10, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov v1.h[2], w9
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w8, w10, w8, lo
-; CHECK-FP16-NEXT: mov v1.h[3], w8
-; CHECK-FP16-NEXT: fmov d0, d1
+; CHECK-FP16-NEXT: mvni v1.4h, #224, lsl #8
+; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h
+; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
%x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f)
ret <4 x i13> %x
@@ -1810,28 +1435,9 @@ define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) {
define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i16:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT: mov w8, #65535
-; CHECK-CVT-NEXT: mov s0, v1.s[1]
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: mov s2, v1.s[2]
-; CHECK-CVT-NEXT: mov s1, v1.s[3]
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: fmov s0, w10
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov v0.h[1], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w9, w10, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: mov v0.h[2], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w8, w10, w8, lo
-; CHECK-CVT-NEXT: mov v0.h[3], w8
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v0.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i16:
@@ -1843,87 +1449,23 @@ define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) {
}
define <4 x i19> @test_unsigned_v4f16_v4i19(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i19:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov w8, #524287
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[3]
-; CHECK-CVT-NEXT: fcvtzu w9, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[2]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: fmov s0, w10
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: fcvt s1, h2
-; CHECK-CVT-NEXT: mov v0.s[1], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w9, w10, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: mov v0.s[2], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w8, w10, w8, lo
-; CHECK-CVT-NEXT: mov v0.s[3], w8
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i19:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov w8, #524287
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w9, h1
-; CHECK-FP16-NEXT: cmp w9, w8
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: fmov s1, w10
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov v1.s[1], w9
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w9, w10, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov v1.s[2], w9
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w8, w10, w8, lo
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v4f16_v4i19:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: movi v1.4s, #7, msl #16
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%x = call <4 x i19> @llvm.fptoui.sat.v4f16.v4i19(<4 x half> %f)
ret <4 x i19> %x
}
define <4 x i32> @test_unsigned_v4f16_v4i32_duplicate(<4 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i32_duplicate:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i32_duplicate:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v4f16_v4i32_duplicate:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptoui.sat.v4f16.v4i32(<4 x half> %f)
ret <4 x i32> %x
}
@@ -2207,98 +1749,57 @@ declare <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half>)
define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i1:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov h3, v0.h[2]
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: fcvt s2, h3
-; CHECK-CVT-NEXT: fcvtzu w8, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[3]
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[4]
+; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: mov s2, v1.s[1]
+; CHECK-CVT-NEXT: mov s3, v1.s[2]
+; CHECK-CVT-NEXT: mov s4, v1.s[3]
+; CHECK-CVT-NEXT: mov s5, v0.s[1]
+; CHECK-CVT-NEXT: fcvtzu w9, s1
+; CHECK-CVT-NEXT: fcvtzu w10, s0
+; CHECK-CVT-NEXT: mov s1, v0.s[2]
+; CHECK-CVT-NEXT: mov s0, v0.s[3]
+; CHECK-CVT-NEXT: fcvtzu w8, s2
+; CHECK-CVT-NEXT: fcvtzu w11, s3
+; CHECK-CVT-NEXT: fcvtzu w12, s4
+; CHECK-CVT-NEXT: fcvtzu w13, s5
; CHECK-CVT-NEXT: cmp w8, #1
; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo
; CHECK-CVT-NEXT: cmp w9, #1
; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo
-; CHECK-CVT-NEXT: fcvt s3, h1
+; CHECK-CVT-NEXT: cmp w11, #1
+; CHECK-CVT-NEXT: csinc w11, w11, wzr, lo
+; CHECK-CVT-NEXT: cmp w12, #1
+; CHECK-CVT-NEXT: csinc w12, w12, wzr, lo
+; CHECK-CVT-NEXT: cmp w13, #1
+; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo
; CHECK-CVT-NEXT: cmp w10, #1
-; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo
+; CHECK-CVT-NEXT: fmov s2, w10
+; CHECK-CVT-NEXT: fcvtzu w10, s1
; CHECK-CVT-NEXT: fmov s1, w9
-; CHECK-CVT-NEXT: fcvtzu w9, s3
-; CHECK-CVT-NEXT: mov h3, v0.h[5]
-; CHECK-CVT-NEXT: mov v1.b[1], w8
-; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo
-; CHECK-CVT-NEXT: cmp w9, #1
-; CHECK-CVT-NEXT: fcvt s3, h3
-; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo
-; CHECK-CVT-NEXT: mov v1.b[2], w8
-; CHECK-CVT-NEXT: fcvtzu w8, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[6]
-; CHECK-CVT-NEXT: fcvtzu w10, s3
-; CHECK-CVT-NEXT: mov h0, v0.h[7]
-; CHECK-CVT-NEXT: cmp w8, #1
-; CHECK-CVT-NEXT: mov v1.b[3], w9
-; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo
-; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: mov v2.s[1], w13
+; CHECK-CVT-NEXT: cmp w10, #1
+; CHECK-CVT-NEXT: csinc w9, w10, wzr, lo
+; CHECK-CVT-NEXT: fcvtzu w10, s0
+; CHECK-CVT-NEXT: mov v1.s[1], w8
+; CHECK-CVT-NEXT: mov v2.s[2], w9
; CHECK-CVT-NEXT: cmp w10, #1
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov v1.b[4], w8
; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: mov v1.b[5], w8
-; CHECK-CVT-NEXT: cmp w9, #1
-; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: mov v1.b[6], w8
-; CHECK-CVT-NEXT: cmp w9, #1
-; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo
-; CHECK-CVT-NEXT: mov v1.b[7], w8
-; CHECK-CVT-NEXT: fmov d0, d1
+; CHECK-CVT-NEXT: mov v1.s[2], w11
+; CHECK-CVT-NEXT: mov v2.s[3], w8
+; CHECK-CVT-NEXT: mov v1.s[3], w12
+; CHECK-CVT-NEXT: xtn v0.4h, v2.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i1:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w8, h1
-; CHECK-FP16-NEXT: cmp w8, #1
-; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo
-; CHECK-FP16-NEXT: cmp w9, #1
-; CHECK-FP16-NEXT: csinc w9, w9, wzr, lo
-; CHECK-FP16-NEXT: fmov s1, w9
-; CHECK-FP16-NEXT: fcvtzu w9, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[4]
-; CHECK-FP16-NEXT: mov v1.b[1], w8
-; CHECK-FP16-NEXT: cmp w9, #1
-; CHECK-FP16-NEXT: fcvtzu w8, h3
-; CHECK-FP16-NEXT: csinc w9, w9, wzr, lo
-; CHECK-FP16-NEXT: mov h3, v0.h[5]
-; CHECK-FP16-NEXT: cmp w8, #1
-; CHECK-FP16-NEXT: mov v1.b[2], w9
-; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo
-; CHECK-FP16-NEXT: fcvtzu w9, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[6]
-; CHECK-FP16-NEXT: mov h0, v0.h[7]
-; CHECK-FP16-NEXT: mov v1.b[3], w8
-; CHECK-FP16-NEXT: cmp w9, #1
-; CHECK-FP16-NEXT: fcvtzu w8, h3
-; CHECK-FP16-NEXT: csinc w9, w9, wzr, lo
-; CHECK-FP16-NEXT: cmp w8, #1
-; CHECK-FP16-NEXT: mov v1.b[4], w9
-; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo
-; CHECK-FP16-NEXT: fcvtzu w9, h2
-; CHECK-FP16-NEXT: mov v1.b[5], w8
-; CHECK-FP16-NEXT: cmp w9, #1
-; CHECK-FP16-NEXT: csinc w8, w9, wzr, lo
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov v1.b[6], w8
-; CHECK-FP16-NEXT: cmp w9, #1
-; CHECK-FP16-NEXT: csinc w8, w9, wzr, lo
-; CHECK-FP16-NEXT: mov v1.b[7], w8
-; CHECK-FP16-NEXT: fmov d0, d1
+; CHECK-FP16-NEXT: movi v1.8h, #1
+; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
%x = call <8 x i1> @llvm.fptoui.sat.v8f16.v8i1(<8 x half> %f)
ret <8 x i1> %x
@@ -2307,93 +1808,56 @@ define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i8:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-NEXT: mov w8, #255
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-CVT-NEXT: mov s2, v1.s[1]
+; CHECK-CVT-NEXT: mov s3, v1.s[2]
+; CHECK-CVT-NEXT: mov s4, v1.s[3]
+; CHECK-CVT-NEXT: mov s5, v0.s[1]
; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: mov s3, v1.s[3]
+; CHECK-CVT-NEXT: fcvtzu w11, s0
+; CHECK-CVT-NEXT: mov s1, v0.s[2]
+; CHECK-CVT-NEXT: mov s0, v0.s[3]
; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: mov s2, v1.s[2]
+; CHECK-CVT-NEXT: fcvtzu w12, s3
+; CHECK-CVT-NEXT: fcvtzu w13, s4
+; CHECK-CVT-NEXT: fcvtzu w14, s5
; CHECK-CVT-NEXT: cmp w9, #255
; CHECK-CVT-NEXT: csel w9, w9, w8, lo
; CHECK-CVT-NEXT: cmp w10, #255
; CHECK-CVT-NEXT: csel w10, w10, w8, lo
+; CHECK-CVT-NEXT: cmp w12, #255
+; CHECK-CVT-NEXT: csel w12, w12, w8, lo
+; CHECK-CVT-NEXT: cmp w13, #255
+; CHECK-CVT-NEXT: csel w13, w13, w8, lo
+; CHECK-CVT-NEXT: cmp w14, #255
+; CHECK-CVT-NEXT: csel w14, w14, w8, lo
+; CHECK-CVT-NEXT: cmp w11, #255
+; CHECK-CVT-NEXT: csel w11, w11, w8, lo
+; CHECK-CVT-NEXT: fmov s2, w11
+; CHECK-CVT-NEXT: fcvtzu w11, s1
; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: mov v1.b[1], w9
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: fcvtzu w9, s3
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: cmp w9, #255
-; CHECK-CVT-NEXT: mov v1.b[2], w10
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s0
-; CHECK-CVT-NEXT: mov v1.b[3], w9
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: mov s2, v0.s[2]
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: cmp w9, #255
-; CHECK-CVT-NEXT: mov v1.b[4], w10
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov v1.b[5], w9
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: csel w9, w10, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s0
-; CHECK-CVT-NEXT: mov v1.b[6], w9
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: csel w8, w10, w8, lo
-; CHECK-CVT-NEXT: mov v1.b[7], w8
-; CHECK-CVT-NEXT: fmov d0, d1
+; CHECK-CVT-NEXT: mov v2.s[1], w14
+; CHECK-CVT-NEXT: cmp w11, #255
+; CHECK-CVT-NEXT: csel w10, w11, w8, lo
+; CHECK-CVT-NEXT: fcvtzu w11, s0
+; CHECK-CVT-NEXT: mov v1.s[1], w9
+; CHECK-CVT-NEXT: mov v2.s[2], w10
+; CHECK-CVT-NEXT: cmp w11, #255
+; CHECK-CVT-NEXT: csel w8, w11, w8, lo
+; CHECK-CVT-NEXT: mov v1.s[2], w12
+; CHECK-CVT-NEXT: mov v2.s[3], w8
+; CHECK-CVT-NEXT: mov v1.s[3], w13
+; CHECK-CVT-NEXT: xtn v0.4h, v2.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i8:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov w8, #255
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w9, h1
-; CHECK-FP16-NEXT: cmp w9, #255
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: cmp w10, #255
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: fmov s1, w10
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[4]
-; CHECK-FP16-NEXT: mov v1.b[1], w9
-; CHECK-FP16-NEXT: cmp w10, #255
-; CHECK-FP16-NEXT: fcvtzu w9, h3
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: mov h3, v0.h[5]
-; CHECK-FP16-NEXT: cmp w9, #255
-; CHECK-FP16-NEXT: mov v1.b[2], w10
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[6]
-; CHECK-FP16-NEXT: mov h0, v0.h[7]
-; CHECK-FP16-NEXT: mov v1.b[3], w9
-; CHECK-FP16-NEXT: cmp w10, #255
-; CHECK-FP16-NEXT: fcvtzu w9, h3
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: cmp w9, #255
-; CHECK-FP16-NEXT: mov v1.b[4], w10
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov v1.b[5], w9
-; CHECK-FP16-NEXT: cmp w10, #255
-; CHECK-FP16-NEXT: csel w9, w10, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov v1.b[6], w9
-; CHECK-FP16-NEXT: cmp w10, #255
-; CHECK-FP16-NEXT: csel w8, w10, w8, lo
-; CHECK-FP16-NEXT: mov v1.b[7], w8
-; CHECK-FP16-NEXT: fmov d0, d1
+; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT: uqxtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
%x = call <8 x i8> @llvm.fptoui.sat.v8f16.v8i8(<8 x half> %f)
ret <8 x i8> %x
@@ -2402,100 +1866,56 @@ define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i13:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: mov h1, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s2, h0
-; CHECK-CVT-NEXT: mov h3, v0.h[2]
+; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-NEXT: mov w8, #8191
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: fcvt s2, h3
-; CHECK-CVT-NEXT: fcvtzu w9, s1
-; CHECK-CVT-NEXT: mov h1, v0.h[3]
-; CHECK-CVT-NEXT: fcvtzu w11, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[4]
+; CHECK-CVT-NEXT: mov s2, v1.s[1]
+; CHECK-CVT-NEXT: mov s3, v1.s[2]
+; CHECK-CVT-NEXT: mov s4, v1.s[3]
+; CHECK-CVT-NEXT: mov s5, v0.s[1]
+; CHECK-CVT-NEXT: fcvtzu w10, s1
+; CHECK-CVT-NEXT: fcvtzu w11, s0
+; CHECK-CVT-NEXT: mov s1, v0.s[2]
+; CHECK-CVT-NEXT: mov s0, v0.s[3]
+; CHECK-CVT-NEXT: fcvtzu w9, s2
+; CHECK-CVT-NEXT: fcvtzu w12, s3
+; CHECK-CVT-NEXT: fcvtzu w13, s4
+; CHECK-CVT-NEXT: fcvtzu w14, s5
; CHECK-CVT-NEXT: cmp w9, w8
; CHECK-CVT-NEXT: csel w9, w9, w8, lo
; CHECK-CVT-NEXT: cmp w10, w8
; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: fcvt s3, h1
+; CHECK-CVT-NEXT: cmp w12, w8
+; CHECK-CVT-NEXT: csel w12, w12, w8, lo
+; CHECK-CVT-NEXT: cmp w13, w8
+; CHECK-CVT-NEXT: csel w13, w13, w8, lo
+; CHECK-CVT-NEXT: cmp w14, w8
+; CHECK-CVT-NEXT: csel w14, w14, w8, lo
; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: csel w11, w11, w8, lo
+; CHECK-CVT-NEXT: fmov s2, w11
+; CHECK-CVT-NEXT: fcvtzu w11, s1
; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: fcvtzu w10, s3
-; CHECK-CVT-NEXT: mov h3, v0.h[5]
-; CHECK-CVT-NEXT: mov v1.h[1], w9
-; CHECK-CVT-NEXT: csel w9, w11, w8, lo
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: fcvt s3, h3
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: mov v1.h[2], w9
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: mov h2, v0.h[6]
-; CHECK-CVT-NEXT: fcvtzu w11, s3
-; CHECK-CVT-NEXT: mov h0, v0.h[7]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: mov v1.h[3], w10
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: mov v2.s[1], w14
; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov v1.h[4], w9
-; CHECK-CVT-NEXT: csel w9, w11, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov v1.h[5], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w9, w10, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s0
-; CHECK-CVT-NEXT: mov v1.h[6], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w8, w10, w8, lo
-; CHECK-CVT-NEXT: mov v1.h[7], w8
-; CHECK-CVT-NEXT: mov v0.16b, v1.16b
+; CHECK-CVT-NEXT: csel w10, w11, w8, lo
+; CHECK-CVT-NEXT: fcvtzu w11, s0
+; CHECK-CVT-NEXT: mov v1.s[1], w9
+; CHECK-CVT-NEXT: mov v2.s[2], w10
+; CHECK-CVT-NEXT: cmp w11, w8
+; CHECK-CVT-NEXT: csel w8, w11, w8, lo
+; CHECK-CVT-NEXT: mov v1.s[2], w12
+; CHECK-CVT-NEXT: mov v2.s[3], w8
+; CHECK-CVT-NEXT: mov v1.s[3], w13
+; CHECK-CVT-NEXT: xtn v0.4h, v2.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov w8, #8191
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w9, h1
-; CHECK-FP16-NEXT: cmp w9, w8
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: fmov s1, w10
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[4]
-; CHECK-FP16-NEXT: mov v1.h[1], w9
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: fcvtzu w9, h3
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: mov h3, v0.h[5]
-; CHECK-FP16-NEXT: cmp w9, w8
-; CHECK-FP16-NEXT: mov v1.h[2], w10
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[6]
-; CHECK-FP16-NEXT: mov h0, v0.h[7]
-; CHECK-FP16-NEXT: mov v1.h[3], w9
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: fcvtzu w9, h3
-; CHECK-FP16-NEXT: csel w10, w10, w8, lo
-; CHECK-FP16-NEXT: cmp w9, w8
-; CHECK-FP16-NEXT: mov v1.h[4], w10
-; CHECK-FP16-NEXT: csel w9, w9, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h2
-; CHECK-FP16-NEXT: mov v1.h[5], w9
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w9, w10, w8, lo
-; CHECK-FP16-NEXT: fcvtzu w10, h0
-; CHECK-FP16-NEXT: mov v1.h[6], w9
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: csel w8, w10, w8, lo
-; CHECK-FP16-NEXT: mov v1.h[7], w8
-; CHECK-FP16-NEXT: mov v0.16b, v1.16b
+; CHECK-FP16-NEXT: mvni v1.8h, #224, lsl #8
+; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: ret
%x = call <8 x i13> @llvm.fptoui.sat.v8f16.v8i13(<8 x half> %f)
ret <8 x i13> %x
@@ -2504,48 +1924,49 @@ define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
define <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i16:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-NEXT: mov w8, #65535
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-CVT-NEXT: mov s2, v1.s[1]
+; CHECK-CVT-NEXT: mov s3, v1.s[2]
+; CHECK-CVT-NEXT: mov s4, v1.s[3]
+; CHECK-CVT-NEXT: mov s5, v0.s[1]
; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: mov s3, v1.s[3]
+; CHECK-CVT-NEXT: fcvtzu w11, s0
+; CHECK-CVT-NEXT: mov s1, v0.s[2]
+; CHECK-CVT-NEXT: mov s0, v0.s[3]
; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: mov s2, v1.s[2]
+; CHECK-CVT-NEXT: fcvtzu w12, s3
+; CHECK-CVT-NEXT: fcvtzu w13, s4
+; CHECK-CVT-NEXT: fcvtzu w14, s5
; CHECK-CVT-NEXT: cmp w9, w8
; CHECK-CVT-NEXT: csel w9, w9, w8, lo
; CHECK-CVT-NEXT: cmp w10, w8
; CHECK-CVT-NEXT: csel w10, w10, w8, lo
+; CHECK-CVT-NEXT: cmp w12, w8
+; CHECK-CVT-NEXT: csel w12, w12, w8, lo
+; CHECK-CVT-NEXT: cmp w13, w8
+; CHECK-CVT-NEXT: csel w13, w13, w8, lo
+; CHECK-CVT-NEXT: cmp w14, w8
+; CHECK-CVT-NEXT: csel w14, w14, w8, lo
+; CHECK-CVT-NEXT: cmp w11, w8
+; CHECK-CVT-NEXT: csel w11, w11, w8, lo
+; CHECK-CVT-NEXT: fmov s2, w11
+; CHECK-CVT-NEXT: fcvtzu w11, s1
; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: mov v1.h[1], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: fcvtzu w9, s3
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: mov v1.h[2], w10
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s0
-; CHECK-CVT-NEXT: mov v1.h[3], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: mov s2, v0.s[2]
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: mov v1.h[4], w10
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: mov v1.h[5], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w9, w10, w8, lo
-; CHECK-CVT-NEXT: fcvtzu w10, s0
-; CHECK-CVT-NEXT: mov v1.h[6], w9
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w8, w10, w8, lo
-; CHECK-CVT-NEXT: mov v1.h[7], w8
-; CHECK-CVT-NEXT: mov v0.16b, v1.16b
+; CHECK-CVT-NEXT: mov v2.s[1], w14
+; CHECK-CVT-NEXT: cmp w11, w8
+; CHECK-CVT-NEXT: csel w10, w11, w8, lo
+; CHECK-CVT-NEXT: fcvtzu w11, s0
+; CHECK-CVT-NEXT: mov v1.s[1], w9
+; CHECK-CVT-NEXT: mov v2.s[2], w10
+; CHECK-CVT-NEXT: cmp w11, w8
+; CHECK-CVT-NEXT: csel w8, w11, w8, lo
+; CHECK-CVT-NEXT: mov v1.s[2], w12
+; CHECK-CVT-NEXT: mov v2.s[3], w8
+; CHECK-CVT-NEXT: mov v1.s[3], w13
+; CHECK-CVT-NEXT: xtn v0.4h, v2.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16:
@@ -2557,125 +1978,36 @@ define <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
}
define <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i19:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-CVT-NEXT: mov w8, #524287
-; CHECK-CVT-NEXT: mov h2, v0.h[1]
-; CHECK-CVT-NEXT: mov h3, v0.h[2]
-; CHECK-CVT-NEXT: mov h5, v0.h[3]
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov h4, v1.h[1]
-; CHECK-CVT-NEXT: mov h6, v1.h[2]
-; CHECK-CVT-NEXT: mov h7, v1.h[3]
-; CHECK-CVT-NEXT: fcvt s1, h1
-; CHECK-CVT-NEXT: fcvt s2, h2
-; CHECK-CVT-NEXT: fcvt s3, h3
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: fcvt s5, h5
-; CHECK-CVT-NEXT: fcvt s4, h4
-; CHECK-CVT-NEXT: fcvt s6, h6
-; CHECK-CVT-NEXT: fcvt s0, h7
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: fcvtzu w11, s2
-; CHECK-CVT-NEXT: fcvtzu w12, s3
-; CHECK-CVT-NEXT: fcvtzu w14, s5
-; CHECK-CVT-NEXT: fcvtzu w13, s4
-; CHECK-CVT-NEXT: fcvtzu w15, s6
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: fcvtzu w16, s0
-; CHECK-CVT-NEXT: csel w4, w10, w8, lo
-; CHECK-CVT-NEXT: cmp w13, w8
-; CHECK-CVT-NEXT: csel w5, w13, w8, lo
-; CHECK-CVT-NEXT: cmp w15, w8
-; CHECK-CVT-NEXT: csel w6, w15, w8, lo
-; CHECK-CVT-NEXT: cmp w16, w8
-; CHECK-CVT-NEXT: csel w7, w16, w8, lo
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w0, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: csel w1, w11, w8, lo
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: csel w2, w12, w8, lo
-; CHECK-CVT-NEXT: cmp w14, w8
-; CHECK-CVT-NEXT: csel w3, w14, w8, lo
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i19:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov w8, #524287
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: mov h3, v0.h[2]
-; CHECK-FP16-NEXT: mov h5, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov h4, v1.h[1]
-; CHECK-FP16-NEXT: mov h6, v1.h[2]
-; CHECK-FP16-NEXT: mov h0, v1.h[3]
-; CHECK-FP16-NEXT: fcvtzu w10, h1
-; CHECK-FP16-NEXT: fcvtzu w11, h2
-; CHECK-FP16-NEXT: fcvtzu w12, h3
-; CHECK-FP16-NEXT: fcvtzu w14, h5
-; CHECK-FP16-NEXT: fcvtzu w13, h4
-; CHECK-FP16-NEXT: fcvtzu w15, h6
-; CHECK-FP16-NEXT: cmp w10, w8
-; CHECK-FP16-NEXT: fcvtzu w16, h0
-; CHECK-FP16-NEXT: csel w4, w10, w8, lo
-; CHECK-FP16-NEXT: cmp w13, w8
-; CHECK-FP16-NEXT: csel w5, w13, w8, lo
-; CHECK-FP16-NEXT: cmp w15, w8
-; CHECK-FP16-NEXT: csel w6, w15, w8, lo
-; CHECK-FP16-NEXT: cmp w16, w8
-; CHECK-FP16-NEXT: csel w7, w16, w8, lo
-; CHECK-FP16-NEXT: cmp w9, w8
-; CHECK-FP16-NEXT: csel w0, w9, w8, lo
-; CHECK-FP16-NEXT: cmp w11, w8
-; CHECK-FP16-NEXT: csel w1, w11, w8, lo
-; CHECK-FP16-NEXT: cmp w12, w8
-; CHECK-FP16-NEXT: csel w2, w12, w8, lo
-; CHECK-FP16-NEXT: cmp w14, w8
-; CHECK-FP16-NEXT: csel w3, w14, w8, lo
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v8f16_v8i19:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: movi v1.4s, #7, msl #16
+; CHECK-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s
+; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mov w1, v2.s[1]
+; CHECK-NEXT: mov w2, v2.s[2]
+; CHECK-NEXT: mov w5, v0.s[1]
+; CHECK-NEXT: mov w3, v2.s[3]
+; CHECK-NEXT: mov w6, v0.s[2]
+; CHECK-NEXT: mov w7, v0.s[3]
+; CHECK-NEXT: fmov w4, s0
+; CHECK-NEXT: fmov w0, s2
+; CHECK-NEXT: ret
%x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f)
ret <8 x i19> %x
}
define <8 x i32> @test_unsigned_v8f16_v8i32_duplicate(<8 x half> %f) {
-; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i32_duplicate:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i32_duplicate:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-FP16-NEXT: mov h4, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov h2, v3.h[1]
-; CHECK-FP16-NEXT: fcvtzu w8, h3
-; CHECK-FP16-NEXT: mov h5, v3.h[2]
-; CHECK-FP16-NEXT: mov h3, v3.h[3]
-; CHECK-FP16-NEXT: fmov s1, w8
-; CHECK-FP16-NEXT: fcvtzu w8, h2
-; CHECK-FP16-NEXT: fmov s2, w9
-; CHECK-FP16-NEXT: fcvtzu w9, h4
-; CHECK-FP16-NEXT: mov h4, v0.h[2]
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: mov v1.s[1], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h5
-; CHECK-FP16-NEXT: mov v2.s[1], w9
-; CHECK-FP16-NEXT: fcvtzu w9, h4
-; CHECK-FP16-NEXT: mov v1.s[2], w8
-; CHECK-FP16-NEXT: fcvtzu w8, h3
-; CHECK-FP16-NEXT: mov v2.s[2], w9
-; CHECK-FP16-NEXT: fcvtzu w9, h0
-; CHECK-FP16-NEXT: mov v1.s[3], w8
-; CHECK-FP16-NEXT: mov v2.s[3], w9
-; CHECK-FP16-NEXT: mov v0.16b, v2.16b
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: test_unsigned_v8f16_v8i32_duplicate:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
%x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f)
ret <8 x i32> %x
}
More information about the llvm-commits
mailing list