[llvm] 594d359 - [AArch64] Split v8f32 fptosi_sat into two v4f32.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 28 02:47:45 PDT 2024
Author: David Green
Date: 2024-07-28T10:19:44+01:00
New Revision: 594d3593fec2ae15f6dd38c51fba8bb1f9828089
URL: https://github.com/llvm/llvm-project/commit/594d3593fec2ae15f6dd38c51fba8bb1f9828089
DIFF: https://github.com/llvm/llvm-project/commit/594d3593fec2ae15f6dd38c51fba8bb1f9828089.diff
LOG: [AArch64] Split v8f32 fptosi_sat into two v4f32.
If we produce illegal v8f32 types, the VectorLegalizer will unroll them,
scalarizing the operations. In this patch we pre-split them during custom
legalization to produce better results.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/fcvt_combine.ll
llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d86e52d49000a..1e9da9b819bdd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4508,11 +4508,19 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
EVT SrcElementVT = SrcVT.getVectorElementType();
// In the absence of FP16 support, promote f16 to f32 and saturate the result.
+ SDLoc DL(Op);
+ SDValue SrcVal2;
if ((SrcElementVT == MVT::f16 &&
(!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
SrcElementVT == MVT::bf16) {
MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
- SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F32VT, SrcVal);
+ // If we are extending to a v8f32, split into two v4f32 to produce legal
+ // types.
+ if (F32VT.getSizeInBits() > 128) {
+ std::tie(SrcVal, SrcVal2) = DAG.SplitVector(SrcVal, DL);
+ F32VT = F32VT.getHalfNumVectorElementsVT();
+ }
SrcVT = F32VT;
SrcElementVT = MVT::f32;
SrcElementWidth = 32;
@@ -4520,9 +4528,8 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
return SDValue();
- SDLoc DL(Op);
- // Expand to f64 if we are saturating to i64, to help produce keep the lanes
- // the same width and produce a fcvtzu.
+ // Expand to f64 if we are saturating to i64, to help keep the lanes the same
+ // width and produce a fcvtzu.
if (SatWidth == 64 && SrcElementWidth < 64) {
MVT F64VT = MVT::getVectorVT(MVT::f64, SrcVT.getVectorNumElements());
SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F64VT, SrcVal);
@@ -4531,9 +4538,16 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
SrcElementWidth = 64;
}
// Cases that we can emit directly.
- if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
- return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
- DAG.getValueType(DstVT.getScalarType()));
+ if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
+ SDValue Res = DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
+ DAG.getValueType(DstVT.getScalarType()));
+ if (SrcVal2) {
+ SDValue Res2 = DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal2,
+ DAG.getValueType(DstVT.getScalarType()));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Res, Res2);
+ }
+ return Res;
+ }
// Otherwise we emit a cvt that saturates to a higher BW, and saturate the
// result. This is only valid if the legal cvt is larger than the saturate
@@ -4545,20 +4559,32 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
DAG.getValueType(IntVT.getScalarType()));
- SDValue Sat;
+ SDValue NativeCvt2 =
+ SrcVal2 ? DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal2,
+ DAG.getValueType(IntVT.getScalarType()))
+ : SDValue();
+ SDValue Sat, Sat2;
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
SDValue MinC = DAG.getConstant(
APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
+ SDValue Min2 = SrcVal2 ? DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt2, MinC) : SDValue();
SDValue MaxC = DAG.getConstant(
APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
+ Sat2 = SrcVal2 ? DAG.getNode(ISD::SMAX, DL, IntVT, Min2, MaxC) : SDValue();
} else {
SDValue MinC = DAG.getConstant(
APInt::getAllOnes(SatWidth).zext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
+ Sat2 = SrcVal2 ? DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt2, MinC) : SDValue();
}
+ if (SrcVal2)
+ Sat = DAG.getNode(ISD::CONCAT_VECTORS, DL,
+ IntVT.getDoubleNumVectorElementsVT(*DAG.getContext()),
+ Sat, Sat2);
+
return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index 62669a6d99eae..251d7a424175b 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -466,72 +466,19 @@ define <8 x i16> @test_v8f16_sat(<8 x half> %in) {
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: movi v1.8h, #68, lsl #8
; CHECK-NO16-NEXT: fcvtl v2.4s, v0.4h
-; CHECK-NO16-NEXT: mov w8, #32767 // =0x7fff
; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-NO16-NEXT: mov w11, #-32768 // =0xffff8000
; CHECK-NO16-NEXT: fcvtl v3.4s, v1.4h
; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h
; CHECK-NO16-NEXT: fmul v2.4s, v2.4s, v3.4s
; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
; CHECK-NO16-NEXT: fcvtn v1.4h, v2.4s
; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s
-; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h
-; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-NO16-NEXT: mov s2, v0.s[1]
-; CHECK-NO16-NEXT: fcvtzs w10, s0
-; CHECK-NO16-NEXT: fcvtzs w15, s1
-; CHECK-NO16-NEXT: fcvtzs w9, s2
-; CHECK-NO16-NEXT: mov s2, v0.s[2]
-; CHECK-NO16-NEXT: mov s0, v0.s[3]
-; CHECK-NO16-NEXT: cmp w9, w8
-; CHECK-NO16-NEXT: fcvtzs w12, s2
-; CHECK-NO16-NEXT: mov s2, v1.s[1]
-; CHECK-NO16-NEXT: csel w9, w9, w8, lt
-; CHECK-NO16-NEXT: fcvtzs w13, s0
-; CHECK-NO16-NEXT: mov s0, v1.s[2]
-; CHECK-NO16-NEXT: cmn w9, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT: csel w9, w9, w11, gt
-; CHECK-NO16-NEXT: cmp w10, w8
-; CHECK-NO16-NEXT: csel w10, w10, w8, lt
-; CHECK-NO16-NEXT: fcvtzs w14, s2
-; CHECK-NO16-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT: fcvtzs w16, s0
-; CHECK-NO16-NEXT: mov s0, v1.s[3]
-; CHECK-NO16-NEXT: csel w10, w10, w11, gt
-; CHECK-NO16-NEXT: cmp w12, w8
-; CHECK-NO16-NEXT: csel w12, w12, w8, lt
-; CHECK-NO16-NEXT: fmov s1, w10
-; CHECK-NO16-NEXT: cmn w12, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT: csel w12, w12, w11, gt
-; CHECK-NO16-NEXT: cmp w13, w8
-; CHECK-NO16-NEXT: csel w13, w13, w8, lt
-; CHECK-NO16-NEXT: mov v1.s[1], w9
-; CHECK-NO16-NEXT: fcvtzs w9, s0
-; CHECK-NO16-NEXT: cmn w13, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT: csel w13, w13, w11, gt
-; CHECK-NO16-NEXT: cmp w14, w8
-; CHECK-NO16-NEXT: csel w14, w14, w8, lt
-; CHECK-NO16-NEXT: cmn w14, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT: mov v1.s[2], w12
-; CHECK-NO16-NEXT: csel w14, w14, w11, gt
-; CHECK-NO16-NEXT: cmp w15, w8
-; CHECK-NO16-NEXT: csel w15, w15, w8, lt
-; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT: csel w10, w15, w11, gt
-; CHECK-NO16-NEXT: cmp w16, w8
-; CHECK-NO16-NEXT: mov v1.s[3], w13
-; CHECK-NO16-NEXT: fmov s2, w10
-; CHECK-NO16-NEXT: csel w10, w16, w8, lt
-; CHECK-NO16-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT: csel w10, w10, w11, gt
-; CHECK-NO16-NEXT: cmp w9, w8
-; CHECK-NO16-NEXT: mov v2.s[1], w14
-; CHECK-NO16-NEXT: csel w8, w9, w8, lt
-; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT: csel w8, w8, w11, gt
-; CHECK-NO16-NEXT: mov v2.s[2], w10
-; CHECK-NO16-NEXT: mov v2.s[3], w8
-; CHECK-NO16-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h
+; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s
+; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s
; CHECK-NO16-NEXT: ret
;
; CHECK-FP16-LABEL: test_v8f16_sat:
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 91c8b7f345e32..4626fd7f2b3dd 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -2014,47 +2014,17 @@ declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>)
define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v8f16_v8i1:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: fcvtzs w9, s1
-; CHECK-CVT-NEXT: fcvtzs w13, s0
-; CHECK-CVT-NEXT: fcvtzs w8, s2
-; CHECK-CVT-NEXT: mov s2, v1.s[2]
-; CHECK-CVT-NEXT: mov s1, v1.s[3]
-; CHECK-CVT-NEXT: ands w8, w8, w8, asr #31
-; CHECK-CVT-NEXT: fcvtzs w10, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: fcvtzs w11, s1
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT: ands w9, w9, w9, asr #31
-; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge
-; CHECK-CVT-NEXT: ands w10, w10, w10, asr #31
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: fcvtzs w14, s1
-; CHECK-CVT-NEXT: fmov s1, w9
-; CHECK-CVT-NEXT: fcvtzs w9, s0
-; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge
-; CHECK-CVT-NEXT: ands w11, w11, w11, asr #31
-; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge
-; CHECK-CVT-NEXT: ands w12, w12, w12, asr #31
-; CHECK-CVT-NEXT: mov v1.s[1], w8
-; CHECK-CVT-NEXT: csinv w12, w12, wzr, ge
-; CHECK-CVT-NEXT: ands w13, w13, w13, asr #31
-; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge
-; CHECK-CVT-NEXT: ands w8, w14, w14, asr #31
-; CHECK-CVT-NEXT: mov v1.s[2], w10
-; CHECK-CVT-NEXT: fmov s2, w13
-; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT: mov v2.s[1], w12
-; CHECK-CVT-NEXT: mov v1.s[3], w11
-; CHECK-CVT-NEXT: mov v2.s[2], w8
-; CHECK-CVT-NEXT: ands w8, w9, w9, asr #31
-; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT: mov v2.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-CVT-NEXT: movi v3.2d, #0xffffffffffffffff
+; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s
+; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-NEXT: ret
;
@@ -2074,65 +2044,17 @@ define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v8f16_v8i8:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: mov w8, #127 // =0x7f
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov w11, #-128 // =0xffffff80
-; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: fcvtzs w10, s1
-; CHECK-CVT-NEXT: fcvtzs w15, s0
-; CHECK-CVT-NEXT: fcvtzs w9, s2
-; CHECK-CVT-NEXT: mov s2, v1.s[2]
-; CHECK-CVT-NEXT: mov s1, v1.s[3]
-; CHECK-CVT-NEXT: cmp w9, #127
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w13, s1
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: cmn w9, #128
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: csel w9, w9, w11, gt
-; CHECK-CVT-NEXT: cmp w10, #127
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w14, s2
-; CHECK-CVT-NEXT: cmn w10, #128
-; CHECK-CVT-NEXT: fcvtzs w16, s1
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: csel w12, w12, w8, lt
-; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: cmn w12, #128
-; CHECK-CVT-NEXT: csel w12, w12, w11, gt
-; CHECK-CVT-NEXT: cmp w13, #127
-; CHECK-CVT-NEXT: csel w13, w13, w8, lt
-; CHECK-CVT-NEXT: mov v1.s[1], w9
-; CHECK-CVT-NEXT: fcvtzs w9, s0
-; CHECK-CVT-NEXT: cmn w13, #128
-; CHECK-CVT-NEXT: csel w13, w13, w11, gt
-; CHECK-CVT-NEXT: cmp w14, #127
-; CHECK-CVT-NEXT: csel w14, w14, w8, lt
-; CHECK-CVT-NEXT: cmn w14, #128
-; CHECK-CVT-NEXT: mov v1.s[2], w12
-; CHECK-CVT-NEXT: csel w14, w14, w11, gt
-; CHECK-CVT-NEXT: cmp w15, #127
-; CHECK-CVT-NEXT: csel w15, w15, w8, lt
-; CHECK-CVT-NEXT: cmn w15, #128
-; CHECK-CVT-NEXT: csel w10, w15, w11, gt
-; CHECK-CVT-NEXT: cmp w16, #127
-; CHECK-CVT-NEXT: mov v1.s[3], w13
-; CHECK-CVT-NEXT: fmov s2, w10
-; CHECK-CVT-NEXT: csel w10, w16, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #128
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w9, #127
-; CHECK-CVT-NEXT: mov v2.s[1], w14
-; CHECK-CVT-NEXT: csel w8, w9, w8, lt
-; CHECK-CVT-NEXT: cmn w8, #128
-; CHECK-CVT-NEXT: csel w8, w8, w11, gt
-; CHECK-CVT-NEXT: mov v2.s[2], w10
-; CHECK-CVT-NEXT: mov v2.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT: movi v1.4s, #127
+; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: mvni v1.4s, #127
+; CHECK-CVT-NEXT: smax v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-NEXT: ret
;
@@ -2148,65 +2070,17 @@ define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v8f16_v8i13:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: mov w8, #4095 // =0xfff
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov w11, #-4096 // =0xfffff000
-; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: fcvtzs w10, s1
-; CHECK-CVT-NEXT: fcvtzs w15, s0
-; CHECK-CVT-NEXT: fcvtzs w9, s2
-; CHECK-CVT-NEXT: mov s2, v1.s[2]
-; CHECK-CVT-NEXT: mov s1, v1.s[3]
-; CHECK-CVT-NEXT: cmp w9, #4095
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w13, s1
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: csel w9, w9, w11, gt
-; CHECK-CVT-NEXT: cmp w10, #4095
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w14, s2
-; CHECK-CVT-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: fcvtzs w16, s1
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, #4095
-; CHECK-CVT-NEXT: csel w12, w12, w8, lt
-; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w12, w12, w11, gt
-; CHECK-CVT-NEXT: cmp w13, #4095
-; CHECK-CVT-NEXT: csel w13, w13, w8, lt
-; CHECK-CVT-NEXT: mov v1.s[1], w9
-; CHECK-CVT-NEXT: fcvtzs w9, s0
-; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w13, w13, w11, gt
-; CHECK-CVT-NEXT: cmp w14, #4095
-; CHECK-CVT-NEXT: csel w14, w14, w8, lt
-; CHECK-CVT-NEXT: cmn w14, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: mov v1.s[2], w12
-; CHECK-CVT-NEXT: csel w14, w14, w11, gt
-; CHECK-CVT-NEXT: cmp w15, #4095
-; CHECK-CVT-NEXT: csel w15, w15, w8, lt
-; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w10, w15, w11, gt
-; CHECK-CVT-NEXT: cmp w16, #4095
-; CHECK-CVT-NEXT: mov v1.s[3], w13
-; CHECK-CVT-NEXT: fmov s2, w10
-; CHECK-CVT-NEXT: csel w10, w16, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w9, #4095
-; CHECK-CVT-NEXT: mov v2.s[1], w14
-; CHECK-CVT-NEXT: csel w8, w9, w8, lt
-; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT: csel w8, w8, w11, gt
-; CHECK-CVT-NEXT: mov v2.s[2], w10
-; CHECK-CVT-NEXT: mov v2.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT: movi v1.4s, #15, msl #8
+; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: mvni v1.4s, #15, msl #8
+; CHECK-CVT-NEXT: smax v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v8f16_v8i13:
@@ -2224,65 +2098,12 @@ define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v8f16_v8i16:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: mov w8, #32767 // =0x7fff
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov w11, #-32768 // =0xffff8000
-; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: fcvtzs w10, s1
-; CHECK-CVT-NEXT: fcvtzs w15, s0
-; CHECK-CVT-NEXT: fcvtzs w9, s2
-; CHECK-CVT-NEXT: mov s2, v1.s[2]
-; CHECK-CVT-NEXT: mov s1, v1.s[3]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: fcvtzs w12, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: csel w9, w9, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w13, s1
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: csel w9, w9, w11, gt
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w14, s2
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: fcvtzs w16, s1
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: csel w12, w12, w8, lt
-; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w12, w12, w11, gt
-; CHECK-CVT-NEXT: cmp w13, w8
-; CHECK-CVT-NEXT: csel w13, w13, w8, lt
-; CHECK-CVT-NEXT: mov v1.s[1], w9
-; CHECK-CVT-NEXT: fcvtzs w9, s0
-; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w13, w13, w11, gt
-; CHECK-CVT-NEXT: cmp w14, w8
-; CHECK-CVT-NEXT: csel w14, w14, w8, lt
-; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v1.s[2], w12
-; CHECK-CVT-NEXT: csel w14, w14, w11, gt
-; CHECK-CVT-NEXT: cmp w15, w8
-; CHECK-CVT-NEXT: csel w15, w15, w8, lt
-; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w10, w15, w11, gt
-; CHECK-CVT-NEXT: cmp w16, w8
-; CHECK-CVT-NEXT: mov v1.s[3], w13
-; CHECK-CVT-NEXT: fmov s2, w10
-; CHECK-CVT-NEXT: csel w10, w16, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w10, w10, w11, gt
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: mov v2.s[1], w14
-; CHECK-CVT-NEXT: csel w8, w9, w8, lt
-; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w8, w8, w11, gt
-; CHECK-CVT-NEXT: mov v2.s[2], w10
-; CHECK-CVT-NEXT: mov v2.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: fcvtzs v1.4s, v2.4s
+; CHECK-CVT-NEXT: sqxtn2 v0.8h, v1.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v8f16_v8i16:
@@ -2984,123 +2805,27 @@ define <16 x i16> @test_signed_v16f32_v16i16(<16 x float> %f) {
define <16 x i8> @test_signed_v16f16_v16i8(<16 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v16f16_v16i8:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: mov w8, #127 // =0x7f
+; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h
; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: mov s3, v2.s[1]
-; CHECK-CVT-NEXT: fcvtzs w10, s2
-; CHECK-CVT-NEXT: fcvtzs w16, s1
-; CHECK-CVT-NEXT: fcvtzs w9, s3
-; CHECK-CVT-NEXT: mov s3, v2.s[2]
-; CHECK-CVT-NEXT: mov s2, v2.s[3]
-; CHECK-CVT-NEXT: cmp w9, #127
-; CHECK-CVT-NEXT: fcvtzs w12, s3
-; CHECK-CVT-NEXT: mov s3, v1.s[1]
-; CHECK-CVT-NEXT: csel w11, w9, w8, lt
-; CHECK-CVT-NEXT: mov w9, #-128 // =0xffffff80
-; CHECK-CVT-NEXT: fcvtzs w14, s2
-; CHECK-CVT-NEXT: cmn w11, #128
-; CHECK-CVT-NEXT: mov s2, v1.s[2]
-; CHECK-CVT-NEXT: mov s1, v1.s[3]
-; CHECK-CVT-NEXT: csel w11, w11, w9, gt
-; CHECK-CVT-NEXT: cmp w10, #127
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w15, s3
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: cmn w10, #128
+; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: csel w13, w10, w9, gt
-; CHECK-CVT-NEXT: cmp w12, #127
-; CHECK-CVT-NEXT: fcvtzs w17, s1
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #128
-; CHECK-CVT-NEXT: mov s1, v3.s[2]
-; CHECK-CVT-NEXT: fcvtzs w0, s3
-; CHECK-CVT-NEXT: csel w10, w10, w9, gt
-; CHECK-CVT-NEXT: cmp w14, #127
-; CHECK-CVT-NEXT: fcvtzs w4, s0
-; CHECK-CVT-NEXT: csel w12, w14, w8, lt
-; CHECK-CVT-NEXT: cmn w12, #128
-; CHECK-CVT-NEXT: csel w12, w12, w9, gt
-; CHECK-CVT-NEXT: cmp w15, #127
-; CHECK-CVT-NEXT: fcvtzs w1, s1
-; CHECK-CVT-NEXT: csel w14, w15, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w15, s2
-; CHECK-CVT-NEXT: mov s2, v3.s[1]
-; CHECK-CVT-NEXT: cmn w14, #128
-; CHECK-CVT-NEXT: mov s1, v0.s[1]
-; CHECK-CVT-NEXT: csel w14, w14, w9, gt
-; CHECK-CVT-NEXT: cmp w16, #127
-; CHECK-CVT-NEXT: csel w16, w16, w8, lt
-; CHECK-CVT-NEXT: cmn w16, #128
-; CHECK-CVT-NEXT: fcvtzs w18, s2
-; CHECK-CVT-NEXT: mov s2, v3.s[3]
-; CHECK-CVT-NEXT: csel w16, w16, w9, gt
-; CHECK-CVT-NEXT: cmp w15, #127
-; CHECK-CVT-NEXT: fcvtzs w3, s1
-; CHECK-CVT-NEXT: csel w15, w15, w8, lt
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: cmn w15, #128
-; CHECK-CVT-NEXT: csel w15, w15, w9, gt
-; CHECK-CVT-NEXT: cmp w17, #127
-; CHECK-CVT-NEXT: fcvtzs w2, s2
-; CHECK-CVT-NEXT: csel w17, w17, w8, lt
-; CHECK-CVT-NEXT: fmov s2, w13
-; CHECK-CVT-NEXT: cmn w17, #128
-; CHECK-CVT-NEXT: csel w17, w17, w9, gt
-; CHECK-CVT-NEXT: cmp w18, #127
-; CHECK-CVT-NEXT: csel w18, w18, w8, lt
-; CHECK-CVT-NEXT: mov v2.s[1], w11
-; CHECK-CVT-NEXT: cmn w18, #128
-; CHECK-CVT-NEXT: csel w18, w18, w9, gt
-; CHECK-CVT-NEXT: cmp w0, #127
-; CHECK-CVT-NEXT: csel w0, w0, w8, lt
-; CHECK-CVT-NEXT: cmn w0, #128
-; CHECK-CVT-NEXT: mov v2.s[2], w10
-; CHECK-CVT-NEXT: csel w0, w0, w9, gt
-; CHECK-CVT-NEXT: cmp w1, #127
-; CHECK-CVT-NEXT: csel w1, w1, w8, lt
-; CHECK-CVT-NEXT: fmov s3, w0
-; CHECK-CVT-NEXT: cmn w1, #128
-; CHECK-CVT-NEXT: csel w1, w1, w9, gt
-; CHECK-CVT-NEXT: cmp w2, #127
-; CHECK-CVT-NEXT: mov v2.s[3], w12
-; CHECK-CVT-NEXT: csel w2, w2, w8, lt
-; CHECK-CVT-NEXT: mov v3.s[1], w18
-; CHECK-CVT-NEXT: cmn w2, #128
-; CHECK-CVT-NEXT: csel w2, w2, w9, gt
-; CHECK-CVT-NEXT: cmp w3, #127
-; CHECK-CVT-NEXT: csel w3, w3, w8, lt
-; CHECK-CVT-NEXT: cmn w3, #128
-; CHECK-CVT-NEXT: mov v3.s[2], w1
-; CHECK-CVT-NEXT: csel w13, w3, w9, gt
-; CHECK-CVT-NEXT: cmp w4, #127
-; CHECK-CVT-NEXT: csel w3, w4, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w4, s1
-; CHECK-CVT-NEXT: fmov s1, w16
-; CHECK-CVT-NEXT: cmn w3, #128
-; CHECK-CVT-NEXT: csel w11, w3, w9, gt
-; CHECK-CVT-NEXT: mov v3.s[3], w2
-; CHECK-CVT-NEXT: fmov s4, w11
-; CHECK-CVT-NEXT: mov v1.s[1], w14
-; CHECK-CVT-NEXT: fcvtzs w11, s0
-; CHECK-CVT-NEXT: cmp w4, #127
-; CHECK-CVT-NEXT: mov v4.s[1], w13
-; CHECK-CVT-NEXT: csel w13, w4, w8, lt
-; CHECK-CVT-NEXT: cmn w13, #128
-; CHECK-CVT-NEXT: mov v1.s[2], w15
-; CHECK-CVT-NEXT: csel w10, w13, w9, gt
-; CHECK-CVT-NEXT: cmp w11, #127
-; CHECK-CVT-NEXT: csel w8, w11, w8, lt
-; CHECK-CVT-NEXT: mov v4.s[2], w10
-; CHECK-CVT-NEXT: cmn w8, #128
-; CHECK-CVT-NEXT: csel w8, w8, w9, gt
-; CHECK-CVT-NEXT: mov v1.s[3], w17
-; CHECK-CVT-NEXT: mov v4.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h
-; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h
-; CHECK-CVT-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; CHECK-CVT-NEXT: movi v2.4s, #127
+; CHECK-CVT-NEXT: fcvtzs v3.4s, v3.4s
+; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-CVT-NEXT: fcvtzs v4.4s, v4.4s
+; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT: smin v3.4s, v3.4s, v2.4s
+; CHECK-CVT-NEXT: smin v1.4s, v1.4s, v2.4s
+; CHECK-CVT-NEXT: smin v4.4s, v4.4s, v2.4s
+; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v2.4s
+; CHECK-CVT-NEXT: mvni v2.4s, #127
+; CHECK-CVT-NEXT: smax v3.4s, v3.4s, v2.4s
+; CHECK-CVT-NEXT: smax v1.4s, v1.4s, v2.4s
+; CHECK-CVT-NEXT: smax v4.4s, v4.4s, v2.4s
+; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v2.4s
+; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v4.8h
+; CHECK-CVT-NEXT: uzp1 v0.16b, v0.16b, v1.16b
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v16f16_v16i8:
@@ -3117,122 +2842,18 @@ define <16 x i8> @test_signed_v16f16_v16i8(<16 x half> %f) {
define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) {
; CHECK-CVT-LABEL: test_signed_v16f16_v16i16:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: mov w8, #32767 // =0x7fff
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov s3, v2.s[1]
-; CHECK-CVT-NEXT: fcvtzs w10, s2
-; CHECK-CVT-NEXT: fcvtzs w16, s0
-; CHECK-CVT-NEXT: fcvtzs w9, s3
-; CHECK-CVT-NEXT: mov s3, v2.s[2]
-; CHECK-CVT-NEXT: mov s2, v2.s[3]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: fcvtzs w12, s3
-; CHECK-CVT-NEXT: mov s3, v0.s[1]
-; CHECK-CVT-NEXT: csel w11, w9, w8, lt
-; CHECK-CVT-NEXT: mov w9, #-32768 // =0xffff8000
-; CHECK-CVT-NEXT: fcvtzs w14, s2
-; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov s2, v0.s[2]
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: csel w11, w11, w9, gt
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w15, s3
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: csel w13, w10, w9, gt
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: fcvtzs w17, s0
-; CHECK-CVT-NEXT: csel w10, w12, w8, lt
-; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov s0, v3.s[2]
-; CHECK-CVT-NEXT: fcvtzs w0, s3
-; CHECK-CVT-NEXT: csel w10, w10, w9, gt
-; CHECK-CVT-NEXT: cmp w14, w8
-; CHECK-CVT-NEXT: fcvtzs w4, s1
-; CHECK-CVT-NEXT: csel w12, w14, w8, lt
-; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w12, w12, w9, gt
-; CHECK-CVT-NEXT: cmp w15, w8
-; CHECK-CVT-NEXT: fcvtzs w1, s0
-; CHECK-CVT-NEXT: csel w14, w15, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w15, s2
-; CHECK-CVT-NEXT: mov s2, v3.s[1]
-; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov s0, v1.s[1]
-; CHECK-CVT-NEXT: csel w14, w14, w9, gt
-; CHECK-CVT-NEXT: cmp w16, w8
-; CHECK-CVT-NEXT: csel w16, w16, w8, lt
-; CHECK-CVT-NEXT: cmn w16, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: fcvtzs w18, s2
-; CHECK-CVT-NEXT: mov s2, v3.s[3]
-; CHECK-CVT-NEXT: csel w16, w16, w9, gt
-; CHECK-CVT-NEXT: cmp w15, w8
-; CHECK-CVT-NEXT: fcvtzs w3, s0
-; CHECK-CVT-NEXT: csel w15, w15, w8, lt
-; CHECK-CVT-NEXT: mov s0, v1.s[2]
-; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w15, w15, w9, gt
-; CHECK-CVT-NEXT: cmp w17, w8
-; CHECK-CVT-NEXT: fcvtzs w2, s2
-; CHECK-CVT-NEXT: csel w17, w17, w8, lt
-; CHECK-CVT-NEXT: fmov s2, w13
-; CHECK-CVT-NEXT: cmn w17, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w17, w17, w9, gt
-; CHECK-CVT-NEXT: cmp w18, w8
-; CHECK-CVT-NEXT: csel w18, w18, w8, lt
-; CHECK-CVT-NEXT: mov v2.s[1], w11
-; CHECK-CVT-NEXT: cmn w18, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w18, w18, w9, gt
-; CHECK-CVT-NEXT: cmp w0, w8
-; CHECK-CVT-NEXT: csel w0, w0, w8, lt
-; CHECK-CVT-NEXT: cmn w0, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v2.s[2], w10
-; CHECK-CVT-NEXT: csel w0, w0, w9, gt
-; CHECK-CVT-NEXT: cmp w1, w8
-; CHECK-CVT-NEXT: csel w1, w1, w8, lt
-; CHECK-CVT-NEXT: fmov s3, w0
-; CHECK-CVT-NEXT: cmn w1, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w1, w1, w9, gt
-; CHECK-CVT-NEXT: cmp w2, w8
-; CHECK-CVT-NEXT: mov v2.s[3], w12
-; CHECK-CVT-NEXT: csel w2, w2, w8, lt
-; CHECK-CVT-NEXT: mov v3.s[1], w18
-; CHECK-CVT-NEXT: cmn w2, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w2, w2, w9, gt
-; CHECK-CVT-NEXT: cmp w3, w8
-; CHECK-CVT-NEXT: csel w3, w3, w8, lt
-; CHECK-CVT-NEXT: cmn w3, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: mov v3.s[2], w1
-; CHECK-CVT-NEXT: csel w13, w3, w9, gt
-; CHECK-CVT-NEXT: cmp w4, w8
-; CHECK-CVT-NEXT: csel w3, w4, w8, lt
-; CHECK-CVT-NEXT: fcvtzs w4, s0
-; CHECK-CVT-NEXT: mov s0, v1.s[3]
-; CHECK-CVT-NEXT: cmn w3, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: fmov s1, w16
-; CHECK-CVT-NEXT: csel w11, w3, w9, gt
-; CHECK-CVT-NEXT: mov v3.s[3], w2
-; CHECK-CVT-NEXT: fmov s4, w11
-; CHECK-CVT-NEXT: mov v1.s[1], w14
-; CHECK-CVT-NEXT: cmp w4, w8
-; CHECK-CVT-NEXT: fcvtzs w11, s0
-; CHECK-CVT-NEXT: mov v4.s[1], w13
-; CHECK-CVT-NEXT: csel w13, w4, w8, lt
-; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w10, w13, w9, gt
-; CHECK-CVT-NEXT: mov v1.s[2], w15
-; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: csel w8, w11, w8, lt
-; CHECK-CVT-NEXT: mov v4.s[2], w10
-; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT: csel w8, w8, w9, gt
-; CHECK-CVT-NEXT: mov v1.s[3], w17
-; CHECK-CVT-NEXT: mov v4.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h
-; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h
+; CHECK-CVT-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl2 v5.4s, v1.8h
+; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzs v1.4s, v3.4s
+; CHECK-CVT-NEXT: fcvtzs v3.4s, v5.4s
+; CHECK-CVT-NEXT: sqxtn v0.4h, v2.4s
+; CHECK-CVT-NEXT: fcvtzs v2.4s, v4.4s
+; CHECK-CVT-NEXT: sqxtn v1.4h, v1.4s
+; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT: sqxtn2 v1.8h, v3.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_signed_v16f16_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 6089d76f7820c..a3b94bcf18ab4 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -1708,47 +1708,14 @@ declare <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half>)
define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i1:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: mov s3, v1.s[2]
-; CHECK-CVT-NEXT: mov s4, v1.s[3]
-; CHECK-CVT-NEXT: fcvtzu w9, s1
-; CHECK-CVT-NEXT: fcvtzu w13, s0
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: fcvtzu w8, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: fcvtzu w10, s3
-; CHECK-CVT-NEXT: fcvtzu w11, s4
-; CHECK-CVT-NEXT: fcvtzu w14, s1
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: cmp w8, #1
-; CHECK-CVT-NEXT: fcvtzu w12, s2
-; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo
-; CHECK-CVT-NEXT: cmp w9, #1
-; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo
-; CHECK-CVT-NEXT: cmp w10, #1
-; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo
-; CHECK-CVT-NEXT: cmp w11, #1
-; CHECK-CVT-NEXT: fmov s1, w9
-; CHECK-CVT-NEXT: csinc w11, w11, wzr, lo
-; CHECK-CVT-NEXT: cmp w12, #1
-; CHECK-CVT-NEXT: csinc w12, w12, wzr, lo
-; CHECK-CVT-NEXT: cmp w13, #1
-; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo
-; CHECK-CVT-NEXT: mov v1.s[1], w8
-; CHECK-CVT-NEXT: cmp w14, #1
-; CHECK-CVT-NEXT: fmov s2, w13
-; CHECK-CVT-NEXT: fcvtzu w8, s0
-; CHECK-CVT-NEXT: csinc w9, w14, wzr, lo
-; CHECK-CVT-NEXT: mov v2.s[1], w12
-; CHECK-CVT-NEXT: mov v1.s[2], w10
-; CHECK-CVT-NEXT: cmp w8, #1
-; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo
-; CHECK-CVT-NEXT: mov v2.s[2], w9
-; CHECK-CVT-NEXT: mov v1.s[3], w11
-; CHECK-CVT-NEXT: mov v2.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT: movi v1.4s, #1
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-NEXT: ret
;
@@ -1766,48 +1733,14 @@ define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i8:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov w8, #255 // =0xff
-; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: mov s3, v1.s[2]
-; CHECK-CVT-NEXT: mov s4, v1.s[3]
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: fcvtzu w14, s0
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: fcvtzu w11, s3
-; CHECK-CVT-NEXT: fcvtzu w12, s4
-; CHECK-CVT-NEXT: fcvtzu w15, s1
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: cmp w9, #255
-; CHECK-CVT-NEXT: fcvtzu w13, s2
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: cmp w11, #255
-; CHECK-CVT-NEXT: csel w11, w11, w8, lo
-; CHECK-CVT-NEXT: cmp w12, #255
-; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: csel w12, w12, w8, lo
-; CHECK-CVT-NEXT: cmp w13, #255
-; CHECK-CVT-NEXT: csel w13, w13, w8, lo
-; CHECK-CVT-NEXT: cmp w14, #255
-; CHECK-CVT-NEXT: csel w14, w14, w8, lo
-; CHECK-CVT-NEXT: mov v1.s[1], w9
-; CHECK-CVT-NEXT: cmp w15, #255
-; CHECK-CVT-NEXT: fmov s2, w14
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: csel w10, w15, w8, lo
-; CHECK-CVT-NEXT: mov v2.s[1], w13
-; CHECK-CVT-NEXT: mov v1.s[2], w11
-; CHECK-CVT-NEXT: cmp w9, #255
-; CHECK-CVT-NEXT: csel w8, w9, w8, lo
-; CHECK-CVT-NEXT: mov v2.s[2], w10
-; CHECK-CVT-NEXT: mov v1.s[3], w12
-; CHECK-CVT-NEXT: mov v2.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-NEXT: ret
;
@@ -1823,48 +1756,14 @@ define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i13:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov w8, #8191 // =0x1fff
-; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: mov s3, v1.s[2]
-; CHECK-CVT-NEXT: mov s4, v1.s[3]
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: fcvtzu w14, s0
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: fcvtzu w11, s3
-; CHECK-CVT-NEXT: fcvtzu w12, s4
-; CHECK-CVT-NEXT: fcvtzu w15, s1
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: fcvtzu w13, s2
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: csel w11, w11, w8, lo
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: csel w12, w12, w8, lo
-; CHECK-CVT-NEXT: cmp w13, w8
-; CHECK-CVT-NEXT: csel w13, w13, w8, lo
-; CHECK-CVT-NEXT: cmp w14, w8
-; CHECK-CVT-NEXT: csel w14, w14, w8, lo
-; CHECK-CVT-NEXT: mov v1.s[1], w9
-; CHECK-CVT-NEXT: cmp w15, w8
-; CHECK-CVT-NEXT: fmov s2, w14
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: csel w10, w15, w8, lo
-; CHECK-CVT-NEXT: mov v2.s[1], w13
-; CHECK-CVT-NEXT: mov v1.s[2], w11
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w8, w9, w8, lo
-; CHECK-CVT-NEXT: mov v2.s[2], w10
-; CHECK-CVT-NEXT: mov v1.s[3], w12
-; CHECK-CVT-NEXT: mov v2.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT: movi v1.4s, #31, msl #8
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13:
@@ -1880,48 +1779,12 @@ define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
define <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i16:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov w8, #65535 // =0xffff
-; CHECK-CVT-NEXT: mov s2, v1.s[1]
-; CHECK-CVT-NEXT: mov s3, v1.s[2]
-; CHECK-CVT-NEXT: mov s4, v1.s[3]
-; CHECK-CVT-NEXT: fcvtzu w10, s1
-; CHECK-CVT-NEXT: fcvtzu w14, s0
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: fcvtzu w9, s2
-; CHECK-CVT-NEXT: mov s2, v0.s[1]
-; CHECK-CVT-NEXT: fcvtzu w11, s3
-; CHECK-CVT-NEXT: fcvtzu w12, s4
-; CHECK-CVT-NEXT: fcvtzu w15, s1
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: fcvtzu w13, s2
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: csel w11, w11, w8, lo
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: fmov s1, w10
-; CHECK-CVT-NEXT: csel w12, w12, w8, lo
-; CHECK-CVT-NEXT: cmp w13, w8
-; CHECK-CVT-NEXT: csel w13, w13, w8, lo
-; CHECK-CVT-NEXT: cmp w14, w8
-; CHECK-CVT-NEXT: csel w14, w14, w8, lo
-; CHECK-CVT-NEXT: mov v1.s[1], w9
-; CHECK-CVT-NEXT: cmp w15, w8
-; CHECK-CVT-NEXT: fmov s2, w14
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: csel w10, w15, w8, lo
-; CHECK-CVT-NEXT: mov v2.s[1], w13
-; CHECK-CVT-NEXT: mov v1.s[2], w11
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w8, w9, w8, lo
-; CHECK-CVT-NEXT: mov v2.s[2], w10
-; CHECK-CVT-NEXT: mov v1.s[3], w12
-; CHECK-CVT-NEXT: mov v2.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: fcvtzu v1.4s, v2.4s
+; CHECK-CVT-NEXT: uqxtn2 v0.8h, v1.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16:
@@ -2509,90 +2372,22 @@ define <16 x i16> @test_unsigned_v16f32_v16i16(<16 x float> %f) {
define <16 x i8> @test_unsigned_v16f16_v16i8(<16 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v16f16_v16i8:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h
; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: mov w8, #255 // =0xff
-; CHECK-CVT-NEXT: mov s3, v2.s[1]
-; CHECK-CVT-NEXT: mov s4, v2.s[2]
-; CHECK-CVT-NEXT: mov s5, v2.s[3]
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtzu w13, s1
+; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu w9, s3
-; CHECK-CVT-NEXT: mov s3, v1.s[1]
-; CHECK-CVT-NEXT: fcvtzu w11, s4
-; CHECK-CVT-NEXT: mov s4, v1.s[2]
-; CHECK-CVT-NEXT: fcvtzu w12, s5
-; CHECK-CVT-NEXT: mov s1, v1.s[3]
-; CHECK-CVT-NEXT: fcvtzu w18, s2
-; CHECK-CVT-NEXT: fcvtzu w3, s0
-; CHECK-CVT-NEXT: fcvtzu w14, s3
-; CHECK-CVT-NEXT: cmp w9, #255
-; CHECK-CVT-NEXT: mov s3, v2.s[1]
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, #255
-; CHECK-CVT-NEXT: fcvtzu w15, s4
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: cmp w11, #255
-; CHECK-CVT-NEXT: mov s4, v2.s[2]
-; CHECK-CVT-NEXT: csel w11, w11, w8, lo
-; CHECK-CVT-NEXT: cmp w12, #255
-; CHECK-CVT-NEXT: fcvtzu w16, s1
-; CHECK-CVT-NEXT: mov s1, v2.s[3]
-; CHECK-CVT-NEXT: csel w12, w12, w8, lo
-; CHECK-CVT-NEXT: cmp w14, #255
-; CHECK-CVT-NEXT: fcvtzu w17, s3
-; CHECK-CVT-NEXT: mov s3, v0.s[1]
-; CHECK-CVT-NEXT: csel w14, w14, w8, lo
-; CHECK-CVT-NEXT: cmp w13, #255
-; CHECK-CVT-NEXT: fcvtzu w0, s4
-; CHECK-CVT-NEXT: fmov s2, w10
-; CHECK-CVT-NEXT: csel w13, w13, w8, lo
-; CHECK-CVT-NEXT: cmp w15, #255
-; CHECK-CVT-NEXT: csel w15, w15, w8, lo
-; CHECK-CVT-NEXT: cmp w16, #255
-; CHECK-CVT-NEXT: fcvtzu w1, s1
-; CHECK-CVT-NEXT: csel w16, w16, w8, lo
-; CHECK-CVT-NEXT: cmp w17, #255
-; CHECK-CVT-NEXT: fcvtzu w2, s3
-; CHECK-CVT-NEXT: csel w17, w17, w8, lo
-; CHECK-CVT-NEXT: cmp w18, #255
-; CHECK-CVT-NEXT: mov s1, v0.s[2]
-; CHECK-CVT-NEXT: csel w18, w18, w8, lo
-; CHECK-CVT-NEXT: cmp w0, #255
-; CHECK-CVT-NEXT: mov v2.s[1], w9
-; CHECK-CVT-NEXT: csel w0, w0, w8, lo
-; CHECK-CVT-NEXT: cmp w1, #255
-; CHECK-CVT-NEXT: fmov s3, w18
-; CHECK-CVT-NEXT: csel w10, w1, w8, lo
-; CHECK-CVT-NEXT: cmp w2, #255
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: csel w9, w2, w8, lo
-; CHECK-CVT-NEXT: cmp w3, #255
-; CHECK-CVT-NEXT: fcvtzu w2, s1
-; CHECK-CVT-NEXT: csel w1, w3, w8, lo
-; CHECK-CVT-NEXT: fmov s1, w13
-; CHECK-CVT-NEXT: mov v3.s[1], w17
-; CHECK-CVT-NEXT: fmov s4, w1
-; CHECK-CVT-NEXT: mov v2.s[2], w11
-; CHECK-CVT-NEXT: mov v1.s[1], w14
-; CHECK-CVT-NEXT: cmp w2, #255
-; CHECK-CVT-NEXT: mov v4.s[1], w9
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: csel w11, w2, w8, lo
-; CHECK-CVT-NEXT: mov v3.s[2], w0
-; CHECK-CVT-NEXT: mov v2.s[3], w12
-; CHECK-CVT-NEXT: mov v1.s[2], w15
-; CHECK-CVT-NEXT: mov v4.s[2], w11
-; CHECK-CVT-NEXT: cmp w9, #255
-; CHECK-CVT-NEXT: csel w8, w9, w8, lo
-; CHECK-CVT-NEXT: mov v3.s[3], w10
-; CHECK-CVT-NEXT: mov v1.s[3], w16
-; CHECK-CVT-NEXT: mov v4.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h
-; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h
-; CHECK-CVT-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; CHECK-CVT-NEXT: movi v2.2d, #0x0000ff000000ff
+; CHECK-CVT-NEXT: fcvtzu v3.4s, v3.4s
+; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-NEXT: fcvtzu v4.4s, v4.4s
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v3.4s, v3.4s, v2.4s
+; CHECK-CVT-NEXT: umin v1.4s, v1.4s, v2.4s
+; CHECK-CVT-NEXT: umin v4.4s, v4.4s, v2.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v2.4s
+; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v4.8h
+; CHECK-CVT-NEXT: uzp1 v0.16b, v0.16b, v1.16b
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i8:
@@ -2609,89 +2404,18 @@ define <16 x i8> @test_unsigned_v16f16_v16i8(<16 x half> %f) {
define <16 x i16> @test_unsigned_v16f16_v16i16(<16 x half> %f) {
; CHECK-CVT-LABEL: test_unsigned_v16f16_v16i16:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: mov w8, #65535 // =0xffff
-; CHECK-CVT-NEXT: mov s3, v2.s[1]
-; CHECK-CVT-NEXT: mov s4, v2.s[2]
-; CHECK-CVT-NEXT: mov s5, v2.s[3]
-; CHECK-CVT-NEXT: fcvtzu w10, s2
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtzu w13, s0
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtzu w9, s3
-; CHECK-CVT-NEXT: mov s3, v0.s[1]
-; CHECK-CVT-NEXT: fcvtzu w11, s4
-; CHECK-CVT-NEXT: mov s4, v0.s[2]
-; CHECK-CVT-NEXT: fcvtzu w12, s5
-; CHECK-CVT-NEXT: mov s0, v0.s[3]
-; CHECK-CVT-NEXT: fcvtzu w18, s2
-; CHECK-CVT-NEXT: fcvtzu w3, s1
-; CHECK-CVT-NEXT: fcvtzu w14, s3
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: mov s3, v2.s[1]
-; CHECK-CVT-NEXT: csel w9, w9, w8, lo
-; CHECK-CVT-NEXT: cmp w10, w8
-; CHECK-CVT-NEXT: fcvtzu w15, s4
-; CHECK-CVT-NEXT: csel w10, w10, w8, lo
-; CHECK-CVT-NEXT: cmp w11, w8
-; CHECK-CVT-NEXT: mov s4, v2.s[2]
-; CHECK-CVT-NEXT: csel w11, w11, w8, lo
-; CHECK-CVT-NEXT: cmp w12, w8
-; CHECK-CVT-NEXT: fcvtzu w16, s0
-; CHECK-CVT-NEXT: mov s0, v2.s[3]
-; CHECK-CVT-NEXT: csel w12, w12, w8, lo
-; CHECK-CVT-NEXT: cmp w14, w8
-; CHECK-CVT-NEXT: fcvtzu w17, s3
-; CHECK-CVT-NEXT: mov s3, v1.s[1]
-; CHECK-CVT-NEXT: csel w14, w14, w8, lo
-; CHECK-CVT-NEXT: cmp w13, w8
-; CHECK-CVT-NEXT: fcvtzu w0, s4
-; CHECK-CVT-NEXT: fmov s2, w10
-; CHECK-CVT-NEXT: csel w13, w13, w8, lo
-; CHECK-CVT-NEXT: cmp w15, w8
-; CHECK-CVT-NEXT: csel w15, w15, w8, lo
-; CHECK-CVT-NEXT: cmp w16, w8
-; CHECK-CVT-NEXT: fcvtzu w1, s0
-; CHECK-CVT-NEXT: csel w16, w16, w8, lo
-; CHECK-CVT-NEXT: cmp w17, w8
-; CHECK-CVT-NEXT: fcvtzu w2, s3
-; CHECK-CVT-NEXT: csel w17, w17, w8, lo
-; CHECK-CVT-NEXT: cmp w18, w8
-; CHECK-CVT-NEXT: mov s0, v1.s[2]
-; CHECK-CVT-NEXT: csel w18, w18, w8, lo
-; CHECK-CVT-NEXT: cmp w0, w8
-; CHECK-CVT-NEXT: mov v2.s[1], w9
-; CHECK-CVT-NEXT: csel w0, w0, w8, lo
-; CHECK-CVT-NEXT: cmp w1, w8
-; CHECK-CVT-NEXT: fmov s3, w18
-; CHECK-CVT-NEXT: csel w10, w1, w8, lo
-; CHECK-CVT-NEXT: cmp w2, w8
-; CHECK-CVT-NEXT: csel w9, w2, w8, lo
-; CHECK-CVT-NEXT: cmp w3, w8
-; CHECK-CVT-NEXT: fcvtzu w2, s0
-; CHECK-CVT-NEXT: csel w1, w3, w8, lo
-; CHECK-CVT-NEXT: mov s0, v1.s[3]
-; CHECK-CVT-NEXT: fmov s1, w13
-; CHECK-CVT-NEXT: fmov s4, w1
-; CHECK-CVT-NEXT: mov v3.s[1], w17
-; CHECK-CVT-NEXT: mov v2.s[2], w11
-; CHECK-CVT-NEXT: mov v1.s[1], w14
-; CHECK-CVT-NEXT: cmp w2, w8
-; CHECK-CVT-NEXT: mov v4.s[1], w9
-; CHECK-CVT-NEXT: fcvtzu w9, s0
-; CHECK-CVT-NEXT: csel w11, w2, w8, lo
-; CHECK-CVT-NEXT: mov v3.s[2], w0
-; CHECK-CVT-NEXT: mov v2.s[3], w12
-; CHECK-CVT-NEXT: mov v1.s[2], w15
-; CHECK-CVT-NEXT: mov v4.s[2], w11
-; CHECK-CVT-NEXT: cmp w9, w8
-; CHECK-CVT-NEXT: csel w8, w9, w8, lo
-; CHECK-CVT-NEXT: mov v3.s[3], w10
-; CHECK-CVT-NEXT: mov v1.s[3], w16
-; CHECK-CVT-NEXT: mov v4.s[3], w8
-; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h
-; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h
+; CHECK-CVT-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl2 v5.4s, v1.8h
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v1.4s, v3.4s
+; CHECK-CVT-NEXT: fcvtzu v3.4s, v5.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v4.4s
+; CHECK-CVT-NEXT: uqxtn v1.4h, v1.4s
+; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT: uqxtn2 v1.8h, v3.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i16:
More information about the llvm-commits
mailing list