[llvm] 594d359 - [AArch64] Split v8f32 fptosi_sat into two v4f32.

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 28 02:47:45 PDT 2024


Author: David Green
Date: 2024-07-28T10:19:44+01:00
New Revision: 594d3593fec2ae15f6dd38c51fba8bb1f9828089

URL: https://github.com/llvm/llvm-project/commit/594d3593fec2ae15f6dd38c51fba8bb1f9828089
DIFF: https://github.com/llvm/llvm-project/commit/594d3593fec2ae15f6dd38c51fba8bb1f9828089.diff

LOG: [AArch64] Split v8f32 fptosi_sat into two v4f32.

If we produce illegal v8f32 types, the VectorLegalizer will unroll them,
scalarizing the operations. In this patch we pre-split them during custom
legalization to produce better results.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/fcvt_combine.ll
    llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d86e52d49000a..1e9da9b819bdd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4508,11 +4508,19 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
   EVT SrcElementVT = SrcVT.getVectorElementType();
 
   // In the absence of FP16 support, promote f16 to f32 and saturate the result.
+  SDLoc DL(Op);
+  SDValue SrcVal2;
   if ((SrcElementVT == MVT::f16 &&
        (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
       SrcElementVT == MVT::bf16) {
     MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
-    SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
+    SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F32VT, SrcVal);
+    // If we are extending to a v8f32, split into two v4f32 to produce legal
+    // types.
+    if (F32VT.getSizeInBits() > 128) {
+      std::tie(SrcVal, SrcVal2) = DAG.SplitVector(SrcVal, DL);
+      F32VT = F32VT.getHalfNumVectorElementsVT();
+    }
     SrcVT = F32VT;
     SrcElementVT = MVT::f32;
     SrcElementWidth = 32;
@@ -4520,9 +4528,8 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
              SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
     return SDValue();
 
-  SDLoc DL(Op);
-  // Expand to f64 if we are saturating to i64, to help produce keep the lanes
-  // the same width and produce a fcvtzu.
+  // Expand to f64 if we are saturating to i64, to help keep the lanes the same
+  // width and produce a fcvtzu.
   if (SatWidth == 64 && SrcElementWidth < 64) {
     MVT F64VT = MVT::getVectorVT(MVT::f64, SrcVT.getVectorNumElements());
     SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F64VT, SrcVal);
@@ -4531,9 +4538,16 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
     SrcElementWidth = 64;
   }
   // Cases that we can emit directly.
-  if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
-    return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
-                       DAG.getValueType(DstVT.getScalarType()));
+  if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
+    SDValue Res = DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
+                              DAG.getValueType(DstVT.getScalarType()));
+    if (SrcVal2) {
+      SDValue Res2 = DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal2,
+                                 DAG.getValueType(DstVT.getScalarType()));
+      return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Res, Res2);
+    }
+    return Res;
+  }
 
   // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
   // result. This is only valid if the legal cvt is larger than the saturate
@@ -4545,20 +4559,32 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
   EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
   SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
                                   DAG.getValueType(IntVT.getScalarType()));
-  SDValue Sat;
+  SDValue NativeCvt2 =
+      SrcVal2 ? DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal2,
+                            DAG.getValueType(IntVT.getScalarType()))
+              : SDValue();
+  SDValue Sat, Sat2;
   if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
     SDValue MinC = DAG.getConstant(
         APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
     SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
+    SDValue Min2 = SrcVal2 ? DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt2, MinC) : SDValue();
     SDValue MaxC = DAG.getConstant(
         APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
     Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
+    Sat2 = SrcVal2 ? DAG.getNode(ISD::SMAX, DL, IntVT, Min2, MaxC) : SDValue();
   } else {
     SDValue MinC = DAG.getConstant(
         APInt::getAllOnes(SatWidth).zext(SrcElementWidth), DL, IntVT);
     Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
+    Sat2 = SrcVal2 ? DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt2, MinC) : SDValue();
   }
 
+  if (SrcVal2)
+    Sat = DAG.getNode(ISD::CONCAT_VECTORS, DL,
+                      IntVT.getDoubleNumVectorElementsVT(*DAG.getContext()),
+                      Sat, Sat2);
+
   return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index 62669a6d99eae..251d7a424175b 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -466,72 +466,19 @@ define <8 x i16> @test_v8f16_sat(<8 x half> %in) {
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    movi v1.8h, #68, lsl #8
 ; CHECK-NO16-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-NO16-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NO16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-NO16-NEXT:    mov w11, #-32768 // =0xffff8000
 ; CHECK-NO16-NEXT:    fcvtl v3.4s, v1.4h
 ; CHECK-NO16-NEXT:    fcvtl2 v1.4s, v1.8h
 ; CHECK-NO16-NEXT:    fmul v2.4s, v2.4s, v3.4s
 ; CHECK-NO16-NEXT:    fmul v0.4s, v0.4s, v1.4s
 ; CHECK-NO16-NEXT:    fcvtn v1.4h, v2.4s
 ; CHECK-NO16-NEXT:    fcvtn2 v1.8h, v0.4s
-; CHECK-NO16-NEXT:    fcvtl2 v0.4s, v1.8h
-; CHECK-NO16-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-NO16-NEXT:    mov s2, v0.s[1]
-; CHECK-NO16-NEXT:    fcvtzs w10, s0
-; CHECK-NO16-NEXT:    fcvtzs w15, s1
-; CHECK-NO16-NEXT:    fcvtzs w9, s2
-; CHECK-NO16-NEXT:    mov s2, v0.s[2]
-; CHECK-NO16-NEXT:    mov s0, v0.s[3]
-; CHECK-NO16-NEXT:    cmp w9, w8
-; CHECK-NO16-NEXT:    fcvtzs w12, s2
-; CHECK-NO16-NEXT:    mov s2, v1.s[1]
-; CHECK-NO16-NEXT:    csel w9, w9, w8, lt
-; CHECK-NO16-NEXT:    fcvtzs w13, s0
-; CHECK-NO16-NEXT:    mov s0, v1.s[2]
-; CHECK-NO16-NEXT:    cmn w9, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    csel w9, w9, w11, gt
-; CHECK-NO16-NEXT:    cmp w10, w8
-; CHECK-NO16-NEXT:    csel w10, w10, w8, lt
-; CHECK-NO16-NEXT:    fcvtzs w14, s2
-; CHECK-NO16-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    fcvtzs w16, s0
-; CHECK-NO16-NEXT:    mov s0, v1.s[3]
-; CHECK-NO16-NEXT:    csel w10, w10, w11, gt
-; CHECK-NO16-NEXT:    cmp w12, w8
-; CHECK-NO16-NEXT:    csel w12, w12, w8, lt
-; CHECK-NO16-NEXT:    fmov s1, w10
-; CHECK-NO16-NEXT:    cmn w12, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    csel w12, w12, w11, gt
-; CHECK-NO16-NEXT:    cmp w13, w8
-; CHECK-NO16-NEXT:    csel w13, w13, w8, lt
-; CHECK-NO16-NEXT:    mov v1.s[1], w9
-; CHECK-NO16-NEXT:    fcvtzs w9, s0
-; CHECK-NO16-NEXT:    cmn w13, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    csel w13, w13, w11, gt
-; CHECK-NO16-NEXT:    cmp w14, w8
-; CHECK-NO16-NEXT:    csel w14, w14, w8, lt
-; CHECK-NO16-NEXT:    cmn w14, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    mov v1.s[2], w12
-; CHECK-NO16-NEXT:    csel w14, w14, w11, gt
-; CHECK-NO16-NEXT:    cmp w15, w8
-; CHECK-NO16-NEXT:    csel w15, w15, w8, lt
-; CHECK-NO16-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    csel w10, w15, w11, gt
-; CHECK-NO16-NEXT:    cmp w16, w8
-; CHECK-NO16-NEXT:    mov v1.s[3], w13
-; CHECK-NO16-NEXT:    fmov s2, w10
-; CHECK-NO16-NEXT:    csel w10, w16, w8, lt
-; CHECK-NO16-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    csel w10, w10, w11, gt
-; CHECK-NO16-NEXT:    cmp w9, w8
-; CHECK-NO16-NEXT:    mov v2.s[1], w14
-; CHECK-NO16-NEXT:    csel w8, w9, w8, lt
-; CHECK-NO16-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    csel w8, w8, w11, gt
-; CHECK-NO16-NEXT:    mov v2.s[2], w10
-; CHECK-NO16-NEXT:    mov v2.s[3], w8
-; CHECK-NO16-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-NO16-NEXT:    fcvtl v0.4s, v1.4h
+; CHECK-NO16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-NO16-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NO16-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-NO16-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-NO16-NEXT:    sqxtn2 v0.8h, v1.4s
 ; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_v8f16_sat:

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 91c8b7f345e32..4626fd7f2b3dd 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -2014,47 +2014,17 @@ declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>)
 define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
 ; CHECK-CVT-LABEL: test_signed_v8f16_v8i1:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov s2, v1.s[1]
-; CHECK-CVT-NEXT:    fcvtzs w9, s1
-; CHECK-CVT-NEXT:    fcvtzs w13, s0
-; CHECK-CVT-NEXT:    fcvtzs w8, s2
-; CHECK-CVT-NEXT:    mov s2, v1.s[2]
-; CHECK-CVT-NEXT:    mov s1, v1.s[3]
-; CHECK-CVT-NEXT:    ands w8, w8, w8, asr #31
-; CHECK-CVT-NEXT:    fcvtzs w10, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    fcvtzs w11, s1
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT:    ands w9, w9, w9, asr #31
-; CHECK-CVT-NEXT:    csinv w9, w9, wzr, ge
-; CHECK-CVT-NEXT:    ands w10, w10, w10, asr #31
-; CHECK-CVT-NEXT:    fcvtzs w12, s2
-; CHECK-CVT-NEXT:    fcvtzs w14, s1
-; CHECK-CVT-NEXT:    fmov s1, w9
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    csinv w10, w10, wzr, ge
-; CHECK-CVT-NEXT:    ands w11, w11, w11, asr #31
-; CHECK-CVT-NEXT:    csinv w11, w11, wzr, ge
-; CHECK-CVT-NEXT:    ands w12, w12, w12, asr #31
-; CHECK-CVT-NEXT:    mov v1.s[1], w8
-; CHECK-CVT-NEXT:    csinv w12, w12, wzr, ge
-; CHECK-CVT-NEXT:    ands w13, w13, w13, asr #31
-; CHECK-CVT-NEXT:    csinv w13, w13, wzr, ge
-; CHECK-CVT-NEXT:    ands w8, w14, w14, asr #31
-; CHECK-CVT-NEXT:    mov v1.s[2], w10
-; CHECK-CVT-NEXT:    fmov s2, w13
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT:    mov v2.s[1], w12
-; CHECK-CVT-NEXT:    mov v1.s[3], w11
-; CHECK-CVT-NEXT:    mov v2.s[2], w8
-; CHECK-CVT-NEXT:    ands w8, w9, w9, asr #31
-; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-CVT-NEXT:    movi v3.2d, #0xffffffffffffffff
+; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    smax v1.4s, v2.4s, v3.4s
+; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v3.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
@@ -2074,65 +2044,17 @@ define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
 define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
 ; CHECK-CVT-LABEL: test_signed_v8f16_v8i8:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT:    mov w8, #127 // =0x7f
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov w11, #-128 // =0xffffff80
-; CHECK-CVT-NEXT:    mov s2, v1.s[1]
-; CHECK-CVT-NEXT:    fcvtzs w10, s1
-; CHECK-CVT-NEXT:    fcvtzs w15, s0
-; CHECK-CVT-NEXT:    fcvtzs w9, s2
-; CHECK-CVT-NEXT:    mov s2, v1.s[2]
-; CHECK-CVT-NEXT:    mov s1, v1.s[3]
-; CHECK-CVT-NEXT:    cmp w9, #127
-; CHECK-CVT-NEXT:    fcvtzs w12, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    csel w9, w9, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w13, s1
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    cmn w9, #128
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    csel w9, w9, w11, gt
-; CHECK-CVT-NEXT:    cmp w10, #127
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w14, s2
-; CHECK-CVT-NEXT:    cmn w10, #128
-; CHECK-CVT-NEXT:    fcvtzs w16, s1
-; CHECK-CVT-NEXT:    csel w10, w10, w11, gt
-; CHECK-CVT-NEXT:    cmp w12, #127
-; CHECK-CVT-NEXT:    csel w12, w12, w8, lt
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    cmn w12, #128
-; CHECK-CVT-NEXT:    csel w12, w12, w11, gt
-; CHECK-CVT-NEXT:    cmp w13, #127
-; CHECK-CVT-NEXT:    csel w13, w13, w8, lt
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    cmn w13, #128
-; CHECK-CVT-NEXT:    csel w13, w13, w11, gt
-; CHECK-CVT-NEXT:    cmp w14, #127
-; CHECK-CVT-NEXT:    csel w14, w14, w8, lt
-; CHECK-CVT-NEXT:    cmn w14, #128
-; CHECK-CVT-NEXT:    mov v1.s[2], w12
-; CHECK-CVT-NEXT:    csel w14, w14, w11, gt
-; CHECK-CVT-NEXT:    cmp w15, #127
-; CHECK-CVT-NEXT:    csel w15, w15, w8, lt
-; CHECK-CVT-NEXT:    cmn w15, #128
-; CHECK-CVT-NEXT:    csel w10, w15, w11, gt
-; CHECK-CVT-NEXT:    cmp w16, #127
-; CHECK-CVT-NEXT:    mov v1.s[3], w13
-; CHECK-CVT-NEXT:    fmov s2, w10
-; CHECK-CVT-NEXT:    csel w10, w16, w8, lt
-; CHECK-CVT-NEXT:    cmn w10, #128
-; CHECK-CVT-NEXT:    csel w10, w10, w11, gt
-; CHECK-CVT-NEXT:    cmp w9, #127
-; CHECK-CVT-NEXT:    mov v2.s[1], w14
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lt
-; CHECK-CVT-NEXT:    cmn w8, #128
-; CHECK-CVT-NEXT:    csel w8, w8, w11, gt
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT:    movi v1.4s, #127
+; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    mvni v1.4s, #127
+; CHECK-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
@@ -2148,65 +2070,17 @@ define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
 define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
 ; CHECK-CVT-LABEL: test_signed_v8f16_v8i13:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT:    mov w8, #4095 // =0xfff
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov w11, #-4096 // =0xfffff000
-; CHECK-CVT-NEXT:    mov s2, v1.s[1]
-; CHECK-CVT-NEXT:    fcvtzs w10, s1
-; CHECK-CVT-NEXT:    fcvtzs w15, s0
-; CHECK-CVT-NEXT:    fcvtzs w9, s2
-; CHECK-CVT-NEXT:    mov s2, v1.s[2]
-; CHECK-CVT-NEXT:    mov s1, v1.s[3]
-; CHECK-CVT-NEXT:    cmp w9, #4095
-; CHECK-CVT-NEXT:    fcvtzs w12, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    csel w9, w9, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w13, s1
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    cmn w9, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    csel w9, w9, w11, gt
-; CHECK-CVT-NEXT:    cmp w10, #4095
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w14, s2
-; CHECK-CVT-NEXT:    cmn w10, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    fcvtzs w16, s1
-; CHECK-CVT-NEXT:    csel w10, w10, w11, gt
-; CHECK-CVT-NEXT:    cmp w12, #4095
-; CHECK-CVT-NEXT:    csel w12, w12, w8, lt
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    cmn w12, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    csel w12, w12, w11, gt
-; CHECK-CVT-NEXT:    cmp w13, #4095
-; CHECK-CVT-NEXT:    csel w13, w13, w8, lt
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    cmn w13, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    csel w13, w13, w11, gt
-; CHECK-CVT-NEXT:    cmp w14, #4095
-; CHECK-CVT-NEXT:    csel w14, w14, w8, lt
-; CHECK-CVT-NEXT:    cmn w14, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    mov v1.s[2], w12
-; CHECK-CVT-NEXT:    csel w14, w14, w11, gt
-; CHECK-CVT-NEXT:    cmp w15, #4095
-; CHECK-CVT-NEXT:    csel w15, w15, w8, lt
-; CHECK-CVT-NEXT:    cmn w15, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    csel w10, w15, w11, gt
-; CHECK-CVT-NEXT:    cmp w16, #4095
-; CHECK-CVT-NEXT:    mov v1.s[3], w13
-; CHECK-CVT-NEXT:    fmov s2, w10
-; CHECK-CVT-NEXT:    csel w10, w16, w8, lt
-; CHECK-CVT-NEXT:    cmn w10, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    csel w10, w10, w11, gt
-; CHECK-CVT-NEXT:    cmp w9, #4095
-; CHECK-CVT-NEXT:    mov v2.s[1], w14
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lt
-; CHECK-CVT-NEXT:    cmn w8, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    csel w8, w8, w11, gt
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT:    movi v1.4s, #15, msl #8
+; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    mvni v1.4s, #15, msl #8
+; CHECK-CVT-NEXT:    smax v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13:
@@ -2224,65 +2098,12 @@ define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
 define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
 ; CHECK-CVT-LABEL: test_signed_v8f16_v8i16:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov w11, #-32768 // =0xffff8000
-; CHECK-CVT-NEXT:    mov s2, v1.s[1]
-; CHECK-CVT-NEXT:    fcvtzs w10, s1
-; CHECK-CVT-NEXT:    fcvtzs w15, s0
-; CHECK-CVT-NEXT:    fcvtzs w9, s2
-; CHECK-CVT-NEXT:    mov s2, v1.s[2]
-; CHECK-CVT-NEXT:    mov s1, v1.s[3]
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    fcvtzs w12, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    csel w9, w9, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w13, s1
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    cmn w9, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    csel w9, w9, w11, gt
-; CHECK-CVT-NEXT:    cmp w10, w8
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w14, s2
-; CHECK-CVT-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    fcvtzs w16, s1
-; CHECK-CVT-NEXT:    csel w10, w10, w11, gt
-; CHECK-CVT-NEXT:    cmp w12, w8
-; CHECK-CVT-NEXT:    csel w12, w12, w8, lt
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    cmn w12, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w12, w12, w11, gt
-; CHECK-CVT-NEXT:    cmp w13, w8
-; CHECK-CVT-NEXT:    csel w13, w13, w8, lt
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    fcvtzs w9, s0
-; CHECK-CVT-NEXT:    cmn w13, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w13, w13, w11, gt
-; CHECK-CVT-NEXT:    cmp w14, w8
-; CHECK-CVT-NEXT:    csel w14, w14, w8, lt
-; CHECK-CVT-NEXT:    cmn w14, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    mov v1.s[2], w12
-; CHECK-CVT-NEXT:    csel w14, w14, w11, gt
-; CHECK-CVT-NEXT:    cmp w15, w8
-; CHECK-CVT-NEXT:    csel w15, w15, w8, lt
-; CHECK-CVT-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w10, w15, w11, gt
-; CHECK-CVT-NEXT:    cmp w16, w8
-; CHECK-CVT-NEXT:    mov v1.s[3], w13
-; CHECK-CVT-NEXT:    fmov s2, w10
-; CHECK-CVT-NEXT:    csel w10, w16, w8, lt
-; CHECK-CVT-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w10, w10, w11, gt
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    mov v2.s[1], w14
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lt
-; CHECK-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w8, w8, w11, gt
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-CVT-NEXT:    sqxtn v0.4h, v1.4s
+; CHECK-CVT-NEXT:    fcvtzs v1.4s, v2.4s
+; CHECK-CVT-NEXT:    sqxtn2 v0.8h, v1.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16:
@@ -2984,123 +2805,27 @@ define <16 x i16> @test_signed_v16f32_v16i16(<16 x float> %f) {
 define <16 x i8> @test_signed_v16f16_v16i8(<16 x half> %f) {
 ; CHECK-CVT-LABEL: test_signed_v16f16_v16i8:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT:    mov w8, #127 // =0x7f
+; CHECK-CVT-NEXT:    fcvtl2 v3.4s, v1.8h
 ; CHECK-CVT-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT:    mov s3, v2.s[1]
-; CHECK-CVT-NEXT:    fcvtzs w10, s2
-; CHECK-CVT-NEXT:    fcvtzs w16, s1
-; CHECK-CVT-NEXT:    fcvtzs w9, s3
-; CHECK-CVT-NEXT:    mov s3, v2.s[2]
-; CHECK-CVT-NEXT:    mov s2, v2.s[3]
-; CHECK-CVT-NEXT:    cmp w9, #127
-; CHECK-CVT-NEXT:    fcvtzs w12, s3
-; CHECK-CVT-NEXT:    mov s3, v1.s[1]
-; CHECK-CVT-NEXT:    csel w11, w9, w8, lt
-; CHECK-CVT-NEXT:    mov w9, #-128 // =0xffffff80
-; CHECK-CVT-NEXT:    fcvtzs w14, s2
-; CHECK-CVT-NEXT:    cmn w11, #128
-; CHECK-CVT-NEXT:    mov s2, v1.s[2]
-; CHECK-CVT-NEXT:    mov s1, v1.s[3]
-; CHECK-CVT-NEXT:    csel w11, w11, w9, gt
-; CHECK-CVT-NEXT:    cmp w10, #127
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w15, s3
-; CHECK-CVT-NEXT:    fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT:    cmn w10, #128
+; CHECK-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    csel w13, w10, w9, gt
-; CHECK-CVT-NEXT:    cmp w12, #127
-; CHECK-CVT-NEXT:    fcvtzs w17, s1
-; CHECK-CVT-NEXT:    csel w10, w12, w8, lt
-; CHECK-CVT-NEXT:    cmn w10, #128
-; CHECK-CVT-NEXT:    mov s1, v3.s[2]
-; CHECK-CVT-NEXT:    fcvtzs w0, s3
-; CHECK-CVT-NEXT:    csel w10, w10, w9, gt
-; CHECK-CVT-NEXT:    cmp w14, #127
-; CHECK-CVT-NEXT:    fcvtzs w4, s0
-; CHECK-CVT-NEXT:    csel w12, w14, w8, lt
-; CHECK-CVT-NEXT:    cmn w12, #128
-; CHECK-CVT-NEXT:    csel w12, w12, w9, gt
-; CHECK-CVT-NEXT:    cmp w15, #127
-; CHECK-CVT-NEXT:    fcvtzs w1, s1
-; CHECK-CVT-NEXT:    csel w14, w15, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w15, s2
-; CHECK-CVT-NEXT:    mov s2, v3.s[1]
-; CHECK-CVT-NEXT:    cmn w14, #128
-; CHECK-CVT-NEXT:    mov s1, v0.s[1]
-; CHECK-CVT-NEXT:    csel w14, w14, w9, gt
-; CHECK-CVT-NEXT:    cmp w16, #127
-; CHECK-CVT-NEXT:    csel w16, w16, w8, lt
-; CHECK-CVT-NEXT:    cmn w16, #128
-; CHECK-CVT-NEXT:    fcvtzs w18, s2
-; CHECK-CVT-NEXT:    mov s2, v3.s[3]
-; CHECK-CVT-NEXT:    csel w16, w16, w9, gt
-; CHECK-CVT-NEXT:    cmp w15, #127
-; CHECK-CVT-NEXT:    fcvtzs w3, s1
-; CHECK-CVT-NEXT:    csel w15, w15, w8, lt
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    cmn w15, #128
-; CHECK-CVT-NEXT:    csel w15, w15, w9, gt
-; CHECK-CVT-NEXT:    cmp w17, #127
-; CHECK-CVT-NEXT:    fcvtzs w2, s2
-; CHECK-CVT-NEXT:    csel w17, w17, w8, lt
-; CHECK-CVT-NEXT:    fmov s2, w13
-; CHECK-CVT-NEXT:    cmn w17, #128
-; CHECK-CVT-NEXT:    csel w17, w17, w9, gt
-; CHECK-CVT-NEXT:    cmp w18, #127
-; CHECK-CVT-NEXT:    csel w18, w18, w8, lt
-; CHECK-CVT-NEXT:    mov v2.s[1], w11
-; CHECK-CVT-NEXT:    cmn w18, #128
-; CHECK-CVT-NEXT:    csel w18, w18, w9, gt
-; CHECK-CVT-NEXT:    cmp w0, #127
-; CHECK-CVT-NEXT:    csel w0, w0, w8, lt
-; CHECK-CVT-NEXT:    cmn w0, #128
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    csel w0, w0, w9, gt
-; CHECK-CVT-NEXT:    cmp w1, #127
-; CHECK-CVT-NEXT:    csel w1, w1, w8, lt
-; CHECK-CVT-NEXT:    fmov s3, w0
-; CHECK-CVT-NEXT:    cmn w1, #128
-; CHECK-CVT-NEXT:    csel w1, w1, w9, gt
-; CHECK-CVT-NEXT:    cmp w2, #127
-; CHECK-CVT-NEXT:    mov v2.s[3], w12
-; CHECK-CVT-NEXT:    csel w2, w2, w8, lt
-; CHECK-CVT-NEXT:    mov v3.s[1], w18
-; CHECK-CVT-NEXT:    cmn w2, #128
-; CHECK-CVT-NEXT:    csel w2, w2, w9, gt
-; CHECK-CVT-NEXT:    cmp w3, #127
-; CHECK-CVT-NEXT:    csel w3, w3, w8, lt
-; CHECK-CVT-NEXT:    cmn w3, #128
-; CHECK-CVT-NEXT:    mov v3.s[2], w1
-; CHECK-CVT-NEXT:    csel w13, w3, w9, gt
-; CHECK-CVT-NEXT:    cmp w4, #127
-; CHECK-CVT-NEXT:    csel w3, w4, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w4, s1
-; CHECK-CVT-NEXT:    fmov s1, w16
-; CHECK-CVT-NEXT:    cmn w3, #128
-; CHECK-CVT-NEXT:    csel w11, w3, w9, gt
-; CHECK-CVT-NEXT:    mov v3.s[3], w2
-; CHECK-CVT-NEXT:    fmov s4, w11
-; CHECK-CVT-NEXT:    mov v1.s[1], w14
-; CHECK-CVT-NEXT:    fcvtzs w11, s0
-; CHECK-CVT-NEXT:    cmp w4, #127
-; CHECK-CVT-NEXT:    mov v4.s[1], w13
-; CHECK-CVT-NEXT:    csel w13, w4, w8, lt
-; CHECK-CVT-NEXT:    cmn w13, #128
-; CHECK-CVT-NEXT:    mov v1.s[2], w15
-; CHECK-CVT-NEXT:    csel w10, w13, w9, gt
-; CHECK-CVT-NEXT:    cmp w11, #127
-; CHECK-CVT-NEXT:    csel w8, w11, w8, lt
-; CHECK-CVT-NEXT:    mov v4.s[2], w10
-; CHECK-CVT-NEXT:    cmn w8, #128
-; CHECK-CVT-NEXT:    csel w8, w8, w9, gt
-; CHECK-CVT-NEXT:    mov v1.s[3], w17
-; CHECK-CVT-NEXT:    mov v4.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
-; CHECK-CVT-NEXT:    uzp1 v1.8h, v4.8h, v3.8h
-; CHECK-CVT-NEXT:    uzp1 v0.16b, v1.16b, v0.16b
+; CHECK-CVT-NEXT:    movi v2.4s, #127
+; CHECK-CVT-NEXT:    fcvtzs v3.4s, v3.4s
+; CHECK-CVT-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-CVT-NEXT:    fcvtzs v4.4s, v4.4s
+; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT:    smin v3.4s, v3.4s, v2.4s
+; CHECK-CVT-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-CVT-NEXT:    smin v4.4s, v4.4s, v2.4s
+; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v2.4s
+; CHECK-CVT-NEXT:    mvni v2.4s, #127
+; CHECK-CVT-NEXT:    smax v3.4s, v3.4s, v2.4s
+; CHECK-CVT-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-CVT-NEXT:    smax v4.4s, v4.4s, v2.4s
+; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v2.4s
+; CHECK-CVT-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
+; CHECK-CVT-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v16f16_v16i8:
@@ -3117,122 +2842,18 @@ define <16 x i8> @test_signed_v16f16_v16i8(<16 x half> %f) {
 define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) {
 ; CHECK-CVT-LABEL: test_signed_v16f16_v16i16:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov s3, v2.s[1]
-; CHECK-CVT-NEXT:    fcvtzs w10, s2
-; CHECK-CVT-NEXT:    fcvtzs w16, s0
-; CHECK-CVT-NEXT:    fcvtzs w9, s3
-; CHECK-CVT-NEXT:    mov s3, v2.s[2]
-; CHECK-CVT-NEXT:    mov s2, v2.s[3]
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    fcvtzs w12, s3
-; CHECK-CVT-NEXT:    mov s3, v0.s[1]
-; CHECK-CVT-NEXT:    csel w11, w9, w8, lt
-; CHECK-CVT-NEXT:    mov w9, #-32768 // =0xffff8000
-; CHECK-CVT-NEXT:    fcvtzs w14, s2
-; CHECK-CVT-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    mov s2, v0.s[2]
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    csel w11, w11, w9, gt
-; CHECK-CVT-NEXT:    cmp w10, w8
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w15, s3
-; CHECK-CVT-NEXT:    fcvtl2 v3.4s, v1.8h
-; CHECK-CVT-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT:    csel w13, w10, w9, gt
-; CHECK-CVT-NEXT:    cmp w12, w8
-; CHECK-CVT-NEXT:    fcvtzs w17, s0
-; CHECK-CVT-NEXT:    csel w10, w12, w8, lt
-; CHECK-CVT-NEXT:    cmn w10, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    mov s0, v3.s[2]
-; CHECK-CVT-NEXT:    fcvtzs w0, s3
-; CHECK-CVT-NEXT:    csel w10, w10, w9, gt
-; CHECK-CVT-NEXT:    cmp w14, w8
-; CHECK-CVT-NEXT:    fcvtzs w4, s1
-; CHECK-CVT-NEXT:    csel w12, w14, w8, lt
-; CHECK-CVT-NEXT:    cmn w12, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w12, w12, w9, gt
-; CHECK-CVT-NEXT:    cmp w15, w8
-; CHECK-CVT-NEXT:    fcvtzs w1, s0
-; CHECK-CVT-NEXT:    csel w14, w15, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w15, s2
-; CHECK-CVT-NEXT:    mov s2, v3.s[1]
-; CHECK-CVT-NEXT:    cmn w14, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    mov s0, v1.s[1]
-; CHECK-CVT-NEXT:    csel w14, w14, w9, gt
-; CHECK-CVT-NEXT:    cmp w16, w8
-; CHECK-CVT-NEXT:    csel w16, w16, w8, lt
-; CHECK-CVT-NEXT:    cmn w16, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    fcvtzs w18, s2
-; CHECK-CVT-NEXT:    mov s2, v3.s[3]
-; CHECK-CVT-NEXT:    csel w16, w16, w9, gt
-; CHECK-CVT-NEXT:    cmp w15, w8
-; CHECK-CVT-NEXT:    fcvtzs w3, s0
-; CHECK-CVT-NEXT:    csel w15, w15, w8, lt
-; CHECK-CVT-NEXT:    mov s0, v1.s[2]
-; CHECK-CVT-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w15, w15, w9, gt
-; CHECK-CVT-NEXT:    cmp w17, w8
-; CHECK-CVT-NEXT:    fcvtzs w2, s2
-; CHECK-CVT-NEXT:    csel w17, w17, w8, lt
-; CHECK-CVT-NEXT:    fmov s2, w13
-; CHECK-CVT-NEXT:    cmn w17, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w17, w17, w9, gt
-; CHECK-CVT-NEXT:    cmp w18, w8
-; CHECK-CVT-NEXT:    csel w18, w18, w8, lt
-; CHECK-CVT-NEXT:    mov v2.s[1], w11
-; CHECK-CVT-NEXT:    cmn w18, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w18, w18, w9, gt
-; CHECK-CVT-NEXT:    cmp w0, w8
-; CHECK-CVT-NEXT:    csel w0, w0, w8, lt
-; CHECK-CVT-NEXT:    cmn w0, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    csel w0, w0, w9, gt
-; CHECK-CVT-NEXT:    cmp w1, w8
-; CHECK-CVT-NEXT:    csel w1, w1, w8, lt
-; CHECK-CVT-NEXT:    fmov s3, w0
-; CHECK-CVT-NEXT:    cmn w1, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w1, w1, w9, gt
-; CHECK-CVT-NEXT:    cmp w2, w8
-; CHECK-CVT-NEXT:    mov v2.s[3], w12
-; CHECK-CVT-NEXT:    csel w2, w2, w8, lt
-; CHECK-CVT-NEXT:    mov v3.s[1], w18
-; CHECK-CVT-NEXT:    cmn w2, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w2, w2, w9, gt
-; CHECK-CVT-NEXT:    cmp w3, w8
-; CHECK-CVT-NEXT:    csel w3, w3, w8, lt
-; CHECK-CVT-NEXT:    cmn w3, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    mov v3.s[2], w1
-; CHECK-CVT-NEXT:    csel w13, w3, w9, gt
-; CHECK-CVT-NEXT:    cmp w4, w8
-; CHECK-CVT-NEXT:    csel w3, w4, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w4, s0
-; CHECK-CVT-NEXT:    mov s0, v1.s[3]
-; CHECK-CVT-NEXT:    cmn w3, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    fmov s1, w16
-; CHECK-CVT-NEXT:    csel w11, w3, w9, gt
-; CHECK-CVT-NEXT:    mov v3.s[3], w2
-; CHECK-CVT-NEXT:    fmov s4, w11
-; CHECK-CVT-NEXT:    mov v1.s[1], w14
-; CHECK-CVT-NEXT:    cmp w4, w8
-; CHECK-CVT-NEXT:    fcvtzs w11, s0
-; CHECK-CVT-NEXT:    mov v4.s[1], w13
-; CHECK-CVT-NEXT:    csel w13, w4, w8, lt
-; CHECK-CVT-NEXT:    cmn w13, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w10, w13, w9, gt
-; CHECK-CVT-NEXT:    mov v1.s[2], w15
-; CHECK-CVT-NEXT:    cmp w11, w8
-; CHECK-CVT-NEXT:    csel w8, w11, w8, lt
-; CHECK-CVT-NEXT:    mov v4.s[2], w10
-; CHECK-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w8, w8, w9, gt
-; CHECK-CVT-NEXT:    mov v1.s[3], w17
-; CHECK-CVT-NEXT:    mov v4.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
-; CHECK-CVT-NEXT:    uzp1 v1.8h, v4.8h, v3.8h
+; CHECK-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-CVT-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v5.4s, v1.8h
+; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzs v1.4s, v3.4s
+; CHECK-CVT-NEXT:    fcvtzs v3.4s, v5.4s
+; CHECK-CVT-NEXT:    sqxtn v0.4h, v2.4s
+; CHECK-CVT-NEXT:    fcvtzs v2.4s, v4.4s
+; CHECK-CVT-NEXT:    sqxtn v1.4h, v1.4s
+; CHECK-CVT-NEXT:    sqxtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    sqxtn2 v1.8h, v3.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v16f16_v16i16:

diff  --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 6089d76f7820c..a3b94bcf18ab4 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -1708,47 +1708,14 @@ declare <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half>)
 define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
 ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i1:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov s2, v1.s[1]
-; CHECK-CVT-NEXT:    mov s3, v1.s[2]
-; CHECK-CVT-NEXT:    mov s4, v1.s[3]
-; CHECK-CVT-NEXT:    fcvtzu w9, s1
-; CHECK-CVT-NEXT:    fcvtzu w13, s0
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    fcvtzu w8, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    fcvtzu w10, s3
-; CHECK-CVT-NEXT:    fcvtzu w11, s4
-; CHECK-CVT-NEXT:    fcvtzu w14, s1
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    cmp w8, #1
-; CHECK-CVT-NEXT:    fcvtzu w12, s2
-; CHECK-CVT-NEXT:    csinc w8, w8, wzr, lo
-; CHECK-CVT-NEXT:    cmp w9, #1
-; CHECK-CVT-NEXT:    csinc w9, w9, wzr, lo
-; CHECK-CVT-NEXT:    cmp w10, #1
-; CHECK-CVT-NEXT:    csinc w10, w10, wzr, lo
-; CHECK-CVT-NEXT:    cmp w11, #1
-; CHECK-CVT-NEXT:    fmov s1, w9
-; CHECK-CVT-NEXT:    csinc w11, w11, wzr, lo
-; CHECK-CVT-NEXT:    cmp w12, #1
-; CHECK-CVT-NEXT:    csinc w12, w12, wzr, lo
-; CHECK-CVT-NEXT:    cmp w13, #1
-; CHECK-CVT-NEXT:    csinc w13, w13, wzr, lo
-; CHECK-CVT-NEXT:    mov v1.s[1], w8
-; CHECK-CVT-NEXT:    cmp w14, #1
-; CHECK-CVT-NEXT:    fmov s2, w13
-; CHECK-CVT-NEXT:    fcvtzu w8, s0
-; CHECK-CVT-NEXT:    csinc w9, w14, wzr, lo
-; CHECK-CVT-NEXT:    mov v2.s[1], w12
-; CHECK-CVT-NEXT:    mov v1.s[2], w10
-; CHECK-CVT-NEXT:    cmp w8, #1
-; CHECK-CVT-NEXT:    csinc w8, w8, wzr, lo
-; CHECK-CVT-NEXT:    mov v2.s[2], w9
-; CHECK-CVT-NEXT:    mov v1.s[3], w11
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT:    movi v1.4s, #1
+; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
@@ -1766,48 +1733,14 @@ define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
 define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
 ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i8:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov w8, #255 // =0xff
-; CHECK-CVT-NEXT:    mov s2, v1.s[1]
-; CHECK-CVT-NEXT:    mov s3, v1.s[2]
-; CHECK-CVT-NEXT:    mov s4, v1.s[3]
-; CHECK-CVT-NEXT:    fcvtzu w10, s1
-; CHECK-CVT-NEXT:    fcvtzu w14, s0
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    fcvtzu w9, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    fcvtzu w11, s3
-; CHECK-CVT-NEXT:    fcvtzu w12, s4
-; CHECK-CVT-NEXT:    fcvtzu w15, s1
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    cmp w9, #255
-; CHECK-CVT-NEXT:    fcvtzu w13, s2
-; CHECK-CVT-NEXT:    csel w9, w9, w8, lo
-; CHECK-CVT-NEXT:    cmp w10, #255
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lo
-; CHECK-CVT-NEXT:    cmp w11, #255
-; CHECK-CVT-NEXT:    csel w11, w11, w8, lo
-; CHECK-CVT-NEXT:    cmp w12, #255
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    csel w12, w12, w8, lo
-; CHECK-CVT-NEXT:    cmp w13, #255
-; CHECK-CVT-NEXT:    csel w13, w13, w8, lo
-; CHECK-CVT-NEXT:    cmp w14, #255
-; CHECK-CVT-NEXT:    csel w14, w14, w8, lo
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    cmp w15, #255
-; CHECK-CVT-NEXT:    fmov s2, w14
-; CHECK-CVT-NEXT:    fcvtzu w9, s0
-; CHECK-CVT-NEXT:    csel w10, w15, w8, lo
-; CHECK-CVT-NEXT:    mov v2.s[1], w13
-; CHECK-CVT-NEXT:    mov v1.s[2], w11
-; CHECK-CVT-NEXT:    cmp w9, #255
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lo
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    mov v1.s[3], w12
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT:    movi v1.2d, #0x0000ff000000ff
+; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
@@ -1823,48 +1756,14 @@ define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
 define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
 ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i13:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov w8, #8191 // =0x1fff
-; CHECK-CVT-NEXT:    mov s2, v1.s[1]
-; CHECK-CVT-NEXT:    mov s3, v1.s[2]
-; CHECK-CVT-NEXT:    mov s4, v1.s[3]
-; CHECK-CVT-NEXT:    fcvtzu w10, s1
-; CHECK-CVT-NEXT:    fcvtzu w14, s0
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    fcvtzu w9, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    fcvtzu w11, s3
-; CHECK-CVT-NEXT:    fcvtzu w12, s4
-; CHECK-CVT-NEXT:    fcvtzu w15, s1
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    fcvtzu w13, s2
-; CHECK-CVT-NEXT:    csel w9, w9, w8, lo
-; CHECK-CVT-NEXT:    cmp w10, w8
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lo
-; CHECK-CVT-NEXT:    cmp w11, w8
-; CHECK-CVT-NEXT:    csel w11, w11, w8, lo
-; CHECK-CVT-NEXT:    cmp w12, w8
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    csel w12, w12, w8, lo
-; CHECK-CVT-NEXT:    cmp w13, w8
-; CHECK-CVT-NEXT:    csel w13, w13, w8, lo
-; CHECK-CVT-NEXT:    cmp w14, w8
-; CHECK-CVT-NEXT:    csel w14, w14, w8, lo
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    cmp w15, w8
-; CHECK-CVT-NEXT:    fmov s2, w14
-; CHECK-CVT-NEXT:    fcvtzu w9, s0
-; CHECK-CVT-NEXT:    csel w10, w15, w8, lo
-; CHECK-CVT-NEXT:    mov v2.s[1], w13
-; CHECK-CVT-NEXT:    mov v1.s[2], w11
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lo
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    mov v1.s[3], w12
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT:    movi v1.4s, #31, msl #8
+; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13:
@@ -1880,48 +1779,12 @@ define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
 define <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
 ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i16:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov w8, #65535 // =0xffff
-; CHECK-CVT-NEXT:    mov s2, v1.s[1]
-; CHECK-CVT-NEXT:    mov s3, v1.s[2]
-; CHECK-CVT-NEXT:    mov s4, v1.s[3]
-; CHECK-CVT-NEXT:    fcvtzu w10, s1
-; CHECK-CVT-NEXT:    fcvtzu w14, s0
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    fcvtzu w9, s2
-; CHECK-CVT-NEXT:    mov s2, v0.s[1]
-; CHECK-CVT-NEXT:    fcvtzu w11, s3
-; CHECK-CVT-NEXT:    fcvtzu w12, s4
-; CHECK-CVT-NEXT:    fcvtzu w15, s1
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    fcvtzu w13, s2
-; CHECK-CVT-NEXT:    csel w9, w9, w8, lo
-; CHECK-CVT-NEXT:    cmp w10, w8
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lo
-; CHECK-CVT-NEXT:    cmp w11, w8
-; CHECK-CVT-NEXT:    csel w11, w11, w8, lo
-; CHECK-CVT-NEXT:    cmp w12, w8
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    csel w12, w12, w8, lo
-; CHECK-CVT-NEXT:    cmp w13, w8
-; CHECK-CVT-NEXT:    csel w13, w13, w8, lo
-; CHECK-CVT-NEXT:    cmp w14, w8
-; CHECK-CVT-NEXT:    csel w14, w14, w8, lo
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    cmp w15, w8
-; CHECK-CVT-NEXT:    fmov s2, w14
-; CHECK-CVT-NEXT:    fcvtzu w9, s0
-; CHECK-CVT-NEXT:    csel w10, w15, w8, lo
-; CHECK-CVT-NEXT:    mov v2.s[1], w13
-; CHECK-CVT-NEXT:    mov v1.s[2], w11
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lo
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    mov v1.s[3], w12
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v2.8h, v1.8h
+; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-CVT-NEXT:    uqxtn v0.4h, v1.4s
+; CHECK-CVT-NEXT:    fcvtzu v1.4s, v2.4s
+; CHECK-CVT-NEXT:    uqxtn2 v0.8h, v1.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16:
@@ -2509,90 +2372,22 @@ define <16 x i16> @test_unsigned_v16f32_v16i16(<16 x float> %f) {
 define <16 x i8> @test_unsigned_v16f16_v16i8(<16 x half> %f) {
 ; CHECK-CVT-LABEL: test_unsigned_v16f16_v16i8:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-NEXT:    fcvtl2 v3.4s, v1.8h
 ; CHECK-CVT-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT:    mov w8, #255 // =0xff
-; CHECK-CVT-NEXT:    mov s3, v2.s[1]
-; CHECK-CVT-NEXT:    mov s4, v2.s[2]
-; CHECK-CVT-NEXT:    mov s5, v2.s[3]
-; CHECK-CVT-NEXT:    fcvtzu w10, s2
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtzu w13, s1
+; CHECK-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
 ; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtzu w9, s3
-; CHECK-CVT-NEXT:    mov s3, v1.s[1]
-; CHECK-CVT-NEXT:    fcvtzu w11, s4
-; CHECK-CVT-NEXT:    mov s4, v1.s[2]
-; CHECK-CVT-NEXT:    fcvtzu w12, s5
-; CHECK-CVT-NEXT:    mov s1, v1.s[3]
-; CHECK-CVT-NEXT:    fcvtzu w18, s2
-; CHECK-CVT-NEXT:    fcvtzu w3, s0
-; CHECK-CVT-NEXT:    fcvtzu w14, s3
-; CHECK-CVT-NEXT:    cmp w9, #255
-; CHECK-CVT-NEXT:    mov s3, v2.s[1]
-; CHECK-CVT-NEXT:    csel w9, w9, w8, lo
-; CHECK-CVT-NEXT:    cmp w10, #255
-; CHECK-CVT-NEXT:    fcvtzu w15, s4
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lo
-; CHECK-CVT-NEXT:    cmp w11, #255
-; CHECK-CVT-NEXT:    mov s4, v2.s[2]
-; CHECK-CVT-NEXT:    csel w11, w11, w8, lo
-; CHECK-CVT-NEXT:    cmp w12, #255
-; CHECK-CVT-NEXT:    fcvtzu w16, s1
-; CHECK-CVT-NEXT:    mov s1, v2.s[3]
-; CHECK-CVT-NEXT:    csel w12, w12, w8, lo
-; CHECK-CVT-NEXT:    cmp w14, #255
-; CHECK-CVT-NEXT:    fcvtzu w17, s3
-; CHECK-CVT-NEXT:    mov s3, v0.s[1]
-; CHECK-CVT-NEXT:    csel w14, w14, w8, lo
-; CHECK-CVT-NEXT:    cmp w13, #255
-; CHECK-CVT-NEXT:    fcvtzu w0, s4
-; CHECK-CVT-NEXT:    fmov s2, w10
-; CHECK-CVT-NEXT:    csel w13, w13, w8, lo
-; CHECK-CVT-NEXT:    cmp w15, #255
-; CHECK-CVT-NEXT:    csel w15, w15, w8, lo
-; CHECK-CVT-NEXT:    cmp w16, #255
-; CHECK-CVT-NEXT:    fcvtzu w1, s1
-; CHECK-CVT-NEXT:    csel w16, w16, w8, lo
-; CHECK-CVT-NEXT:    cmp w17, #255
-; CHECK-CVT-NEXT:    fcvtzu w2, s3
-; CHECK-CVT-NEXT:    csel w17, w17, w8, lo
-; CHECK-CVT-NEXT:    cmp w18, #255
-; CHECK-CVT-NEXT:    mov s1, v0.s[2]
-; CHECK-CVT-NEXT:    csel w18, w18, w8, lo
-; CHECK-CVT-NEXT:    cmp w0, #255
-; CHECK-CVT-NEXT:    mov v2.s[1], w9
-; CHECK-CVT-NEXT:    csel w0, w0, w8, lo
-; CHECK-CVT-NEXT:    cmp w1, #255
-; CHECK-CVT-NEXT:    fmov s3, w18
-; CHECK-CVT-NEXT:    csel w10, w1, w8, lo
-; CHECK-CVT-NEXT:    cmp w2, #255
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    csel w9, w2, w8, lo
-; CHECK-CVT-NEXT:    cmp w3, #255
-; CHECK-CVT-NEXT:    fcvtzu w2, s1
-; CHECK-CVT-NEXT:    csel w1, w3, w8, lo
-; CHECK-CVT-NEXT:    fmov s1, w13
-; CHECK-CVT-NEXT:    mov v3.s[1], w17
-; CHECK-CVT-NEXT:    fmov s4, w1
-; CHECK-CVT-NEXT:    mov v2.s[2], w11
-; CHECK-CVT-NEXT:    mov v1.s[1], w14
-; CHECK-CVT-NEXT:    cmp w2, #255
-; CHECK-CVT-NEXT:    mov v4.s[1], w9
-; CHECK-CVT-NEXT:    fcvtzu w9, s0
-; CHECK-CVT-NEXT:    csel w11, w2, w8, lo
-; CHECK-CVT-NEXT:    mov v3.s[2], w0
-; CHECK-CVT-NEXT:    mov v2.s[3], w12
-; CHECK-CVT-NEXT:    mov v1.s[2], w15
-; CHECK-CVT-NEXT:    mov v4.s[2], w11
-; CHECK-CVT-NEXT:    cmp w9, #255
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lo
-; CHECK-CVT-NEXT:    mov v3.s[3], w10
-; CHECK-CVT-NEXT:    mov v1.s[3], w16
-; CHECK-CVT-NEXT:    mov v4.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
-; CHECK-CVT-NEXT:    uzp1 v1.8h, v4.8h, v3.8h
-; CHECK-CVT-NEXT:    uzp1 v0.16b, v1.16b, v0.16b
+; CHECK-CVT-NEXT:    movi v2.2d, #0x0000ff000000ff
+; CHECK-CVT-NEXT:    fcvtzu v3.4s, v3.4s
+; CHECK-CVT-NEXT:    fcvtzu v1.4s, v1.4s
+; CHECK-CVT-NEXT:    fcvtzu v4.4s, v4.4s
+; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT:    umin v3.4s, v3.4s, v2.4s
+; CHECK-CVT-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-CVT-NEXT:    umin v4.4s, v4.4s, v2.4s
+; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v2.4s
+; CHECK-CVT-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
+; CHECK-CVT-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i8:
@@ -2609,89 +2404,18 @@ define <16 x i8> @test_unsigned_v16f16_v16i8(<16 x half> %f) {
 define <16 x i16> @test_unsigned_v16f16_v16i16(<16 x half> %f) {
 ; CHECK-CVT-LABEL: test_unsigned_v16f16_v16i16:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT:    mov w8, #65535 // =0xffff
-; CHECK-CVT-NEXT:    mov s3, v2.s[1]
-; CHECK-CVT-NEXT:    mov s4, v2.s[2]
-; CHECK-CVT-NEXT:    mov s5, v2.s[3]
-; CHECK-CVT-NEXT:    fcvtzu w10, s2
-; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT:    fcvtzu w13, s0
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT:    fcvtzu w9, s3
-; CHECK-CVT-NEXT:    mov s3, v0.s[1]
-; CHECK-CVT-NEXT:    fcvtzu w11, s4
-; CHECK-CVT-NEXT:    mov s4, v0.s[2]
-; CHECK-CVT-NEXT:    fcvtzu w12, s5
-; CHECK-CVT-NEXT:    mov s0, v0.s[3]
-; CHECK-CVT-NEXT:    fcvtzu w18, s2
-; CHECK-CVT-NEXT:    fcvtzu w3, s1
-; CHECK-CVT-NEXT:    fcvtzu w14, s3
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    mov s3, v2.s[1]
-; CHECK-CVT-NEXT:    csel w9, w9, w8, lo
-; CHECK-CVT-NEXT:    cmp w10, w8
-; CHECK-CVT-NEXT:    fcvtzu w15, s4
-; CHECK-CVT-NEXT:    csel w10, w10, w8, lo
-; CHECK-CVT-NEXT:    cmp w11, w8
-; CHECK-CVT-NEXT:    mov s4, v2.s[2]
-; CHECK-CVT-NEXT:    csel w11, w11, w8, lo
-; CHECK-CVT-NEXT:    cmp w12, w8
-; CHECK-CVT-NEXT:    fcvtzu w16, s0
-; CHECK-CVT-NEXT:    mov s0, v2.s[3]
-; CHECK-CVT-NEXT:    csel w12, w12, w8, lo
-; CHECK-CVT-NEXT:    cmp w14, w8
-; CHECK-CVT-NEXT:    fcvtzu w17, s3
-; CHECK-CVT-NEXT:    mov s3, v1.s[1]
-; CHECK-CVT-NEXT:    csel w14, w14, w8, lo
-; CHECK-CVT-NEXT:    cmp w13, w8
-; CHECK-CVT-NEXT:    fcvtzu w0, s4
-; CHECK-CVT-NEXT:    fmov s2, w10
-; CHECK-CVT-NEXT:    csel w13, w13, w8, lo
-; CHECK-CVT-NEXT:    cmp w15, w8
-; CHECK-CVT-NEXT:    csel w15, w15, w8, lo
-; CHECK-CVT-NEXT:    cmp w16, w8
-; CHECK-CVT-NEXT:    fcvtzu w1, s0
-; CHECK-CVT-NEXT:    csel w16, w16, w8, lo
-; CHECK-CVT-NEXT:    cmp w17, w8
-; CHECK-CVT-NEXT:    fcvtzu w2, s3
-; CHECK-CVT-NEXT:    csel w17, w17, w8, lo
-; CHECK-CVT-NEXT:    cmp w18, w8
-; CHECK-CVT-NEXT:    mov s0, v1.s[2]
-; CHECK-CVT-NEXT:    csel w18, w18, w8, lo
-; CHECK-CVT-NEXT:    cmp w0, w8
-; CHECK-CVT-NEXT:    mov v2.s[1], w9
-; CHECK-CVT-NEXT:    csel w0, w0, w8, lo
-; CHECK-CVT-NEXT:    cmp w1, w8
-; CHECK-CVT-NEXT:    fmov s3, w18
-; CHECK-CVT-NEXT:    csel w10, w1, w8, lo
-; CHECK-CVT-NEXT:    cmp w2, w8
-; CHECK-CVT-NEXT:    csel w9, w2, w8, lo
-; CHECK-CVT-NEXT:    cmp w3, w8
-; CHECK-CVT-NEXT:    fcvtzu w2, s0
-; CHECK-CVT-NEXT:    csel w1, w3, w8, lo
-; CHECK-CVT-NEXT:    mov s0, v1.s[3]
-; CHECK-CVT-NEXT:    fmov s1, w13
-; CHECK-CVT-NEXT:    fmov s4, w1
-; CHECK-CVT-NEXT:    mov v3.s[1], w17
-; CHECK-CVT-NEXT:    mov v2.s[2], w11
-; CHECK-CVT-NEXT:    mov v1.s[1], w14
-; CHECK-CVT-NEXT:    cmp w2, w8
-; CHECK-CVT-NEXT:    mov v4.s[1], w9
-; CHECK-CVT-NEXT:    fcvtzu w9, s0
-; CHECK-CVT-NEXT:    csel w11, w2, w8, lo
-; CHECK-CVT-NEXT:    mov v3.s[2], w0
-; CHECK-CVT-NEXT:    mov v2.s[3], w12
-; CHECK-CVT-NEXT:    mov v1.s[2], w15
-; CHECK-CVT-NEXT:    mov v4.s[2], w11
-; CHECK-CVT-NEXT:    cmp w9, w8
-; CHECK-CVT-NEXT:    csel w8, w9, w8, lo
-; CHECK-CVT-NEXT:    mov v3.s[3], w10
-; CHECK-CVT-NEXT:    mov v1.s[3], w16
-; CHECK-CVT-NEXT:    mov v4.s[3], w8
-; CHECK-CVT-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
-; CHECK-CVT-NEXT:    uzp1 v1.8h, v4.8h, v3.8h
+; CHECK-CVT-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-CVT-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-CVT-NEXT:    fcvtl2 v4.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v5.4s, v1.8h
+; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzu v1.4s, v3.4s
+; CHECK-CVT-NEXT:    fcvtzu v3.4s, v5.4s
+; CHECK-CVT-NEXT:    uqxtn v0.4h, v2.4s
+; CHECK-CVT-NEXT:    fcvtzu v2.4s, v4.4s
+; CHECK-CVT-NEXT:    uqxtn v1.4h, v1.4s
+; CHECK-CVT-NEXT:    uqxtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    uqxtn2 v1.8h, v3.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i16:


        


More information about the llvm-commits mailing list