[llvm] 65033ef - [AArch64] Add a tablegen pattern for UZP1.

Alexandros Lamprineas via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 14 03:56:34 PST 2021


Author: Alexandros Lamprineas
Date: 2021-12-14T11:51:05Z
New Revision: 65033ef9e8a6ecf7c2ad80c4e3e0c339b44e61b4

URL: https://github.com/llvm/llvm-project/commit/65033ef9e8a6ecf7c2ad80c4e3e0c339b44e61b4
DIFF: https://github.com/llvm/llvm-project/commit/65033ef9e8a6ecf7c2ad80c4e3e0c339b44e61b4.diff

LOG: [AArch64] Add a tablegen pattern for UZP1.

Converts concat_vectors(V64 (trunc V128), V64 (trunc V128)), which
would otherwise be lowered as xtn followed by xtn2, to uzp1.

Differential Revision: https://reviews.llvm.org/D115435

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
    llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
    llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll
    llvm/test/CodeGen/AArch64/fcvt_combine.ll
    llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
    llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
    llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
    llvm/test/CodeGen/AArch64/neon-truncstore.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index decee117d2d5..bcaf6cd3195e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5288,6 +5288,16 @@ defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
 defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
 defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
 
+def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
+                                 (v8i8 (trunc (v8i16 V128:$Vm))))),
+          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
+def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
+                                 (v4i16 (trunc (v4i32 V128:$Vm))))),
+          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
+def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
+                                 (v2i32 (trunc (v2i64 V128:$Vm))))),
+          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
+
 //----------------------------------------------------------------------------
 // AdvSIMD TBL/TBX instructions
 //----------------------------------------------------------------------------

diff  --git a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index b45388c1045d..9910a4e0f739 100644
--- a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -57,9 +57,8 @@ define <4 x half> @uitofp_v4i64_to_v4f16(<4 x i64>* %ptr) {
 define <4 x i16> @trunc_v4i64_to_v4i16(<4 x i64>* %ptr) {
 ; CHECK-LABEL: trunc_v4i64_to_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q0, q1, [x0]
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    ldp q1, q0, [x0]
+; CHECK-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
   %tmp1 = load <4 x i64>, <4 x i64>* %ptr

diff  --git a/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll b/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
index b478b54f10b2..6b788b4e1d9a 100644
--- a/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
+++ b/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
@@ -16,6 +16,28 @@ entry:
   ret <4 x i16> %shuffle
 }
 
+define <4 x i32> @test_concat_truncate_v2i64_to_v4i32(<2 x i64> %a, <2 x i64> %b) #0 {
+entry:
+; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i32:
+; CHECK-NEXT: uzp1.4s v0, v0, v1
+; CHECK-NEXT: ret
+  %at = trunc <2 x i64> %a to <2 x i32>
+  %bt = trunc <2 x i64> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %at, <2 x i32> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i16> @test_concat_truncate_v2i32_to_v4i16(<2 x i32> %a, <2 x i32> %b) #0 {
+entry:
+; CHECK-LABEL: test_concat_truncate_v2i32_to_v4i16:
+; CHECK-NEXT: uzp1.4h v0, v0, v1
+; CHECK-NEXT: ret
+  %at = trunc <2 x i32> %a to <2 x i16>
+  %bt = trunc <2 x i32> %b to <2 x i16>
+  %shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle
+}
+
 define <8 x i8> @test_concat_truncate_v4i32_to_v8i8(<4 x i32> %a, <4 x i32> %b) #0 {
 entry:
 ; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i8:
@@ -31,8 +53,7 @@ entry:
 define <8 x i16> @test_concat_truncate_v4i32_to_v8i16(<4 x i32> %a, <4 x i32> %b) #0 {
 entry:
 ; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i16:
-; CHECK-NEXT: xtn.4h v0, v0
-; CHECK-NEXT: xtn2.8h v0, v1
+; CHECK-NEXT: uzp1.8h v0, v0, v1
 ; CHECK-NEXT: ret
   %at = trunc <4 x i32> %a to <4 x i16>
   %bt = trunc <4 x i32> %b to <4 x i16>
@@ -40,6 +61,27 @@ entry:
   ret <8 x i16> %shuffle
 }
 
+define <8 x i8> @test_concat_truncate_v4i16_to_v8i8(<4 x i16> %a, <4 x i16> %b) #0 {
+entry:
+; CHECK-LABEL: test_concat_truncate_v4i16_to_v8i8:
+; CHECK-NEXT: uzp1.8b v0, v0, v1
+; CHECK-NEXT: ret
+  %at = trunc <4 x i16> %a to <4 x i8>
+  %bt = trunc <4 x i16> %b to <4 x i8>
+  %shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle
+}
+
+define <16 x i8> @test_concat_truncate_v8i16_to_v16i8(<8 x i16> %a, <8 x i16> %b) #0 {
+entry:
+; CHECK-LABEL: test_concat_truncate_v8i16_to_v16i8:
+; CHECK-NEXT: uzp1.16b v0, v0, v1
+; CHECK-NEXT: ret
+  %at = trunc <8 x i16> %a to <8 x i8>
+  %bt = trunc <8 x i16> %b to <8 x i8>
+  %shuffle = shufflevector <8 x i8> %at, <8 x i8> %bt, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32  9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle
+}
 
 ; The concat_vectors operation in this test is introduced when splitting
 ; the fptrunc operation due to the split <vscale x 4 x double> input operand.

diff  --git a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll
index 072f94a25034..4fccf4d07405 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll
@@ -14,8 +14,7 @@ define void @no_combine(i32 %p) local_unnamed_addr {
 ; CHECK-NEXT:    dup v1.4s, w0
 ; CHECK-NEXT:    xtn v1.4h, v1.4s
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-NEXT:    xtn v0.8b, v1.8h
-; CHECK-NEXT:    xtn2 v0.16b, v1.8h
+; CHECK-NEXT:    uzp1 v0.16b, v1.16b, v1.16b
 ; CHECK-NEXT:    str q0, [x8]
 ; CHECK-NEXT:    ret
 

diff  --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index 2eb38ec3118d..eecf92a26a5c 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -180,50 +180,49 @@ define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) {
 define <8 x i16> @test_v8f16(<8 x half> %in) {
 ; CHECK-NO16-LABEL: test_v8f16:
 ; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    mov h2, v0.h[1]
-; CHECK-NO16-NEXT:    mov h3, v0.h[4]
-; CHECK-NO16-NEXT:    mov h4, v0.h[5]
+; CHECK-NO16-NEXT:    mov h2, v0.h[4]
+; CHECK-NO16-NEXT:    mov h3, v0.h[5]
+; CHECK-NO16-NEXT:    mov h4, v0.h[1]
 ; CHECK-NO16-NEXT:    mov h5, v0.h[2]
-; CHECK-NO16-NEXT:    fcvt s6, h0
+; CHECK-NO16-NEXT:    mov h6, v0.h[6]
+; CHECK-NO16-NEXT:    fcvt s7, h0
 ; CHECK-NO16-NEXT:    fmov s1, #4.00000000
-; CHECK-NO16-NEXT:    mov h7, v0.h[6]
 ; CHECK-NO16-NEXT:    mov h16, v0.h[3]
 ; CHECK-NO16-NEXT:    fcvt s2, h2
 ; CHECK-NO16-NEXT:    fcvt s3, h3
 ; CHECK-NO16-NEXT:    fcvt s4, h4
-; CHECK-NO16-NEXT:    fcvt s5, h5
-; CHECK-NO16-NEXT:    fmul s6, s6, s1
 ; CHECK-NO16-NEXT:    mov h0, v0.h[7]
-; CHECK-NO16-NEXT:    fcvt s7, h7
+; CHECK-NO16-NEXT:    fcvt s5, h5
+; CHECK-NO16-NEXT:    fcvt s6, h6
+; CHECK-NO16-NEXT:    fmul s7, s7, s1
 ; CHECK-NO16-NEXT:    fcvt s16, h16
 ; CHECK-NO16-NEXT:    fmul s2, s2, s1
 ; CHECK-NO16-NEXT:    fmul s3, s3, s1
 ; CHECK-NO16-NEXT:    fmul s4, s4, s1
-; CHECK-NO16-NEXT:    fmul s5, s5, s1
-; CHECK-NO16-NEXT:    fcvt h6, s6
 ; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s7, s7, s1
+; CHECK-NO16-NEXT:    fmul s5, s5, s1
+; CHECK-NO16-NEXT:    fmul s6, s6, s1
+; CHECK-NO16-NEXT:    fcvt h7, s7
 ; CHECK-NO16-NEXT:    fmul s16, s16, s1
 ; CHECK-NO16-NEXT:    fcvt h2, s2
 ; CHECK-NO16-NEXT:    fcvt h3, s3
 ; CHECK-NO16-NEXT:    fcvt h4, s4
-; CHECK-NO16-NEXT:    fcvt h5, s5
 ; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h1, s7
-; CHECK-NO16-NEXT:    mov v6.h[1], v2.h[0]
-; CHECK-NO16-NEXT:    fcvt h2, s16
-; CHECK-NO16-NEXT:    mov v3.h[1], v4.h[0]
+; CHECK-NO16-NEXT:    fcvt h1, s5
+; CHECK-NO16-NEXT:    fcvt h5, s6
+; CHECK-NO16-NEXT:    mov v2.h[1], v3.h[0]
+; CHECK-NO16-NEXT:    fcvt h3, s16
+; CHECK-NO16-NEXT:    mov v7.h[1], v4.h[0]
 ; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    mov v6.h[2], v5.h[0]
-; CHECK-NO16-NEXT:    mov v3.h[2], v1.h[0]
-; CHECK-NO16-NEXT:    mov v6.h[3], v2.h[0]
-; CHECK-NO16-NEXT:    mov v3.h[3], v0.h[0]
-; CHECK-NO16-NEXT:    fcvtl v0.4s, v6.4h
-; CHECK-NO16-NEXT:    fcvtl v1.4s, v3.4h
+; CHECK-NO16-NEXT:    mov v2.h[2], v5.h[0]
+; CHECK-NO16-NEXT:    mov v7.h[2], v1.h[0]
+; CHECK-NO16-NEXT:    mov v2.h[3], v0.h[0]
+; CHECK-NO16-NEXT:    mov v7.h[3], v3.h[0]
+; CHECK-NO16-NEXT:    fcvtl v0.4s, v2.4h
+; CHECK-NO16-NEXT:    fcvtl v1.4s, v7.4h
 ; CHECK-NO16-NEXT:    fcvtzs v0.4s, v0.4s
 ; CHECK-NO16-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-NO16-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NO16-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-NO16-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_v8f16:
@@ -547,31 +546,30 @@ define <8 x i16> @test_v8f16_sat(<8 x half> %in) {
 ; CHECK-NO16-NEXT:    cmp w14, w9
 ; CHECK-NO16-NEXT:    csel w14, w14, w9, lt
 ; CHECK-NO16-NEXT:    cmn w14, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    fcvtzs w16, s0
 ; CHECK-NO16-NEXT:    csel w14, w14, w10, gt
 ; CHECK-NO16-NEXT:    cmp w15, w9
 ; CHECK-NO16-NEXT:    csel w15, w15, w9, lt
-; CHECK-NO16-NEXT:    mov s0, v1.s[3]
 ; CHECK-NO16-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    csel w15, w15, w10, gt
-; CHECK-NO16-NEXT:    cmp w16, w9
-; CHECK-NO16-NEXT:    csel w11, w16, w9, lt
-; CHECK-NO16-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-NO16-NEXT:    fmov s1, w15
+; CHECK-NO16-NEXT:    csel w11, w15, w10, gt
 ; CHECK-NO16-NEXT:    fcvtzs w15, s0
-; CHECK-NO16-NEXT:    csel w11, w11, w10, gt
+; CHECK-NO16-NEXT:    mov s0, v1.s[3]
 ; CHECK-NO16-NEXT:    mov v2.s[1], w8
-; CHECK-NO16-NEXT:    mov v1.s[1], w14
+; CHECK-NO16-NEXT:    fmov s1, w11
 ; CHECK-NO16-NEXT:    cmp w15, w9
 ; CHECK-NO16-NEXT:    csel w8, w15, w9, lt
+; CHECK-NO16-NEXT:    fcvtzs w11, s0
 ; CHECK-NO16-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-NO16-NEXT:    mov v1.s[1], w14
 ; CHECK-NO16-NEXT:    csel w8, w8, w10, gt
-; CHECK-NO16-NEXT:    mov v1.s[2], w11
 ; CHECK-NO16-NEXT:    mov v2.s[2], w12
-; CHECK-NO16-NEXT:    mov v1.s[3], w8
+; CHECK-NO16-NEXT:    cmp w11, w9
+; CHECK-NO16-NEXT:    csel w9, w11, w9, lt
+; CHECK-NO16-NEXT:    mov v1.s[2], w8
+; CHECK-NO16-NEXT:    cmn w9, #8, lsl #12 // =32768
+; CHECK-NO16-NEXT:    csel w8, w9, w10, gt
 ; CHECK-NO16-NEXT:    mov v2.s[3], w13
-; CHECK-NO16-NEXT:    xtn v0.4h, v1.4s
-; CHECK-NO16-NEXT:    xtn2 v0.8h, v2.4s
+; CHECK-NO16-NEXT:    mov v1.s[3], w8
+; CHECK-NO16-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
 ; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_v8f16_sat:

diff  --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 15d486ba40ae..5ae8b5775d33 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -661,13 +661,12 @@ define void @test_insert_at_zero(half %a, <8 x half>* %b) #0 {
 define <8 x i8> @fptosi_i8(<8 x half> %a) #0 {
 ; CHECK-CVT-LABEL: fptosi_i8:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-NEXT:    fcvtzs v1.4s, v1.4s
 ; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    xtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    xtn2 v1.8h, v0.4s
-; CHECK-CVT-NEXT:    xtn v0.8b, v1.8h
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: fptosi_i8:
@@ -689,12 +688,11 @@ define <8 x i16> @fptosi_i16(<8 x half> %a) #0 {
 ; CHECK-COMMON_NEXT:      ret
 ; CHECK-CVT-LABEL: fptosi_i16:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-NEXT:    fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT:    fcvtzs v2.4s, v0.4s
-; CHECK-CVT-NEXT:    xtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: fptosi_i16:
@@ -708,13 +706,12 @@ define <8 x i16> @fptosi_i16(<8 x half> %a) #0 {
 define <8 x i8> @fptoui_i8(<8 x half> %a) #0 {
 ; CHECK-CVT-LABEL: fptoui_i8:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-NEXT:    fcvtzu v1.4s, v1.4s
 ; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    xtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    xtn2 v1.8h, v0.4s
-; CHECK-CVT-NEXT:    xtn v0.8b, v1.8h
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: fptoui_i8:
@@ -729,12 +726,11 @@ define <8 x i8> @fptoui_i8(<8 x half> %a) #0 {
 define <8 x i16> @fptoui_i16(<8 x half> %a) #0 {
 ; CHECK-CVT-LABEL: fptoui_i16:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT:    fcvtzu v2.4s, v0.4s
-; CHECK-CVT-NEXT:    xtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: fptoui_i16:

diff  --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 0a40493a3826..0030e2fa94b0 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -80,8 +80,8 @@ entry:
 define <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v2.2d, v0.2s
-; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
+; CHECK-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
 ; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
 ; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
 ; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
@@ -90,9 +90,8 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-NEXT:    and v2.16b, v2.16b, v3.16b
 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    orn v2.16b, v2.16b, v3.16b
-; CHECK-NEXT:    orn v1.16b, v0.16b, v1.16b
-; CHECK-NEXT:    xtn v0.2s, v2.2d
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    orn v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
@@ -138,54 +137,52 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CVT-NEXT:    mov h2, v0.h[2]
-; CHECK-CVT-NEXT:    mov h3, v0.h[1]
-; CHECK-CVT-NEXT:    fcvt s4, h0
-; CHECK-CVT-NEXT:    mov h0, v0.h[3]
+; CHECK-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-CVT-NEXT:    mov h4, v0.h[1]
+; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    movi v1.2d, #0x000000ffffffff
 ; CHECK-CVT-NEXT:    fcvt s2, h2
 ; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvtzu x8, s4
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu x9, s2
-; CHECK-CVT-NEXT:    fmov d2, x8
+; CHECK-CVT-NEXT:    fcvtzu x9, s0
+; CHECK-CVT-NEXT:    fcvtzu x8, s2
+; CHECK-CVT-NEXT:    fcvt s2, h4
+; CHECK-CVT-NEXT:    fmov d0, x8
 ; CHECK-CVT-NEXT:    fcvtzu x8, s3
 ; CHECK-CVT-NEXT:    fmov d3, x9
-; CHECK-CVT-NEXT:    fcvtzu x9, s0
-; CHECK-CVT-NEXT:    mov v2.d[1], x8
+; CHECK-CVT-NEXT:    fcvtzu x9, s2
+; CHECK-CVT-NEXT:    mov v0.d[1], x8
 ; CHECK-CVT-NEXT:    mov v3.d[1], x9
-; CHECK-CVT-NEXT:    cmhi v0.2d, v1.2d, v2.2d
+; CHECK-CVT-NEXT:    cmhi v2.2d, v1.2d, v0.2d
 ; CHECK-CVT-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-CVT-NEXT:    and v2.16b, v2.16b, v0.16b
+; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-CVT-NEXT:    and v3.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT:    orn v0.16b, v2.16b, v0.16b
+; CHECK-CVT-NEXT:    orn v0.16b, v0.16b, v2.16b
 ; CHECK-CVT-NEXT:    orn v1.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT:    xtn v0.2s, v0.2d
-; CHECK-CVT-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-CVT-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: utesth_f16i32:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-FP16-NEXT:    mov h3, v0.h[1]
-; CHECK-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-FP16-NEXT:    mov h0, v0.h[3]
+; CHECK-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-FP16-NEXT:    fcvtzu x9, h0
 ; CHECK-FP16-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-NEXT:    fcvtzu x9, h2
-; CHECK-FP16-NEXT:    fmov d2, x8
+; CHECK-FP16-NEXT:    fcvtzu x8, h2
+; CHECK-FP16-NEXT:    mov h2, v0.h[1]
+; CHECK-FP16-NEXT:    fmov d0, x8
 ; CHECK-FP16-NEXT:    fcvtzu x8, h3
 ; CHECK-FP16-NEXT:    fmov d3, x9
-; CHECK-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-FP16-NEXT:    mov v2.d[1], x8
+; CHECK-FP16-NEXT:    fcvtzu x9, h2
+; CHECK-FP16-NEXT:    mov v0.d[1], x8
 ; CHECK-FP16-NEXT:    mov v3.d[1], x9
-; CHECK-FP16-NEXT:    cmhi v0.2d, v1.2d, v2.2d
+; CHECK-FP16-NEXT:    cmhi v2.2d, v1.2d, v0.2d
 ; CHECK-FP16-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-FP16-NEXT:    and v2.16b, v2.16b, v0.16b
+; CHECK-FP16-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-FP16-NEXT:    and v3.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT:    orn v0.16b, v2.16b, v0.16b
+; CHECK-FP16-NEXT:    orn v0.16b, v0.16b, v2.16b
 ; CHECK-FP16-NEXT:    orn v1.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT:    xtn v0.2s, v0.2d
-; CHECK-FP16-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-FP16-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
 ; CHECK-FP16-NEXT:    ret
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>
@@ -324,10 +321,11 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-CVT-NEXT:    mvni v3.4s, #127, msl #8
 ; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
 ; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v1.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    sqxtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    smax v1.4s, v1.4s, v3.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    smax v1.4s, v2.4s, v3.4s
+; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v3.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: stest_f16i16:
@@ -352,9 +350,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
 ; CHECK-NEXT:    fcvtzu v2.4s, v2.4s
 ; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    uqxtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <8 x half> %x to <8 x i32>
@@ -372,9 +370,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-CVT-NEXT:    movi v1.2d, #0x00ffff0000ffff
 ; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
 ; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v1.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    uqxtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: ustest_f16i16:
@@ -813,8 +811,8 @@ entry:
 define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl v2.2d, v0.2s
-; CHECK-NEXT:    fcvtl2 v0.2d, v0.4s
+; CHECK-NEXT:    fcvtl2 v2.2d, v0.4s
+; CHECK-NEXT:    fcvtl v0.2d, v0.2s
 ; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
 ; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
 ; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
@@ -823,9 +821,8 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
 ; CHECK-NEXT:    and v2.16b, v2.16b, v3.16b
 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    orn v2.16b, v2.16b, v3.16b
-; CHECK-NEXT:    orn v1.16b, v0.16b, v1.16b
-; CHECK-NEXT:    xtn v0.2s, v2.2d
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    orn v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
@@ -866,54 +863,52 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CVT-NEXT:    mov h2, v0.h[2]
-; CHECK-CVT-NEXT:    mov h3, v0.h[1]
-; CHECK-CVT-NEXT:    fcvt s4, h0
-; CHECK-CVT-NEXT:    mov h0, v0.h[3]
+; CHECK-CVT-NEXT:    mov h3, v0.h[3]
+; CHECK-CVT-NEXT:    mov h4, v0.h[1]
+; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    movi v1.2d, #0x000000ffffffff
 ; CHECK-CVT-NEXT:    fcvt s2, h2
 ; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvtzu x8, s4
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    fcvtzu x9, s2
-; CHECK-CVT-NEXT:    fmov d2, x8
+; CHECK-CVT-NEXT:    fcvtzu x9, s0
+; CHECK-CVT-NEXT:    fcvtzu x8, s2
+; CHECK-CVT-NEXT:    fcvt s2, h4
+; CHECK-CVT-NEXT:    fmov d0, x8
 ; CHECK-CVT-NEXT:    fcvtzu x8, s3
 ; CHECK-CVT-NEXT:    fmov d3, x9
-; CHECK-CVT-NEXT:    fcvtzu x9, s0
-; CHECK-CVT-NEXT:    mov v2.d[1], x8
+; CHECK-CVT-NEXT:    fcvtzu x9, s2
+; CHECK-CVT-NEXT:    mov v0.d[1], x8
 ; CHECK-CVT-NEXT:    mov v3.d[1], x9
-; CHECK-CVT-NEXT:    cmhi v0.2d, v1.2d, v2.2d
+; CHECK-CVT-NEXT:    cmhi v2.2d, v1.2d, v0.2d
 ; CHECK-CVT-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-CVT-NEXT:    and v2.16b, v2.16b, v0.16b
+; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-CVT-NEXT:    and v3.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT:    orn v0.16b, v2.16b, v0.16b
+; CHECK-CVT-NEXT:    orn v0.16b, v0.16b, v2.16b
 ; CHECK-CVT-NEXT:    orn v1.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT:    xtn v0.2s, v0.2d
-; CHECK-CVT-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-CVT-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: utesth_f16i32_mm:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-FP16-NEXT:    mov h3, v0.h[1]
-; CHECK-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-FP16-NEXT:    mov h0, v0.h[3]
+; CHECK-FP16-NEXT:    mov h3, v0.h[3]
+; CHECK-FP16-NEXT:    fcvtzu x9, h0
 ; CHECK-FP16-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-NEXT:    fcvtzu x9, h2
-; CHECK-FP16-NEXT:    fmov d2, x8
+; CHECK-FP16-NEXT:    fcvtzu x8, h2
+; CHECK-FP16-NEXT:    mov h2, v0.h[1]
+; CHECK-FP16-NEXT:    fmov d0, x8
 ; CHECK-FP16-NEXT:    fcvtzu x8, h3
 ; CHECK-FP16-NEXT:    fmov d3, x9
-; CHECK-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-FP16-NEXT:    mov v2.d[1], x8
+; CHECK-FP16-NEXT:    fcvtzu x9, h2
+; CHECK-FP16-NEXT:    mov v0.d[1], x8
 ; CHECK-FP16-NEXT:    mov v3.d[1], x9
-; CHECK-FP16-NEXT:    cmhi v0.2d, v1.2d, v2.2d
+; CHECK-FP16-NEXT:    cmhi v2.2d, v1.2d, v0.2d
 ; CHECK-FP16-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-FP16-NEXT:    and v2.16b, v2.16b, v0.16b
+; CHECK-FP16-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-FP16-NEXT:    and v3.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT:    orn v0.16b, v2.16b, v0.16b
+; CHECK-FP16-NEXT:    orn v0.16b, v0.16b, v2.16b
 ; CHECK-FP16-NEXT:    orn v1.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT:    xtn v0.2s, v0.2d
-; CHECK-FP16-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-FP16-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
 ; CHECK-FP16-NEXT:    ret
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>
@@ -1039,10 +1034,11 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-CVT-NEXT:    mvni v3.4s, #127, msl #8
 ; CHECK-CVT-NEXT:    fcvtzs v2.4s, v2.4s
 ; CHECK-CVT-NEXT:    fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT:    smin v1.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    sqxtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    smax v1.4s, v1.4s, v3.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT:    smin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    smax v1.4s, v2.4s, v3.4s
+; CHECK-CVT-NEXT:    smax v0.4s, v0.4s, v3.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: stest_f16i16_mm:
@@ -1065,9 +1061,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
 ; CHECK-NEXT:    fcvtzu v2.4s, v2.4s
 ; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    umin v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    uqxtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <8 x half> %x to <8 x i32>
@@ -1084,9 +1080,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-CVT-NEXT:    movi v1.2d, #0x00ffff0000ffff
 ; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
 ; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT:    umin v1.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT:    uqxtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: ustest_f16i16_mm:

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 964c806ad092..55e018783f04 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -2093,9 +2093,8 @@ define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    csinv w10, w10, wzr, ge
 ; CHECK-CVT-NEXT:    cmp w11, #0
 ; CHECK-CVT-NEXT:    csel w11, w11, wzr, lt
-; CHECK-CVT-NEXT:    fcvtzs w14, s1
-; CHECK-CVT-NEXT:    cmp w11, #0
 ; CHECK-CVT-NEXT:    fmov s2, w9
+; CHECK-CVT-NEXT:    cmp w11, #0
 ; CHECK-CVT-NEXT:    csinv w11, w11, wzr, ge
 ; CHECK-CVT-NEXT:    cmp w12, #0
 ; CHECK-CVT-NEXT:    csel w12, w12, wzr, lt
@@ -2104,25 +2103,25 @@ define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    cmp w13, #0
 ; CHECK-CVT-NEXT:    csel w13, w13, wzr, lt
 ; CHECK-CVT-NEXT:    cmp w13, #0
-; CHECK-CVT-NEXT:    csinv w13, w13, wzr, ge
-; CHECK-CVT-NEXT:    cmp w14, #0
-; CHECK-CVT-NEXT:    csel w9, w14, wzr, lt
-; CHECK-CVT-NEXT:    cmp w9, #0
-; CHECK-CVT-NEXT:    fmov s1, w13
-; CHECK-CVT-NEXT:    fcvtzs w13, s0
-; CHECK-CVT-NEXT:    csinv w9, w9, wzr, ge
+; CHECK-CVT-NEXT:    csinv w9, w13, wzr, ge
+; CHECK-CVT-NEXT:    fcvtzs w13, s1
 ; CHECK-CVT-NEXT:    mov v2.s[1], w8
-; CHECK-CVT-NEXT:    mov v1.s[1], w12
+; CHECK-CVT-NEXT:    fmov s1, w9
 ; CHECK-CVT-NEXT:    cmp w13, #0
 ; CHECK-CVT-NEXT:    csel w8, w13, wzr, lt
+; CHECK-CVT-NEXT:    fcvtzs w9, s0
 ; CHECK-CVT-NEXT:    cmp w8, #0
+; CHECK-CVT-NEXT:    mov v1.s[1], w12
 ; CHECK-CVT-NEXT:    csinv w8, w8, wzr, ge
-; CHECK-CVT-NEXT:    mov v1.s[2], w9
+; CHECK-CVT-NEXT:    cmp w9, #0
+; CHECK-CVT-NEXT:    csel w9, w9, wzr, lt
 ; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    mov v1.s[3], w8
+; CHECK-CVT-NEXT:    cmp w9, #0
+; CHECK-CVT-NEXT:    mov v1.s[2], w8
+; CHECK-CVT-NEXT:    csinv w8, w9, wzr, ge
 ; CHECK-CVT-NEXT:    mov v2.s[3], w11
-; CHECK-CVT-NEXT:    xtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    mov v1.s[3], w8
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
@@ -2172,9 +2171,8 @@ define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    csel w12, w12, w10, gt
 ; CHECK-CVT-NEXT:    cmp w13, #127
 ; CHECK-CVT-NEXT:    csel w13, w13, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w16, s1
-; CHECK-CVT-NEXT:    cmn w13, #128
 ; CHECK-CVT-NEXT:    fmov s2, w11
+; CHECK-CVT-NEXT:    cmn w13, #128
 ; CHECK-CVT-NEXT:    csel w13, w13, w10, gt
 ; CHECK-CVT-NEXT:    cmp w14, #127
 ; CHECK-CVT-NEXT:    csel w14, w14, w8, lt
@@ -2183,25 +2181,25 @@ define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    cmp w15, #127
 ; CHECK-CVT-NEXT:    csel w15, w15, w8, lt
 ; CHECK-CVT-NEXT:    cmn w15, #128
-; CHECK-CVT-NEXT:    csel w15, w15, w10, gt
-; CHECK-CVT-NEXT:    cmp w16, #127
-; CHECK-CVT-NEXT:    csel w11, w16, w8, lt
-; CHECK-CVT-NEXT:    cmn w11, #128
-; CHECK-CVT-NEXT:    fmov s1, w15
-; CHECK-CVT-NEXT:    fcvtzs w15, s0
-; CHECK-CVT-NEXT:    csel w11, w11, w10, gt
+; CHECK-CVT-NEXT:    csel w11, w15, w10, gt
+; CHECK-CVT-NEXT:    fcvtzs w15, s1
 ; CHECK-CVT-NEXT:    mov v2.s[1], w9
-; CHECK-CVT-NEXT:    mov v1.s[1], w14
+; CHECK-CVT-NEXT:    fmov s1, w11
 ; CHECK-CVT-NEXT:    cmp w15, #127
-; CHECK-CVT-NEXT:    csel w8, w15, w8, lt
+; CHECK-CVT-NEXT:    csel w9, w15, w8, lt
+; CHECK-CVT-NEXT:    fcvtzs w11, s0
+; CHECK-CVT-NEXT:    cmn w9, #128
+; CHECK-CVT-NEXT:    mov v1.s[1], w14
+; CHECK-CVT-NEXT:    csel w9, w9, w10, gt
+; CHECK-CVT-NEXT:    cmp w11, #127
+; CHECK-CVT-NEXT:    csel w8, w11, w8, lt
+; CHECK-CVT-NEXT:    mov v2.s[2], w12
 ; CHECK-CVT-NEXT:    cmn w8, #128
+; CHECK-CVT-NEXT:    mov v1.s[2], w9
 ; CHECK-CVT-NEXT:    csel w8, w8, w10, gt
-; CHECK-CVT-NEXT:    mov v1.s[2], w11
-; CHECK-CVT-NEXT:    mov v2.s[2], w12
-; CHECK-CVT-NEXT:    mov v1.s[3], w8
 ; CHECK-CVT-NEXT:    mov v2.s[3], w13
-; CHECK-CVT-NEXT:    xtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    mov v1.s[3], w8
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
@@ -2247,9 +2245,8 @@ define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    csel w12, w12, w10, gt
 ; CHECK-CVT-NEXT:    cmp w13, #4095
 ; CHECK-CVT-NEXT:    csel w13, w13, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w16, s1
-; CHECK-CVT-NEXT:    cmn w13, #1, lsl #12 // =4096
 ; CHECK-CVT-NEXT:    fmov s2, w11
+; CHECK-CVT-NEXT:    cmn w13, #1, lsl #12 // =4096
 ; CHECK-CVT-NEXT:    csel w13, w13, w10, gt
 ; CHECK-CVT-NEXT:    cmp w14, #4095
 ; CHECK-CVT-NEXT:    csel w14, w14, w8, lt
@@ -2258,25 +2255,25 @@ define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    cmp w15, #4095
 ; CHECK-CVT-NEXT:    csel w15, w15, w8, lt
 ; CHECK-CVT-NEXT:    cmn w15, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    csel w15, w15, w10, gt
-; CHECK-CVT-NEXT:    cmp w16, #4095
-; CHECK-CVT-NEXT:    csel w11, w16, w8, lt
-; CHECK-CVT-NEXT:    cmn w11, #1, lsl #12 // =4096
-; CHECK-CVT-NEXT:    fmov s1, w15
-; CHECK-CVT-NEXT:    fcvtzs w15, s0
-; CHECK-CVT-NEXT:    csel w11, w11, w10, gt
+; CHECK-CVT-NEXT:    csel w11, w15, w10, gt
+; CHECK-CVT-NEXT:    fcvtzs w15, s1
 ; CHECK-CVT-NEXT:    mov v2.s[1], w9
-; CHECK-CVT-NEXT:    mov v1.s[1], w14
+; CHECK-CVT-NEXT:    fmov s1, w11
 ; CHECK-CVT-NEXT:    cmp w15, #4095
-; CHECK-CVT-NEXT:    csel w8, w15, w8, lt
+; CHECK-CVT-NEXT:    csel w9, w15, w8, lt
+; CHECK-CVT-NEXT:    fcvtzs w11, s0
+; CHECK-CVT-NEXT:    cmn w9, #1, lsl #12 // =4096
+; CHECK-CVT-NEXT:    mov v1.s[1], w14
+; CHECK-CVT-NEXT:    csel w9, w9, w10, gt
+; CHECK-CVT-NEXT:    cmp w11, #4095
+; CHECK-CVT-NEXT:    csel w8, w11, w8, lt
+; CHECK-CVT-NEXT:    mov v2.s[2], w12
 ; CHECK-CVT-NEXT:    cmn w8, #1, lsl #12 // =4096
+; CHECK-CVT-NEXT:    mov v1.s[2], w9
 ; CHECK-CVT-NEXT:    csel w8, w8, w10, gt
-; CHECK-CVT-NEXT:    mov v1.s[2], w11
-; CHECK-CVT-NEXT:    mov v2.s[2], w12
-; CHECK-CVT-NEXT:    mov v1.s[3], w8
 ; CHECK-CVT-NEXT:    mov v2.s[3], w13
-; CHECK-CVT-NEXT:    xtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    mov v1.s[3], w8
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13:
@@ -2324,9 +2321,8 @@ define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    csel w12, w12, w10, gt
 ; CHECK-CVT-NEXT:    cmp w13, w8
 ; CHECK-CVT-NEXT:    csel w13, w13, w8, lt
-; CHECK-CVT-NEXT:    fcvtzs w16, s1
-; CHECK-CVT-NEXT:    cmn w13, #8, lsl #12 // =32768
 ; CHECK-CVT-NEXT:    fmov s2, w11
+; CHECK-CVT-NEXT:    cmn w13, #8, lsl #12 // =32768
 ; CHECK-CVT-NEXT:    csel w13, w13, w10, gt
 ; CHECK-CVT-NEXT:    cmp w14, w8
 ; CHECK-CVT-NEXT:    csel w14, w14, w8, lt
@@ -2335,25 +2331,25 @@ define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    cmp w15, w8
 ; CHECK-CVT-NEXT:    csel w15, w15, w8, lt
 ; CHECK-CVT-NEXT:    cmn w15, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    csel w15, w15, w10, gt
-; CHECK-CVT-NEXT:    cmp w16, w8
-; CHECK-CVT-NEXT:    csel w11, w16, w8, lt
-; CHECK-CVT-NEXT:    cmn w11, #8, lsl #12 // =32768
-; CHECK-CVT-NEXT:    fmov s1, w15
-; CHECK-CVT-NEXT:    fcvtzs w15, s0
-; CHECK-CVT-NEXT:    csel w11, w11, w10, gt
+; CHECK-CVT-NEXT:    csel w11, w15, w10, gt
+; CHECK-CVT-NEXT:    fcvtzs w15, s1
 ; CHECK-CVT-NEXT:    mov v2.s[1], w9
-; CHECK-CVT-NEXT:    mov v1.s[1], w14
+; CHECK-CVT-NEXT:    fmov s1, w11
 ; CHECK-CVT-NEXT:    cmp w15, w8
-; CHECK-CVT-NEXT:    csel w8, w15, w8, lt
+; CHECK-CVT-NEXT:    csel w9, w15, w8, lt
+; CHECK-CVT-NEXT:    fcvtzs w11, s0
+; CHECK-CVT-NEXT:    cmn w9, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT:    mov v1.s[1], w14
+; CHECK-CVT-NEXT:    csel w9, w9, w10, gt
+; CHECK-CVT-NEXT:    cmp w11, w8
+; CHECK-CVT-NEXT:    csel w8, w11, w8, lt
+; CHECK-CVT-NEXT:    mov v2.s[2], w12
 ; CHECK-CVT-NEXT:    cmn w8, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT:    mov v1.s[2], w9
 ; CHECK-CVT-NEXT:    csel w8, w8, w10, gt
-; CHECK-CVT-NEXT:    mov v1.s[2], w11
-; CHECK-CVT-NEXT:    mov v2.s[2], w12
-; CHECK-CVT-NEXT:    mov v1.s[3], w8
 ; CHECK-CVT-NEXT:    mov v2.s[3], w13
-; CHECK-CVT-NEXT:    xtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    mov v1.s[3], w8
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16:

diff  --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index a903d842ec9b..017845d3624a 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -1775,22 +1775,21 @@ define <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    csinc w13, w13, wzr, lo
 ; CHECK-CVT-NEXT:    cmp w10, #1
 ; CHECK-CVT-NEXT:    csinc w10, w10, wzr, lo
-; CHECK-CVT-NEXT:    fmov s2, w10
-; CHECK-CVT-NEXT:    fcvtzu w10, s1
-; CHECK-CVT-NEXT:    fmov s1, w9
-; CHECK-CVT-NEXT:    mov v2.s[1], w13
-; CHECK-CVT-NEXT:    cmp w10, #1
-; CHECK-CVT-NEXT:    csinc w9, w10, wzr, lo
-; CHECK-CVT-NEXT:    fcvtzu w10, s0
-; CHECK-CVT-NEXT:    mov v1.s[1], w8
-; CHECK-CVT-NEXT:    mov v2.s[2], w9
-; CHECK-CVT-NEXT:    cmp w10, #1
-; CHECK-CVT-NEXT:    csinc w8, w10, wzr, lo
-; CHECK-CVT-NEXT:    mov v1.s[2], w11
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    mov v1.s[3], w12
-; CHECK-CVT-NEXT:    xtn v0.4h, v2.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT:    fmov s2, w9
+; CHECK-CVT-NEXT:    fcvtzu w9, s1
+; CHECK-CVT-NEXT:    fmov s3, w10
+; CHECK-CVT-NEXT:    mov v2.s[1], w8
+; CHECK-CVT-NEXT:    cmp w9, #1
+; CHECK-CVT-NEXT:    csinc w8, w9, wzr, lo
+; CHECK-CVT-NEXT:    fcvtzu w9, s0
+; CHECK-CVT-NEXT:    mov v3.s[1], w13
+; CHECK-CVT-NEXT:    mov v2.s[2], w11
+; CHECK-CVT-NEXT:    cmp w9, #1
+; CHECK-CVT-NEXT:    mov v3.s[2], w8
+; CHECK-CVT-NEXT:    csinc w8, w9, wzr, lo
+; CHECK-CVT-NEXT:    mov v2.s[3], w12
+; CHECK-CVT-NEXT:    mov v3.s[3], w8
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v3.8h, v2.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
@@ -1835,22 +1834,21 @@ define <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    csel w14, w14, w8, lo
 ; CHECK-CVT-NEXT:    cmp w11, #255
 ; CHECK-CVT-NEXT:    csel w11, w11, w8, lo
-; CHECK-CVT-NEXT:    fmov s2, w11
-; CHECK-CVT-NEXT:    fcvtzu w11, s1
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    mov v2.s[1], w14
-; CHECK-CVT-NEXT:    cmp w11, #255
-; CHECK-CVT-NEXT:    csel w10, w11, w8, lo
-; CHECK-CVT-NEXT:    fcvtzu w11, s0
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    cmp w11, #255
-; CHECK-CVT-NEXT:    csel w8, w11, w8, lo
-; CHECK-CVT-NEXT:    mov v1.s[2], w12
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    mov v1.s[3], w13
-; CHECK-CVT-NEXT:    xtn v0.4h, v2.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT:    fmov s2, w10
+; CHECK-CVT-NEXT:    fcvtzu w10, s1
+; CHECK-CVT-NEXT:    fmov s3, w11
+; CHECK-CVT-NEXT:    mov v2.s[1], w9
+; CHECK-CVT-NEXT:    cmp w10, #255
+; CHECK-CVT-NEXT:    csel w9, w10, w8, lo
+; CHECK-CVT-NEXT:    fcvtzu w10, s0
+; CHECK-CVT-NEXT:    mov v3.s[1], w14
+; CHECK-CVT-NEXT:    mov v2.s[2], w12
+; CHECK-CVT-NEXT:    cmp w10, #255
+; CHECK-CVT-NEXT:    csel w8, w10, w8, lo
+; CHECK-CVT-NEXT:    mov v3.s[2], w9
+; CHECK-CVT-NEXT:    mov v2.s[3], w13
+; CHECK-CVT-NEXT:    mov v3.s[3], w8
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v3.8h, v2.8h
 ; CHECK-CVT-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-NEXT:    ret
 ;
@@ -1893,22 +1891,21 @@ define <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    csel w14, w14, w8, lo
 ; CHECK-CVT-NEXT:    cmp w11, w8
 ; CHECK-CVT-NEXT:    csel w11, w11, w8, lo
-; CHECK-CVT-NEXT:    fmov s2, w11
-; CHECK-CVT-NEXT:    fcvtzu w11, s1
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    mov v2.s[1], w14
-; CHECK-CVT-NEXT:    cmp w11, w8
-; CHECK-CVT-NEXT:    csel w10, w11, w8, lo
-; CHECK-CVT-NEXT:    fcvtzu w11, s0
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    cmp w11, w8
-; CHECK-CVT-NEXT:    csel w8, w11, w8, lo
-; CHECK-CVT-NEXT:    mov v1.s[2], w12
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    mov v1.s[3], w13
-; CHECK-CVT-NEXT:    xtn v0.4h, v2.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT:    fmov s2, w10
+; CHECK-CVT-NEXT:    fcvtzu w10, s1
+; CHECK-CVT-NEXT:    fmov s3, w11
+; CHECK-CVT-NEXT:    mov v2.s[1], w9
+; CHECK-CVT-NEXT:    cmp w10, w8
+; CHECK-CVT-NEXT:    csel w9, w10, w8, lo
+; CHECK-CVT-NEXT:    fcvtzu w10, s0
+; CHECK-CVT-NEXT:    mov v3.s[1], w14
+; CHECK-CVT-NEXT:    mov v2.s[2], w12
+; CHECK-CVT-NEXT:    cmp w10, w8
+; CHECK-CVT-NEXT:    csel w8, w10, w8, lo
+; CHECK-CVT-NEXT:    mov v3.s[2], w9
+; CHECK-CVT-NEXT:    mov v2.s[3], w13
+; CHECK-CVT-NEXT:    mov v3.s[3], w8
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v3.8h, v2.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13:
@@ -1951,22 +1948,21 @@ define <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
 ; CHECK-CVT-NEXT:    csel w14, w14, w8, lo
 ; CHECK-CVT-NEXT:    cmp w11, w8
 ; CHECK-CVT-NEXT:    csel w11, w11, w8, lo
-; CHECK-CVT-NEXT:    fmov s2, w11
-; CHECK-CVT-NEXT:    fcvtzu w11, s1
-; CHECK-CVT-NEXT:    fmov s1, w10
-; CHECK-CVT-NEXT:    mov v2.s[1], w14
-; CHECK-CVT-NEXT:    cmp w11, w8
-; CHECK-CVT-NEXT:    csel w10, w11, w8, lo
-; CHECK-CVT-NEXT:    fcvtzu w11, s0
-; CHECK-CVT-NEXT:    mov v1.s[1], w9
-; CHECK-CVT-NEXT:    mov v2.s[2], w10
-; CHECK-CVT-NEXT:    cmp w11, w8
-; CHECK-CVT-NEXT:    csel w8, w11, w8, lo
-; CHECK-CVT-NEXT:    mov v1.s[2], w12
-; CHECK-CVT-NEXT:    mov v2.s[3], w8
-; CHECK-CVT-NEXT:    mov v1.s[3], w13
-; CHECK-CVT-NEXT:    xtn v0.4h, v2.4s
-; CHECK-CVT-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT:    fmov s2, w10
+; CHECK-CVT-NEXT:    fcvtzu w10, s1
+; CHECK-CVT-NEXT:    fmov s3, w11
+; CHECK-CVT-NEXT:    mov v2.s[1], w9
+; CHECK-CVT-NEXT:    cmp w10, w8
+; CHECK-CVT-NEXT:    csel w9, w10, w8, lo
+; CHECK-CVT-NEXT:    fcvtzu w10, s0
+; CHECK-CVT-NEXT:    mov v3.s[1], w14
+; CHECK-CVT-NEXT:    mov v2.s[2], w12
+; CHECK-CVT-NEXT:    cmp w10, w8
+; CHECK-CVT-NEXT:    csel w8, w10, w8, lo
+; CHECK-CVT-NEXT:    mov v3.s[2], w9
+; CHECK-CVT-NEXT:    mov v2.s[3], w13
+; CHECK-CVT-NEXT:    mov v3.s[3], w8
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v3.8h, v2.8h
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16:

diff  --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll
index 595fd2b93a3a..923e33731710 100644
--- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll
+++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll
@@ -18,8 +18,7 @@ define void @v2i64_v2i32(<2 x i64> %a, <2 x i32>* %result) {
 define void @v4i64_v4i32(<4 x i64> %a, <4 x i32>* %result) {
 ; CHECK-LABEL: v4i64_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %b = trunc <4 x i64> %a to <4 x i32>
@@ -30,10 +29,8 @@ define void @v4i64_v4i32(<4 x i64> %a, <4 x i32>* %result) {
 define void @v8i64_v8i32(<8 x i64> %a, <8 x i32>* %result) {
 ; CHECK-LABEL: v8i64_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    xtn2 v2.4s, v3.2d
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    uzp1 v2.4s, v2.4s, v3.4s
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    stp q0, q2, [x0]
 ; CHECK-NEXT:    ret
   %b = trunc <8 x i64> %a to <8 x i32>
@@ -69,8 +66,7 @@ define void @v4i32_v4i16(<4 x i32> %a, <4 x i16>* %result) {
 define void @v8i32_v8i16(<8 x i32> %a, <8 x i16>* %result) {
 ; CHECK-LABEL: v8i32_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %b = trunc <8 x i32> %a to <8 x i16>
@@ -81,10 +77,8 @@ define void @v8i32_v8i16(<8 x i32> %a, <8 x i16>* %result) {
 define void @v16i32_v16i16(<16 x i32> %a, <16 x i16>* %result) {
 ; CHECK-LABEL: v16i32_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn2 v2.8h, v3.4s
-; CHECK-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    stp q0, q2, [x0]
 ; CHECK-NEXT:    ret
   %b = trunc <16 x i32> %a to <16 x i16>
@@ -121,8 +115,7 @@ define void @v4i32_v4i8(<4 x i32> %a, <4 x i8>* %result) {
 define void @v8i32_v8i8(<8 x i32> %a, <8 x i8>* %result) {
 ; CHECK-LABEL: v8i32_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
@@ -134,12 +127,9 @@ define void @v8i32_v8i8(<8 x i32> %a, <8 x i8>* %result) {
 define void @v16i32_v16i8(<16 x i32> %a, <16 x i8>* %result) {
 ; CHECK-LABEL: v16i32_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn2 v0.8h, v1.4s
-; CHECK-NEXT:    xtn2 v2.8h, v3.4s
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    xtn2 v0.16b, v2.8h
+; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %b = trunc <16 x i32> %a to <16 x i8>
@@ -150,18 +140,12 @@ define void @v16i32_v16i8(<16 x i32> %a, <16 x i8>* %result) {
 define void @v32i32_v32i8(<32 x i32> %a, <32 x i8>* %result) {
 ; CHECK-LABEL: v32i32_v32i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v4.4h, v4.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn v6.4h, v6.4s
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn2 v4.8h, v5.4s
-; CHECK-NEXT:    xtn2 v0.8h, v1.4s
-; CHECK-NEXT:    xtn2 v6.8h, v7.4s
-; CHECK-NEXT:    xtn2 v2.8h, v3.4s
-; CHECK-NEXT:    xtn v1.8b, v4.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    xtn2 v1.16b, v6.8h
-; CHECK-NEXT:    xtn2 v0.16b, v2.8h
+; CHECK-NEXT:    uzp1 v6.8h, v6.8h, v7.8h
+; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp1 v3.8h, v4.8h, v5.8h
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp1 v1.16b, v3.16b, v6.16b
+; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
   %b = trunc <32 x i32> %a to <32 x i8>
@@ -209,8 +193,7 @@ define void @v8i16_v8i8(<8 x i16> %a, <8 x i8>* %result) {
 define void @v16i16_v16i8(<16 x i16> %a, <16 x i8>* %result) {
 ; CHECK-LABEL: v16i16_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    xtn2 v0.16b, v1.8h
+; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %b = trunc <16 x i16> %a to <16 x i8>
@@ -221,10 +204,8 @@ define void @v16i16_v16i8(<16 x i16> %a, <16 x i8>* %result) {
 define void @v32i16_v32i8(<32 x i16> %a, <32 x i8>* %result) {
 ; CHECK-LABEL: v32i16_v32i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v2.8b, v2.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    xtn2 v2.16b, v3.8h
-; CHECK-NEXT:    xtn2 v0.16b, v1.8h
+; CHECK-NEXT:    uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    stp q0, q2, [x0]
 ; CHECK-NEXT:    ret
   %b = trunc <32 x i16> %a to <32 x i8>


        


More information about the llvm-commits mailing list