[llvm] 5686f06 - [AArch64][GlobalISel] Select USHLL2 Instruction

Tuan Chuong Goh via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 23 05:55:37 PDT 2023


Author: Tuan Chuong Goh
Date: 2023-08-23T13:41:47+01:00
New Revision: 5686f06d7fc02b7e2ab1eceb56f3830b6fdf7301

URL: https://github.com/llvm/llvm-project/commit/5686f06d7fc02b7e2ab1eceb56f3830b6fdf7301
DIFF: https://github.com/llvm/llvm-project/commit/5686f06d7fc02b7e2ab1eceb56f3830b6fdf7301.diff

LOG: [AArch64][GlobalISel] Select USHLL2 Instruction

Select ushll2 instruction instead of using mov and ushll

Differential Revision: https://reviews.llvm.org/D158420

Added: 
    

Modified: 
    llvm/include/llvm/Target/GlobalISel/Target.td
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/aarch64-addv.ll
    llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
    llvm/test/CodeGen/AArch64/arm64-vabs.ll
    llvm/test/CodeGen/AArch64/sext.ll
    llvm/test/CodeGen/AArch64/zext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Target/GlobalISel/Target.td b/llvm/include/llvm/Target/GlobalISel/Target.td
index 4cb3fd1bf79c14..dd21efe0774165 100644
--- a/llvm/include/llvm/Target/GlobalISel/Target.td
+++ b/llvm/include/llvm/Target/GlobalISel/Target.td
@@ -24,6 +24,7 @@ def s32 : LLT;
 def s64 : LLT;
 def v2s32 : LLT;
 def v4s16 : LLT;
+def v8s8 : LLT;
 
 // Defines a matcher for complex operands. This is analogous to ComplexPattern
 // from SelectionDAG.

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 1ff52f52009678..885b70a50121f3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -133,6 +133,16 @@ def extract_high_v4i32 :
 def extract_high_v2i64 :
     ComplexPattern<v1i64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
 
+def gi_extract_high_v16i8 :
+  GIComplexOperandMatcher<v8s8, "selectExtractHigh">,
+  GIComplexPatternEquiv<extract_high_v16i8>;
+def gi_extract_high_v8i16 :
+  GIComplexOperandMatcher<v4s16, "selectExtractHigh">,
+  GIComplexPatternEquiv<extract_high_v8i16>;
+def gi_extract_high_v4i32 :
+  GIComplexOperandMatcher<v2s32, "selectExtractHigh">,
+  GIComplexPatternEquiv<extract_high_v4i32>;
+
 def extract_high_v8f16 :
     ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
 def extract_high_v4f32 :

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index bd9c2f655aed3d..5bdb1d9ffc6d9b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7285,23 +7285,23 @@ def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>
 def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
 def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
 // Also match an extend from the upper half of a 128 bit source register.
-def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
           (USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (zext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (zext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
           (USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (sext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (sext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
           (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
           (USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (zext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (zext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
           (USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (sext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (sext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
           (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
           (USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (zext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (zext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
           (USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (sext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (sext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
           (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
 
 // Vector shift sxtl aliases

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index e3bf87c7bb79c3..6cab309d7c094c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -85,21 +85,19 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias
 ; GISEL-NEXT:    movi v0.2d, #0000000000000000
 ; GISEL-NEXT:    ushll v1.8h, v1.8b, #0
 ; GISEL-NEXT:    ushll v2.8h, v2.8b, #0
-; GISEL-NEXT:    mov d3, v1.d[1]
-; GISEL-NEXT:    mov d4, v2.d[1]
-; GISEL-NEXT:    usubl v1.4s, v1.4h, v2.4h
-; GISEL-NEXT:    usubl v2.4s, v3.4h, v4.4h
-; GISEL-NEXT:    cmgt v3.4s, v0.4s, v1.4s
-; GISEL-NEXT:    neg v4.4s, v1.4s
-; GISEL-NEXT:    shl v3.4s, v3.4s, #31
-; GISEL-NEXT:    cmgt v0.4s, v0.4s, v2.4s
-; GISEL-NEXT:    neg v5.4s, v2.4s
-; GISEL-NEXT:    sshr v3.4s, v3.4s, #31
+; GISEL-NEXT:    usubl v3.4s, v1.4h, v2.4h
+; GISEL-NEXT:    usubl2 v1.4s, v1.8h, v2.8h
+; GISEL-NEXT:    cmgt v2.4s, v0.4s, v3.4s
+; GISEL-NEXT:    cmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    neg v4.4s, v3.4s
+; GISEL-NEXT:    neg v5.4s, v1.4s
+; GISEL-NEXT:    shl v2.4s, v2.4s, #31
 ; GISEL-NEXT:    shl v0.4s, v0.4s, #31
-; GISEL-NEXT:    bit v1.16b, v4.16b, v3.16b
+; GISEL-NEXT:    sshr v2.4s, v2.4s, #31
 ; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
-; GISEL-NEXT:    bsl v0.16b, v5.16b, v2.16b
-; GISEL-NEXT:    add v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    bsl v2.16b, v4.16b, v3.16b
+; GISEL-NEXT:    bsl v0.16b, v5.16b, v1.16b
+; GISEL-NEXT:    add v0.4s, v2.4s, v0.4s
 ; GISEL-NEXT:    addv s0, v0.4s
 ; GISEL-NEXT:    fmov w0, s0
 ; GISEL-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 43d9eb7f368979..37c6202cda4a1f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -41,9 +41,9 @@ define <16 x i16> @func3(<16 x i8> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: func3:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
-; CHECK-GI-NEXT:    ushll.8h v1, v1, #0
+; CHECK-GI-NEXT:    ushll.8h v2, v0, #0
+; CHECK-GI-NEXT:    ushll2.8h v1, v0, #0
+; CHECK-GI-NEXT:    mov.16b v0, v2
 ; CHECK-GI-NEXT:    ret
   %r = zext <16 x i8> %v0 to <16 x i16>
   ret <16 x i16> %r
@@ -58,9 +58,9 @@ define <16 x i16> @func4(<16 x i8> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: func4:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
-; CHECK-GI-NEXT:    sshll.8h v1, v1, #0
+; CHECK-GI-NEXT:    sshll.8h v2, v0, #0
+; CHECK-GI-NEXT:    sshll2.8h v1, v0, #0
+; CHECK-GI-NEXT:    mov.16b v0, v2
 ; CHECK-GI-NEXT:    ret
   %r = sext <16 x i8> %v0 to <16 x i16>
   ret <16 x i16> %r
@@ -97,9 +97,9 @@ define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: afunc3:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
-; CHECK-GI-NEXT:    ushll.4s v1, v1, #0
+; CHECK-GI-NEXT:    ushll.4s v2, v0, #0
+; CHECK-GI-NEXT:    ushll2.4s v1, v0, #0
+; CHECK-GI-NEXT:    mov.16b v0, v2
 ; CHECK-GI-NEXT:    ret
   %r = zext <8 x i16> %v0 to <8 x i32>
   ret <8 x i32> %r
@@ -114,9 +114,9 @@ define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: afunc4:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    sshll.4s v0, v0, #0
-; CHECK-GI-NEXT:    sshll.4s v1, v1, #0
+; CHECK-GI-NEXT:    sshll.4s v2, v0, #0
+; CHECK-GI-NEXT:    sshll2.4s v1, v0, #0
+; CHECK-GI-NEXT:    mov.16b v0, v2
 ; CHECK-GI-NEXT:    ret
   %r = sext <8 x i16> %v0 to <8 x i32>
   ret <8 x i32> %r
@@ -132,10 +132,9 @@ define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: bfunc1:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
-; CHECK-GI-NEXT:    ushll.4s v1, v1, #0
+; CHECK-GI-NEXT:    ushll.8h v1, v0, #0
+; CHECK-GI-NEXT:    ushll.4s v0, v1, #0
+; CHECK-GI-NEXT:    ushll2.4s v1, v1, #0
 ; CHECK-GI-NEXT:    ret
   %r = zext <8 x i8> %v0 to <8 x i32>
   ret <8 x i32> %r
@@ -151,10 +150,9 @@ define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: bfunc2:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    sshll.4s v0, v0, #0
-; CHECK-GI-NEXT:    sshll.4s v1, v1, #0
+; CHECK-GI-NEXT:    sshll.8h v1, v0, #0
+; CHECK-GI-NEXT:    sshll.4s v0, v1, #0
+; CHECK-GI-NEXT:    sshll2.4s v1, v1, #0
 ; CHECK-GI-NEXT:    ret
   %r = sext <8 x i8> %v0 to <8 x i32>
   ret <8 x i32> %r
@@ -173,9 +171,9 @@ define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: zfunc1:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
-; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
+; CHECK-GI-NEXT:    ushll.2d v2, v0, #0
+; CHECK-GI-NEXT:    ushll2.2d v1, v0, #0
+; CHECK-GI-NEXT:    mov.16b v0, v2
 ; CHECK-GI-NEXT:    ret
   %r = zext <4 x i32> %v0 to <4 x i64>
   ret <4 x i64> %r
@@ -190,9 +188,9 @@ define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: zfunc2:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    sshll.2d v0, v0, #0
-; CHECK-GI-NEXT:    sshll.2d v1, v1, #0
+; CHECK-GI-NEXT:    sshll.2d v2, v0, #0
+; CHECK-GI-NEXT:    sshll2.2d v1, v0, #0
+; CHECK-GI-NEXT:    mov.16b v0, v2
 ; CHECK-GI-NEXT:    ret
   %r = sext <4 x i32> %v0 to <4 x i64>
   ret <4 x i64> %r
@@ -208,10 +206,9 @@ define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: bfunc3:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
-; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
+; CHECK-GI-NEXT:    ushll.4s v1, v0, #0
+; CHECK-GI-NEXT:    ushll.2d v0, v1, #0
+; CHECK-GI-NEXT:    ushll2.2d v1, v1, #0
 ; CHECK-GI-NEXT:    ret
   %r = zext <4 x i16> %v0 to <4 x i64>
   ret <4 x i64> %r
@@ -227,10 +224,9 @@ define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
 ;
 ; CHECK-GI-LABEL: cfunc4:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll.4s v0, v0, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    sshll.2d v0, v0, #0
-; CHECK-GI-NEXT:    sshll.2d v1, v1, #0
+; CHECK-GI-NEXT:    sshll.4s v1, v0, #0
+; CHECK-GI-NEXT:    sshll.2d v0, v1, #0
+; CHECK-GI-NEXT:    sshll2.2d v1, v1, #0
 ; CHECK-GI-NEXT:    ret
   %r = sext <4 x i16> %v0 to <4 x i64>
   ret <4 x i64> %r
@@ -249,12 +245,11 @@ define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
-; CHECK-GI-NEXT:    and.16b v0, v0, v2
-; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
-; CHECK-GI-NEXT:    and.16b v1, v1, v2
+; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT:    ushll.2d v1, v0, #0
+; CHECK-GI-NEXT:    ushll2.2d v2, v0, #0
+; CHECK-GI-NEXT:    and.16b v0, v1, v3
+; CHECK-GI-NEXT:    and.16b v1, v2, v3
 ; CHECK-GI-NEXT:    ret
   %r = zext <4 x i8> %v0 to <4 x i64>
   ret <4 x i64> %r
@@ -275,13 +270,12 @@ define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
 ; CHECK-GI-LABEL: sext_v4i8_to_v4i64:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
-; CHECK-GI-NEXT:    shl.2d v0, v0, #56
-; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
-; CHECK-GI-NEXT:    sshr.2d v0, v0, #56
+; CHECK-GI-NEXT:    ushll.2d v1, v0, #0
+; CHECK-GI-NEXT:    ushll2.2d v0, v0, #0
 ; CHECK-GI-NEXT:    shl.2d v1, v1, #56
-; CHECK-GI-NEXT:    sshr.2d v1, v1, #56
+; CHECK-GI-NEXT:    shl.2d v2, v0, #56
+; CHECK-GI-NEXT:    sshr.2d v0, v1, #56
+; CHECK-GI-NEXT:    sshr.2d v1, v2, #56
 ; CHECK-GI-NEXT:    ret
   %r = sext <4 x i8> %v0 to <4 x i64>
   ret <4 x i64> %r
@@ -302,15 +296,12 @@ define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
 ; CHECK-GI-LABEL: zext_v8i8_to_v8i64:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
-; CHECK-GI-NEXT:    ushll.4s v2, v1, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
-; CHECK-GI-NEXT:    mov d3, v2[1]
-; CHECK-GI-NEXT:    ushll.2d v2, v2, #0
-; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
-; CHECK-GI-NEXT:    ushll.2d v3, v3, #0
+; CHECK-GI-NEXT:    ushll.4s v1, v0, #0
+; CHECK-GI-NEXT:    ushll2.4s v3, v0, #0
+; CHECK-GI-NEXT:    ushll.2d v0, v1, #0
+; CHECK-GI-NEXT:    ushll2.2d v1, v1, #0
+; CHECK-GI-NEXT:    ushll.2d v2, v3, #0
+; CHECK-GI-NEXT:    ushll2.2d v3, v3, #0
 ; CHECK-GI-NEXT:    ret
   %r = zext <8 x i8> %v0 to <8 x i64>
   ret <8 x i64> %r
@@ -331,15 +322,12 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
 ; CHECK-GI-LABEL: sext_v8i8_to_v8i64:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    sshll.4s v0, v0, #0
-; CHECK-GI-NEXT:    sshll.4s v2, v1, #0
-; CHECK-GI-NEXT:    mov d1, v0[1]
-; CHECK-GI-NEXT:    sshll.2d v0, v0, #0
-; CHECK-GI-NEXT:    mov d3, v2[1]
-; CHECK-GI-NEXT:    sshll.2d v2, v2, #0
-; CHECK-GI-NEXT:    sshll.2d v1, v1, #0
-; CHECK-GI-NEXT:    sshll.2d v3, v3, #0
+; CHECK-GI-NEXT:    sshll.4s v1, v0, #0
+; CHECK-GI-NEXT:    sshll2.4s v3, v0, #0
+; CHECK-GI-NEXT:    sshll.2d v0, v1, #0
+; CHECK-GI-NEXT:    sshll2.2d v1, v1, #0
+; CHECK-GI-NEXT:    sshll.2d v2, v3, #0
+; CHECK-GI-NEXT:    sshll2.2d v3, v3, #0
 ; CHECK-GI-NEXT:    ret
   %r = sext <8 x i8> %v0 to <8 x i64>
   ret <8 x i64> %r

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 1cc9040b5a2dc3..18efdc86e16aa8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -287,44 +287,38 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
 ;
 ; CHECK-GI-LABEL: uabd16b_rdx_i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d3, v0[1]
-; CHECK-GI-NEXT:    mov d4, v1[1]
-; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
-; CHECK-GI-NEXT:    ushll.8h v1, v1, #0
+; CHECK-GI-NEXT:    ushll.8h v3, v0, #0
+; CHECK-GI-NEXT:    ushll.8h v4, v1, #0
+; CHECK-GI-NEXT:    ushll2.8h v0, v0, #0
+; CHECK-GI-NEXT:    ushll2.8h v1, v1, #0
 ; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT:    mov d5, v0[1]
-; CHECK-GI-NEXT:    ushll.8h v3, v3, #0
-; CHECK-GI-NEXT:    ushll.8h v4, v4, #0
-; CHECK-GI-NEXT:    mov d7, v1[1]
-; CHECK-GI-NEXT:    usubl.4s v0, v0, v1
-; CHECK-GI-NEXT:    mov d6, v3[1]
-; CHECK-GI-NEXT:    mov d16, v4[1]
-; CHECK-GI-NEXT:    usubl.4s v3, v3, v4
-; CHECK-GI-NEXT:    usubl.4s v1, v5, v7
-; CHECK-GI-NEXT:    cmgt.4s v5, v2, v0
-; CHECK-GI-NEXT:    usubl.4s v4, v6, v16
-; CHECK-GI-NEXT:    cmgt.4s v7, v2, v3
-; CHECK-GI-NEXT:    neg.4s v16, v0
-; CHECK-GI-NEXT:    cmgt.4s v6, v2, v1
-; CHECK-GI-NEXT:    shl.4s v5, v5, #31
-; CHECK-GI-NEXT:    neg.4s v17, v1
-; CHECK-GI-NEXT:    neg.4s v18, v3
-; CHECK-GI-NEXT:    shl.4s v7, v7, #31
-; CHECK-GI-NEXT:    cmgt.4s v2, v2, v4
+; CHECK-GI-NEXT:    usubl.4s v5, v3, v4
+; CHECK-GI-NEXT:    usubl2.4s v3, v3, v4
+; CHECK-GI-NEXT:    usubl.4s v4, v0, v1
+; CHECK-GI-NEXT:    usubl2.4s v0, v0, v1
+; CHECK-GI-NEXT:    cmgt.4s v1, v2, v5
+; CHECK-GI-NEXT:    cmgt.4s v6, v2, v3
+; CHECK-GI-NEXT:    neg.4s v16, v5
+; CHECK-GI-NEXT:    cmgt.4s v7, v2, v4
+; CHECK-GI-NEXT:    cmgt.4s v2, v2, v0
+; CHECK-GI-NEXT:    neg.4s v17, v3
+; CHECK-GI-NEXT:    neg.4s v18, v4
+; CHECK-GI-NEXT:    neg.4s v19, v0
+; CHECK-GI-NEXT:    shl.4s v1, v1, #31
 ; CHECK-GI-NEXT:    shl.4s v6, v6, #31
-; CHECK-GI-NEXT:    neg.4s v19, v4
-; CHECK-GI-NEXT:    sshr.4s v5, v5, #31
-; CHECK-GI-NEXT:    sshr.4s v7, v7, #31
+; CHECK-GI-NEXT:    shl.4s v7, v7, #31
 ; CHECK-GI-NEXT:    shl.4s v2, v2, #31
+; CHECK-GI-NEXT:    sshr.4s v1, v1, #31
 ; CHECK-GI-NEXT:    sshr.4s v6, v6, #31
-; CHECK-GI-NEXT:    bit.16b v0, v16, v5
-; CHECK-GI-NEXT:    bit.16b v3, v18, v7
+; CHECK-GI-NEXT:    sshr.4s v7, v7, #31
 ; CHECK-GI-NEXT:    sshr.4s v2, v2, #31
-; CHECK-GI-NEXT:    bit.16b v1, v17, v6
-; CHECK-GI-NEXT:    bsl.16b v2, v19, v4
-; CHECK-GI-NEXT:    add.4s v0, v0, v1
-; CHECK-GI-NEXT:    add.4s v1, v3, v2
-; CHECK-GI-NEXT:    add.4s v0, v0, v1
+; CHECK-GI-NEXT:    bsl.16b v1, v16, v5
+; CHECK-GI-NEXT:    bit.16b v3, v17, v6
+; CHECK-GI-NEXT:    bit.16b v4, v18, v7
+; CHECK-GI-NEXT:    bit.16b v0, v19, v2
+; CHECK-GI-NEXT:    add.4s v1, v1, v3
+; CHECK-GI-NEXT:    add.4s v0, v4, v0
+; CHECK-GI-NEXT:    add.4s v0, v1, v0
 ; CHECK-GI-NEXT:    addv.4s s0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -349,44 +343,38 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
 ;
 ; CHECK-GI-LABEL: sabd16b_rdx_i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d3, v0[1]
-; CHECK-GI-NEXT:    mov d4, v1[1]
-; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
-; CHECK-GI-NEXT:    sshll.8h v1, v1, #0
+; CHECK-GI-NEXT:    sshll.8h v3, v0, #0
+; CHECK-GI-NEXT:    sshll.8h v4, v1, #0
+; CHECK-GI-NEXT:    sshll2.8h v0, v0, #0
+; CHECK-GI-NEXT:    sshll2.8h v1, v1, #0
 ; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT:    mov d5, v0[1]
-; CHECK-GI-NEXT:    sshll.8h v3, v3, #0
-; CHECK-GI-NEXT:    sshll.8h v4, v4, #0
-; CHECK-GI-NEXT:    mov d7, v1[1]
-; CHECK-GI-NEXT:    ssubl.4s v0, v0, v1
-; CHECK-GI-NEXT:    mov d6, v3[1]
-; CHECK-GI-NEXT:    mov d16, v4[1]
-; CHECK-GI-NEXT:    ssubl.4s v3, v3, v4
-; CHECK-GI-NEXT:    ssubl.4s v1, v5, v7
-; CHECK-GI-NEXT:    cmgt.4s v5, v2, v0
-; CHECK-GI-NEXT:    ssubl.4s v4, v6, v16
-; CHECK-GI-NEXT:    cmgt.4s v7, v2, v3
-; CHECK-GI-NEXT:    neg.4s v16, v0
-; CHECK-GI-NEXT:    cmgt.4s v6, v2, v1
-; CHECK-GI-NEXT:    shl.4s v5, v5, #31
-; CHECK-GI-NEXT:    neg.4s v17, v1
-; CHECK-GI-NEXT:    neg.4s v18, v3
-; CHECK-GI-NEXT:    shl.4s v7, v7, #31
-; CHECK-GI-NEXT:    cmgt.4s v2, v2, v4
+; CHECK-GI-NEXT:    ssubl.4s v5, v3, v4
+; CHECK-GI-NEXT:    ssubl2.4s v3, v3, v4
+; CHECK-GI-NEXT:    ssubl.4s v4, v0, v1
+; CHECK-GI-NEXT:    ssubl2.4s v0, v0, v1
+; CHECK-GI-NEXT:    cmgt.4s v1, v2, v5
+; CHECK-GI-NEXT:    cmgt.4s v6, v2, v3
+; CHECK-GI-NEXT:    neg.4s v16, v5
+; CHECK-GI-NEXT:    cmgt.4s v7, v2, v4
+; CHECK-GI-NEXT:    cmgt.4s v2, v2, v0
+; CHECK-GI-NEXT:    neg.4s v17, v3
+; CHECK-GI-NEXT:    neg.4s v18, v4
+; CHECK-GI-NEXT:    neg.4s v19, v0
+; CHECK-GI-NEXT:    shl.4s v1, v1, #31
 ; CHECK-GI-NEXT:    shl.4s v6, v6, #31
-; CHECK-GI-NEXT:    neg.4s v19, v4
-; CHECK-GI-NEXT:    sshr.4s v5, v5, #31
-; CHECK-GI-NEXT:    sshr.4s v7, v7, #31
+; CHECK-GI-NEXT:    shl.4s v7, v7, #31
 ; CHECK-GI-NEXT:    shl.4s v2, v2, #31
+; CHECK-GI-NEXT:    sshr.4s v1, v1, #31
 ; CHECK-GI-NEXT:    sshr.4s v6, v6, #31
-; CHECK-GI-NEXT:    bit.16b v0, v16, v5
-; CHECK-GI-NEXT:    bit.16b v3, v18, v7
+; CHECK-GI-NEXT:    sshr.4s v7, v7, #31
 ; CHECK-GI-NEXT:    sshr.4s v2, v2, #31
-; CHECK-GI-NEXT:    bit.16b v1, v17, v6
-; CHECK-GI-NEXT:    bsl.16b v2, v19, v4
-; CHECK-GI-NEXT:    add.4s v0, v0, v1
-; CHECK-GI-NEXT:    add.4s v1, v3, v2
-; CHECK-GI-NEXT:    add.4s v0, v0, v1
+; CHECK-GI-NEXT:    bsl.16b v1, v16, v5
+; CHECK-GI-NEXT:    bit.16b v3, v17, v6
+; CHECK-GI-NEXT:    bit.16b v4, v18, v7
+; CHECK-GI-NEXT:    bit.16b v0, v19, v2
+; CHECK-GI-NEXT:    add.4s v1, v1, v3
+; CHECK-GI-NEXT:    add.4s v0, v4, v0
+; CHECK-GI-NEXT:    add.4s v0, v1, v0
 ; CHECK-GI-NEXT:    addv.4s s0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -419,21 +407,19 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
 ; CHECK-GI-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT:    mov d3, v1[1]
-; CHECK-GI-NEXT:    mov d4, v2[1]
-; CHECK-GI-NEXT:    usubl.4s v1, v1, v2
-; CHECK-GI-NEXT:    usubl.4s v2, v3, v4
-; CHECK-GI-NEXT:    cmgt.4s v3, v0, v1
-; CHECK-GI-NEXT:    neg.4s v4, v1
-; CHECK-GI-NEXT:    shl.4s v3, v3, #31
-; CHECK-GI-NEXT:    cmgt.4s v0, v0, v2
-; CHECK-GI-NEXT:    neg.4s v5, v2
-; CHECK-GI-NEXT:    sshr.4s v3, v3, #31
+; CHECK-GI-NEXT:    usubl.4s v3, v1, v2
+; CHECK-GI-NEXT:    usubl2.4s v1, v1, v2
+; CHECK-GI-NEXT:    cmgt.4s v2, v0, v3
+; CHECK-GI-NEXT:    cmgt.4s v0, v0, v1
+; CHECK-GI-NEXT:    neg.4s v4, v3
+; CHECK-GI-NEXT:    neg.4s v5, v1
+; CHECK-GI-NEXT:    shl.4s v2, v2, #31
 ; CHECK-GI-NEXT:    shl.4s v0, v0, #31
-; CHECK-GI-NEXT:    bit.16b v1, v4, v3
+; CHECK-GI-NEXT:    sshr.4s v2, v2, #31
 ; CHECK-GI-NEXT:    sshr.4s v0, v0, #31
-; CHECK-GI-NEXT:    bsl.16b v0, v5, v2
-; CHECK-GI-NEXT:    add.4s v0, v1, v0
+; CHECK-GI-NEXT:    bsl.16b v2, v4, v3
+; CHECK-GI-NEXT:    bsl.16b v0, v5, v1
+; CHECK-GI-NEXT:    add.4s v0, v2, v0
 ; CHECK-GI-NEXT:    addv.4s s0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -459,22 +445,20 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
 ;
 ; CHECK-GI-LABEL: sabd8h_rdx:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov d3, v0[1]
-; CHECK-GI-NEXT:    mov d4, v1[1]
 ; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT:    ssubl.4s v0, v0, v1
-; CHECK-GI-NEXT:    ssubl.4s v1, v3, v4
-; CHECK-GI-NEXT:    cmgt.4s v3, v2, v0
-; CHECK-GI-NEXT:    neg.4s v4, v0
-; CHECK-GI-NEXT:    cmgt.4s v2, v2, v1
-; CHECK-GI-NEXT:    shl.4s v3, v3, #31
-; CHECK-GI-NEXT:    neg.4s v5, v1
+; CHECK-GI-NEXT:    ssubl.4s v3, v0, v1
+; CHECK-GI-NEXT:    ssubl2.4s v0, v0, v1
+; CHECK-GI-NEXT:    cmgt.4s v1, v2, v3
+; CHECK-GI-NEXT:    cmgt.4s v2, v2, v0
+; CHECK-GI-NEXT:    neg.4s v4, v3
+; CHECK-GI-NEXT:    neg.4s v5, v0
+; CHECK-GI-NEXT:    shl.4s v1, v1, #31
 ; CHECK-GI-NEXT:    shl.4s v2, v2, #31
-; CHECK-GI-NEXT:    sshr.4s v3, v3, #31
+; CHECK-GI-NEXT:    sshr.4s v1, v1, #31
 ; CHECK-GI-NEXT:    sshr.4s v2, v2, #31
-; CHECK-GI-NEXT:    bit.16b v0, v4, v3
-; CHECK-GI-NEXT:    bit.16b v1, v5, v2
-; CHECK-GI-NEXT:    add.4s v0, v0, v1
+; CHECK-GI-NEXT:    bsl.16b v1, v4, v3
+; CHECK-GI-NEXT:    bit.16b v0, v5, v2
+; CHECK-GI-NEXT:    add.4s v0, v1, v0
 ; CHECK-GI-NEXT:    addv.4s s0, v0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index fd991104e43465..4d26228caf62e9 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -474,13 +474,12 @@ define <4 x i64> @sext_v4i8_v4i64(<4 x i8> %a) {
 ; CHECK-GI-LABEL: sext_v4i8_v4i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #56
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-GI-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v0.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    shl v1.2d, v1.2d, #56
-; CHECK-GI-NEXT:    sshr v1.2d, v1.2d, #56
+; CHECK-GI-NEXT:    shl v2.2d, v0.2d, #56
+; CHECK-GI-NEXT:    sshr v0.2d, v1.2d, #56
+; CHECK-GI-NEXT:    sshr v1.2d, v2.2d, #56
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <4 x i8> %a to <4 x i64>
@@ -507,10 +506,9 @@ define <4 x i64> @sext_v4i16_v4i64(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v4i16_v4i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll2 v1.2d, v1.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <4 x i16> %a to <4 x i64>
@@ -526,9 +524,9 @@ define <4 x i64> @sext_v4i32_v4i64(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v4i32_v4i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll2 v1.2d, v0.4s, #0
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <4 x i32> %a to <4 x i64>
@@ -573,13 +571,12 @@ define <4 x i64> @sext_v4i10_v4i64(<4 x i10> %a) {
 ; CHECK-GI-LABEL: sext_v4i10_v4i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #54
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v0.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    shl v1.2d, v1.2d, #54
-; CHECK-GI-NEXT:    sshr v1.2d, v1.2d, #54
+; CHECK-GI-NEXT:    shl v2.2d, v0.2d, #54
+; CHECK-GI-NEXT:    sshr v0.2d, v1.2d, #54
+; CHECK-GI-NEXT:    sshr v1.2d, v2.2d, #54
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <4 x i10> %a to <4 x i64>
@@ -606,10 +603,9 @@ define <8 x i32> @sext_v8i8_v8i32(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v8i8_v8i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    sshll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll2 v1.4s, v1.8h, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <8 x i8> %a to <8 x i32>
@@ -631,15 +627,12 @@ define <8 x i64> @sext_v8i8_v8i64(<8 x i8> %a) {
 ; CHECK-GI-LABEL: sext_v8i8_v8i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    mov d3, v2.d[1]
-; CHECK-GI-NEXT:    sshll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    sshll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT:    sshll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll2 v3.2d, v3.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <8 x i8> %a to <8 x i64>
@@ -655,9 +648,9 @@ define <8 x i32> @sext_v8i16_v8i32(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v8i16_v8i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <8 x i16> %a to <8 x i32>
@@ -677,15 +670,12 @@ define <8 x i64> @sext_v8i16_v8i64(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v8i16_v8i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    mov d3, v2.d[1]
-; CHECK-GI-NEXT:    sshll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    sshll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT:    sshll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll2 v3.2d, v3.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <8 x i16> %a to <8 x i64>
@@ -705,13 +695,12 @@ define <8 x i64> @sext_v8i32_v8i64(<8 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v8i32_v8i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll v4.2d, v2.2s, #0
+; CHECK-GI-NEXT:    sshll v4.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll2 v5.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    sshll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT:    sshll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT:    mov v1.16b, v4.16b
+; CHECK-GI-NEXT:    sshll2 v3.2d, v1.4s, #0
+; CHECK-GI-NEXT:    mov v0.16b, v4.16b
+; CHECK-GI-NEXT:    mov v1.16b, v5.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <8 x i32> %a to <8 x i64>
@@ -742,13 +731,12 @@ define <8 x i32> @sext_v8i10_v8i32(<8 x i10> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v8i10_v8i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #22
-; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #22
+; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
 ; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #22
-; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #22
+; CHECK-GI-NEXT:    shl v2.4s, v0.4s, #22
+; CHECK-GI-NEXT:    sshr v0.4s, v1.4s, #22
+; CHECK-GI-NEXT:    sshr v1.4s, v2.4s, #22
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <8 x i10> %a to <8 x i32>
@@ -776,23 +764,20 @@ define <8 x i64> @sext_v8i10_v8i64(<8 x i10> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v8i10_v8i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #54
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT:    ushll v3.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v0.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    shl v2.2d, v2.2d, #54
-; CHECK-GI-NEXT:    shl v4.2d, v1.2d, #54
-; CHECK-GI-NEXT:    ushll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT:    sshr v1.2d, v2.2d, #54
-; CHECK-GI-NEXT:    sshr v2.2d, v4.2d, #54
+; CHECK-GI-NEXT:    shl v1.2d, v1.2d, #54
 ; CHECK-GI-NEXT:    shl v3.2d, v3.2d, #54
-; CHECK-GI-NEXT:    sshr v3.2d, v3.2d, #54
+; CHECK-GI-NEXT:    shl v4.2d, v0.2d, #54
+; CHECK-GI-NEXT:    sshr v0.2d, v2.2d, #54
+; CHECK-GI-NEXT:    sshr v1.2d, v1.2d, #54
+; CHECK-GI-NEXT:    sshr v2.2d, v3.2d, #54
+; CHECK-GI-NEXT:    sshr v3.2d, v4.2d, #54
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <8 x i10> %a to <8 x i64>
@@ -808,9 +793,9 @@ define <16 x i16> @sext_v16i8_v16i16(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v16i8_v16i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    sshll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <16 x i8> %a to <16 x i16>
@@ -830,15 +815,12 @@ define <16 x i32> @sext_v16i8_v16i32(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v16i8_v16i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    sshll v2.8h, v1.8b, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    mov d3, v2.d[1]
-; CHECK-GI-NEXT:    sshll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT:    sshll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT:    sshll v3.4s, v3.4h, #0
+; CHECK-GI-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    sshll2 v3.8h, v0.16b, #0
+; CHECK-GI-NEXT:    sshll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-NEXT:    sshll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT:    sshll2 v3.4s, v3.8h, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <16 x i8> %a to <16 x i32>
@@ -866,27 +848,20 @@ define <16 x i64> @sext_v16i8_v16i64(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v16i8_v16i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    sshll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT:    sshll v4.4s, v1.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    mov d5, v4.d[1]
-; CHECK-GI-NEXT:    sshll v4.2d, v4.2s, #0
-; CHECK-GI-NEXT:    sshll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT:    mov d3, v2.d[1]
-; CHECK-GI-NEXT:    sshll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    mov d7, v6.d[1]
-; CHECK-GI-NEXT:    sshll v5.2d, v5.2s, #0
-; CHECK-GI-NEXT:    sshll v6.2d, v6.2s, #0
-; CHECK-GI-NEXT:    sshll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT:    sshll v7.2d, v7.2s, #0
+; CHECK-GI-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-NEXT:    sshll v2.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll2 v3.4s, v1.8h, #0
+; CHECK-GI-NEXT:    sshll v5.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v7.4s, v0.8h, #0
+; CHECK-GI-NEXT:    sshll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT:    sshll2 v1.2d, v2.4s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll2 v3.2d, v3.4s, #0
+; CHECK-GI-NEXT:    sshll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT:    sshll2 v5.2d, v5.4s, #0
+; CHECK-GI-NEXT:    sshll v6.2d, v7.2s, #0
+; CHECK-GI-NEXT:    sshll2 v7.2d, v7.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <16 x i8> %a to <16 x i64>
@@ -906,13 +881,12 @@ define <16 x i32> @sext_v16i16_v16i32(<16 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v16i16_v16i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    sshll v4.4s, v2.4h, #0
+; CHECK-GI-NEXT:    sshll v4.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v5.4s, v0.8h, #0
 ; CHECK-GI-NEXT:    sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT:    sshll v3.4s, v3.4h, #0
-; CHECK-GI-NEXT:    mov v1.16b, v4.16b
+; CHECK-GI-NEXT:    sshll2 v3.4s, v1.8h, #0
+; CHECK-GI-NEXT:    mov v0.16b, v4.16b
+; CHECK-GI-NEXT:    mov v1.16b, v5.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <16 x i16> %a to <16 x i32>
@@ -938,24 +912,18 @@ define <16 x i64> @sext_v16i16_v16i64(<16 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v16i16_v16i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    sshll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT:    sshll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT:    sshll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT:    mov d3, v0.d[1]
-; CHECK-GI-NEXT:    mov d7, v1.d[1]
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    sshll v4.2d, v1.2s, #0
-; CHECK-GI-NEXT:    mov d5, v2.d[1]
-; CHECK-GI-NEXT:    mov d16, v6.d[1]
-; CHECK-GI-NEXT:    sshll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v3.2s, #0
-; CHECK-GI-NEXT:    sshll v6.2d, v6.2s, #0
-; CHECK-GI-NEXT:    sshll v3.2d, v5.2s, #0
-; CHECK-GI-NEXT:    sshll v5.2d, v7.2s, #0
-; CHECK-GI-NEXT:    sshll v7.2d, v16.2s, #0
+; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT:    sshll v5.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll2 v7.4s, v1.8h, #0
+; CHECK-GI-NEXT:    sshll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT:    sshll2 v1.2d, v2.4s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll2 v3.2d, v3.4s, #0
+; CHECK-GI-NEXT:    sshll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT:    sshll2 v5.2d, v5.4s, #0
+; CHECK-GI-NEXT:    sshll v6.2d, v7.2s, #0
+; CHECK-GI-NEXT:    sshll2 v7.2d, v7.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <16 x i16> %a to <16 x i64>
@@ -980,20 +948,18 @@ define <16 x i64> @sext_v16i32_v16i64(<16 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v16i32_v16i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d6, v1.d[1]
-; CHECK-GI-NEXT:    mov d5, v0.d[1]
-; CHECK-GI-NEXT:    mov d7, v2.d[1]
-; CHECK-GI-NEXT:    mov d18, v3.d[1]
-; CHECK-GI-NEXT:    sshll v16.2d, v1.2s, #0
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll2 v17.2d, v0.4s, #0
+; CHECK-GI-NEXT:    sshll v18.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll2 v19.2d, v1.4s, #0
 ; CHECK-GI-NEXT:    sshll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT:    sshll v17.2d, v6.2s, #0
-; CHECK-GI-NEXT:    sshll v1.2d, v5.2s, #0
+; CHECK-GI-NEXT:    sshll2 v5.2d, v2.4s, #0
 ; CHECK-GI-NEXT:    sshll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT:    sshll v5.2d, v7.2s, #0
-; CHECK-GI-NEXT:    sshll v7.2d, v18.2s, #0
-; CHECK-GI-NEXT:    mov v2.16b, v16.16b
-; CHECK-GI-NEXT:    mov v3.16b, v17.16b
+; CHECK-GI-NEXT:    sshll2 v7.2d, v3.4s, #0
+; CHECK-GI-NEXT:    mov v0.16b, v16.16b
+; CHECK-GI-NEXT:    mov v1.16b, v17.16b
+; CHECK-GI-NEXT:    mov v2.16b, v18.16b
+; CHECK-GI-NEXT:    mov v3.16b, v19.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <16 x i32> %a to <16 x i64>

diff  --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 6499cc9e7a282e..979631adfe2a77 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -597,12 +597,11 @@ define <4 x i64> @zext_v4i8_v4i64(<4 x i8> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v2.2d, v0.4s, #0
+; CHECK-GI-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT:    and v1.16b, v2.16b, v3.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <4 x i8> %a to <4 x i64>
@@ -629,10 +628,9 @@ define <4 x i64> @zext_v4i16_v4i64(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v4i16_v4i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll2 v1.2d, v1.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <4 x i16> %a to <4 x i64>
@@ -648,9 +646,9 @@ define <4 x i64> @zext_v4i32_v4i64(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v4i32_v4i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <4 x i32> %a to <4 x i64>
@@ -706,12 +704,11 @@ define <4 x i64> @zext_v4i10_v4i64(<4 x i10> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI36_0]
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v2.2d, v0.4s, #0
+; CHECK-GI-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT:    and v1.16b, v2.16b, v3.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <4 x i10> %a to <4 x i64>
@@ -738,10 +735,9 @@ define <8 x i32> @zext_v8i8_v8i32(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v8i8_v8i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ushll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll2 v1.4s, v1.8h, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <8 x i8> %a to <8 x i32>
@@ -763,15 +759,12 @@ define <8 x i64> @zext_v8i8_v8i64(<8 x i8> %a) {
 ; CHECK-GI-LABEL: zext_v8i8_v8i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    mov d3, v2.d[1]
-; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll2 v3.2d, v3.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <8 x i8> %a to <8 x i64>
@@ -787,9 +780,9 @@ define <8 x i32> @zext_v8i16_v8i32(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v8i16_v8i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <8 x i16> %a to <8 x i32>
@@ -809,15 +802,12 @@ define <8 x i64> @zext_v8i16_v8i64(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v8i16_v8i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    mov d3, v2.d[1]
-; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll2 v3.2d, v3.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <8 x i16> %a to <8 x i64>
@@ -837,13 +827,12 @@ define <8 x i64> @zext_v8i32_v8i64(<8 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v8i32_v8i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ushll v4.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll v4.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v5.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    ushll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT:    ushll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT:    mov v1.16b, v4.16b
+; CHECK-GI-NEXT:    ushll2 v3.2d, v1.4s, #0
+; CHECK-GI-NEXT:    mov v0.16b, v4.16b
+; CHECK-GI-NEXT:    mov v1.16b, v5.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <8 x i32> %a to <8 x i64>
@@ -877,13 +866,12 @@ define <8 x i32> @zext_v8i10_v8i32(<8 x i10> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v8i10_v8i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI44_0
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI44_0]
-; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-NEXT:    and v0.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT:    and v1.16b, v2.16b, v3.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <8 x i10> %a to <8 x i32>
@@ -904,21 +892,18 @@ define <8 x i64> @zext_v8i10_v8i64(<8 x i10> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v8i10_v8i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI45_0
-; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI45_0]
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT:    and v0.16b, v0.16b, v4.16b
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    ushll v5.2d, v1.2s, #0
-; CHECK-GI-NEXT:    and v1.16b, v2.16b, v4.16b
-; CHECK-GI-NEXT:    and v2.16b, v5.16b, v4.16b
-; CHECK-GI-NEXT:    ushll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT:    and v3.16b, v3.16b, v4.16b
+; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI45_0]
+; CHECK-GI-NEXT:    ushll v2.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT:    ushll v4.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v5.2d, v0.4s, #0
+; CHECK-GI-NEXT:    and v0.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    and v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT:    and v2.16b, v4.16b, v3.16b
+; CHECK-GI-NEXT:    and v3.16b, v5.16b, v3.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <8 x i10> %a to <8 x i64>
@@ -934,9 +919,9 @@ define <16 x i16> @zext_v16i8_v16i16(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v16i8_v16i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ushll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <16 x i8> %a to <16 x i16>
@@ -956,15 +941,12 @@ define <16 x i32> @zext_v16i8_v16i32(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v16i8_v16i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    ushll v2.8h, v1.8b, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    mov d3, v2.d[1]
-; CHECK-GI-NEXT:    ushll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT:    ushll v3.4s, v3.4h, #0
+; CHECK-GI-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ushll2 v3.8h, v0.16b, #0
+; CHECK-GI-NEXT:    ushll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-NEXT:    ushll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT:    ushll2 v3.4s, v3.8h, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <16 x i8> %a to <16 x i32>
@@ -992,27 +974,20 @@ define <16 x i64> @zext_v16i8_v16i64(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v16i8_v16i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    ushll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT:    ushll v4.4s, v1.4h, #0
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    mov d5, v4.d[1]
-; CHECK-GI-NEXT:    ushll v4.2d, v4.2s, #0
-; CHECK-GI-NEXT:    ushll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT:    mov d3, v2.d[1]
-; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT:    mov d7, v6.d[1]
-; CHECK-GI-NEXT:    ushll v5.2d, v5.2s, #0
-; CHECK-GI-NEXT:    ushll v6.2d, v6.2s, #0
-; CHECK-GI-NEXT:    ushll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT:    ushll v7.2d, v7.2s, #0
+; CHECK-GI-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-NEXT:    ushll v2.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll2 v3.4s, v1.8h, #0
+; CHECK-GI-NEXT:    ushll v5.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v7.4s, v0.8h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll2 v1.2d, v2.4s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll2 v3.2d, v3.4s, #0
+; CHECK-GI-NEXT:    ushll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT:    ushll2 v5.2d, v5.4s, #0
+; CHECK-GI-NEXT:    ushll v6.2d, v7.2s, #0
+; CHECK-GI-NEXT:    ushll2 v7.2d, v7.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <16 x i8> %a to <16 x i64>
@@ -1032,13 +1007,12 @@ define <16 x i32> @zext_v16i16_v16i32(<16 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v16i16_v16i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ushll v4.4s, v2.4h, #0
+; CHECK-GI-NEXT:    ushll v4.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v5.4s, v0.8h, #0
 ; CHECK-GI-NEXT:    ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT:    ushll v3.4s, v3.4h, #0
-; CHECK-GI-NEXT:    mov v1.16b, v4.16b
+; CHECK-GI-NEXT:    ushll2 v3.4s, v1.8h, #0
+; CHECK-GI-NEXT:    mov v0.16b, v4.16b
+; CHECK-GI-NEXT:    mov v1.16b, v5.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <16 x i16> %a to <16 x i32>
@@ -1064,24 +1038,18 @@ define <16 x i64> @zext_v16i16_v16i64(<16 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v16i16_v16i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d2, v0.d[1]
-; CHECK-GI-NEXT:    mov d3, v1.d[1]
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT:    ushll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT:    ushll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT:    mov d3, v0.d[1]
-; CHECK-GI-NEXT:    mov d7, v1.d[1]
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ushll v4.2d, v1.2s, #0
-; CHECK-GI-NEXT:    mov d5, v2.d[1]
-; CHECK-GI-NEXT:    mov d16, v6.d[1]
-; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT:    ushll v1.2d, v3.2s, #0
-; CHECK-GI-NEXT:    ushll v6.2d, v6.2s, #0
-; CHECK-GI-NEXT:    ushll v3.2d, v5.2s, #0
-; CHECK-GI-NEXT:    ushll v5.2d, v7.2s, #0
-; CHECK-GI-NEXT:    ushll v7.2d, v16.2s, #0
+; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT:    ushll v5.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll2 v7.4s, v1.8h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll2 v1.2d, v2.4s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll2 v3.2d, v3.4s, #0
+; CHECK-GI-NEXT:    ushll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT:    ushll2 v5.2d, v5.4s, #0
+; CHECK-GI-NEXT:    ushll v6.2d, v7.2s, #0
+; CHECK-GI-NEXT:    ushll2 v7.2d, v7.4s, #0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <16 x i16> %a to <16 x i64>
@@ -1106,20 +1074,18 @@ define <16 x i64> @zext_v16i32_v16i64(<16 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v16i32_v16i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d6, v1.d[1]
-; CHECK-GI-NEXT:    mov d5, v0.d[1]
-; CHECK-GI-NEXT:    mov d7, v2.d[1]
-; CHECK-GI-NEXT:    mov d18, v3.d[1]
-; CHECK-GI-NEXT:    ushll v16.2d, v1.2s, #0
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v17.2d, v0.4s, #0
+; CHECK-GI-NEXT:    ushll v18.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll2 v19.2d, v1.4s, #0
 ; CHECK-GI-NEXT:    ushll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT:    ushll v17.2d, v6.2s, #0
-; CHECK-GI-NEXT:    ushll v1.2d, v5.2s, #0
+; CHECK-GI-NEXT:    ushll2 v5.2d, v2.4s, #0
 ; CHECK-GI-NEXT:    ushll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT:    ushll v5.2d, v7.2s, #0
-; CHECK-GI-NEXT:    ushll v7.2d, v18.2s, #0
-; CHECK-GI-NEXT:    mov v2.16b, v16.16b
-; CHECK-GI-NEXT:    mov v3.16b, v17.16b
+; CHECK-GI-NEXT:    ushll2 v7.2d, v3.4s, #0
+; CHECK-GI-NEXT:    mov v0.16b, v16.16b
+; CHECK-GI-NEXT:    mov v1.16b, v17.16b
+; CHECK-GI-NEXT:    mov v2.16b, v18.16b
+; CHECK-GI-NEXT:    mov v3.16b, v19.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <16 x i32> %a to <16 x i64>


        


More information about the llvm-commits mailing list