[llvm] a40c984 - [AArch64][GlobalISel] Support more legal types for EXTEND

Tuan Chuong Goh via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 21 02:03:48 PDT 2023


Author: Tuan Chuong Goh
Date: 2023-08-21T09:51:17+01:00
New Revision: a40c984976d9cfead9800132720986f73b9f442d

URL: https://github.com/llvm/llvm-project/commit/a40c984976d9cfead9800132720986f73b9f442d
DIFF: https://github.com/llvm/llvm-project/commit/a40c984976d9cfead9800132720986f73b9f442d.diff

LOG: [AArch64][GlobalISel] Support more legal types for EXTEND

Expand (s/z/any)ext instructions to be compatible with more
types for GlobalISel.
This patch mainly focuses on 64-bit and 128-bit vectors with
element size of powers of 2.
It also notably handles larger than legal vectors.

Differential Revision: https://reviews.llvm.org/D157113

Added: 
    llvm/test/CodeGen/AArch64/sext.ll
    llvm/test/CodeGen/AArch64/zext.ll

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir
    llvm/test/CodeGen/AArch64/aarch64-addv.ll
    llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
    llvm/test/CodeGen/AArch64/arm64-vabs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 9288091874cf26..d36f27ea6e5af3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -375,6 +375,7 @@ class LegalizerHelper {
   LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI);
   LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
   LegalizeResult lowerFunnelShift(MachineInstr &MI);
+  LegalizeResult lowerEXT(MachineInstr &MI);
   LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI);
   LegalizeResult lowerRotate(MachineInstr &MI);
 

diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 69927227f837e7..4b059f334cfcd0 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3601,6 +3601,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     return lowerMemCpyFamily(MI);
   case G_MEMCPY_INLINE:
     return lowerMemcpyInline(MI);
+  case G_ZEXT:
+  case G_SEXT:
+  case G_ANYEXT:
+    return lowerEXT(MI);
   GISEL_VECREDUCE_CASES_NONSEQ
     return lowerVectorReduction(MI);
   }
@@ -5955,6 +5959,48 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
   return Result;
 }
 
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
+  auto [Dst, Src] = MI.getFirst2Regs();
+  LLT DstTy = MRI.getType(Dst);
+  LLT SrcTy = MRI.getType(Src);
+
+  uint32_t DstTySize = DstTy.getSizeInBits();
+  uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
+  uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
+
+  if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
+      !isPowerOf2_32(SrcTyScalarSize))
+    return UnableToLegalize;
+
+  // The step between extend is too large, split it by creating an intermediate
+  // extend instruction
+  if (SrcTyScalarSize * 2 < DstTyScalarSize) {
+    LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
+    // If the destination type is illegal, split it into multiple statements
+    // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
+    auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
+    // Unmerge the vector
+    LLT EltTy = MidTy.changeElementCount(
+        MidTy.getElementCount().divideCoefficientBy(2));
+    auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
+
+    // ZExt the vectors
+    LLT ZExtResTy = DstTy.changeElementCount(
+        DstTy.getElementCount().divideCoefficientBy(2));
+    auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
+                                          {UnmergeSrc.getReg(0)});
+    auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
+                                          {UnmergeSrc.getReg(1)});
+
+    // Merge the ending vectors
+    MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  return UnableToLegalize;
+}
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
   auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 61f1350c5eeb43..0d6cbe7d88311a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -491,14 +491,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   auto ExtLegalFunc = [=](const LegalityQuery &Query) {
     unsigned DstSize = Query.Types[0].getSizeInBits();
 
-    if (DstSize == 128 && !Query.Types[0].isVector())
-      return false; // Extending to a scalar s128 needs narrowing.
-
-    // Make sure that we have something that will fit in a register, and
-    // make sure it's a power of 2.
-    if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
+    // Handle legal vectors using legalFor
+    if (Query.Types[0].isVector())
       return false;
 
+    if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
+      return false; // Extending to a scalar s128 needs narrowing.
+
     const LLT &SrcTy = Query.Types[1];
 
     // Make sure we fit in a register otherwise. Don't bother checking that
@@ -512,7 +511,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   };
   getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
       .legalIf(ExtLegalFunc)
-      .clampScalar(0, s64, s64); // Just for s128, others are handled above.
+      .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
+      .clampScalar(0, s64, s64) // Just for s128, others are handled above.
+      .moreElementsToNextPow2(1)
+      .clampMaxNumElements(1, s8, 8)
+      .clampMaxNumElements(1, s16, 4)
+      .clampMaxNumElements(1, s32, 2)
+      // Tries to convert a large EXTEND into two smaller EXTENDs
+      .lowerIf([=](const LegalityQuery &Query) {
+        return (Query.Types[0].getScalarSizeInBits() >
+                Query.Types[1].getScalarSizeInBits() * 2) &&
+               Query.Types[0].isVector() &&
+               (Query.Types[1].getScalarSizeInBits() == 8 ||
+                Query.Types[1].getScalarSizeInBits() == 16);
+      });
 
   getActionDefinitionsBuilder(G_TRUNC)
       .minScalarOrEltIf(

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
index 0f3dc2b5f392ef..fa1700ac4fc52d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
@@ -243,15 +243,15 @@ body:             |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2
-    ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8)
     ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8)
     ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8)
-    ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16)
     ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR]]
-    ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
-    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16)
-    ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
+    ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8)
     ; CHECK-NEXT: RET_ReallyLR implicit $b0
     %1:_(s8) = COPY $b0
     %2:_(s8) = COPY $b1

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
index 433a98afda413c..c9556e27c6349a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
@@ -129,8 +129,8 @@ body: |
     %0:_(s16) = COPY $h0
     %1:_(s16) = COPY $h1
     %2:_(<2 x s16>) = G_BUILD_VECTOR %0(s16), %1(s16)
-    %ext:_(<2 x s32>) = G_ANYEXT %2(<2 x s16>)
-    $d0 = COPY %ext(<2 x s32>)
+    %3:_(<2 x s32>) = G_ANYEXT %2(<2 x s16>)
+    $d0 = COPY %3(<2 x s32>)
     RET_ReallyLR
 ...
 
@@ -141,14 +141,14 @@ body: |
     ; CHECK-LABEL: name: widen_v2s8
     ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
-    ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: %3:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: $d0 = COPY %3(<2 x s32>)
     ; CHECK-NEXT: RET_ReallyLR
     %0:_(s8) = G_IMPLICIT_DEF
     %1:_(s8) = G_IMPLICIT_DEF
     %2:_(<2 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8)
-    %ext:_(<2 x s32>) = G_ANYEXT %2(<2 x s8>)
-    $d0 = COPY %ext(<2 x s32>)
+    %3:_(<2 x s32>) = G_ANYEXT %2(<2 x s8>)
+    $d0 = COPY %3(<2 x s32>)
     RET_ReallyLR
 ...
 
@@ -169,7 +169,7 @@ body: |
     %2:_(s8) = G_IMPLICIT_DEF
     %3:_(s8) = G_IMPLICIT_DEF
     %4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8)
-    %ext:_(<4 x s16>) = G_ANYEXT %4(<4 x s8>)
-    $d0 = COPY %ext(<4 x s16>)
+    %5:_(<4 x s16>) = G_ANYEXT %4(<4 x s8>)
+    $d0 = COPY %5(<4 x s16>)
     RET_ReallyLR
 ...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir
index fe9dd22fdb33cb..8c6a30aaed0483 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir
@@ -274,13 +274,12 @@ body: |
     ; CHECK-LABEL: name: test_uitofp_v2s64_v2i1
     ; CHECK: liveins: $q0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[BUILD_VECTOR]](<2 x s32>)
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[ANYEXT]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
     ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<2 x s64>) = G_UITOFP [[AND]](<2 x s64>)
     ; CHECK-NEXT: $q0 = COPY [[UITOFP]](<2 x s64>)
     %0:_(<2 x s1>) = G_IMPLICIT_DEF
@@ -296,11 +295,10 @@ body: |
     ; CHECK-LABEL: name: test_sitofp_v2s64_v2i1
     ; CHECK: liveins: $q0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
-    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[BUILD_VECTOR]](<2 x s32>)
-    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s64>) = G_SEXT_INREG [[ANYEXT]], 1
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s64>) = G_SEXT_INREG [[BUILD_VECTOR]], 1
     ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(<2 x s64>) = G_SITOFP [[SEXT_INREG]](<2 x s64>)
     ; CHECK-NEXT: $q0 = COPY [[SITOFP]](<2 x s64>)
     %0:_(<2 x s1>) = G_IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index 38548e760ac9f8..1c05fe737883ce 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -69,14 +69,40 @@ define i64 @add_D(ptr %arr)  {
 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
 
 define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias nocapture readonly %arg2) {
-; CHECK-LABEL: oversized_ADDV_256:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr d0, [x0]
-; CHECK-NEXT:    ldr d1, [x1]
-; CHECK-NEXT:    uabdl v0.8h, v0.8b, v1.8b
-; CHECK-NEXT:    uaddlv s0, v0.8h
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; SDAG-LABEL: oversized_ADDV_256:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    ldr d0, [x0]
+; SDAG-NEXT:    ldr d1, [x1]
+; SDAG-NEXT:    uabdl v0.8h, v0.8b, v1.8b
+; SDAG-NEXT:    uaddlv s0, v0.8h
+; SDAG-NEXT:    fmov w0, s0
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: oversized_ADDV_256:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    ldr d1, [x0]
+; GISEL-NEXT:    ldr d2, [x1]
+; GISEL-NEXT:    movi v0.2d, #0000000000000000
+; GISEL-NEXT:    ushll v1.8h, v1.8b, #0
+; GISEL-NEXT:    ushll v2.8h, v2.8b, #0
+; GISEL-NEXT:    mov d3, v1.d[1]
+; GISEL-NEXT:    mov d4, v2.d[1]
+; GISEL-NEXT:    usubl v1.4s, v1.4h, v2.4h
+; GISEL-NEXT:    usubl v2.4s, v3.4h, v4.4h
+; GISEL-NEXT:    cmgt v3.4s, v0.4s, v1.4s
+; GISEL-NEXT:    neg v4.4s, v1.4s
+; GISEL-NEXT:    cmgt v0.4s, v0.4s, v2.4s
+; GISEL-NEXT:    shl v3.4s, v3.4s, #31
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    neg v5.4s, v2.4s
+; GISEL-NEXT:    sshr v3.4s, v3.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    bit v1.16b, v4.16b, v3.16b
+; GISEL-NEXT:    bsl v0.16b, v5.16b, v2.16b
+; GISEL-NEXT:    add v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    addv s0, v0.4s
+; GISEL-NEXT:    fmov w0, s0
+; GISEL-NEXT:    ret
 entry:
   %0 = load <8 x i8>, ptr %arg1, align 1
   %1 = zext <8 x i8> %0 to <8 x i32>
@@ -93,16 +119,16 @@ entry:
 declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
 
 define i32 @oversized_ADDV_512(ptr %arr)  {
-; SDAG-LABEL:  oversized_ADDV_512:
-; SDAG:        // %bb.0:
-; SDAG-NEXT:     ldp q0, q1, [x0, #32]
-; SDAG-NEXT:     ldp q3, q2, [x0]
-; SDAG-NEXT:     add v0.4s, v3.4s, v0.4s
-; SDAG-NEXT:     add v1.4s, v2.4s, v1.4s
-; SDAG-NEXT:     add v0.4s, v0.4s, v1.4s
-; SDAG-NEXT:     addv s0, v0.4s
-; SDAG-NEXT:     fmov w0, s0
-; SDAG-NEXT:     ret
+; SDAG-LABEL: oversized_ADDV_512:
+; SDAG:       // %bb.0:
+; SDAG-NEXT:    ldp q0, q1, [x0, #32]
+; SDAG-NEXT:    ldp q3, q2, [x0]
+; SDAG-NEXT:    add v0.4s, v3.4s, v0.4s
+; SDAG-NEXT:    add v1.4s, v2.4s, v1.4s
+; SDAG-NEXT:    add v0.4s, v0.4s, v1.4s
+; SDAG-NEXT:    addv s0, v0.4s
+; SDAG-NEXT:    fmov w0, s0
+; SDAG-NEXT:    ret
 ;
 ; GISEL-LABEL: oversized_ADDV_512:
 ; GISEL:       // %bb.0:
@@ -148,19 +174,19 @@ entry:
 }
 
 define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) {
-; SDAG-LABEL:  addv_combine_i32:
-; SDAG:        // %bb.0: // %entry
-; SDAG-NEXT:     add v0.4s, v0.4s, v1.4s
-; SDAG-NEXT:     addv s0, v0.4s
-; SDAG-NEXT:     fmov w0, s0
-; SDAG-NEXT:     ret
+; SDAG-LABEL: addv_combine_i32:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    add v0.4s, v0.4s, v1.4s
+; SDAG-NEXT:    addv s0, v0.4s
+; SDAG-NEXT:    fmov w0, s0
+; SDAG-NEXT:    ret
 ;
 ; GISEL-LABEL: addv_combine_i32:
 ; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    addv  s0, v0.4s
-; GISEL-NEXT:    addv  s1, v1.4s
-; GISEL-NEXT:    fmov  w8, s0
-; GISEL-NEXT:    fmov  w9, s1
+; GISEL-NEXT:    addv s0, v0.4s
+; GISEL-NEXT:    addv s1, v1.4s
+; GISEL-NEXT:    fmov w8, s0
+; GISEL-NEXT:    fmov w9, s1
 ; GISEL-NEXT:    add w0, w8, w9
 ; GISEL-NEXT:    ret
 entry:
@@ -171,19 +197,19 @@ entry:
 }
 
 define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) {
-; SDAG-LABEL:  addv_combine_i64:
-; SDAG:        // %bb.0: // %entry
-; SDAG-NEXT:     add v0.2d, v0.2d, v1.2d
-; SDAG-NEXT:     addp d0, v0.2d
-; SDAG-NEXT:     fmov x0, d0
-; SDAG-NEXT:     ret
+; SDAG-LABEL: addv_combine_i64:
+; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    add v0.2d, v0.2d, v1.2d
+; SDAG-NEXT:    addp d0, v0.2d
+; SDAG-NEXT:    fmov x0, d0
+; SDAG-NEXT:    ret
 ;
 ; GISEL-LABEL: addv_combine_i64:
 ; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    addp  d0, v0.2d
-; GISEL-NEXT:    addp  d1, v1.2d
-; GISEL-NEXT:    fmov  x8, d0
-; GISEL-NEXT:    fmov  x9, d1
+; GISEL-NEXT:    addp d0, v0.2d
+; GISEL-NEXT:    addp d1, v1.2d
+; GISEL-NEXT:    fmov x8, d0
+; GISEL-NEXT:    fmov x9, d1
 ; GISEL-NEXT:    add x0, x8, x9
 ; GISEL-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 6a13d950d3b14b..60c2dada6b0627 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -5,21 +5,7 @@
 ; Test efficient codegen of vector extends up from legal type to 128 bit
 ; and 256 bit vector types.
 
-; CHECK-GI:       warning: Instruction selection used fallback path for func3
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for func4
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for afunc3
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for afunc4
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for bfunc1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for bfunc2
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for zfunc1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for zfunc2
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for bfunc3
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cfunc4
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for zext_v4i8_to_v4i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sext_v4i8_to_v4i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for zext_v8i8_to_v8i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sext_v8i8_to_v8i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for zext_v32i1
+; CHECK-GI:        warning: Instruction selection used fallback path for zext_v32i1
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sext_v32i1
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for zext_v64i1
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sext_v64i1
@@ -47,21 +33,35 @@ define <8 x i16> @func2(<8 x i8> %v0) nounwind {
 }
 
 define <16 x i16> @func3(<16 x i8> %v0) nounwind {
-; CHECK-LABEL: func3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll2.8h v1, v0, #0
-; CHECK-NEXT:    ushll.8h v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: func3:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll2.8h v1, v0, #0
+; CHECK-SD-NEXT:    ushll.8h v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: func3:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
+; CHECK-GI-NEXT:    ushll.8h v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = zext <16 x i8> %v0 to <16 x i16>
   ret <16 x i16> %r
 }
 
 define <16 x i16> @func4(<16 x i8> %v0) nounwind {
-; CHECK-LABEL: func4:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll2.8h v1, v0, #0
-; CHECK-NEXT:    sshll.8h v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: func4:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll2.8h v1, v0, #0
+; CHECK-SD-NEXT:    sshll.8h v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: func4:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
+; CHECK-GI-NEXT:    sshll.8h v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = sext <16 x i8> %v0 to <16 x i16>
   ret <16 x i16> %r
 }
@@ -89,43 +89,73 @@ define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
 }
 
 define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
-; CHECK-LABEL: afunc3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll2.4s v1, v0, #0
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: afunc3:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll2.4s v1, v0, #0
+; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: afunc3:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
+; CHECK-GI-NEXT:    ushll.4s v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = zext <8 x i16> %v0 to <8 x i32>
   ret <8 x i32> %r
 }
 
 define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
-; CHECK-LABEL: afunc4:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll2.4s v1, v0, #0
-; CHECK-NEXT:    sshll.4s v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: afunc4:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll2.4s v1, v0, #0
+; CHECK-SD-NEXT:    sshll.4s v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: afunc4:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    sshll.4s v0, v0, #0
+; CHECK-GI-NEXT:    sshll.4s v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = sext <8 x i16> %v0 to <8 x i32>
   ret <8 x i32> %r
 }
 
 define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: bfunc1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll.8h v0, v0, #0
-; CHECK-NEXT:    ushll2.4s v1, v0, #0
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: bfunc1:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll.8h v0, v0, #0
+; CHECK-SD-NEXT:    ushll2.4s v1, v0, #0
+; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: bfunc1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
+; CHECK-GI-NEXT:    ushll.4s v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = zext <8 x i8> %v0 to <8 x i32>
   ret <8 x i32> %r
 }
 
 define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: bfunc2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll.8h v0, v0, #0
-; CHECK-NEXT:    sshll2.4s v1, v0, #0
-; CHECK-NEXT:    sshll.4s v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: bfunc2:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll.8h v0, v0, #0
+; CHECK-SD-NEXT:    sshll2.4s v1, v0, #0
+; CHECK-SD-NEXT:    sshll.4s v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: bfunc2:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    sshll.4s v0, v0, #0
+; CHECK-GI-NEXT:    sshll.4s v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = sext <8 x i8> %v0 to <8 x i32>
   ret <8 x i32> %r
 }
@@ -135,100 +165,182 @@ define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
 ;-----
 
 define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
-; CHECK-LABEL: zfunc1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll2.2d v1, v0, #0
-; CHECK-NEXT:    ushll.2d v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: zfunc1:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll2.2d v1, v0, #0
+; CHECK-SD-NEXT:    ushll.2d v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zfunc1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
+; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = zext <4 x i32> %v0 to <4 x i64>
   ret <4 x i64> %r
 }
 
 define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
-; CHECK-LABEL: zfunc2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll2.2d v1, v0, #0
-; CHECK-NEXT:    sshll.2d v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: zfunc2:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll2.2d v1, v0, #0
+; CHECK-SD-NEXT:    sshll.2d v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zfunc2:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    sshll.2d v0, v0, #0
+; CHECK-GI-NEXT:    sshll.2d v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = sext <4 x i32> %v0 to <4 x i64>
   ret <4 x i64> %r
 }
 
 define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
-; CHECK-LABEL: bfunc3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ushll2.2d v1, v0, #0
-; CHECK-NEXT:    ushll.2d v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: bfunc3:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
+; CHECK-SD-NEXT:    ushll2.2d v1, v0, #0
+; CHECK-SD-NEXT:    ushll.2d v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: bfunc3:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
+; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = zext <4 x i16> %v0 to <4 x i64>
   ret <4 x i64> %r
 }
 
 define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
-; CHECK-LABEL: cfunc4:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll.4s v0, v0, #0
-; CHECK-NEXT:    sshll2.2d v1, v0, #0
-; CHECK-NEXT:    sshll.2d v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: cfunc4:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll.4s v0, v0, #0
+; CHECK-SD-NEXT:    sshll2.2d v1, v0, #0
+; CHECK-SD-NEXT:    sshll.2d v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: cfunc4:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll.4s v0, v0, #0
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    sshll.2d v0, v0, #0
+; CHECK-GI-NEXT:    sshll.2d v1, v1, #0
+; CHECK-GI-NEXT:    ret
   %r = sext <4 x i16> %v0 to <4 x i64>
   ret <4 x i64> %r
 }
 
 define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
-; CHECK-LABEL: zext_v4i8_to_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    bic.4h v0, #255, lsl #8
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ushll2.2d v1, v0, #0
-; CHECK-NEXT:    ushll.2d v0, v0, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: zext_v4i8_to_v4i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    bic.4h v0, #255, lsl #8
+; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
+; CHECK-SD-NEXT:    ushll2.2d v1, v0, #0
+; CHECK-SD-NEXT:    ushll.2d v0, v0, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i8_to_v4i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
+; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
+; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    and.16b v1, v1, v2
+; CHECK-GI-NEXT:    ret
   %r = zext <4 x i8> %v0 to <4 x i64>
   ret <4 x i64> %r
 }
 
 define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
-; CHECK-LABEL: sext_v4i8_to_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ushll.2d v1, v0, #0
-; CHECK-NEXT:    ushll2.2d v0, v0, #0
-; CHECK-NEXT:    shl.2d v2, v1, #56
-; CHECK-NEXT:    shl.2d v0, v0, #56
-; CHECK-NEXT:    sshr.2d v1, v0, #56
-; CHECK-NEXT:    sshr.2d v0, v2, #56
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: sext_v4i8_to_v4i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
+; CHECK-SD-NEXT:    ushll.2d v1, v0, #0
+; CHECK-SD-NEXT:    ushll2.2d v0, v0, #0
+; CHECK-SD-NEXT:    shl.2d v2, v1, #56
+; CHECK-SD-NEXT:    shl.2d v0, v0, #56
+; CHECK-SD-NEXT:    sshr.2d v1, v0, #56
+; CHECK-SD-NEXT:    sshr.2d v0, v2, #56
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v4i8_to_v4i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
+; CHECK-GI-NEXT:    shl.2d v0, v0, #56
+; CHECK-GI-NEXT:    ushll.2d v1, v1, #0
+; CHECK-GI-NEXT:    sshr.2d v0, v0, #56
+; CHECK-GI-NEXT:    shl.2d v1, v1, #56
+; CHECK-GI-NEXT:    sshr.2d v1, v1, #56
+; CHECK-GI-NEXT:    ret
   %r = sext <4 x i8> %v0 to <4 x i64>
   ret <4 x i64> %r
 }
 
 define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: zext_v8i8_to_v8i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll.8h v0, v0, #0
-; CHECK-NEXT:    ushll2.4s v2, v0, #0
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ushll2.2d v3, v2, #0
-; CHECK-NEXT:    ushll2.2d v1, v0, #0
-; CHECK-NEXT:    ushll.2d v0, v0, #0
-; CHECK-NEXT:    ushll.2d v2, v2, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: zext_v8i8_to_v8i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll.8h v0, v0, #0
+; CHECK-SD-NEXT:    ushll2.4s v2, v0, #0
+; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
+; CHECK-SD-NEXT:    ushll2.2d v3, v2, #0
+; CHECK-SD-NEXT:    ushll2.2d v1, v0, #0
+; CHECK-SD-NEXT:    ushll.2d v0, v0, #0
+; CHECK-SD-NEXT:    ushll.2d v2, v2, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i8_to_v8i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
+; CHECK-GI-NEXT:    mov d2, v0[1]
+; CHECK-GI-NEXT:    ushll.4s v3, v1, #0
+; CHECK-GI-NEXT:    ushll.2d v0, v0, #0
+; CHECK-GI-NEXT:    mov d4, v3[1]
+; CHECK-GI-NEXT:    ushll.2d v1, v2, #0
+; CHECK-GI-NEXT:    ushll.2d v2, v3, #0
+; CHECK-GI-NEXT:    ushll.2d v3, v4, #0
+; CHECK-GI-NEXT:    ret
   %r = zext <8 x i8> %v0 to <8 x i64>
   ret <8 x i64> %r
 }
 
 define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: sext_v8i8_to_v8i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll.8h v0, v0, #0
-; CHECK-NEXT:    sshll2.4s v2, v0, #0
-; CHECK-NEXT:    sshll.4s v0, v0, #0
-; CHECK-NEXT:    sshll2.2d v3, v2, #0
-; CHECK-NEXT:    sshll2.2d v1, v0, #0
-; CHECK-NEXT:    sshll.2d v0, v0, #0
-; CHECK-NEXT:    sshll.2d v2, v2, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: sext_v8i8_to_v8i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll.8h v0, v0, #0
+; CHECK-SD-NEXT:    sshll2.4s v2, v0, #0
+; CHECK-SD-NEXT:    sshll.4s v0, v0, #0
+; CHECK-SD-NEXT:    sshll2.2d v3, v2, #0
+; CHECK-SD-NEXT:    sshll2.2d v1, v0, #0
+; CHECK-SD-NEXT:    sshll.2d v0, v0, #0
+; CHECK-SD-NEXT:    sshll.2d v2, v2, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v8i8_to_v8i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
+; CHECK-GI-NEXT:    mov d1, v0[1]
+; CHECK-GI-NEXT:    sshll.4s v0, v0, #0
+; CHECK-GI-NEXT:    mov d2, v0[1]
+; CHECK-GI-NEXT:    sshll.4s v3, v1, #0
+; CHECK-GI-NEXT:    sshll.2d v0, v0, #0
+; CHECK-GI-NEXT:    mov d4, v3[1]
+; CHECK-GI-NEXT:    sshll.2d v1, v2, #0
+; CHECK-GI-NEXT:    sshll.2d v2, v3, #0
+; CHECK-GI-NEXT:    sshll.2d v3, v4, #0
+; CHECK-GI-NEXT:    ret
   %r = sext <8 x i8> %v0 to <8 x i64>
   ret <8 x i64> %r
 }

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 831acd242221b0..3003e4c1c411ee 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -3,10 +3,6 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; CHECK-GI:       warning: Instruction selection used fallback path for uabd16b_rdx
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uabd16b_rdx_i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sabd16b_rdx_i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uabd8h_rdx
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sabd8h_rdx
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uabd4s_rdx
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sabd4s_rdx
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_8b
@@ -281,13 +277,58 @@ define i16 @uabd16b_rdx(ptr %a, ptr %b) {
 }
 
 define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: uabd16b_rdx_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabdl.8h v2, v0, v1
-; CHECK-NEXT:    uabal2.8h v2, v0, v1
-; CHECK-NEXT:    uaddlv.8h s0, v2
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: uabd16b_rdx_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabdl.8h v2, v0, v1
+; CHECK-SD-NEXT:    uabal2.8h v2, v0, v1
+; CHECK-SD-NEXT:    uaddlv.8h s0, v2
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: uabd16b_rdx_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d3, v0[1]
+; CHECK-GI-NEXT:    ushll.8h v4, v1, #0
+; CHECK-GI-NEXT:    mov d1, v1[1]
+; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
+; CHECK-GI-NEXT:    mov d6, v4[1]
+; CHECK-GI-NEXT:    ushll.8h v3, v3, #0
+; CHECK-GI-NEXT:    mov d5, v0[1]
+; CHECK-GI-NEXT:    ushll.8h v1, v1, #0
+; CHECK-GI-NEXT:    mov d7, v3[1]
+; CHECK-GI-NEXT:    mov d16, v1[1]
+; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-GI-NEXT:    usubl.4s v0, v0, v4
+; CHECK-GI-NEXT:    usubl.4s v5, v5, v6
+; CHECK-GI-NEXT:    usubl.4s v1, v3, v1
+; CHECK-GI-NEXT:    usubl.4s v3, v7, v16
+; CHECK-GI-NEXT:    cmgt.4s v4, v2, v0
+; CHECK-GI-NEXT:    cmgt.4s v6, v2, v5
+; CHECK-GI-NEXT:    cmgt.4s v7, v2, v1
+; CHECK-GI-NEXT:    cmgt.4s v2, v2, v3
+; CHECK-GI-NEXT:    shl.4s v4, v4, #31
+; CHECK-GI-NEXT:    shl.4s v6, v6, #31
+; CHECK-GI-NEXT:    shl.4s v7, v7, #31
+; CHECK-GI-NEXT:    shl.4s v2, v2, #31
+; CHECK-GI-NEXT:    sshr.4s v4, v4, #31
+; CHECK-GI-NEXT:    neg.4s v17, v0
+; CHECK-GI-NEXT:    sshr.4s v6, v6, #31
+; CHECK-GI-NEXT:    neg.4s v16, v5
+; CHECK-GI-NEXT:    neg.4s v18, v1
+; CHECK-GI-NEXT:    neg.4s v19, v3
+; CHECK-GI-NEXT:    sshr.4s v7, v7, #31
+; CHECK-GI-NEXT:    sshr.4s v2, v2, #31
+; CHECK-GI-NEXT:    bit.16b v0, v17, v4
+; CHECK-GI-NEXT:    mov.16b v4, v6
+; CHECK-GI-NEXT:    bsl.16b v4, v16, v5
+; CHECK-GI-NEXT:    bit.16b v1, v18, v7
+; CHECK-GI-NEXT:    bsl.16b v2, v19, v3
+; CHECK-GI-NEXT:    add.4s v0, v0, v4
+; CHECK-GI-NEXT:    add.4s v1, v1, v2
+; CHECK-GI-NEXT:    add.4s v0, v0, v1
+; CHECK-GI-NEXT:    addv.4s s0, v0
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
   %aext = zext <16 x i8> %a to <16 x i32>
   %bext = zext <16 x i8> %b to <16 x i32>
   %ab
diff  = sub nsw <16 x i32> %aext, %bext
@@ -299,13 +340,58 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
 }
 
 define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: sabd16b_rdx_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabdl.8h v2, v0, v1
-; CHECK-NEXT:    sabal2.8h v2, v0, v1
-; CHECK-NEXT:    uaddlv.8h s0, v2
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: sabd16b_rdx_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabdl.8h v2, v0, v1
+; CHECK-SD-NEXT:    sabal2.8h v2, v0, v1
+; CHECK-SD-NEXT:    uaddlv.8h s0, v2
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sabd16b_rdx_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d3, v0[1]
+; CHECK-GI-NEXT:    sshll.8h v4, v1, #0
+; CHECK-GI-NEXT:    mov d1, v1[1]
+; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
+; CHECK-GI-NEXT:    mov d6, v4[1]
+; CHECK-GI-NEXT:    sshll.8h v3, v3, #0
+; CHECK-GI-NEXT:    mov d5, v0[1]
+; CHECK-GI-NEXT:    sshll.8h v1, v1, #0
+; CHECK-GI-NEXT:    mov d7, v3[1]
+; CHECK-GI-NEXT:    mov d16, v1[1]
+; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-GI-NEXT:    ssubl.4s v0, v0, v4
+; CHECK-GI-NEXT:    ssubl.4s v5, v5, v6
+; CHECK-GI-NEXT:    ssubl.4s v1, v3, v1
+; CHECK-GI-NEXT:    ssubl.4s v3, v7, v16
+; CHECK-GI-NEXT:    cmgt.4s v4, v2, v0
+; CHECK-GI-NEXT:    cmgt.4s v6, v2, v5
+; CHECK-GI-NEXT:    cmgt.4s v7, v2, v1
+; CHECK-GI-NEXT:    cmgt.4s v2, v2, v3
+; CHECK-GI-NEXT:    shl.4s v4, v4, #31
+; CHECK-GI-NEXT:    shl.4s v6, v6, #31
+; CHECK-GI-NEXT:    shl.4s v7, v7, #31
+; CHECK-GI-NEXT:    shl.4s v2, v2, #31
+; CHECK-GI-NEXT:    sshr.4s v4, v4, #31
+; CHECK-GI-NEXT:    neg.4s v17, v0
+; CHECK-GI-NEXT:    sshr.4s v6, v6, #31
+; CHECK-GI-NEXT:    neg.4s v16, v5
+; CHECK-GI-NEXT:    neg.4s v18, v1
+; CHECK-GI-NEXT:    neg.4s v19, v3
+; CHECK-GI-NEXT:    sshr.4s v7, v7, #31
+; CHECK-GI-NEXT:    sshr.4s v2, v2, #31
+; CHECK-GI-NEXT:    bit.16b v0, v17, v4
+; CHECK-GI-NEXT:    mov.16b v4, v6
+; CHECK-GI-NEXT:    bsl.16b v4, v16, v5
+; CHECK-GI-NEXT:    bit.16b v1, v18, v7
+; CHECK-GI-NEXT:    bsl.16b v2, v19, v3
+; CHECK-GI-NEXT:    add.4s v0, v0, v4
+; CHECK-GI-NEXT:    add.4s v1, v1, v2
+; CHECK-GI-NEXT:    add.4s v0, v0, v1
+; CHECK-GI-NEXT:    addv.4s s0, v0
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
   %aext = sext <16 x i8> %a to <16 x i32>
   %bext = sext <16 x i8> %b to <16 x i32>
   %ab
diff  = sub nsw <16 x i32> %aext, %bext
@@ -321,14 +407,38 @@ declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 
 define i32 @uabd8h_rdx(ptr %a, ptr %b) {
-; CHECK-LABEL: uabd8h_rdx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    ldr q1, [x1]
-; CHECK-NEXT:    uabd.8h v0, v0, v1
-; CHECK-NEXT:    uaddlv.8h s0, v0
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: uabd8h_rdx:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ldr q0, [x0]
+; CHECK-SD-NEXT:    ldr q1, [x1]
+; CHECK-SD-NEXT:    uabd.8h v0, v0, v1
+; CHECK-SD-NEXT:    uaddlv.8h s0, v0
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: uabd8h_rdx:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldr q1, [x0]
+; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    movi.2d v0, #0000000000000000
+; CHECK-GI-NEXT:    mov d3, v1[1]
+; CHECK-GI-NEXT:    mov d4, v2[1]
+; CHECK-GI-NEXT:    usubl.4s v1, v1, v2
+; CHECK-GI-NEXT:    usubl.4s v2, v3, v4
+; CHECK-GI-NEXT:    cmgt.4s v3, v0, v1
+; CHECK-GI-NEXT:    neg.4s v4, v1
+; CHECK-GI-NEXT:    cmgt.4s v0, v0, v2
+; CHECK-GI-NEXT:    shl.4s v3, v3, #31
+; CHECK-GI-NEXT:    shl.4s v0, v0, #31
+; CHECK-GI-NEXT:    neg.4s v5, v2
+; CHECK-GI-NEXT:    sshr.4s v3, v3, #31
+; CHECK-GI-NEXT:    sshr.4s v0, v0, #31
+; CHECK-GI-NEXT:    bit.16b v1, v4, v3
+; CHECK-GI-NEXT:    bsl.16b v0, v5, v2
+; CHECK-GI-NEXT:    add.4s v0, v1, v0
+; CHECK-GI-NEXT:    addv.4s s0, v0
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
   %aload = load <8 x i16>, ptr %a, align 1
   %bload = load <8 x i16>, ptr %b, align 1
   %aext = zext <8 x i16> %aload to <8 x i32>
@@ -342,12 +452,34 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
 }
 
 define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: sabd8h_rdx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd.8h v0, v0, v1
-; CHECK-NEXT:    uaddlv.8h s0, v0
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: sabd8h_rdx:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd.8h v0, v0, v1
+; CHECK-SD-NEXT:    uaddlv.8h s0, v0
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sabd8h_rdx:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov d3, v0[1]
+; CHECK-GI-NEXT:    mov d4, v1[1]
+; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-GI-NEXT:    ssubl.4s v0, v0, v1
+; CHECK-GI-NEXT:    ssubl.4s v1, v3, v4
+; CHECK-GI-NEXT:    cmgt.4s v3, v2, v0
+; CHECK-GI-NEXT:    neg.4s v4, v0
+; CHECK-GI-NEXT:    cmgt.4s v2, v2, v1
+; CHECK-GI-NEXT:    shl.4s v3, v3, #31
+; CHECK-GI-NEXT:    shl.4s v2, v2, #31
+; CHECK-GI-NEXT:    neg.4s v5, v1
+; CHECK-GI-NEXT:    sshr.4s v3, v3, #31
+; CHECK-GI-NEXT:    sshr.4s v2, v2, #31
+; CHECK-GI-NEXT:    bit.16b v0, v4, v3
+; CHECK-GI-NEXT:    bit.16b v1, v5, v2
+; CHECK-GI-NEXT:    add.4s v0, v0, v1
+; CHECK-GI-NEXT:    addv.4s s0, v0
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
   %aext = sext <8 x i16> %a to <8 x i32>
   %bext = sext <8 x i16> %b to <8 x i32>
   %ab
diff  = sub nsw <8 x i32> %aext, %bext

diff  --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
new file mode 100644
index 00000000000000..d794991895b3c4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -0,0 +1,1216 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:      warning: Instruction selection used fallback path for sext_v3i8_v3i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i8_v3i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v16i10_v16i16
+
+define i16 @sext_i8_to_i16(i8 %a) {
+; CHECK-LABEL: sext_i8_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sxtb w0, w0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i8 %a to i16
+  ret i16 %c
+}
+
+define i32 @sext_i8_to_i32(i8 %a) {
+; CHECK-LABEL: sext_i8_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sxtb w0, w0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i8 %a to i32
+  ret i32 %c
+}
+
+define i64 @sext_i8_to_i64(i8 %a) {
+; CHECK-LABEL: sext_i8_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtb x0, w0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i8 %a to i64
+  ret i64 %c
+}
+
+define i10 @sext_i8_to_i10(i8 %a) {
+; CHECK-LABEL: sext_i8_to_i10:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sxtb w0, w0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i8 %a to i10
+  ret i10 %c
+}
+
+define i32 @sext_i16_to_i32(i16 %a) {
+; CHECK-LABEL: sext_i16_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sxth w0, w0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i16 %a to i32
+  ret i32 %c
+}
+
+define i64 @sext_i16_to_i64(i16 %a) {
+; CHECK-LABEL: sext_i16_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxth x0, w0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i16 %a to i64
+  ret i64 %c
+}
+
+define i64 @sext_i32_to_i64(i32 %a) {
+; CHECK-LABEL: sext_i32_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i32 %a to i64
+  ret i64 %c
+}
+
+define i16 @sext_i10_to_i16(i10 %a) {
+; CHECK-LABEL: sext_i10_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sbfx w0, w0, #0, #10
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i10 %a to i16
+  ret i16 %c
+}
+
+define i32 @sext_i10_to_i32(i10 %a) {
+; CHECK-LABEL: sext_i10_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sbfx w0, w0, #0, #10
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i10 %a to i32
+  ret i32 %c
+}
+
+define i64 @sext_i10_to_i64(i10 %a) {
+; CHECK-LABEL: sext_i10_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sbfx x0, x0, #0, #10
+; CHECK-NEXT:    ret
+entry:
+  %c = sext i10 %a to i64
+  ret i64 %c
+}
+
+define <2 x i16> @sext_v2i8_v2i16(<2 x i8> %a) {
+; CHECK-LABEL: sext_v2i8_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.2s, v0.2s, #24
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i8> %a to <2 x i16>
+  ret <2 x i16> %c
+}
+
+define <2 x i32> @sext_v2i8_v2i32(<2 x i8> %a) {
+; CHECK-LABEL: sext_v2i8_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.2s, v0.2s, #24
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i8> %a to <2 x i32>
+  ret <2 x i32> %c
+}
+
+define <2 x i64> @sext_v2i8_v2i64(<2 x i8> %a) {
+; CHECK-LABEL: sext_v2i8_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i8> %a to <2 x i64>
+  ret <2 x i64> %c
+}
+
+define <2 x i32> @sext_v2i16_v2i32(<2 x i16> %a) {
+; CHECK-LABEL: sext_v2i16_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.2s, v0.2s, #16
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i16> %a to <2 x i32>
+  ret <2 x i32> %c
+}
+
+define <2 x i64> @sext_v2i16_v2i64(<2 x i16> %a) {
+; CHECK-LABEL: sext_v2i16_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #48
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #48
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i16> %a to <2 x i64>
+  ret <2 x i64> %c
+}
+
+define <2 x i64> @sext_v2i32_v2i64(<2 x i32> %a) {
+; CHECK-LABEL: sext_v2i32_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %c
+}
+
+define <2 x i16> @sext_v2i10_v2i16(<2 x i10> %a) {
+; CHECK-LABEL: sext_v2i10_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.2s, v0.2s, #22
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #22
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i10> %a to <2 x i16>
+  ret <2 x i16> %c
+}
+
+define <2 x i32> @sext_v2i10_v2i32(<2 x i10> %a) {
+; CHECK-LABEL: sext_v2i10_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.2s, v0.2s, #22
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #22
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i10> %a to <2 x i32>
+  ret <2 x i32> %c
+}
+
+define <2 x i64> @sext_v2i10_v2i64(<2 x i10> %a) {
+; CHECK-LABEL: sext_v2i10_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #54
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #54
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <2 x i10> %a to <2 x i64>
+  ret <2 x i64> %c
+}
+
+define <3 x i16> @sext_v3i8_v3i16(<3 x i8> %a) {
+; CHECK-LABEL: sext_v3i8_v3i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    mov v0.h[1], w1
+; CHECK-NEXT:    mov v0.h[2], w2
+; CHECK-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <3 x i8> %a to <3 x i16>
+  ret <3 x i16> %c
+}
+
+define <3 x i32> @sext_v3i8_v3i32(<3 x i8> %a) {
+; CHECK-LABEL: sext_v3i8_v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    mov v0.h[1], w1
+; CHECK-NEXT:    mov v0.h[2], w2
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <3 x i8> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <3 x i64> @sext_v3i8_v3i64(<3 x i8> %a) {
+; CHECK-SD-LABEL: sext_v3i8_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s1, w0
+; CHECK-SD-NEXT:    fmov s0, w2
+; CHECK-SD-NEXT:    mov v1.s[1], w1
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    shl v2.2d, v0.2d, #56
+; CHECK-SD-NEXT:    ushll v0.2d, v1.2s, #0
+; CHECK-SD-NEXT:    sshr v2.2d, v2.2d, #56
+; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v3i8_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    fmov d0, x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-GI-NEXT:    lsl x8, x2, #56
+; CHECK-GI-NEXT:    asr x8, x8, #56
+; CHECK-GI-NEXT:    mov v0.d[1], x1
+; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <3 x i8> %a to <3 x i64>
+  ret <3 x i64> %c
+}
+
+define <3 x i32> @sext_v3i16_v3i32(<3 x i16> %a) {
+; CHECK-SD-LABEL: sext_v3i16_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v3i16_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    fmov w8, s0
+; CHECK-GI-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NEXT:    sxth w8, w8
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    fmov w8, s2
+; CHECK-GI-NEXT:    sxth w9, w9
+; CHECK-GI-NEXT:    sxth w8, w8
+; CHECK-GI-NEXT:    mov v0.s[1], w9
+; CHECK-GI-NEXT:    mov v0.s[2], w8
+; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <3 x i16> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <3 x i64> @sext_v3i16_v3i64(<3 x i16> %a) {
+; CHECK-SD-LABEL: sext_v3i16_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v2.2s, #0
+; CHECK-SD-NEXT:    sshll2 v2.2d, v2.4s, #0
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v3i16_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NEXT:    fmov w8, s0
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    fmov w10, s2
+; CHECK-GI-NEXT:    sxth x8, w8
+; CHECK-GI-NEXT:    sxth x9, w9
+; CHECK-GI-NEXT:    sxth x10, w10
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    fmov d1, x9
+; CHECK-GI-NEXT:    fmov d2, x10
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <3 x i16> %a to <3 x i64>
+  ret <3 x i64> %c
+}
+
+define <3 x i64> @sext_v3i32_v3i64(<3 x i32> %a) {
+; CHECK-SD-LABEL: sext_v3i32_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll v3.2d, v0.2s, #0
+; CHECK-SD-NEXT:    sshll2 v2.2d, v0.4s, #0
+; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v3i32_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    fmov w8, s0
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    fmov w10, s2
+; CHECK-GI-NEXT:    sxtw x8, w8
+; CHECK-GI-NEXT:    sxtw x9, w9
+; CHECK-GI-NEXT:    sxtw x10, w10
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    fmov d1, x9
+; CHECK-GI-NEXT:    fmov d2, x10
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <3 x i32> %a to <3 x i64>
+  ret <3 x i64> %c
+}
+
+define <3 x i16> @sext_v3i10_v3i16(<3 x i10> %a) {
+; CHECK-LABEL: sext_v3i10_v3i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    mov v0.h[1], w1
+; CHECK-NEXT:    mov v0.h[2], w2
+; CHECK-NEXT:    shl v0.4h, v0.4h, #6
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #6
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <3 x i10> %a to <3 x i16>
+  ret <3 x i16> %c
+}
+
+define <3 x i32> @sext_v3i10_v3i32(<3 x i10> %a) {
+; CHECK-LABEL: sext_v3i10_v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    mov v0.h[1], w1
+; CHECK-NEXT:    mov v0.h[2], w2
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #22
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #22
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <3 x i10> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <3 x i64> @sext_v3i10_v3i64(<3 x i10> %a) {
+; CHECK-SD-LABEL: sext_v3i10_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s1, w0
+; CHECK-SD-NEXT:    fmov s0, w2
+; CHECK-SD-NEXT:    mov v1.s[1], w1
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    shl v2.2d, v0.2d, #54
+; CHECK-SD-NEXT:    ushll v0.2d, v1.2s, #0
+; CHECK-SD-NEXT:    sshr v2.2d, v2.2d, #54
+; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #54
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    sshr v0.2d, v0.2d, #54
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v3i10_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    fmov d0, x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-GI-NEXT:    lsl x8, x2, #54
+; CHECK-GI-NEXT:    asr x8, x8, #54
+; CHECK-GI-NEXT:    mov v0.d[1], x1
+; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #54
+; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <3 x i10> %a to <3 x i64>
+  ret <3 x i64> %c
+}
+
+define <4 x i16> @sext_v4i8_v4i16(<4 x i8> %a) {
+; CHECK-LABEL: sext_v4i8_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <4 x i8> %a to <4 x i16>
+  ret <4 x i16> %c
+}
+
+define <4 x i32> @sext_v4i8_v4i32(<4 x i8> %a) {
+; CHECK-LABEL: sext_v4i8_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <4 x i8> %a to <4 x i32>
+  ret <4 x i32> %c
+}
+
+define <4 x i64> @sext_v4i8_v4i64(<4 x i8> %a) {
+; CHECK-SD-LABEL: sext_v4i8_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT:    shl v2.2d, v1.2d, #56
+; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-SD-NEXT:    sshr v1.2d, v0.2d, #56
+; CHECK-SD-NEXT:    sshr v0.2d, v2.2d, #56
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v4i8_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-GI-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-GI-NEXT:    sshr v1.2d, v1.2d, #56
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <4 x i8> %a to <4 x i64>
+  ret <4 x i64> %c
+}
+
+define <4 x i32> @sext_v4i16_v4i32(<4 x i16> %a) {
+; CHECK-LABEL: sext_v4i16_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <4 x i16> %a to <4 x i32>
+  ret <4 x i32> %c
+}
+
+define <4 x i64> @sext_v4i16_v4i64(<4 x i16> %a) {
+; CHECK-SD-LABEL: sext_v4i16_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v4i16_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <4 x i16> %a to <4 x i64>
+  ret <4 x i64> %c
+}
+
+define <4 x i64> @sext_v4i32_v4i64(<4 x i32> %a) {
+; CHECK-SD-LABEL: sext_v4i32_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v4i32_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <4 x i32> %a to <4 x i64>
+  ret <4 x i64> %c
+}
+
+define <4 x i16> @sext_v4i10_v4i16(<4 x i10> %a) {
+; CHECK-LABEL: sext_v4i10_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.4h, v0.4h, #6
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #6
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <4 x i10> %a to <4 x i16>
+  ret <4 x i16> %c
+}
+
+define <4 x i32> @sext_v4i10_v4i32(<4 x i10> %a) {
+; CHECK-LABEL: sext_v4i10_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #22
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #22
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <4 x i10> %a to <4 x i32>
+  ret <4 x i32> %c
+}
+
+define <4 x i64> @sext_v4i10_v4i64(<4 x i10> %a) {
+; CHECK-SD-LABEL: sext_v4i10_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT:    shl v2.2d, v1.2d, #54
+; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #54
+; CHECK-SD-NEXT:    sshr v1.2d, v0.2d, #54
+; CHECK-SD-NEXT:    sshr v0.2d, v2.2d, #54
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v4i10_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #54
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT:    shl v1.2d, v1.2d, #54
+; CHECK-GI-NEXT:    sshr v1.2d, v1.2d, #54
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <4 x i10> %a to <4 x i64>
+  ret <4 x i64> %c
+}
+
+define <8 x i16> @sext_v8i8_v8i16(<8 x i8> %a) {
+; CHECK-LABEL: sext_v8i8_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <8 x i8> %a to <8 x i16>
+  ret <8 x i16> %c
+}
+
+define <8 x i32> @sext_v8i8_v8i32(<8 x i8> %a) {
+; CHECK-SD-LABEL: sext_v8i8_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v8i8_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <8 x i8> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <8 x i64> @sext_v8i8_v8i64(<8 x i8> %a) {
+; CHECK-SD-LABEL: sext_v8i8_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT:    sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    sshll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v8i8_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    sshll v3.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov d4, v3.d[1]
+; CHECK-GI-NEXT:    sshll v1.2d, v2.2s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <8 x i8> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define <8 x i32> @sext_v8i16_v8i32(<8 x i16> %a) {
+; CHECK-SD-LABEL: sext_v8i16_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v8i16_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <8 x i64> @sext_v8i16_v8i64(<8 x i16> %a) {
+; CHECK-SD-LABEL: sext_v8i16_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT:    sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    sshll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v8i16_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    sshll v3.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov d4, v3.d[1]
+; CHECK-GI-NEXT:    sshll v1.2d, v2.2s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <8 x i16> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define <8 x i64> @sext_v8i32_v8i64(<8 x i32> %a) {
+; CHECK-SD-LABEL: sext_v8i32_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v4.2d, v0.4s, #0
+; CHECK-SD-NEXT:    sshll2 v3.2d, v1.4s, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    sshll v2.2d, v1.2s, #0
+; CHECK-SD-NEXT:    mov v1.16b, v4.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v8i32_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll v1.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <8 x i32> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define <8 x i16> @sext_v8i10_v8i16(<8 x i10> %a) {
+; CHECK-LABEL: sext_v8i10_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.8h, v0.8h, #6
+; CHECK-NEXT:    sshr v0.8h, v0.8h, #6
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <8 x i10> %a to <8 x i16>
+  ret <8 x i16> %c
+}
+
+define <8 x i32> @sext_v8i10_v8i32(<8 x i10> %a) {
+; CHECK-SD-LABEL: sext_v8i10_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-SD-NEXT:    shl v2.4s, v1.4s, #22
+; CHECK-SD-NEXT:    shl v0.4s, v0.4s, #22
+; CHECK-SD-NEXT:    sshr v1.4s, v0.4s, #22
+; CHECK-SD-NEXT:    sshr v0.4s, v2.4s, #22
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v8i10_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #22
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #22
+; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #22
+; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #22
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <8 x i10> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <8 x i64> @sext_v8i10_v8i64(<8 x i10> %a) {
+; CHECK-SD-LABEL: sext_v8i10_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v1.2s, #0
+; CHECK-SD-NEXT:    ushll v3.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll2 v1.2d, v1.4s, #0
+; CHECK-SD-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT:    shl v1.2d, v1.2d, #54
+; CHECK-SD-NEXT:    shl v2.2d, v2.2d, #54
+; CHECK-SD-NEXT:    shl v4.2d, v0.2d, #54
+; CHECK-SD-NEXT:    shl v5.2d, v3.2d, #54
+; CHECK-SD-NEXT:    sshr v1.2d, v1.2d, #54
+; CHECK-SD-NEXT:    sshr v0.2d, v2.2d, #54
+; CHECK-SD-NEXT:    sshr v3.2d, v4.2d, #54
+; CHECK-SD-NEXT:    sshr v2.2d, v5.2d, #54
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v8i10_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov d3, v1.d[1]
+; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #54
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    shl v2.2d, v2.2d, #54
+; CHECK-GI-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT:    shl v4.2d, v1.2d, #54
+; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT:    shl v3.2d, v3.2d, #54
+; CHECK-GI-NEXT:    sshr v1.2d, v2.2d, #54
+; CHECK-GI-NEXT:    sshr v2.2d, v4.2d, #54
+; CHECK-GI-NEXT:    sshr v3.2d, v3.2d, #54
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <8 x i10> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define <16 x i16> @sext_v16i8_v16i16(<16 x i8> %a) {
+; CHECK-SD-LABEL: sext_v16i8_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v16i8_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <16 x i8> %a to <16 x i16>
+  ret <16 x i16> %c
+}
+
+define <16 x i32> @sext_v16i8_v16i32(<16 x i8> %a) {
+; CHECK-SD-LABEL: sext_v16i8_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v2.8h, v0.16b, #0
+; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    sshll2 v3.4s, v2.8h, #0
+; CHECK-SD-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll v2.4s, v2.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v16i8_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    sshll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d4, v3.d[1]
+; CHECK-GI-NEXT:    sshll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT:    sshll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT:    sshll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <16 x i8> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+define <16 x i64> @sext_v16i8_v16i64(<16 x i8> %a) {
+; CHECK-SD-LABEL: sext_v16i8_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    sshll2 v2.4s, v1.8h, #0
+; CHECK-SD-NEXT:    sshll2 v4.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll v16.4s, v1.4h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll2 v7.2d, v2.4s, #0
+; CHECK-SD-NEXT:    sshll2 v3.2d, v4.4s, #0
+; CHECK-SD-NEXT:    sshll2 v5.2d, v16.4s, #0
+; CHECK-SD-NEXT:    sshll v6.2d, v2.2s, #0
+; CHECK-SD-NEXT:    sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    sshll v2.2d, v4.2s, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    sshll v4.2d, v16.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v16i8_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    sshll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT:    mov d1, v1.d[1]
+; CHECK-GI-NEXT:    sshll v2.8h, v2.8b, #0
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d5, v2.d[1]
+; CHECK-GI-NEXT:    sshll v4.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll v6.4s, v2.4h, #0
+; CHECK-GI-NEXT:    sshll v1.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v16.4s, v5.4h, #0
+; CHECK-GI-NEXT:    mov d3, v4.d[1]
+; CHECK-GI-NEXT:    mov d7, v6.d[1]
+; CHECK-GI-NEXT:    mov d17, v16.d[1]
+; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v4.2s, #0
+; CHECK-GI-NEXT:    sshll v4.2d, v6.2s, #0
+; CHECK-GI-NEXT:    sshll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v5.2d, v7.2s, #0
+; CHECK-GI-NEXT:    sshll v6.2d, v16.2s, #0
+; CHECK-GI-NEXT:    sshll v7.2d, v17.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <16 x i8> %a to <16 x i64>
+  ret <16 x i64> %c
+}
+
+define <16 x i32> @sext_v16i16_v16i32(<16 x i16> %a) {
+; CHECK-SD-LABEL: sext_v16i16_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v4.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll2 v3.4s, v1.8h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll v2.4s, v1.4h, #0
+; CHECK-SD-NEXT:    mov v1.16b, v4.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v16i16_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll v2.4s, v1.4h, #0
+; CHECK-GI-NEXT:    sshll v1.4s, v3.4h, #0
+; CHECK-GI-NEXT:    sshll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <16 x i16> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+define <16 x i64> @sext_v16i16_v16i64(<16 x i16> %a) {
+; CHECK-SD-LABEL: sext_v16i16_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll2 v4.4s, v1.8h, #0
+; CHECK-SD-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-SD-NEXT:    sshll2 v16.2d, v0.4s, #0
+; CHECK-SD-NEXT:    sshll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT:    sshll2 v7.2d, v4.4s, #0
+; CHECK-SD-NEXT:    sshll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    sshll2 v5.2d, v1.4s, #0
+; CHECK-SD-NEXT:    sshll v6.2d, v4.2s, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    sshll v4.2d, v1.2s, #0
+; CHECK-SD-NEXT:    mov v1.16b, v16.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v16i16_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll v5.4s, v1.4h, #0
+; CHECK-GI-NEXT:    mov d1, v2.d[1]
+; CHECK-GI-NEXT:    sshll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT:    mov d6, v5.d[1]
+; CHECK-GI-NEXT:    sshll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT:    sshll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT:    mov d7, v2.d[1]
+; CHECK-GI-NEXT:    mov d16, v3.d[1]
+; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT:    sshll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT:    sshll v5.2d, v6.2s, #0
+; CHECK-GI-NEXT:    sshll v6.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v3.2d, v7.2s, #0
+; CHECK-GI-NEXT:    sshll v7.2d, v16.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <16 x i16> %a to <16 x i64>
+  ret <16 x i64> %c
+}
+
+define <16 x i64> @sext_v16i32_v16i64(<16 x i32> %a) {
+; CHECK-SD-LABEL: sext_v16i32_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll2 v17.2d, v0.4s, #0
+; CHECK-SD-NEXT:    sshll2 v18.2d, v1.4s, #0
+; CHECK-SD-NEXT:    sshll v16.2d, v1.2s, #0
+; CHECK-SD-NEXT:    sshll2 v5.2d, v2.4s, #0
+; CHECK-SD-NEXT:    sshll2 v7.2d, v3.4s, #0
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    sshll v4.2d, v2.2s, #0
+; CHECK-SD-NEXT:    sshll v6.2d, v3.2s, #0
+; CHECK-SD-NEXT:    mov v1.16b, v17.16b
+; CHECK-SD-NEXT:    mov v2.16b, v16.16b
+; CHECK-SD-NEXT:    mov v3.16b, v18.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v16i32_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d5, v1.d[1]
+; CHECK-GI-NEXT:    mov d6, v2.d[1]
+; CHECK-GI-NEXT:    sshll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    sshll v4.2d, v2.2s, #0
+; CHECK-GI-NEXT:    mov d2, v3.d[1]
+; CHECK-GI-NEXT:    sshll v17.2d, v1.2s, #0
+; CHECK-GI-NEXT:    sshll v18.2d, v5.2s, #0
+; CHECK-GI-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT:    sshll v5.2d, v6.2s, #0
+; CHECK-GI-NEXT:    sshll v6.2d, v3.2s, #0
+; CHECK-GI-NEXT:    sshll v7.2d, v2.2s, #0
+; CHECK-GI-NEXT:    mov v0.16b, v16.16b
+; CHECK-GI-NEXT:    mov v2.16b, v17.16b
+; CHECK-GI-NEXT:    mov v3.16b, v18.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <16 x i32> %a to <16 x i64>
+  ret <16 x i64> %c
+}
+
+define <16 x i16> @sext_v16i10_v16i16(<16 x i10> %a) {
+; CHECK-LABEL: sext_v16i10_v16i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr w8, [sp]
+; CHECK-NEXT:    fmov s1, w0
+; CHECK-NEXT:    ldr w9, [sp, #16]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ldr w8, [sp, #8]
+; CHECK-NEXT:    mov v1.h[1], w1
+; CHECK-NEXT:    mov v0.h[1], w8
+; CHECK-NEXT:    ldr w8, [sp, #24]
+; CHECK-NEXT:    mov v1.h[2], w2
+; CHECK-NEXT:    mov v0.h[2], w9
+; CHECK-NEXT:    ldr w9, [sp, #32]
+; CHECK-NEXT:    mov v1.h[3], w3
+; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    ldr w8, [sp, #40]
+; CHECK-NEXT:    mov v1.h[4], w4
+; CHECK-NEXT:    mov v0.h[4], w9
+; CHECK-NEXT:    ldr w9, [sp, #48]
+; CHECK-NEXT:    mov v1.h[5], w5
+; CHECK-NEXT:    mov v0.h[5], w8
+; CHECK-NEXT:    ldr w8, [sp, #56]
+; CHECK-NEXT:    mov v1.h[6], w6
+; CHECK-NEXT:    mov v0.h[6], w9
+; CHECK-NEXT:    mov v1.h[7], w7
+; CHECK-NEXT:    mov v0.h[7], w8
+; CHECK-NEXT:    shl v1.8h, v1.8h, #6
+; CHECK-NEXT:    shl v2.8h, v0.8h, #6
+; CHECK-NEXT:    sshr v0.8h, v1.8h, #6
+; CHECK-NEXT:    sshr v1.8h, v2.8h, #6
+; CHECK-NEXT:    ret
+entry:
+  %c = sext <16 x i10> %a to <16 x i16>
+  ret <16 x i16> %c
+}
+
+define <16 x i32> @sext_v16i10_v16i32(<16 x i10> %a) {
+; CHECK-SD-LABEL: sext_v16i10_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldr w11, [sp, #32]
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    ldr w12, [sp]
+; CHECK-SD-NEXT:    fmov s1, w4
+; CHECK-SD-NEXT:    ldr w10, [sp, #40]
+; CHECK-SD-NEXT:    ldr w15, [sp, #8]
+; CHECK-SD-NEXT:    fmov s3, w11
+; CHECK-SD-NEXT:    fmov s2, w12
+; CHECK-SD-NEXT:    ldr w9, [sp, #48]
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    ldr w14, [sp, #16]
+; CHECK-SD-NEXT:    mov v1.h[1], w5
+; CHECK-SD-NEXT:    ldr w8, [sp, #56]
+; CHECK-SD-NEXT:    mov v2.h[1], w15
+; CHECK-SD-NEXT:    ldr w13, [sp, #24]
+; CHECK-SD-NEXT:    mov v3.h[1], w10
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    mov v1.h[2], w6
+; CHECK-SD-NEXT:    mov v2.h[2], w14
+; CHECK-SD-NEXT:    mov v3.h[2], w9
+; CHECK-SD-NEXT:    mov v0.h[3], w3
+; CHECK-SD-NEXT:    mov v1.h[3], w7
+; CHECK-SD-NEXT:    mov v2.h[3], w13
+; CHECK-SD-NEXT:    mov v3.h[3], w8
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-SD-NEXT:    ushll v2.4s, v2.4h, #0
+; CHECK-SD-NEXT:    ushll v3.4s, v3.4h, #0
+; CHECK-SD-NEXT:    shl v0.4s, v0.4s, #22
+; CHECK-SD-NEXT:    shl v1.4s, v1.4s, #22
+; CHECK-SD-NEXT:    shl v2.4s, v2.4s, #22
+; CHECK-SD-NEXT:    shl v3.4s, v3.4s, #22
+; CHECK-SD-NEXT:    sshr v0.4s, v0.4s, #22
+; CHECK-SD-NEXT:    sshr v1.4s, v1.4s, #22
+; CHECK-SD-NEXT:    sshr v2.4s, v2.4s, #22
+; CHECK-SD-NEXT:    sshr v3.4s, v3.4s, #22
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v16i10_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [sp]
+; CHECK-GI-NEXT:    fmov s7, w0
+; CHECK-GI-NEXT:    ldr s1, [sp, #8]
+; CHECK-GI-NEXT:    fmov s17, w4
+; CHECK-GI-NEXT:    ldr s4, [sp, #32]
+; CHECK-GI-NEXT:    ldr s5, [sp, #40]
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    ldr s2, [sp, #16]
+; CHECK-GI-NEXT:    mov v7.s[1], w1
+; CHECK-GI-NEXT:    ldr s6, [sp, #48]
+; CHECK-GI-NEXT:    mov v17.s[1], w5
+; CHECK-GI-NEXT:    ldr s3, [sp, #24]
+; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT:    ldr s16, [sp, #56]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v7.s[2], w2
+; CHECK-GI-NEXT:    mov v17.s[2], w6
+; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    mov v7.s[3], w3
+; CHECK-GI-NEXT:    mov v17.s[3], w7
+; CHECK-GI-NEXT:    mov v4.s[3], v16.s[0]
+; CHECK-GI-NEXT:    shl v3.4s, v0.4s, #22
+; CHECK-GI-NEXT:    shl v1.4s, v7.4s, #22
+; CHECK-GI-NEXT:    shl v2.4s, v17.4s, #22
+; CHECK-GI-NEXT:    shl v4.4s, v4.4s, #22
+; CHECK-GI-NEXT:    sshr v0.4s, v1.4s, #22
+; CHECK-GI-NEXT:    sshr v1.4s, v2.4s, #22
+; CHECK-GI-NEXT:    sshr v2.4s, v3.4s, #22
+; CHECK-GI-NEXT:    sshr v3.4s, v4.4s, #22
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <16 x i10> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+define <16 x i64> @sext_v16i10_v16i64(<16 x i10> %a) {
+; CHECK-SD-LABEL: sext_v16i10_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldr s0, [sp]
+; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    fmov s1, w0
+; CHECK-SD-NEXT:    fmov s2, w2
+; CHECK-SD-NEXT:    fmov s3, w4
+; CHECK-SD-NEXT:    fmov s4, w6
+; CHECK-SD-NEXT:    ld1 { v0.s }[1], [x8]
+; CHECK-SD-NEXT:    add x8, sp, #24
+; CHECK-SD-NEXT:    ldr s5, [sp, #16]
+; CHECK-SD-NEXT:    add x9, sp, #40
+; CHECK-SD-NEXT:    ldr s6, [sp, #32]
+; CHECK-SD-NEXT:    add x10, sp, #56
+; CHECK-SD-NEXT:    ldr s7, [sp, #48]
+; CHECK-SD-NEXT:    mov v1.s[1], w1
+; CHECK-SD-NEXT:    ld1 { v5.s }[1], [x8]
+; CHECK-SD-NEXT:    mov v2.s[1], w3
+; CHECK-SD-NEXT:    ld1 { v6.s }[1], [x9]
+; CHECK-SD-NEXT:    mov v3.s[1], w5
+; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x10]
+; CHECK-SD-NEXT:    mov v4.s[1], w7
+; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT:    ushll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT:    ushll v16.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v5.2d, v5.2s, #0
+; CHECK-SD-NEXT:    ushll v6.2d, v6.2s, #0
+; CHECK-SD-NEXT:    ushll v7.2d, v7.2s, #0
+; CHECK-SD-NEXT:    shl v0.2d, v1.2d, #54
+; CHECK-SD-NEXT:    shl v1.2d, v2.2d, #54
+; CHECK-SD-NEXT:    shl v2.2d, v3.2d, #54
+; CHECK-SD-NEXT:    shl v3.2d, v4.2d, #54
+; CHECK-SD-NEXT:    shl v4.2d, v16.2d, #54
+; CHECK-SD-NEXT:    shl v5.2d, v5.2d, #54
+; CHECK-SD-NEXT:    shl v6.2d, v6.2d, #54
+; CHECK-SD-NEXT:    shl v7.2d, v7.2d, #54
+; CHECK-SD-NEXT:    sshr v0.2d, v0.2d, #54
+; CHECK-SD-NEXT:    sshr v1.2d, v1.2d, #54
+; CHECK-SD-NEXT:    sshr v2.2d, v2.2d, #54
+; CHECK-SD-NEXT:    sshr v3.2d, v3.2d, #54
+; CHECK-SD-NEXT:    sshr v4.2d, v4.2d, #54
+; CHECK-SD-NEXT:    sshr v5.2d, v5.2d, #54
+; CHECK-SD-NEXT:    sshr v6.2d, v6.2d, #54
+; CHECK-SD-NEXT:    sshr v7.2d, v7.2d, #54
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v16i10_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [sp]
+; CHECK-GI-NEXT:    fmov s6, w0
+; CHECK-GI-NEXT:    ldr s1, [sp, #8]
+; CHECK-GI-NEXT:    fmov s16, w2
+; CHECK-GI-NEXT:    ldr s2, [sp, #16]
+; CHECK-GI-NEXT:    fmov s18, w4
+; CHECK-GI-NEXT:    ldr s3, [sp, #24]
+; CHECK-GI-NEXT:    fmov s19, w6
+; CHECK-GI-NEXT:    ldr s4, [sp, #32]
+; CHECK-GI-NEXT:    ldr s5, [sp, #40]
+; CHECK-GI-NEXT:    ldr s7, [sp, #48]
+; CHECK-GI-NEXT:    ldr s17, [sp, #56]
+; CHECK-GI-NEXT:    mov v6.s[1], w1
+; CHECK-GI-NEXT:    mov v16.s[1], w3
+; CHECK-GI-NEXT:    mov v18.s[1], w5
+; CHECK-GI-NEXT:    mov v19.s[1], w7
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    mov v2.s[1], v3.s[0]
+; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT:    mov v7.s[1], v17.s[0]
+; CHECK-GI-NEXT:    ushll v1.2d, v6.2s, #0
+; CHECK-GI-NEXT:    ushll v3.2d, v16.2s, #0
+; CHECK-GI-NEXT:    ushll v5.2d, v18.2s, #0
+; CHECK-GI-NEXT:    ushll v6.2d, v19.2s, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll v4.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ushll v7.2d, v7.2s, #0
+; CHECK-GI-NEXT:    shl v1.2d, v1.2d, #54
+; CHECK-GI-NEXT:    shl v3.2d, v3.2d, #54
+; CHECK-GI-NEXT:    shl v5.2d, v5.2d, #54
+; CHECK-GI-NEXT:    shl v6.2d, v6.2d, #54
+; CHECK-GI-NEXT:    shl v16.2d, v0.2d, #54
+; CHECK-GI-NEXT:    shl v17.2d, v2.2d, #54
+; CHECK-GI-NEXT:    shl v18.2d, v4.2d, #54
+; CHECK-GI-NEXT:    shl v7.2d, v7.2d, #54
+; CHECK-GI-NEXT:    sshr v0.2d, v1.2d, #54
+; CHECK-GI-NEXT:    sshr v1.2d, v3.2d, #54
+; CHECK-GI-NEXT:    sshr v2.2d, v5.2d, #54
+; CHECK-GI-NEXT:    sshr v3.2d, v6.2d, #54
+; CHECK-GI-NEXT:    sshr v4.2d, v16.2d, #54
+; CHECK-GI-NEXT:    sshr v5.2d, v17.2d, #54
+; CHECK-GI-NEXT:    sshr v6.2d, v18.2d, #54
+; CHECK-GI-NEXT:    sshr v7.2d, v7.2d, #54
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = sext <16 x i10> %a to <16 x i64>
+  ret <16 x i64> %c
+}

diff  --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
new file mode 100644
index 00000000000000..8ac9dd8fdc62bc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -0,0 +1,1345 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:      warning: Instruction selection used fallback path for zext_v16i10_v16i16
+
+define i16 @zext_i8_to_i16(i8 %a) {
+; CHECK-LABEL: zext_i8_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w0, w0, #0xff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i8 %a to i16
+  ret i16 %c
+}
+
+define i32 @zext_i8_to_i32(i8 %a) {
+; CHECK-LABEL: zext_i8_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w0, w0, #0xff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i8 %a to i32
+  ret i32 %c
+}
+
+define i64 @zext_i8_to_i64(i8 %a) {
+; CHECK-LABEL: zext_i8_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    and x0, x0, #0xff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i8 %a to i64
+  ret i64 %c
+}
+
+define i10 @zext_i8_to_i10(i8 %a) {
+; CHECK-LABEL: zext_i8_to_i10:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w0, w0, #0xff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i8 %a to i10
+  ret i10 %c
+}
+
+define i32 @zext_i16_to_i32(i16 %a) {
+; CHECK-LABEL: zext_i16_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w0, w0, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i16 %a to i32
+  ret i32 %c
+}
+
+define i64 @zext_i16_to_i64(i16 %a) {
+; CHECK-LABEL: zext_i16_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    and x0, x0, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i16 %a to i64
+  ret i64 %c
+}
+
+define i64 @zext_i32_to_i64(i32 %a) {
+; CHECK-LABEL: zext_i32_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w0
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i32 %a to i64
+  ret i64 %c
+}
+
+define i16 @zext_i10_to_i16(i10 %a) {
+; CHECK-LABEL: zext_i10_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w0, w0, #0x3ff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i10 %a to i16
+  ret i16 %c
+}
+
+define i32 @zext_i10_to_i32(i10 %a) {
+; CHECK-LABEL: zext_i10_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w0, w0, #0x3ff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i10 %a to i32
+  ret i32 %c
+}
+
+define i64 @zext_i10_to_i64(i10 %a) {
+; CHECK-LABEL: zext_i10_to_i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    and x0, x0, #0x3ff
+; CHECK-NEXT:    ret
+entry:
+  %c = zext i10 %a to i64
+  ret i64 %c
+}
+
+define <2 x i16> @zext_v2i8_v2i16(<2 x i8> %a) {
+; CHECK-SD-LABEL: zext_v2i8_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v2i8_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <2 x i8> %a to <2 x i16>
+  ret <2 x i16> %c
+}
+
+define <2 x i32> @zext_v2i8_v2i32(<2 x i8> %a) {
+; CHECK-SD-LABEL: zext_v2i8_v2i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v2i8_v2i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI11_0
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <2 x i8> %a to <2 x i32>
+  ret <2 x i32> %c
+}
+
+define <2 x i64> @zext_v2i8_v2i64(<2 x i8> %a) {
+; CHECK-SD-LABEL: zext_v2i8_v2i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v2i8_v2i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI12_0
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <2 x i8> %a to <2 x i64>
+  ret <2 x i64> %c
+}
+
+define <2 x i32> @zext_v2i16_v2i32(<2 x i16> %a) {
+; CHECK-SD-LABEL: zext_v2i16_v2i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v2i16_v2i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <2 x i16> %a to <2 x i32>
+  ret <2 x i32> %c
+}
+
+define <2 x i64> @zext_v2i16_v2i64(<2 x i16> %a) {
+; CHECK-SD-LABEL: zext_v2i16_v2i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v2i16_v2i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <2 x i16> %a to <2 x i64>
+  ret <2 x i64> %c
+}
+
+define <2 x i64> @zext_v2i32_v2i64(<2 x i32> %a) {
+; CHECK-LABEL: zext_v2i32_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ret
+entry:
+  %c = zext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %c
+}
+
+define <2 x i16> @zext_v2i10_v2i16(<2 x i10> %a) {
+; CHECK-SD-LABEL: zext_v2i10_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi v1.2s, #3, msl #8
+; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v2i10_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI16_0]
+; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <2 x i10> %a to <2 x i16>
+  ret <2 x i16> %c
+}
+
+define <2 x i32> @zext_v2i10_v2i32(<2 x i10> %a) {
+; CHECK-SD-LABEL: zext_v2i10_v2i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi v1.2s, #3, msl #8
+; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v2i10_v2i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI17_0
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI17_0]
+; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <2 x i10> %a to <2 x i32>
+  ret <2 x i32> %c
+}
+
+define <2 x i64> @zext_v2i10_v2i64(<2 x i10> %a) {
+; CHECK-SD-LABEL: zext_v2i10_v2i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi v1.2s, #3, msl #8
+; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v2i10_v2i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI18_0
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI18_0]
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <2 x i10> %a to <2 x i64>
+  ret <2 x i64> %c
+}
+
+define <3 x i16> @zext_v3i8_v3i16(<3 x i8> %a) {
+; CHECK-SD-LABEL: zext_v3i8_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i8_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #255 // =0xff
+; CHECK-GI-NEXT:    fmov s1, w0
+; CHECK-GI-NEXT:    fmov s2, w1
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT:    fmov s2, w2
+; CHECK-GI-NEXT:    mov v3.16b, v0.16b
+; CHECK-GI-NEXT:    mov v3.h[1], v0.h[0]
+; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NEXT:    mov v3.h[2], v0.h[0]
+; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
+; CHECK-GI-NEXT:    mov v3.h[3], v0.h[0]
+; CHECK-GI-NEXT:    and v0.8b, v1.8b, v3.8b
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i8> %a to <3 x i16>
+  ret <3 x i16> %c
+}
+
+define <3 x i32> @zext_v3i8_v3i32(<3 x i8> %a) {
+; CHECK-SD-LABEL: zext_v3i8_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    movi v1.2d, #0x0000ff000000ff
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i8_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #255 // =0xff
+; CHECK-GI-NEXT:    fmov s0, w0
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v0.s[1], w1
+; CHECK-GI-NEXT:    mov v1.s[1], w8
+; CHECK-GI-NEXT:    mov v0.s[2], w2
+; CHECK-GI-NEXT:    mov v1.s[2], w8
+; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    mov v1.s[3], w8
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i8> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <3 x i64> @zext_v3i8_v3i64(<3 x i8> %a) {
+; CHECK-SD-LABEL: zext_v3i8_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s1, w0
+; CHECK-SD-NEXT:    fmov s3, w2
+; CHECK-SD-NEXT:    movi v0.2d, #0x000000000000ff
+; CHECK-SD-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT:    mov v1.s[1], w1
+; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT:    mov v2.b[0], v3.b[0]
+; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i8_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    fmov d0, x0
+; CHECK-GI-NEXT:    adrp x8, .LCPI21_0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-GI-NEXT:    mov v0.d[1], x1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-NEXT:    and x8, x2, #0xff
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i8> %a to <3 x i64>
+  ret <3 x i64> %c
+}
+
+define <3 x i32> @zext_v3i16_v3i32(<3 x i16> %a) {
+; CHECK-SD-LABEL: zext_v3i16_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i16_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    fmov w8, s0
+; CHECK-GI-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NEXT:    uxth w8, w8
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    fmov w8, s2
+; CHECK-GI-NEXT:    uxth w9, w9
+; CHECK-GI-NEXT:    uxth w8, w8
+; CHECK-GI-NEXT:    mov v0.s[1], w9
+; CHECK-GI-NEXT:    mov v0.s[2], w8
+; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i16> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <3 x i64> @zext_v3i16_v3i64(<3 x i16> %a) {
+; CHECK-SD-LABEL: zext_v3i16_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ushll2 v2.2d, v2.4s, #0
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i16_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NEXT:    fmov w8, s0
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    fmov w10, s2
+; CHECK-GI-NEXT:    ubfx x8, x8, #0, #16
+; CHECK-GI-NEXT:    ubfx x9, x9, #0, #16
+; CHECK-GI-NEXT:    ubfx x10, x10, #0, #16
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    fmov d1, x9
+; CHECK-GI-NEXT:    fmov d2, x10
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i16> %a to <3 x i64>
+  ret <3 x i64> %c
+}
+
+define <3 x i64> @zext_v3i32_v3i64(<3 x i32> %a) {
+; CHECK-SD-LABEL: zext_v3i32_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v3.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll2 v2.2d, v0.4s, #0
+; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i32_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    fmov w8, s0
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    fmov w10, s2
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    fmov d1, x9
+; CHECK-GI-NEXT:    fmov d2, x10
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i32> %a to <3 x i64>
+  ret <3 x i64> %c
+}
+
+define <3 x i16> @zext_v3i10_v3i16(<3 x i10> %a) {
+; CHECK-SD-LABEL: zext_v3i10_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i10_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #1023 // =0x3ff
+; CHECK-GI-NEXT:    fmov s1, w0
+; CHECK-GI-NEXT:    fmov s2, w1
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT:    fmov s2, w2
+; CHECK-GI-NEXT:    mov v3.16b, v0.16b
+; CHECK-GI-NEXT:    mov v3.h[1], v0.h[0]
+; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-GI-NEXT:    mov v3.h[2], v0.h[0]
+; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
+; CHECK-GI-NEXT:    mov v3.h[3], v0.h[0]
+; CHECK-GI-NEXT:    and v0.8b, v1.8b, v3.8b
+; CHECK-GI-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NEXT:    mov h2, v0.h[2]
+; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT:    mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i10> %a to <3 x i16>
+  ret <3 x i16> %c
+}
+
+define <3 x i32> @zext_v3i10_v3i32(<3 x i10> %a) {
+; CHECK-SD-LABEL: zext_v3i10_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    movi v1.4s, #3, msl #8
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i10_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #1023 // =0x3ff
+; CHECK-GI-NEXT:    fmov s0, w0
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v0.s[1], w1
+; CHECK-GI-NEXT:    mov v1.s[1], w8
+; CHECK-GI-NEXT:    mov v0.s[2], w2
+; CHECK-GI-NEXT:    mov v1.s[2], w8
+; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    mov v1.s[3], w8
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    mov s1, v0.s[1]
+; CHECK-GI-NEXT:    mov s2, v0.s[2]
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i10> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <3 x i64> @zext_v3i10_v3i64(<3 x i10> %a) {
+; CHECK-SD-LABEL: zext_v3i10_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov w8, #1023 // =0x3ff
+; CHECK-SD-NEXT:    fmov s3, w2
+; CHECK-SD-NEXT:    mov v0.s[1], w1
+; CHECK-SD-NEXT:    dup v2.2d, x8
+; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    and v2.8b, v3.8b, v2.8b
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v3i10_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    fmov d0, x0
+; CHECK-GI-NEXT:    adrp x8, .LCPI27_0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-GI-NEXT:    mov v0.d[1], x1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI27_0]
+; CHECK-GI-NEXT:    and x8, x2, #0x3ff
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <3 x i10> %a to <3 x i64>
+  ret <3 x i64> %c
+}
+
+define <4 x i16> @zext_v4i8_v4i16(<4 x i8> %a) {
+; CHECK-SD-LABEL: zext_v4i8_v4i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i8_v4i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI28_0
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI28_0]
+; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <4 x i8> %a to <4 x i16>
+  ret <4 x i16> %c
+}
+
+define <4 x i32> @zext_v4i8_v4i32(<4 x i8> %a) {
+; CHECK-SD-LABEL: zext_v4i8_v4i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i8_v4i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI29_0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI29_0]
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <4 x i8> %a to <4 x i32>
+  ret <4 x i32> %c
+}
+
+define <4 x i64> @zext_v4i8_v4i64(<4 x i8> %a) {
+; CHECK-SD-LABEL: zext_v4i8_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i8_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <4 x i8> %a to <4 x i64>
+  ret <4 x i64> %c
+}
+
+define <4 x i32> @zext_v4i16_v4i32(<4 x i16> %a) {
+; CHECK-LABEL: zext_v4i16_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
+entry:
+  %c = zext <4 x i16> %a to <4 x i32>
+  ret <4 x i32> %c
+}
+
+define <4 x i64> @zext_v4i16_v4i64(<4 x i16> %a) {
+; CHECK-SD-LABEL: zext_v4i16_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i16_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <4 x i16> %a to <4 x i64>
+  ret <4 x i64> %c
+}
+
+define <4 x i64> @zext_v4i32_v4i64(<4 x i32> %a) {
+; CHECK-SD-LABEL: zext_v4i32_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i32_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <4 x i32> %a to <4 x i64>
+  ret <4 x i64> %c
+}
+
+define <4 x i16> @zext_v4i10_v4i16(<4 x i10> %a) {
+; CHECK-SD-LABEL: zext_v4i10_v4i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i10_v4i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI34_0
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <4 x i10> %a to <4 x i16>
+  ret <4 x i16> %c
+}
+
+define <4 x i32> @zext_v4i10_v4i32(<4 x i10> %a) {
+; CHECK-SD-LABEL: zext_v4i10_v4i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i10_v4i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <4 x i10> %a to <4 x i32>
+  ret <4 x i32> %c
+}
+
+define <4 x i64> @zext_v4i10_v4i64(<4 x i10> %a) {
+; CHECK-SD-LABEL: zext_v4i10_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v4i10_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <4 x i10> %a to <4 x i64>
+  ret <4 x i64> %c
+}
+
+define <8 x i16> @zext_v8i8_v8i16(<8 x i8> %a) {
+; CHECK-LABEL: zext_v8i8_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ret
+entry:
+  %c = zext <8 x i8> %a to <8 x i16>
+  ret <8 x i16> %c
+}
+
+define <8 x i32> @zext_v8i8_v8i32(<8 x i8> %a) {
+; CHECK-SD-LABEL: zext_v8i8_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i8_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <8 x i8> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <8 x i64> @zext_v8i8_v8i64(<8 x i8> %a) {
+; CHECK-SD-LABEL: zext_v8i8_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i8_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    ushll v3.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov d4, v3.d[1]
+; CHECK-GI-NEXT:    ushll v1.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <8 x i8> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define <8 x i32> @zext_v8i16_v8i32(<8 x i16> %a) {
+; CHECK-SD-LABEL: zext_v8i16_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i16_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <8 x i64> @zext_v8i16_v8i64(<8 x i16> %a) {
+; CHECK-SD-LABEL: zext_v8i16_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i16_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    ushll v3.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov d4, v3.d[1]
+; CHECK-GI-NEXT:    ushll v1.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <8 x i16> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define <8 x i64> @zext_v8i32_v8i64(<8 x i32> %a) {
+; CHECK-SD-LABEL: zext_v8i32_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v4.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll2 v3.2d, v1.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v1.2s, #0
+; CHECK-SD-NEXT:    mov v1.16b, v4.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i32_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll v1.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <8 x i32> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define <8 x i16> @zext_v8i10_v8i16(<8 x i10> %a) {
+; CHECK-SD-LABEL: zext_v8i10_v8i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.8h, #252, lsl #8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i10_v8i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI43_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI43_0]
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <8 x i10> %a to <8 x i16>
+  ret <8 x i16> %c
+}
+
+define <8 x i32> @zext_v8i10_v8i32(<8 x i10> %a) {
+; CHECK-SD-LABEL: zext_v8i10_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.8h, #252, lsl #8
+; CHECK-SD-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i10_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI44_0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <8 x i10> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <8 x i64> @zext_v8i10_v8i64(<8 x i10> %a) {
+; CHECK-SD-LABEL: zext_v8i10_v8i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    bic v0.8h, #252, lsl #8
+; CHECK-SD-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v8i10_v8i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    adrp x8, .LCPI45_0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI45_0]
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll v5.2d, v1.2s, #0
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT:    ushll v4.2d, v4.2s, #0
+; CHECK-GI-NEXT:    and v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    and v2.16b, v5.16b, v3.16b
+; CHECK-GI-NEXT:    and v3.16b, v4.16b, v3.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <8 x i10> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define <16 x i16> @zext_v16i8_v16i16(<16 x i8> %a) {
+; CHECK-SD-LABEL: zext_v16i8_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v16i8_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <16 x i8> %a to <16 x i16>
+  ret <16 x i16> %c
+}
+
+define <16 x i32> @zext_v16i8_v16i32(<16 x i8> %a) {
+; CHECK-SD-LABEL: zext_v16i8_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v2.8h, v0.16b, #0
+; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    ushll2 v3.4s, v2.8h, #0
+; CHECK-SD-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll v2.4s, v2.4h, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v16i8_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    ushll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov d4, v3.d[1]
+; CHECK-GI-NEXT:    ushll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT:    ushll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT:    ushll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <16 x i8> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+define <16 x i64> @zext_v16i8_v16i64(<16 x i8> %a) {
+; CHECK-SD-LABEL: zext_v16i8_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    ushll2 v2.4s, v1.8h, #0
+; CHECK-SD-NEXT:    ushll2 v4.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v16.4s, v1.4h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v7.2d, v2.4s, #0
+; CHECK-SD-NEXT:    ushll2 v3.2d, v4.4s, #0
+; CHECK-SD-NEXT:    ushll2 v5.2d, v16.4s, #0
+; CHECK-SD-NEXT:    ushll v6.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v4.2s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v4.2d, v16.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v16i8_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ushll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT:    mov d1, v1.d[1]
+; CHECK-GI-NEXT:    ushll v2.8h, v2.8b, #0
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d5, v2.d[1]
+; CHECK-GI-NEXT:    ushll v4.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v6.4s, v2.4h, #0
+; CHECK-GI-NEXT:    ushll v1.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v16.4s, v5.4h, #0
+; CHECK-GI-NEXT:    mov d3, v4.d[1]
+; CHECK-GI-NEXT:    mov d7, v6.d[1]
+; CHECK-GI-NEXT:    mov d17, v16.d[1]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ushll v4.2d, v6.2s, #0
+; CHECK-GI-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v5.2d, v7.2s, #0
+; CHECK-GI-NEXT:    ushll v6.2d, v16.2s, #0
+; CHECK-GI-NEXT:    ushll v7.2d, v17.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <16 x i8> %a to <16 x i64>
+  ret <16 x i64> %c
+}
+
+define <16 x i32> @zext_v16i16_v16i32(<16 x i16> %a) {
+; CHECK-SD-LABEL: zext_v16i16_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v4.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll2 v3.4s, v1.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll v2.4s, v1.4h, #0
+; CHECK-SD-NEXT:    mov v1.16b, v4.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v16i16_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll v2.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v1.4s, v3.4h, #0
+; CHECK-GI-NEXT:    ushll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <16 x i16> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+define <16 x i64> @zext_v16i16_v16i64(<16 x i16> %a) {
+; CHECK-SD-LABEL: zext_v16i16_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll2 v4.4s, v1.8h, #0
+; CHECK-SD-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-SD-NEXT:    ushll2 v16.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT:    ushll2 v7.2d, v4.4s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ushll2 v5.2d, v1.4s, #0
+; CHECK-SD-NEXT:    ushll v6.2d, v4.2s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v4.2d, v1.2s, #0
+; CHECK-SD-NEXT:    mov v1.16b, v16.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v16i16_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    mov d4, v1.d[1]
+; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll v5.4s, v1.4h, #0
+; CHECK-GI-NEXT:    mov d1, v2.d[1]
+; CHECK-GI-NEXT:    ushll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT:    mov d6, v5.d[1]
+; CHECK-GI-NEXT:    ushll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT:    ushll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT:    mov d7, v2.d[1]
+; CHECK-GI-NEXT:    mov d16, v3.d[1]
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT:    ushll v5.2d, v6.2s, #0
+; CHECK-GI-NEXT:    ushll v6.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v3.2d, v7.2s, #0
+; CHECK-GI-NEXT:    ushll v7.2d, v16.2s, #0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <16 x i16> %a to <16 x i64>
+  ret <16 x i64> %c
+}
+
+define <16 x i64> @zext_v16i32_v16i64(<16 x i32> %a) {
+; CHECK-SD-LABEL: zext_v16i32_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll2 v17.2d, v0.4s, #0
+; CHECK-SD-NEXT:    ushll2 v18.2d, v1.4s, #0
+; CHECK-SD-NEXT:    ushll v16.2d, v1.2s, #0
+; CHECK-SD-NEXT:    ushll2 v5.2d, v2.4s, #0
+; CHECK-SD-NEXT:    ushll2 v7.2d, v3.4s, #0
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v4.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ushll v6.2d, v3.2s, #0
+; CHECK-SD-NEXT:    mov v1.16b, v17.16b
+; CHECK-SD-NEXT:    mov v2.16b, v16.16b
+; CHECK-SD-NEXT:    mov v3.16b, v18.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v16i32_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov d5, v1.d[1]
+; CHECK-GI-NEXT:    mov d6, v2.d[1]
+; CHECK-GI-NEXT:    ushll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    ushll v4.2d, v2.2s, #0
+; CHECK-GI-NEXT:    mov d2, v3.d[1]
+; CHECK-GI-NEXT:    ushll v17.2d, v1.2s, #0
+; CHECK-GI-NEXT:    ushll v18.2d, v5.2s, #0
+; CHECK-GI-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v5.2d, v6.2s, #0
+; CHECK-GI-NEXT:    ushll v6.2d, v3.2s, #0
+; CHECK-GI-NEXT:    ushll v7.2d, v2.2s, #0
+; CHECK-GI-NEXT:    mov v0.16b, v16.16b
+; CHECK-GI-NEXT:    mov v2.16b, v17.16b
+; CHECK-GI-NEXT:    mov v3.16b, v18.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <16 x i32> %a to <16 x i64>
+  ret <16 x i64> %c
+}
+
+define <16 x i16> @zext_v16i10_v16i16(<16 x i10> %a) {
+; CHECK-LABEL: zext_v16i10_v16i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr w8, [sp]
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    fmov s1, w8
+; CHECK-NEXT:    ldr w8, [sp, #8]
+; CHECK-NEXT:    mov v0.h[1], w1
+; CHECK-NEXT:    mov v1.h[1], w8
+; CHECK-NEXT:    ldr w8, [sp, #16]
+; CHECK-NEXT:    mov v0.h[2], w2
+; CHECK-NEXT:    mov v1.h[2], w8
+; CHECK-NEXT:    ldr w8, [sp, #24]
+; CHECK-NEXT:    mov v0.h[3], w3
+; CHECK-NEXT:    mov v1.h[3], w8
+; CHECK-NEXT:    ldr w8, [sp, #32]
+; CHECK-NEXT:    mov v0.h[4], w4
+; CHECK-NEXT:    mov v1.h[4], w8
+; CHECK-NEXT:    ldr w8, [sp, #40]
+; CHECK-NEXT:    mov v0.h[5], w5
+; CHECK-NEXT:    mov v1.h[5], w8
+; CHECK-NEXT:    ldr w8, [sp, #48]
+; CHECK-NEXT:    mov v0.h[6], w6
+; CHECK-NEXT:    mov v1.h[6], w8
+; CHECK-NEXT:    ldr w8, [sp, #56]
+; CHECK-NEXT:    mov v0.h[7], w7
+; CHECK-NEXT:    mov v1.h[7], w8
+; CHECK-NEXT:    bic v0.8h, #252, lsl #8
+; CHECK-NEXT:    bic v1.8h, #252, lsl #8
+; CHECK-NEXT:    ret
+entry:
+  %c = zext <16 x i10> %a to <16 x i16>
+  ret <16 x i16> %c
+}
+
+define <16 x i32> @zext_v16i10_v16i32(<16 x i10> %a) {
+; CHECK-SD-LABEL: zext_v16i10_v16i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldr w11, [sp, #32]
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    ldr w13, [sp]
+; CHECK-SD-NEXT:    fmov s1, w4
+; CHECK-SD-NEXT:    ldr w10, [sp, #40]
+; CHECK-SD-NEXT:    ldr w15, [sp, #8]
+; CHECK-SD-NEXT:    fmov s3, w11
+; CHECK-SD-NEXT:    fmov s2, w13
+; CHECK-SD-NEXT:    ldr w9, [sp, #48]
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    ldr w14, [sp, #16]
+; CHECK-SD-NEXT:    mov v1.h[1], w5
+; CHECK-SD-NEXT:    ldr w8, [sp, #56]
+; CHECK-SD-NEXT:    mov v2.h[1], w15
+; CHECK-SD-NEXT:    ldr w12, [sp, #24]
+; CHECK-SD-NEXT:    mov v3.h[1], w10
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    mov v1.h[2], w6
+; CHECK-SD-NEXT:    mov v2.h[2], w14
+; CHECK-SD-NEXT:    mov v3.h[2], w9
+; CHECK-SD-NEXT:    mov v0.h[3], w3
+; CHECK-SD-NEXT:    mov v1.h[3], w7
+; CHECK-SD-NEXT:    mov v2.h[3], w12
+; CHECK-SD-NEXT:    mov v3.h[3], w8
+; CHECK-SD-NEXT:    movi v4.4s, #3, msl #8
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-SD-NEXT:    ushll v2.4s, v2.4h, #0
+; CHECK-SD-NEXT:    ushll v3.4s, v3.4h, #0
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT:    and v1.16b, v1.16b, v4.16b
+; CHECK-SD-NEXT:    and v2.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT:    and v3.16b, v3.16b, v4.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v16i10_v16i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [sp]
+; CHECK-GI-NEXT:    fmov s16, w0
+; CHECK-GI-NEXT:    ldr s1, [sp, #8]
+; CHECK-GI-NEXT:    fmov s17, w4
+; CHECK-GI-NEXT:    ldr s4, [sp, #32]
+; CHECK-GI-NEXT:    adrp x8, .LCPI53_0
+; CHECK-GI-NEXT:    ldr s5, [sp, #40]
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    ldr s2, [sp, #16]
+; CHECK-GI-NEXT:    mov v16.s[1], w1
+; CHECK-GI-NEXT:    ldr s6, [sp, #48]
+; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT:    ldr s3, [sp, #24]
+; CHECK-GI-NEXT:    mov v17.s[1], w5
+; CHECK-GI-NEXT:    ldr s7, [sp, #56]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI53_0]
+; CHECK-GI-NEXT:    mov v16.s[2], w2
+; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
+; CHECK-GI-NEXT:    mov v17.s[2], w6
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT:    mov v16.s[3], w3
+; CHECK-GI-NEXT:    mov v4.s[3], v7.s[0]
+; CHECK-GI-NEXT:    mov v17.s[3], w7
+; CHECK-GI-NEXT:    and v2.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    and v0.16b, v16.16b, v1.16b
+; CHECK-GI-NEXT:    and v3.16b, v4.16b, v1.16b
+; CHECK-GI-NEXT:    and v1.16b, v17.16b, v1.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <16 x i10> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+define <16 x i64> @zext_v16i10_v16i64(<16 x i10> %a) {
+; CHECK-SD-LABEL: zext_v16i10_v16i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w8, #1023 // =0x3ff
+; CHECK-SD-NEXT:    ldr s4, [sp]
+; CHECK-SD-NEXT:    ldr s5, [sp, #16]
+; CHECK-SD-NEXT:    add x9, sp, #24
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    fmov s1, w2
+; CHECK-SD-NEXT:    dup v7.2d, x8
+; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    fmov s2, w4
+; CHECK-SD-NEXT:    fmov s3, w6
+; CHECK-SD-NEXT:    ld1 { v5.s }[1], [x9]
+; CHECK-SD-NEXT:    add x9, sp, #56
+; CHECK-SD-NEXT:    ld1 { v4.s }[1], [x8]
+; CHECK-SD-NEXT:    add x8, sp, #40
+; CHECK-SD-NEXT:    ldr s6, [sp, #32]
+; CHECK-SD-NEXT:    ldr s16, [sp, #48]
+; CHECK-SD-NEXT:    mov v0.s[1], w1
+; CHECK-SD-NEXT:    mov v1.s[1], w3
+; CHECK-SD-NEXT:    ld1 { v6.s }[1], [x8]
+; CHECK-SD-NEXT:    mov v2.s[1], w5
+; CHECK-SD-NEXT:    ld1 { v16.s }[1], [x9]
+; CHECK-SD-NEXT:    mov v3.s[1], w7
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT:    ushll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT:    ushll v5.2d, v5.2s, #0
+; CHECK-SD-NEXT:    ushll v6.2d, v6.2s, #0
+; CHECK-SD-NEXT:    ushll v16.2d, v16.2s, #0
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v7.16b
+; CHECK-SD-NEXT:    and v1.16b, v1.16b, v7.16b
+; CHECK-SD-NEXT:    and v2.16b, v2.16b, v7.16b
+; CHECK-SD-NEXT:    and v3.16b, v3.16b, v7.16b
+; CHECK-SD-NEXT:    and v4.16b, v4.16b, v7.16b
+; CHECK-SD-NEXT:    and v5.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT:    and v6.16b, v6.16b, v7.16b
+; CHECK-SD-NEXT:    and v7.16b, v16.16b, v7.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: zext_v16i10_v16i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [sp]
+; CHECK-GI-NEXT:    fmov s6, w0
+; CHECK-GI-NEXT:    ldr s1, [sp, #8]
+; CHECK-GI-NEXT:    fmov s16, w2
+; CHECK-GI-NEXT:    ldr s2, [sp, #16]
+; CHECK-GI-NEXT:    fmov s18, w4
+; CHECK-GI-NEXT:    ldr s3, [sp, #24]
+; CHECK-GI-NEXT:    fmov s19, w6
+; CHECK-GI-NEXT:    ldr s4, [sp, #32]
+; CHECK-GI-NEXT:    adrp x8, .LCPI54_0
+; CHECK-GI-NEXT:    ldr s5, [sp, #40]
+; CHECK-GI-NEXT:    ldr s7, [sp, #48]
+; CHECK-GI-NEXT:    ldr s17, [sp, #56]
+; CHECK-GI-NEXT:    mov v6.s[1], w1
+; CHECK-GI-NEXT:    mov v16.s[1], w3
+; CHECK-GI-NEXT:    mov v18.s[1], w5
+; CHECK-GI-NEXT:    mov v19.s[1], w7
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    mov v2.s[1], v3.s[0]
+; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT:    mov v7.s[1], v17.s[0]
+; CHECK-GI-NEXT:    ldr q17, [x8, :lo12:.LCPI54_0]
+; CHECK-GI-NEXT:    ushll v1.2d, v6.2s, #0
+; CHECK-GI-NEXT:    ushll v3.2d, v16.2s, #0
+; CHECK-GI-NEXT:    ushll v5.2d, v18.2s, #0
+; CHECK-GI-NEXT:    ushll v6.2d, v19.2s, #0
+; CHECK-GI-NEXT:    ushll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll v18.2d, v2.2s, #0
+; CHECK-GI-NEXT:    ushll v19.2d, v4.2s, #0
+; CHECK-GI-NEXT:    ushll v7.2d, v7.2s, #0
+; CHECK-GI-NEXT:    and v0.16b, v1.16b, v17.16b
+; CHECK-GI-NEXT:    and v1.16b, v3.16b, v17.16b
+; CHECK-GI-NEXT:    and v2.16b, v5.16b, v17.16b
+; CHECK-GI-NEXT:    and v3.16b, v6.16b, v17.16b
+; CHECK-GI-NEXT:    and v4.16b, v16.16b, v17.16b
+; CHECK-GI-NEXT:    and v5.16b, v18.16b, v17.16b
+; CHECK-GI-NEXT:    and v6.16b, v19.16b, v17.16b
+; CHECK-GI-NEXT:    and v7.16b, v7.16b, v17.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = zext <16 x i10> %a to <16 x i64>
+  ret <16 x i64> %c
+}


        


More information about the llvm-commits mailing list