[llvm] a40c984 - [AArch64][GlobalISel] Support more legal types for EXTEND
Tuan Chuong Goh via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 21 02:03:48 PDT 2023
Author: Tuan Chuong Goh
Date: 2023-08-21T09:51:17+01:00
New Revision: a40c984976d9cfead9800132720986f73b9f442d
URL: https://github.com/llvm/llvm-project/commit/a40c984976d9cfead9800132720986f73b9f442d
DIFF: https://github.com/llvm/llvm-project/commit/a40c984976d9cfead9800132720986f73b9f442d.diff
LOG: [AArch64][GlobalISel] Support more legal types for EXTEND
Expand (s/z/any)ext instructions to be compatible with more
types for GlobalISel.
This patch mainly focuses on 64-bit and 128-bit vectors with
element size of powers of 2.
It also notably handles larger than legal vectors.
Differential Revision: https://reviews.llvm.org/D157113
Added:
llvm/test/CodeGen/AArch64/sext.ll
llvm/test/CodeGen/AArch64/zext.ll
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir
llvm/test/CodeGen/AArch64/aarch64-addv.ll
llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
llvm/test/CodeGen/AArch64/arm64-vabs.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 9288091874cf26..d36f27ea6e5af3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -375,6 +375,7 @@ class LegalizerHelper {
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI);
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
LegalizeResult lowerFunnelShift(MachineInstr &MI);
+ LegalizeResult lowerEXT(MachineInstr &MI);
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI);
LegalizeResult lowerRotate(MachineInstr &MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 69927227f837e7..4b059f334cfcd0 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3601,6 +3601,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerMemCpyFamily(MI);
case G_MEMCPY_INLINE:
return lowerMemcpyInline(MI);
+ case G_ZEXT:
+ case G_SEXT:
+ case G_ANYEXT:
+ return lowerEXT(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
}
@@ -5955,6 +5959,48 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
return Result;
}
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ uint32_t DstTySize = DstTy.getSizeInBits();
+ uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
+ uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
+
+ if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
+ !isPowerOf2_32(SrcTyScalarSize))
+ return UnableToLegalize;
+
+ // The step between extend is too large, split it by creating an intermediate
+ // extend instruction
+ if (SrcTyScalarSize * 2 < DstTyScalarSize) {
+ LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
+ // If the destination type is illegal, split it into multiple statements
+ // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
+ auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
+ // Unmerge the vector
+ LLT EltTy = MidTy.changeElementCount(
+ MidTy.getElementCount().divideCoefficientBy(2));
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
+
+ // ZExt the vectors
+ LLT ZExtResTy = DstTy.changeElementCount(
+ DstTy.getElementCount().divideCoefficientBy(2));
+ auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
+ {UnmergeSrc.getReg(0)});
+ auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
+ {UnmergeSrc.getReg(1)});
+
+ // Merge the ending vectors
+ MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ return UnableToLegalize;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 61f1350c5eeb43..0d6cbe7d88311a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -491,14 +491,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
unsigned DstSize = Query.Types[0].getSizeInBits();
- if (DstSize == 128 && !Query.Types[0].isVector())
- return false; // Extending to a scalar s128 needs narrowing.
-
- // Make sure that we have something that will fit in a register, and
- // make sure it's a power of 2.
- if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
+ // Handle legal vectors using legalFor
+ if (Query.Types[0].isVector())
return false;
+ if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
+ return false; // Extending to a scalar s128 needs narrowing.
+
const LLT &SrcTy = Query.Types[1];
// Make sure we fit in a register otherwise. Don't bother checking that
@@ -512,7 +511,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
};
getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
.legalIf(ExtLegalFunc)
- .clampScalar(0, s64, s64); // Just for s128, others are handled above.
+ .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
+ .clampScalar(0, s64, s64) // Just for s128, others are handled above.
+ .moreElementsToNextPow2(1)
+ .clampMaxNumElements(1, s8, 8)
+ .clampMaxNumElements(1, s16, 4)
+ .clampMaxNumElements(1, s32, 2)
+ // Tries to convert a large EXTEND into two smaller EXTENDs
+ .lowerIf([=](const LegalityQuery &Query) {
+ return (Query.Types[0].getScalarSizeInBits() >
+ Query.Types[1].getScalarSizeInBits() * 2) &&
+ Query.Types[0].isVector() &&
+ (Query.Types[1].getScalarSizeInBits() == 8 ||
+ Query.Types[1].getScalarSizeInBits() == 16);
+ });
getActionDefinitionsBuilder(G_TRUNC)
.minScalarOrEltIf(
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
index 0f3dc2b5f392ef..fa1700ac4fc52d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
@@ -243,15 +243,15 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2
- ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8)
- ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR]]
- ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16)
- ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
+ ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8)
; CHECK-NEXT: RET_ReallyLR implicit $b0
%1:_(s8) = COPY $b0
%2:_(s8) = COPY $b1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
index 433a98afda413c..c9556e27c6349a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
@@ -129,8 +129,8 @@ body: |
%0:_(s16) = COPY $h0
%1:_(s16) = COPY $h1
%2:_(<2 x s16>) = G_BUILD_VECTOR %0(s16), %1(s16)
- %ext:_(<2 x s32>) = G_ANYEXT %2(<2 x s16>)
- $d0 = COPY %ext(<2 x s32>)
+ %3:_(<2 x s32>) = G_ANYEXT %2(<2 x s16>)
+ $d0 = COPY %3(<2 x s32>)
RET_ReallyLR
...
@@ -141,14 +141,14 @@ body: |
; CHECK-LABEL: name: widen_v2s8
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
- ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: %3:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: $d0 = COPY %3(<2 x s32>)
; CHECK-NEXT: RET_ReallyLR
%0:_(s8) = G_IMPLICIT_DEF
%1:_(s8) = G_IMPLICIT_DEF
%2:_(<2 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8)
- %ext:_(<2 x s32>) = G_ANYEXT %2(<2 x s8>)
- $d0 = COPY %ext(<2 x s32>)
+ %3:_(<2 x s32>) = G_ANYEXT %2(<2 x s8>)
+ $d0 = COPY %3(<2 x s32>)
RET_ReallyLR
...
@@ -169,7 +169,7 @@ body: |
%2:_(s8) = G_IMPLICIT_DEF
%3:_(s8) = G_IMPLICIT_DEF
%4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8)
- %ext:_(<4 x s16>) = G_ANYEXT %4(<4 x s8>)
- $d0 = COPY %ext(<4 x s16>)
+ %5:_(<4 x s16>) = G_ANYEXT %4(<4 x s8>)
+ $d0 = COPY %5(<4 x s16>)
RET_ReallyLR
...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir
index fe9dd22fdb33cb..8c6a30aaed0483 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir
@@ -274,13 +274,12 @@ body: |
; CHECK-LABEL: name: test_uitofp_v2s64_v2i1
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[ANYEXT]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<2 x s64>) = G_UITOFP [[AND]](<2 x s64>)
; CHECK-NEXT: $q0 = COPY [[UITOFP]](<2 x s64>)
%0:_(<2 x s1>) = G_IMPLICIT_DEF
@@ -296,11 +295,10 @@ body: |
; CHECK-LABEL: name: test_sitofp_v2s64_v2i1
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s64>) = G_SEXT_INREG [[ANYEXT]], 1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64)
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s64>) = G_SEXT_INREG [[BUILD_VECTOR]], 1
; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(<2 x s64>) = G_SITOFP [[SEXT_INREG]](<2 x s64>)
; CHECK-NEXT: $q0 = COPY [[SITOFP]](<2 x s64>)
%0:_(<2 x s1>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index 38548e760ac9f8..1c05fe737883ce 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -69,14 +69,40 @@ define i64 @add_D(ptr %arr) {
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias nocapture readonly %arg2) {
-; CHECK-LABEL: oversized_ADDV_256:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: uabdl v0.8h, v0.8b, v1.8b
-; CHECK-NEXT: uaddlv s0, v0.8h
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; SDAG-LABEL: oversized_ADDV_256:
+; SDAG: // %bb.0: // %entry
+; SDAG-NEXT: ldr d0, [x0]
+; SDAG-NEXT: ldr d1, [x1]
+; SDAG-NEXT: uabdl v0.8h, v0.8b, v1.8b
+; SDAG-NEXT: uaddlv s0, v0.8h
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: oversized_ADDV_256:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: ldr d1, [x0]
+; GISEL-NEXT: ldr d2, [x1]
+; GISEL-NEXT: movi v0.2d, #0000000000000000
+; GISEL-NEXT: ushll v1.8h, v1.8b, #0
+; GISEL-NEXT: ushll v2.8h, v2.8b, #0
+; GISEL-NEXT: mov d3, v1.d[1]
+; GISEL-NEXT: mov d4, v2.d[1]
+; GISEL-NEXT: usubl v1.4s, v1.4h, v2.4h
+; GISEL-NEXT: usubl v2.4s, v3.4h, v4.4h
+; GISEL-NEXT: cmgt v3.4s, v0.4s, v1.4s
+; GISEL-NEXT: neg v4.4s, v1.4s
+; GISEL-NEXT: cmgt v0.4s, v0.4s, v2.4s
+; GISEL-NEXT: shl v3.4s, v3.4s, #31
+; GISEL-NEXT: shl v0.4s, v0.4s, #31
+; GISEL-NEXT: neg v5.4s, v2.4s
+; GISEL-NEXT: sshr v3.4s, v3.4s, #31
+; GISEL-NEXT: sshr v0.4s, v0.4s, #31
+; GISEL-NEXT: bit v1.16b, v4.16b, v3.16b
+; GISEL-NEXT: bsl v0.16b, v5.16b, v2.16b
+; GISEL-NEXT: add v0.4s, v1.4s, v0.4s
+; GISEL-NEXT: addv s0, v0.4s
+; GISEL-NEXT: fmov w0, s0
+; GISEL-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %arg1, align 1
%1 = zext <8 x i8> %0 to <8 x i32>
@@ -93,16 +119,16 @@ entry:
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
define i32 @oversized_ADDV_512(ptr %arr) {
-; SDAG-LABEL: oversized_ADDV_512:
-; SDAG: // %bb.0:
-; SDAG-NEXT: ldp q0, q1, [x0, #32]
-; SDAG-NEXT: ldp q3, q2, [x0]
-; SDAG-NEXT: add v0.4s, v3.4s, v0.4s
-; SDAG-NEXT: add v1.4s, v2.4s, v1.4s
-; SDAG-NEXT: add v0.4s, v0.4s, v1.4s
-; SDAG-NEXT: addv s0, v0.4s
-; SDAG-NEXT: fmov w0, s0
-; SDAG-NEXT: ret
+; SDAG-LABEL: oversized_ADDV_512:
+; SDAG: // %bb.0:
+; SDAG-NEXT: ldp q0, q1, [x0, #32]
+; SDAG-NEXT: ldp q3, q2, [x0]
+; SDAG-NEXT: add v0.4s, v3.4s, v0.4s
+; SDAG-NEXT: add v1.4s, v2.4s, v1.4s
+; SDAG-NEXT: add v0.4s, v0.4s, v1.4s
+; SDAG-NEXT: addv s0, v0.4s
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
;
; GISEL-LABEL: oversized_ADDV_512:
; GISEL: // %bb.0:
@@ -148,19 +174,19 @@ entry:
}
define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) {
-; SDAG-LABEL: addv_combine_i32:
-; SDAG: // %bb.0: // %entry
-; SDAG-NEXT: add v0.4s, v0.4s, v1.4s
-; SDAG-NEXT: addv s0, v0.4s
-; SDAG-NEXT: fmov w0, s0
-; SDAG-NEXT: ret
+; SDAG-LABEL: addv_combine_i32:
+; SDAG: // %bb.0: // %entry
+; SDAG-NEXT: add v0.4s, v0.4s, v1.4s
+; SDAG-NEXT: addv s0, v0.4s
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
;
; GISEL-LABEL: addv_combine_i32:
; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: addv s0, v0.4s
-; GISEL-NEXT: addv s1, v1.4s
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
+; GISEL-NEXT: addv s0, v0.4s
+; GISEL-NEXT: addv s1, v1.4s
+; GISEL-NEXT: fmov w8, s0
+; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: add w0, w8, w9
; GISEL-NEXT: ret
entry:
@@ -171,19 +197,19 @@ entry:
}
define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) {
-; SDAG-LABEL: addv_combine_i64:
-; SDAG: // %bb.0: // %entry
-; SDAG-NEXT: add v0.2d, v0.2d, v1.2d
-; SDAG-NEXT: addp d0, v0.2d
-; SDAG-NEXT: fmov x0, d0
-; SDAG-NEXT: ret
+; SDAG-LABEL: addv_combine_i64:
+; SDAG: // %bb.0: // %entry
+; SDAG-NEXT: add v0.2d, v0.2d, v1.2d
+; SDAG-NEXT: addp d0, v0.2d
+; SDAG-NEXT: fmov x0, d0
+; SDAG-NEXT: ret
;
; GISEL-LABEL: addv_combine_i64:
; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: addp d0, v0.2d
-; GISEL-NEXT: addp d1, v1.2d
-; GISEL-NEXT: fmov x8, d0
-; GISEL-NEXT: fmov x9, d1
+; GISEL-NEXT: addp d0, v0.2d
+; GISEL-NEXT: addp d1, v1.2d
+; GISEL-NEXT: fmov x8, d0
+; GISEL-NEXT: fmov x9, d1
; GISEL-NEXT: add x0, x8, x9
; GISEL-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 6a13d950d3b14b..60c2dada6b0627 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -5,21 +5,7 @@
; Test efficient codegen of vector extends up from legal type to 128 bit
; and 256 bit vector types.
-; CHECK-GI: warning: Instruction selection used fallback path for func3
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for func4
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for afunc3
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for afunc4
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bfunc1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bfunc2
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zfunc1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zfunc2
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bfunc3
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cfunc4
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v4i8_to_v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v4i8_to_v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v8i8_to_v8i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v8i8_to_v8i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v32i1
+; CHECK-GI: warning: Instruction selection used fallback path for zext_v32i1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v32i1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v64i1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v64i1
@@ -47,21 +33,35 @@ define <8 x i16> @func2(<8 x i8> %v0) nounwind {
}
define <16 x i16> @func3(<16 x i8> %v0) nounwind {
-; CHECK-LABEL: func3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll2.8h v1, v0, #0
-; CHECK-NEXT: ushll.8h v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: func3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll2.8h v1, v0, #0
+; CHECK-SD-NEXT: ushll.8h v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: func3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: ushll.8h v0, v0, #0
+; CHECK-GI-NEXT: ushll.8h v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = zext <16 x i8> %v0 to <16 x i16>
ret <16 x i16> %r
}
define <16 x i16> @func4(<16 x i8> %v0) nounwind {
-; CHECK-LABEL: func4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll2.8h v1, v0, #0
-; CHECK-NEXT: sshll.8h v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: func4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll2.8h v1, v0, #0
+; CHECK-SD-NEXT: sshll.8h v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: func4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: sshll.8h v0, v0, #0
+; CHECK-GI-NEXT: sshll.8h v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = sext <16 x i8> %v0 to <16 x i16>
ret <16 x i16> %r
}
@@ -89,43 +89,73 @@ define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
}
define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
-; CHECK-LABEL: afunc3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll2.4s v1, v0, #0
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: afunc3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll2.4s v1, v0, #0
+; CHECK-SD-NEXT: ushll.4s v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: afunc3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: ushll.4s v0, v0, #0
+; CHECK-GI-NEXT: ushll.4s v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = zext <8 x i16> %v0 to <8 x i32>
ret <8 x i32> %r
}
define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
-; CHECK-LABEL: afunc4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll2.4s v1, v0, #0
-; CHECK-NEXT: sshll.4s v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: afunc4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll2.4s v1, v0, #0
+; CHECK-SD-NEXT: sshll.4s v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: afunc4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: sshll.4s v0, v0, #0
+; CHECK-GI-NEXT: sshll.4s v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = sext <8 x i16> %v0 to <8 x i32>
ret <8 x i32> %r
}
define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: bfunc1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll.8h v0, v0, #0
-; CHECK-NEXT: ushll2.4s v1, v0, #0
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bfunc1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll.8h v0, v0, #0
+; CHECK-SD-NEXT: ushll2.4s v1, v0, #0
+; CHECK-SD-NEXT: ushll.4s v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bfunc1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll.8h v0, v0, #0
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: ushll.4s v0, v0, #0
+; CHECK-GI-NEXT: ushll.4s v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = zext <8 x i8> %v0 to <8 x i32>
ret <8 x i32> %r
}
define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: bfunc2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll.8h v0, v0, #0
-; CHECK-NEXT: sshll2.4s v1, v0, #0
-; CHECK-NEXT: sshll.4s v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bfunc2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll.8h v0, v0, #0
+; CHECK-SD-NEXT: sshll2.4s v1, v0, #0
+; CHECK-SD-NEXT: sshll.4s v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bfunc2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll.8h v0, v0, #0
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: sshll.4s v0, v0, #0
+; CHECK-GI-NEXT: sshll.4s v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = sext <8 x i8> %v0 to <8 x i32>
ret <8 x i32> %r
}
@@ -135,100 +165,182 @@ define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
;-----
define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
-; CHECK-LABEL: zfunc1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll2.2d v1, v0, #0
-; CHECK-NEXT: ushll.2d v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: zfunc1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
+; CHECK-SD-NEXT: ushll.2d v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zfunc1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: ushll.2d v0, v0, #0
+; CHECK-GI-NEXT: ushll.2d v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = zext <4 x i32> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
-; CHECK-LABEL: zfunc2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll2.2d v1, v0, #0
-; CHECK-NEXT: sshll.2d v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: zfunc2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
+; CHECK-SD-NEXT: sshll.2d v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zfunc2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: sshll.2d v0, v0, #0
+; CHECK-GI-NEXT: sshll.2d v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = sext <4 x i32> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
-; CHECK-LABEL: bfunc3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ushll2.2d v1, v0, #0
-; CHECK-NEXT: ushll.2d v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bfunc3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll.4s v0, v0, #0
+; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
+; CHECK-SD-NEXT: ushll.2d v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bfunc3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll.4s v0, v0, #0
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: ushll.2d v0, v0, #0
+; CHECK-GI-NEXT: ushll.2d v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = zext <4 x i16> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
-; CHECK-LABEL: cfunc4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll.4s v0, v0, #0
-; CHECK-NEXT: sshll2.2d v1, v0, #0
-; CHECK-NEXT: sshll.2d v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cfunc4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll.4s v0, v0, #0
+; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
+; CHECK-SD-NEXT: sshll.2d v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cfunc4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll.4s v0, v0, #0
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: sshll.2d v0, v0, #0
+; CHECK-GI-NEXT: sshll.2d v1, v1, #0
+; CHECK-GI-NEXT: ret
%r = sext <4 x i16> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
-; CHECK-LABEL: zext_v4i8_to_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: bic.4h v0, #255, lsl #8
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ushll2.2d v1, v0, #0
-; CHECK-NEXT: ushll.2d v0, v0, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: zext_v4i8_to_v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
+; CHECK-SD-NEXT: ushll.4s v0, v0, #0
+; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
+; CHECK-SD-NEXT: ushll.2d v0, v0, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i8_to_v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll.4s v0, v0, #0
+; CHECK-GI-NEXT: adrp x8, .LCPI14_0
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT: ushll.2d v0, v0, #0
+; CHECK-GI-NEXT: ushll.2d v1, v1, #0
+; CHECK-GI-NEXT: and.16b v0, v0, v2
+; CHECK-GI-NEXT: and.16b v1, v1, v2
+; CHECK-GI-NEXT: ret
%r = zext <4 x i8> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
-; CHECK-LABEL: sext_v4i8_to_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ushll.2d v1, v0, #0
-; CHECK-NEXT: ushll2.2d v0, v0, #0
-; CHECK-NEXT: shl.2d v2, v1, #56
-; CHECK-NEXT: shl.2d v0, v0, #56
-; CHECK-NEXT: sshr.2d v1, v0, #56
-; CHECK-NEXT: sshr.2d v0, v2, #56
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sext_v4i8_to_v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll.4s v0, v0, #0
+; CHECK-SD-NEXT: ushll.2d v1, v0, #0
+; CHECK-SD-NEXT: ushll2.2d v0, v0, #0
+; CHECK-SD-NEXT: shl.2d v2, v1, #56
+; CHECK-SD-NEXT: shl.2d v0, v0, #56
+; CHECK-SD-NEXT: sshr.2d v1, v0, #56
+; CHECK-SD-NEXT: sshr.2d v0, v2, #56
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v4i8_to_v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll.4s v0, v0, #0
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: ushll.2d v0, v0, #0
+; CHECK-GI-NEXT: shl.2d v0, v0, #56
+; CHECK-GI-NEXT: ushll.2d v1, v1, #0
+; CHECK-GI-NEXT: sshr.2d v0, v0, #56
+; CHECK-GI-NEXT: shl.2d v1, v1, #56
+; CHECK-GI-NEXT: sshr.2d v1, v1, #56
+; CHECK-GI-NEXT: ret
%r = sext <4 x i8> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: zext_v8i8_to_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll.8h v0, v0, #0
-; CHECK-NEXT: ushll2.4s v2, v0, #0
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ushll2.2d v3, v2, #0
-; CHECK-NEXT: ushll2.2d v1, v0, #0
-; CHECK-NEXT: ushll.2d v0, v0, #0
-; CHECK-NEXT: ushll.2d v2, v2, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: zext_v8i8_to_v8i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll.8h v0, v0, #0
+; CHECK-SD-NEXT: ushll2.4s v2, v0, #0
+; CHECK-SD-NEXT: ushll.4s v0, v0, #0
+; CHECK-SD-NEXT: ushll2.2d v3, v2, #0
+; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
+; CHECK-SD-NEXT: ushll.2d v0, v0, #0
+; CHECK-SD-NEXT: ushll.2d v2, v2, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i8_to_v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll.8h v0, v0, #0
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: ushll.4s v0, v0, #0
+; CHECK-GI-NEXT: mov d2, v0[1]
+; CHECK-GI-NEXT: ushll.4s v3, v1, #0
+; CHECK-GI-NEXT: ushll.2d v0, v0, #0
+; CHECK-GI-NEXT: mov d4, v3[1]
+; CHECK-GI-NEXT: ushll.2d v1, v2, #0
+; CHECK-GI-NEXT: ushll.2d v2, v3, #0
+; CHECK-GI-NEXT: ushll.2d v3, v4, #0
+; CHECK-GI-NEXT: ret
%r = zext <8 x i8> %v0 to <8 x i64>
ret <8 x i64> %r
}
define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: sext_v8i8_to_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll.8h v0, v0, #0
-; CHECK-NEXT: sshll2.4s v2, v0, #0
-; CHECK-NEXT: sshll.4s v0, v0, #0
-; CHECK-NEXT: sshll2.2d v3, v2, #0
-; CHECK-NEXT: sshll2.2d v1, v0, #0
-; CHECK-NEXT: sshll.2d v0, v0, #0
-; CHECK-NEXT: sshll.2d v2, v2, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sext_v8i8_to_v8i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll.8h v0, v0, #0
+; CHECK-SD-NEXT: sshll2.4s v2, v0, #0
+; CHECK-SD-NEXT: sshll.4s v0, v0, #0
+; CHECK-SD-NEXT: sshll2.2d v3, v2, #0
+; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
+; CHECK-SD-NEXT: sshll.2d v0, v0, #0
+; CHECK-SD-NEXT: sshll.2d v2, v2, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v8i8_to_v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll.8h v0, v0, #0
+; CHECK-GI-NEXT: mov d1, v0[1]
+; CHECK-GI-NEXT: sshll.4s v0, v0, #0
+; CHECK-GI-NEXT: mov d2, v0[1]
+; CHECK-GI-NEXT: sshll.4s v3, v1, #0
+; CHECK-GI-NEXT: sshll.2d v0, v0, #0
+; CHECK-GI-NEXT: mov d4, v3[1]
+; CHECK-GI-NEXT: sshll.2d v1, v2, #0
+; CHECK-GI-NEXT: sshll.2d v2, v3, #0
+; CHECK-GI-NEXT: sshll.2d v3, v4, #0
+; CHECK-GI-NEXT: ret
%r = sext <8 x i8> %v0 to <8 x i64>
ret <8 x i64> %r
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 831acd242221b0..3003e4c1c411ee 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -3,10 +3,6 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for uabd16b_rdx
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd16b_rdx_i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sabd16b_rdx_i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd8h_rdx
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sabd8h_rdx
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd4s_rdx
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sabd4s_rdx
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_8b
@@ -281,13 +277,58 @@ define i16 @uabd16b_rdx(ptr %a, ptr %b) {
}
define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: uabd16b_rdx_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabdl.8h v2, v0, v1
-; CHECK-NEXT: uabal2.8h v2, v0, v1
-; CHECK-NEXT: uaddlv.8h s0, v2
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: uabd16b_rdx_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabdl.8h v2, v0, v1
+; CHECK-SD-NEXT: uabal2.8h v2, v0, v1
+; CHECK-SD-NEXT: uaddlv.8h s0, v2
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: uabd16b_rdx_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d3, v0[1]
+; CHECK-GI-NEXT: ushll.8h v4, v1, #0
+; CHECK-GI-NEXT: mov d1, v1[1]
+; CHECK-GI-NEXT: ushll.8h v0, v0, #0
+; CHECK-GI-NEXT: mov d6, v4[1]
+; CHECK-GI-NEXT: ushll.8h v3, v3, #0
+; CHECK-GI-NEXT: mov d5, v0[1]
+; CHECK-GI-NEXT: ushll.8h v1, v1, #0
+; CHECK-GI-NEXT: mov d7, v3[1]
+; CHECK-GI-NEXT: mov d16, v1[1]
+; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
+; CHECK-GI-NEXT: usubl.4s v0, v0, v4
+; CHECK-GI-NEXT: usubl.4s v5, v5, v6
+; CHECK-GI-NEXT: usubl.4s v1, v3, v1
+; CHECK-GI-NEXT: usubl.4s v3, v7, v16
+; CHECK-GI-NEXT: cmgt.4s v4, v2, v0
+; CHECK-GI-NEXT: cmgt.4s v6, v2, v5
+; CHECK-GI-NEXT: cmgt.4s v7, v2, v1
+; CHECK-GI-NEXT: cmgt.4s v2, v2, v3
+; CHECK-GI-NEXT: shl.4s v4, v4, #31
+; CHECK-GI-NEXT: shl.4s v6, v6, #31
+; CHECK-GI-NEXT: shl.4s v7, v7, #31
+; CHECK-GI-NEXT: shl.4s v2, v2, #31
+; CHECK-GI-NEXT: sshr.4s v4, v4, #31
+; CHECK-GI-NEXT: neg.4s v17, v0
+; CHECK-GI-NEXT: sshr.4s v6, v6, #31
+; CHECK-GI-NEXT: neg.4s v16, v5
+; CHECK-GI-NEXT: neg.4s v18, v1
+; CHECK-GI-NEXT: neg.4s v19, v3
+; CHECK-GI-NEXT: sshr.4s v7, v7, #31
+; CHECK-GI-NEXT: sshr.4s v2, v2, #31
+; CHECK-GI-NEXT: bit.16b v0, v17, v4
+; CHECK-GI-NEXT: mov.16b v4, v6
+; CHECK-GI-NEXT: bsl.16b v4, v16, v5
+; CHECK-GI-NEXT: bit.16b v1, v18, v7
+; CHECK-GI-NEXT: bsl.16b v2, v19, v3
+; CHECK-GI-NEXT: add.4s v0, v0, v4
+; CHECK-GI-NEXT: add.4s v1, v1, v2
+; CHECK-GI-NEXT: add.4s v0, v0, v1
+; CHECK-GI-NEXT: addv.4s s0, v0
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%aext = zext <16 x i8> %a to <16 x i32>
%bext = zext <16 x i8> %b to <16 x i32>
%ab
diff = sub nsw <16 x i32> %aext, %bext
@@ -299,13 +340,58 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
}
define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: sabd16b_rdx_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabdl.8h v2, v0, v1
-; CHECK-NEXT: sabal2.8h v2, v0, v1
-; CHECK-NEXT: uaddlv.8h s0, v2
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sabd16b_rdx_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabdl.8h v2, v0, v1
+; CHECK-SD-NEXT: sabal2.8h v2, v0, v1
+; CHECK-SD-NEXT: uaddlv.8h s0, v2
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabd16b_rdx_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d3, v0[1]
+; CHECK-GI-NEXT: sshll.8h v4, v1, #0
+; CHECK-GI-NEXT: mov d1, v1[1]
+; CHECK-GI-NEXT: sshll.8h v0, v0, #0
+; CHECK-GI-NEXT: mov d6, v4[1]
+; CHECK-GI-NEXT: sshll.8h v3, v3, #0
+; CHECK-GI-NEXT: mov d5, v0[1]
+; CHECK-GI-NEXT: sshll.8h v1, v1, #0
+; CHECK-GI-NEXT: mov d7, v3[1]
+; CHECK-GI-NEXT: mov d16, v1[1]
+; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
+; CHECK-GI-NEXT: ssubl.4s v0, v0, v4
+; CHECK-GI-NEXT: ssubl.4s v5, v5, v6
+; CHECK-GI-NEXT: ssubl.4s v1, v3, v1
+; CHECK-GI-NEXT: ssubl.4s v3, v7, v16
+; CHECK-GI-NEXT: cmgt.4s v4, v2, v0
+; CHECK-GI-NEXT: cmgt.4s v6, v2, v5
+; CHECK-GI-NEXT: cmgt.4s v7, v2, v1
+; CHECK-GI-NEXT: cmgt.4s v2, v2, v3
+; CHECK-GI-NEXT: shl.4s v4, v4, #31
+; CHECK-GI-NEXT: shl.4s v6, v6, #31
+; CHECK-GI-NEXT: shl.4s v7, v7, #31
+; CHECK-GI-NEXT: shl.4s v2, v2, #31
+; CHECK-GI-NEXT: sshr.4s v4, v4, #31
+; CHECK-GI-NEXT: neg.4s v17, v0
+; CHECK-GI-NEXT: sshr.4s v6, v6, #31
+; CHECK-GI-NEXT: neg.4s v16, v5
+; CHECK-GI-NEXT: neg.4s v18, v1
+; CHECK-GI-NEXT: neg.4s v19, v3
+; CHECK-GI-NEXT: sshr.4s v7, v7, #31
+; CHECK-GI-NEXT: sshr.4s v2, v2, #31
+; CHECK-GI-NEXT: bit.16b v0, v17, v4
+; CHECK-GI-NEXT: mov.16b v4, v6
+; CHECK-GI-NEXT: bsl.16b v4, v16, v5
+; CHECK-GI-NEXT: bit.16b v1, v18, v7
+; CHECK-GI-NEXT: bsl.16b v2, v19, v3
+; CHECK-GI-NEXT: add.4s v0, v0, v4
+; CHECK-GI-NEXT: add.4s v1, v1, v2
+; CHECK-GI-NEXT: add.4s v0, v0, v1
+; CHECK-GI-NEXT: addv.4s s0, v0
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%aext = sext <16 x i8> %a to <16 x i32>
%bext = sext <16 x i8> %b to <16 x i32>
%ab
diff = sub nsw <16 x i32> %aext, %bext
@@ -321,14 +407,38 @@ declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
define i32 @uabd8h_rdx(ptr %a, ptr %b) {
-; CHECK-LABEL: uabd8h_rdx:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: uabd.8h v0, v0, v1
-; CHECK-NEXT: uaddlv.8h s0, v0
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: uabd8h_rdx:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr q0, [x0]
+; CHECK-SD-NEXT: ldr q1, [x1]
+; CHECK-SD-NEXT: uabd.8h v0, v0, v1
+; CHECK-SD-NEXT: uaddlv.8h s0, v0
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: uabd8h_rdx:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr q1, [x0]
+; CHECK-GI-NEXT: ldr q2, [x1]
+; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
+; CHECK-GI-NEXT: mov d3, v1[1]
+; CHECK-GI-NEXT: mov d4, v2[1]
+; CHECK-GI-NEXT: usubl.4s v1, v1, v2
+; CHECK-GI-NEXT: usubl.4s v2, v3, v4
+; CHECK-GI-NEXT: cmgt.4s v3, v0, v1
+; CHECK-GI-NEXT: neg.4s v4, v1
+; CHECK-GI-NEXT: cmgt.4s v0, v0, v2
+; CHECK-GI-NEXT: shl.4s v3, v3, #31
+; CHECK-GI-NEXT: shl.4s v0, v0, #31
+; CHECK-GI-NEXT: neg.4s v5, v2
+; CHECK-GI-NEXT: sshr.4s v3, v3, #31
+; CHECK-GI-NEXT: sshr.4s v0, v0, #31
+; CHECK-GI-NEXT: bit.16b v1, v4, v3
+; CHECK-GI-NEXT: bsl.16b v0, v5, v2
+; CHECK-GI-NEXT: add.4s v0, v1, v0
+; CHECK-GI-NEXT: addv.4s s0, v0
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%aload = load <8 x i16>, ptr %a, align 1
%bload = load <8 x i16>, ptr %b, align 1
%aext = zext <8 x i16> %aload to <8 x i32>
@@ -342,12 +452,34 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
}
define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: sabd8h_rdx:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd.8h v0, v0, v1
-; CHECK-NEXT: uaddlv.8h s0, v0
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sabd8h_rdx:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd.8h v0, v0, v1
+; CHECK-SD-NEXT: uaddlv.8h s0, v0
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabd8h_rdx:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d3, v0[1]
+; CHECK-GI-NEXT: mov d4, v1[1]
+; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
+; CHECK-GI-NEXT: ssubl.4s v0, v0, v1
+; CHECK-GI-NEXT: ssubl.4s v1, v3, v4
+; CHECK-GI-NEXT: cmgt.4s v3, v2, v0
+; CHECK-GI-NEXT: neg.4s v4, v0
+; CHECK-GI-NEXT: cmgt.4s v2, v2, v1
+; CHECK-GI-NEXT: shl.4s v3, v3, #31
+; CHECK-GI-NEXT: shl.4s v2, v2, #31
+; CHECK-GI-NEXT: neg.4s v5, v1
+; CHECK-GI-NEXT: sshr.4s v3, v3, #31
+; CHECK-GI-NEXT: sshr.4s v2, v2, #31
+; CHECK-GI-NEXT: bit.16b v0, v4, v3
+; CHECK-GI-NEXT: bit.16b v1, v5, v2
+; CHECK-GI-NEXT: add.4s v0, v0, v1
+; CHECK-GI-NEXT: addv.4s s0, v0
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%aext = sext <8 x i16> %a to <8 x i32>
%bext = sext <8 x i16> %b to <8 x i32>
%ab
diff = sub nsw <8 x i32> %aext, %bext
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
new file mode 100644
index 00000000000000..d794991895b3c4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -0,0 +1,1216 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for sext_v3i8_v3i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i8_v3i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v16i10_v16i16
+
+define i16 @sext_i8_to_i16(i8 %a) {
+; CHECK-LABEL: sext_i8_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w0, w0
+; CHECK-NEXT: ret
+entry:
+ %c = sext i8 %a to i16
+ ret i16 %c
+}
+
+define i32 @sext_i8_to_i32(i8 %a) {
+; CHECK-LABEL: sext_i8_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w0, w0
+; CHECK-NEXT: ret
+entry:
+ %c = sext i8 %a to i32
+ ret i32 %c
+}
+
+define i64 @sext_i8_to_i64(i8 %a) {
+; CHECK-LABEL: sext_i8_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtb x0, w0
+; CHECK-NEXT: ret
+entry:
+ %c = sext i8 %a to i64
+ ret i64 %c
+}
+
+define i10 @sext_i8_to_i10(i8 %a) {
+; CHECK-LABEL: sext_i8_to_i10:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w0, w0
+; CHECK-NEXT: ret
+entry:
+ %c = sext i8 %a to i10
+ ret i10 %c
+}
+
+define i32 @sext_i16_to_i32(i16 %a) {
+; CHECK-LABEL: sext_i16_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxth w0, w0
+; CHECK-NEXT: ret
+entry:
+ %c = sext i16 %a to i32
+ ret i32 %c
+}
+
+define i64 @sext_i16_to_i64(i16 %a) {
+; CHECK-LABEL: sext_i16_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxth x0, w0
+; CHECK-NEXT: ret
+entry:
+ %c = sext i16 %a to i64
+ ret i64 %c
+}
+
+define i64 @sext_i32_to_i64(i32 %a) {
+; CHECK-LABEL: sext_i32_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtw x0, w0
+; CHECK-NEXT: ret
+entry:
+ %c = sext i32 %a to i64
+ ret i64 %c
+}
+
+define i16 @sext_i10_to_i16(i10 %a) {
+; CHECK-LABEL: sext_i10_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sbfx w0, w0, #0, #10
+; CHECK-NEXT: ret
+entry:
+ %c = sext i10 %a to i16
+ ret i16 %c
+}
+
+define i32 @sext_i10_to_i32(i10 %a) {
+; CHECK-LABEL: sext_i10_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sbfx w0, w0, #0, #10
+; CHECK-NEXT: ret
+entry:
+ %c = sext i10 %a to i32
+ ret i32 %c
+}
+
+define i64 @sext_i10_to_i64(i10 %a) {
+; CHECK-LABEL: sext_i10_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sbfx x0, x0, #0, #10
+; CHECK-NEXT: ret
+entry:
+ %c = sext i10 %a to i64
+ ret i64 %c
+}
+
+define <2 x i16> @sext_v2i8_v2i16(<2 x i8> %a) {
+; CHECK-LABEL: sext_v2i8_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.2s, v0.2s, #24
+; CHECK-NEXT: sshr v0.2s, v0.2s, #24
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i8> %a to <2 x i16>
+ ret <2 x i16> %c
+}
+
+define <2 x i32> @sext_v2i8_v2i32(<2 x i8> %a) {
+; CHECK-LABEL: sext_v2i8_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.2s, v0.2s, #24
+; CHECK-NEXT: sshr v0.2s, v0.2s, #24
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i8> %a to <2 x i32>
+ ret <2 x i32> %c
+}
+
+define <2 x i64> @sext_v2i8_v2i64(<2 x i8> %a) {
+; CHECK-LABEL: sext_v2i8_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i8> %a to <2 x i64>
+ ret <2 x i64> %c
+}
+
+define <2 x i32> @sext_v2i16_v2i32(<2 x i16> %a) {
+; CHECK-LABEL: sext_v2i16_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.2s, v0.2s, #16
+; CHECK-NEXT: sshr v0.2s, v0.2s, #16
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i16> %a to <2 x i32>
+ ret <2 x i32> %c
+}
+
+define <2 x i64> @sext_v2i16_v2i64(<2 x i16> %a) {
+; CHECK-LABEL: sext_v2i16_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: shl v0.2d, v0.2d, #48
+; CHECK-NEXT: sshr v0.2d, v0.2d, #48
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i16> %a to <2 x i64>
+ ret <2 x i64> %c
+}
+
+define <2 x i64> @sext_v2i32_v2i64(<2 x i32> %a) {
+; CHECK-LABEL: sext_v2i32_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i32> %a to <2 x i64>
+ ret <2 x i64> %c
+}
+
+define <2 x i16> @sext_v2i10_v2i16(<2 x i10> %a) {
+; CHECK-LABEL: sext_v2i10_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.2s, v0.2s, #22
+; CHECK-NEXT: sshr v0.2s, v0.2s, #22
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i10> %a to <2 x i16>
+ ret <2 x i16> %c
+}
+
+define <2 x i32> @sext_v2i10_v2i32(<2 x i10> %a) {
+; CHECK-LABEL: sext_v2i10_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.2s, v0.2s, #22
+; CHECK-NEXT: sshr v0.2s, v0.2s, #22
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i10> %a to <2 x i32>
+ ret <2 x i32> %c
+}
+
+define <2 x i64> @sext_v2i10_v2i64(<2 x i10> %a) {
+; CHECK-LABEL: sext_v2i10_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: shl v0.2d, v0.2d, #54
+; CHECK-NEXT: sshr v0.2d, v0.2d, #54
+; CHECK-NEXT: ret
+entry:
+ %c = sext <2 x i10> %a to <2 x i64>
+ ret <2 x i64> %c
+}
+
+define <3 x i16> @sext_v3i8_v3i16(<3 x i8> %a) {
+; CHECK-LABEL: sext_v3i8_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov v0.h[1], w1
+; CHECK-NEXT: mov v0.h[2], w2
+; CHECK-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-NEXT: ret
+entry:
+ %c = sext <3 x i8> %a to <3 x i16>
+ ret <3 x i16> %c
+}
+
+define <3 x i32> @sext_v3i8_v3i32(<3 x i8> %a) {
+; CHECK-LABEL: sext_v3i8_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov v0.h[1], w1
+; CHECK-NEXT: mov v0.h[2], w2
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-NEXT: ret
+entry:
+ %c = sext <3 x i8> %a to <3 x i32>
+ ret <3 x i32> %c
+}
+
+define <3 x i64> @sext_v3i8_v3i64(<3 x i8> %a) {
+; CHECK-SD-LABEL: sext_v3i8_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s0, w2
+; CHECK-SD-NEXT: mov v1.s[1], w1
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: shl v2.2d, v0.2d, #56
+; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0
+; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #56
+; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v3i8_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-GI-NEXT: lsl x8, x2, #56
+; CHECK-GI-NEXT: asr x8, x8, #56
+; CHECK-GI-NEXT: mov v0.d[1], x1
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <3 x i8> %a to <3 x i64>
+ ret <3 x i64> %c
+}
+
+define <3 x i32> @sext_v3i16_v3i32(<3 x i16> %a) {
+; CHECK-SD-LABEL: sext_v3i16_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v3i16_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: sxth w8, w8
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov w8, s2
+; CHECK-GI-NEXT: sxth w9, w9
+; CHECK-GI-NEXT: sxth w8, w8
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: mov v0.s[2], w8
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <3 x i16> %a to <3 x i32>
+ ret <3 x i32> %c
+}
+
+define <3 x i64> @sext_v3i16_v3i64(<3 x i16> %a) {
+; CHECK-SD-LABEL: sext_v3i16_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll v0.2d, v2.2s, #0
+; CHECK-SD-NEXT: sshll2 v2.2d, v2.4s, #0
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v3i16_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: sxth x8, w8
+; CHECK-GI-NEXT: sxth x9, w9
+; CHECK-GI-NEXT: sxth x10, w10
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: fmov d1, x9
+; CHECK-GI-NEXT: fmov d2, x10
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <3 x i16> %a to <3 x i64>
+ ret <3 x i64> %c
+}
+
+define <3 x i64> @sext_v3i32_v3i64(<3 x i32> %a) {
+; CHECK-SD-LABEL: sext_v3i32_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll2 v2.2d, v0.4s, #0
+; CHECK-SD-NEXT: fmov d0, d3
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v3i32_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: sxtw x8, w8
+; CHECK-GI-NEXT: sxtw x9, w9
+; CHECK-GI-NEXT: sxtw x10, w10
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: fmov d1, x9
+; CHECK-GI-NEXT: fmov d2, x10
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <3 x i32> %a to <3 x i64>
+ ret <3 x i64> %c
+}
+
+define <3 x i16> @sext_v3i10_v3i16(<3 x i10> %a) {
+; CHECK-LABEL: sext_v3i10_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov v0.h[1], w1
+; CHECK-NEXT: mov v0.h[2], w2
+; CHECK-NEXT: shl v0.4h, v0.4h, #6
+; CHECK-NEXT: sshr v0.4h, v0.4h, #6
+; CHECK-NEXT: ret
+entry:
+ %c = sext <3 x i10> %a to <3 x i16>
+ ret <3 x i16> %c
+}
+
+define <3 x i32> @sext_v3i10_v3i32(<3 x i10> %a) {
+; CHECK-LABEL: sext_v3i10_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov v0.h[1], w1
+; CHECK-NEXT: mov v0.h[2], w2
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: shl v0.4s, v0.4s, #22
+; CHECK-NEXT: sshr v0.4s, v0.4s, #22
+; CHECK-NEXT: ret
+entry:
+ %c = sext <3 x i10> %a to <3 x i32>
+ ret <3 x i32> %c
+}
+
+define <3 x i64> @sext_v3i10_v3i64(<3 x i10> %a) {
+; CHECK-SD-LABEL: sext_v3i10_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s0, w2
+; CHECK-SD-NEXT: mov v1.s[1], w1
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: shl v2.2d, v0.2d, #54
+; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0
+; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #54
+; CHECK-SD-NEXT: shl v0.2d, v0.2d, #54
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #54
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v3i10_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-GI-NEXT: lsl x8, x2, #54
+; CHECK-GI-NEXT: asr x8, x8, #54
+; CHECK-GI-NEXT: mov v0.d[1], x1
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <3 x i10> %a to <3 x i64>
+ ret <3 x i64> %c
+}
+
+define <4 x i16> @sext_v4i8_v4i16(<4 x i8> %a) {
+; CHECK-LABEL: sext_v4i8_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-NEXT: ret
+entry:
+ %c = sext <4 x i8> %a to <4 x i16>
+ ret <4 x i16> %c
+}
+
+define <4 x i32> @sext_v4i8_v4i32(<4 x i8> %a) {
+; CHECK-LABEL: sext_v4i8_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-NEXT: ret
+entry:
+ %c = sext <4 x i8> %a to <4 x i32>
+ ret <4 x i32> %c
+}
+
+define <4 x i64> @sext_v4i8_v4i64(<4 x i8> %a) {
+; CHECK-SD-LABEL: sext_v4i8_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT: shl v2.2d, v1.2d, #56
+; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-NEXT: sshr v1.2d, v0.2d, #56
+; CHECK-SD-NEXT: sshr v0.2d, v2.2d, #56
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v4i8_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-GI-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <4 x i8> %a to <4 x i64>
+ ret <4 x i64> %c
+}
+
+define <4 x i32> @sext_v4i16_v4i32(<4 x i16> %a) {
+; CHECK-LABEL: sext_v4i16_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-NEXT: ret
+entry:
+ %c = sext <4 x i16> %a to <4 x i32>
+ ret <4 x i32> %c
+}
+
+define <4 x i64> @sext_v4i16_v4i64(<4 x i16> %a) {
+; CHECK-SD-LABEL: sext_v4i16_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v4i16_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <4 x i16> %a to <4 x i64>
+ ret <4 x i64> %c
+}
+
+define <4 x i64> @sext_v4i32_v4i64(<4 x i32> %a) {
+; CHECK-SD-LABEL: sext_v4i32_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v4i32_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <4 x i32> %a to <4 x i64>
+ ret <4 x i64> %c
+}
+
+define <4 x i16> @sext_v4i10_v4i16(<4 x i10> %a) {
+; CHECK-LABEL: sext_v4i10_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.4h, v0.4h, #6
+; CHECK-NEXT: sshr v0.4h, v0.4h, #6
+; CHECK-NEXT: ret
+entry:
+ %c = sext <4 x i10> %a to <4 x i16>
+ ret <4 x i16> %c
+}
+
+define <4 x i32> @sext_v4i10_v4i32(<4 x i10> %a) {
+; CHECK-LABEL: sext_v4i10_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: shl v0.4s, v0.4s, #22
+; CHECK-NEXT: sshr v0.4s, v0.4s, #22
+; CHECK-NEXT: ret
+entry:
+ %c = sext <4 x i10> %a to <4 x i32>
+ ret <4 x i32> %c
+}
+
+define <4 x i64> @sext_v4i10_v4i64(<4 x i10> %a) {
+; CHECK-SD-LABEL: sext_v4i10_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT: shl v2.2d, v1.2d, #54
+; CHECK-SD-NEXT: shl v0.2d, v0.2d, #54
+; CHECK-SD-NEXT: sshr v1.2d, v0.2d, #54
+; CHECK-SD-NEXT: sshr v0.2d, v2.2d, #54
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v4i10_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54
+; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54
+; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #54
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <4 x i10> %a to <4 x i64>
+ ret <4 x i64> %c
+}
+
+define <8 x i16> @sext_v8i8_v8i16(<8 x i8> %a) {
+; CHECK-LABEL: sext_v8i8_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: ret
+entry:
+ %c = sext <8 x i8> %a to <8 x i16>
+ ret <8 x i16> %c
+}
+
+define <8 x i32> @sext_v8i8_v8i32(<8 x i8> %a) {
+; CHECK-SD-LABEL: sext_v8i8_v8i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v8i8_v8i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <8 x i8> %a to <8 x i32>
+ ret <8 x i32> %c
+}
+
+define <8 x i64> @sext_v8i8_v8i64(<8 x i8> %a) {
+; CHECK-SD-LABEL: sext_v8i8_v8i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v8i8_v8i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: mov d4, v3.d[1]
+; CHECK-GI-NEXT: sshll v1.2d, v2.2s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <8 x i8> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <8 x i32> @sext_v8i16_v8i32(<8 x i16> %a) {
+; CHECK-SD-LABEL: sext_v8i16_v8i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v8i16_v8i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <8 x i16> %a to <8 x i32>
+ ret <8 x i32> %c
+}
+
+define <8 x i64> @sext_v8i16_v8i64(<8 x i16> %a) {
+; CHECK-SD-LABEL: sext_v8i16_v8i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v8i16_v8i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: mov d4, v3.d[1]
+; CHECK-GI-NEXT: sshll v1.2d, v2.2s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <8 x i16> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <8 x i64> @sext_v8i32_v8i64(<8 x i32> %a) {
+; CHECK-SD-LABEL: sext_v8i32_v8i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v4.2d, v0.4s, #0
+; CHECK-SD-NEXT: sshll2 v3.2d, v1.4s, #0
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-SD-NEXT: mov v1.16b, v4.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v8i32_v8i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d3, v0.d[1]
+; CHECK-GI-NEXT: mov d4, v1.d[1]
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll v1.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <8 x i32> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <8 x i16> @sext_v8i10_v8i16(<8 x i10> %a) {
+; CHECK-LABEL: sext_v8i10_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.8h, v0.8h, #6
+; CHECK-NEXT: sshr v0.8h, v0.8h, #6
+; CHECK-NEXT: ret
+entry:
+ %c = sext <8 x i10> %a to <8 x i16>
+ ret <8 x i16> %c
+}
+
+define <8 x i32> @sext_v8i10_v8i32(<8 x i10> %a) {
+; CHECK-SD-LABEL: sext_v8i10_v8i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-SD-NEXT: shl v2.4s, v1.4s, #22
+; CHECK-SD-NEXT: shl v0.4s, v0.4s, #22
+; CHECK-SD-NEXT: sshr v1.4s, v0.4s, #22
+; CHECK-SD-NEXT: sshr v0.4s, v2.4s, #22
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v8i10_v8i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: shl v0.4s, v0.4s, #22
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #22
+; CHECK-GI-NEXT: shl v1.4s, v1.4s, #22
+; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #22
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <8 x i10> %a to <8 x i32>
+ ret <8 x i32> %c
+}
+
+define <8 x i64> @sext_v8i10_v8i64(<8 x i10> %a) {
+; CHECK-SD-LABEL: sext_v8i10_v8i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-SD-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT: shl v1.2d, v1.2d, #54
+; CHECK-SD-NEXT: shl v2.2d, v2.2d, #54
+; CHECK-SD-NEXT: shl v4.2d, v0.2d, #54
+; CHECK-SD-NEXT: shl v5.2d, v3.2d, #54
+; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #54
+; CHECK-SD-NEXT: sshr v0.2d, v2.2d, #54
+; CHECK-SD-NEXT: sshr v3.2d, v4.2d, #54
+; CHECK-SD-NEXT: sshr v2.2d, v5.2d, #54
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v8i10_v8i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54
+; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: shl v2.2d, v2.2d, #54
+; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT: shl v4.2d, v1.2d, #54
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT: shl v3.2d, v3.2d, #54
+; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #54
+; CHECK-GI-NEXT: sshr v2.2d, v4.2d, #54
+; CHECK-GI-NEXT: sshr v3.2d, v3.2d, #54
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <8 x i10> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <16 x i16> @sext_v16i8_v16i16(<16 x i8> %a) {
+; CHECK-SD-LABEL: sext_v16i8_v16i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i8_v16i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <16 x i8> %a to <16 x i16>
+ ret <16 x i16> %c
+}
+
+define <16 x i32> @sext_v16i8_v16i32(<16 x i8> %a) {
+; CHECK-SD-LABEL: sext_v16i8_v16i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v2.8h, v0.16b, #0
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll2 v3.4s, v2.8h, #0
+; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll v2.4s, v2.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i8_v16i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d4, v3.d[1]
+; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT: sshll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <16 x i8> %a to <16 x i32>
+ ret <16 x i32> %c
+}
+
+define <16 x i64> @sext_v16i8_v16i64(<16 x i8> %a) {
+; CHECK-SD-LABEL: sext_v16i8_v16i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll2 v2.4s, v1.8h, #0
+; CHECK-SD-NEXT: sshll2 v4.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll v16.4s, v1.4h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll2 v7.2d, v2.4s, #0
+; CHECK-SD-NEXT: sshll2 v3.2d, v4.4s, #0
+; CHECK-SD-NEXT: sshll2 v5.2d, v16.4s, #0
+; CHECK-SD-NEXT: sshll v6.2d, v2.2s, #0
+; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: sshll v2.2d, v4.2s, #0
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll v4.2d, v16.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i8_v16i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: sshll v2.8h, v2.8b, #0
+; CHECK-GI-NEXT: mov d3, v0.d[1]
+; CHECK-GI-NEXT: mov d5, v2.d[1]
+; CHECK-GI-NEXT: sshll v4.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll v6.4s, v2.4h, #0
+; CHECK-GI-NEXT: sshll v1.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v16.4s, v5.4h, #0
+; CHECK-GI-NEXT: mov d3, v4.d[1]
+; CHECK-GI-NEXT: mov d7, v6.d[1]
+; CHECK-GI-NEXT: mov d17, v16.d[1]
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v4.2s, #0
+; CHECK-GI-NEXT: sshll v4.2d, v6.2s, #0
+; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v5.2d, v7.2s, #0
+; CHECK-GI-NEXT: sshll v6.2d, v16.2s, #0
+; CHECK-GI-NEXT: sshll v7.2d, v17.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <16 x i8> %a to <16 x i64>
+ ret <16 x i64> %c
+}
+
+define <16 x i32> @sext_v16i16_v16i32(<16 x i16> %a) {
+; CHECK-SD-LABEL: sext_v16i16_v16i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v4.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll2 v3.4s, v1.8h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-SD-NEXT: mov v1.16b, v4.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i16_v16i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d3, v0.d[1]
+; CHECK-GI-NEXT: mov d4, v1.d[1]
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll v1.4s, v3.4h, #0
+; CHECK-GI-NEXT: sshll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <16 x i16> %a to <16 x i32>
+ ret <16 x i32> %c
+}
+
+define <16 x i64> @sext_v16i16_v16i64(<16 x i16> %a) {
+; CHECK-SD-LABEL: sext_v16i16_v16i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll2 v4.4s, v1.8h, #0
+; CHECK-SD-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-NEXT: sshll2 v16.2d, v0.4s, #0
+; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT: sshll2 v7.2d, v4.4s, #0
+; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: sshll2 v5.2d, v1.4s, #0
+; CHECK-SD-NEXT: sshll v6.2d, v4.2s, #0
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll v4.2d, v1.2s, #0
+; CHECK-SD-NEXT: mov v1.16b, v16.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i16_v16i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d3, v0.d[1]
+; CHECK-GI-NEXT: mov d4, v1.d[1]
+; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll v5.4s, v1.4h, #0
+; CHECK-GI-NEXT: mov d1, v2.d[1]
+; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT: mov d6, v5.d[1]
+; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT: sshll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT: mov d7, v2.d[1]
+; CHECK-GI-NEXT: mov d16, v3.d[1]
+; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT: sshll v5.2d, v6.2s, #0
+; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v3.2d, v7.2s, #0
+; CHECK-GI-NEXT: sshll v7.2d, v16.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <16 x i16> %a to <16 x i64>
+ ret <16 x i64> %c
+}
+
+define <16 x i64> @sext_v16i32_v16i64(<16 x i32> %a) {
+; CHECK-SD-LABEL: sext_v16i32_v16i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sshll2 v17.2d, v0.4s, #0
+; CHECK-SD-NEXT: sshll2 v18.2d, v1.4s, #0
+; CHECK-SD-NEXT: sshll v16.2d, v1.2s, #0
+; CHECK-SD-NEXT: sshll2 v5.2d, v2.4s, #0
+; CHECK-SD-NEXT: sshll2 v7.2d, v3.4s, #0
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-SD-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-SD-NEXT: mov v1.16b, v17.16b
+; CHECK-SD-NEXT: mov v2.16b, v16.16b
+; CHECK-SD-NEXT: mov v3.16b, v18.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i32_v16i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d5, v1.d[1]
+; CHECK-GI-NEXT: mov d6, v2.d[1]
+; CHECK-GI-NEXT: sshll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-NEXT: mov d2, v3.d[1]
+; CHECK-GI-NEXT: sshll v17.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll v18.2d, v5.2s, #0
+; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll v5.2d, v6.2s, #0
+; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v7.2d, v2.2s, #0
+; CHECK-GI-NEXT: mov v0.16b, v16.16b
+; CHECK-GI-NEXT: mov v2.16b, v17.16b
+; CHECK-GI-NEXT: mov v3.16b, v18.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <16 x i32> %a to <16 x i64>
+ ret <16 x i64> %c
+}
+
+define <16 x i16> @sext_v16i10_v16i16(<16 x i10> %a) {
+; CHECK-LABEL: sext_v16i10_v16i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr w8, [sp]
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: ldr w9, [sp, #16]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ldr w8, [sp, #8]
+; CHECK-NEXT: mov v1.h[1], w1
+; CHECK-NEXT: mov v0.h[1], w8
+; CHECK-NEXT: ldr w8, [sp, #24]
+; CHECK-NEXT: mov v1.h[2], w2
+; CHECK-NEXT: mov v0.h[2], w9
+; CHECK-NEXT: ldr w9, [sp, #32]
+; CHECK-NEXT: mov v1.h[3], w3
+; CHECK-NEXT: mov v0.h[3], w8
+; CHECK-NEXT: ldr w8, [sp, #40]
+; CHECK-NEXT: mov v1.h[4], w4
+; CHECK-NEXT: mov v0.h[4], w9
+; CHECK-NEXT: ldr w9, [sp, #48]
+; CHECK-NEXT: mov v1.h[5], w5
+; CHECK-NEXT: mov v0.h[5], w8
+; CHECK-NEXT: ldr w8, [sp, #56]
+; CHECK-NEXT: mov v1.h[6], w6
+; CHECK-NEXT: mov v0.h[6], w9
+; CHECK-NEXT: mov v1.h[7], w7
+; CHECK-NEXT: mov v0.h[7], w8
+; CHECK-NEXT: shl v1.8h, v1.8h, #6
+; CHECK-NEXT: shl v2.8h, v0.8h, #6
+; CHECK-NEXT: sshr v0.8h, v1.8h, #6
+; CHECK-NEXT: sshr v1.8h, v2.8h, #6
+; CHECK-NEXT: ret
+entry:
+ %c = sext <16 x i10> %a to <16 x i16>
+ ret <16 x i16> %c
+}
+
+define <16 x i32> @sext_v16i10_v16i32(<16 x i10> %a) {
+; CHECK-SD-LABEL: sext_v16i10_v16i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr w11, [sp, #32]
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: ldr w12, [sp]
+; CHECK-SD-NEXT: fmov s1, w4
+; CHECK-SD-NEXT: ldr w10, [sp, #40]
+; CHECK-SD-NEXT: ldr w15, [sp, #8]
+; CHECK-SD-NEXT: fmov s3, w11
+; CHECK-SD-NEXT: fmov s2, w12
+; CHECK-SD-NEXT: ldr w9, [sp, #48]
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: ldr w14, [sp, #16]
+; CHECK-SD-NEXT: mov v1.h[1], w5
+; CHECK-SD-NEXT: ldr w8, [sp, #56]
+; CHECK-SD-NEXT: mov v2.h[1], w15
+; CHECK-SD-NEXT: ldr w13, [sp, #24]
+; CHECK-SD-NEXT: mov v3.h[1], w10
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: mov v1.h[2], w6
+; CHECK-SD-NEXT: mov v2.h[2], w14
+; CHECK-SD-NEXT: mov v3.h[2], w9
+; CHECK-SD-NEXT: mov v0.h[3], w3
+; CHECK-SD-NEXT: mov v1.h[3], w7
+; CHECK-SD-NEXT: mov v2.h[3], w13
+; CHECK-SD-NEXT: mov v3.h[3], w8
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0
+; CHECK-SD-NEXT: ushll v3.4s, v3.4h, #0
+; CHECK-SD-NEXT: shl v0.4s, v0.4s, #22
+; CHECK-SD-NEXT: shl v1.4s, v1.4s, #22
+; CHECK-SD-NEXT: shl v2.4s, v2.4s, #22
+; CHECK-SD-NEXT: shl v3.4s, v3.4s, #22
+; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #22
+; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #22
+; CHECK-SD-NEXT: sshr v2.4s, v2.4s, #22
+; CHECK-SD-NEXT: sshr v3.4s, v3.4s, #22
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i10_v16i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [sp]
+; CHECK-GI-NEXT: fmov s7, w0
+; CHECK-GI-NEXT: ldr s1, [sp, #8]
+; CHECK-GI-NEXT: fmov s17, w4
+; CHECK-GI-NEXT: ldr s4, [sp, #32]
+; CHECK-GI-NEXT: ldr s5, [sp, #40]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: ldr s2, [sp, #16]
+; CHECK-GI-NEXT: mov v7.s[1], w1
+; CHECK-GI-NEXT: ldr s6, [sp, #48]
+; CHECK-GI-NEXT: mov v17.s[1], w5
+; CHECK-GI-NEXT: ldr s3, [sp, #24]
+; CHECK-GI-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT: ldr s16, [sp, #56]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v7.s[2], w2
+; CHECK-GI-NEXT: mov v17.s[2], w6
+; CHECK-GI-NEXT: mov v4.s[2], v6.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT: mov v7.s[3], w3
+; CHECK-GI-NEXT: mov v17.s[3], w7
+; CHECK-GI-NEXT: mov v4.s[3], v16.s[0]
+; CHECK-GI-NEXT: shl v3.4s, v0.4s, #22
+; CHECK-GI-NEXT: shl v1.4s, v7.4s, #22
+; CHECK-GI-NEXT: shl v2.4s, v17.4s, #22
+; CHECK-GI-NEXT: shl v4.4s, v4.4s, #22
+; CHECK-GI-NEXT: sshr v0.4s, v1.4s, #22
+; CHECK-GI-NEXT: sshr v1.4s, v2.4s, #22
+; CHECK-GI-NEXT: sshr v2.4s, v3.4s, #22
+; CHECK-GI-NEXT: sshr v3.4s, v4.4s, #22
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <16 x i10> %a to <16 x i32>
+ ret <16 x i32> %c
+}
+
+define <16 x i64> @sext_v16i10_v16i64(<16 x i10> %a) {
+; CHECK-SD-LABEL: sext_v16i10_v16i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr s0, [sp]
+; CHECK-SD-NEXT: add x8, sp, #8
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s2, w2
+; CHECK-SD-NEXT: fmov s3, w4
+; CHECK-SD-NEXT: fmov s4, w6
+; CHECK-SD-NEXT: ld1 { v0.s }[1], [x8]
+; CHECK-SD-NEXT: add x8, sp, #24
+; CHECK-SD-NEXT: ldr s5, [sp, #16]
+; CHECK-SD-NEXT: add x9, sp, #40
+; CHECK-SD-NEXT: ldr s6, [sp, #32]
+; CHECK-SD-NEXT: add x10, sp, #56
+; CHECK-SD-NEXT: ldr s7, [sp, #48]
+; CHECK-SD-NEXT: mov v1.s[1], w1
+; CHECK-SD-NEXT: ld1 { v5.s }[1], [x8]
+; CHECK-SD-NEXT: mov v2.s[1], w3
+; CHECK-SD-NEXT: ld1 { v6.s }[1], [x9]
+; CHECK-SD-NEXT: mov v3.s[1], w5
+; CHECK-SD-NEXT: ld1 { v7.s }[1], [x10]
+; CHECK-SD-NEXT: mov v4.s[1], w7
+; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT: ushll v16.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v5.2d, v5.2s, #0
+; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
+; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0
+; CHECK-SD-NEXT: shl v0.2d, v1.2d, #54
+; CHECK-SD-NEXT: shl v1.2d, v2.2d, #54
+; CHECK-SD-NEXT: shl v2.2d, v3.2d, #54
+; CHECK-SD-NEXT: shl v3.2d, v4.2d, #54
+; CHECK-SD-NEXT: shl v4.2d, v16.2d, #54
+; CHECK-SD-NEXT: shl v5.2d, v5.2d, #54
+; CHECK-SD-NEXT: shl v6.2d, v6.2d, #54
+; CHECK-SD-NEXT: shl v7.2d, v7.2d, #54
+; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #54
+; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #54
+; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #54
+; CHECK-SD-NEXT: sshr v3.2d, v3.2d, #54
+; CHECK-SD-NEXT: sshr v4.2d, v4.2d, #54
+; CHECK-SD-NEXT: sshr v5.2d, v5.2d, #54
+; CHECK-SD-NEXT: sshr v6.2d, v6.2d, #54
+; CHECK-SD-NEXT: sshr v7.2d, v7.2d, #54
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i10_v16i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [sp]
+; CHECK-GI-NEXT: fmov s6, w0
+; CHECK-GI-NEXT: ldr s1, [sp, #8]
+; CHECK-GI-NEXT: fmov s16, w2
+; CHECK-GI-NEXT: ldr s2, [sp, #16]
+; CHECK-GI-NEXT: fmov s18, w4
+; CHECK-GI-NEXT: ldr s3, [sp, #24]
+; CHECK-GI-NEXT: fmov s19, w6
+; CHECK-GI-NEXT: ldr s4, [sp, #32]
+; CHECK-GI-NEXT: ldr s5, [sp, #40]
+; CHECK-GI-NEXT: ldr s7, [sp, #48]
+; CHECK-GI-NEXT: ldr s17, [sp, #56]
+; CHECK-GI-NEXT: mov v6.s[1], w1
+; CHECK-GI-NEXT: mov v16.s[1], w3
+; CHECK-GI-NEXT: mov v18.s[1], w5
+; CHECK-GI-NEXT: mov v19.s[1], w7
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v2.s[1], v3.s[0]
+; CHECK-GI-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT: mov v7.s[1], v17.s[0]
+; CHECK-GI-NEXT: ushll v1.2d, v6.2s, #0
+; CHECK-GI-NEXT: ushll v3.2d, v16.2s, #0
+; CHECK-GI-NEXT: ushll v5.2d, v18.2s, #0
+; CHECK-GI-NEXT: ushll v6.2d, v19.2s, #0
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0
+; CHECK-GI-NEXT: ushll v7.2d, v7.2s, #0
+; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54
+; CHECK-GI-NEXT: shl v3.2d, v3.2d, #54
+; CHECK-GI-NEXT: shl v5.2d, v5.2d, #54
+; CHECK-GI-NEXT: shl v6.2d, v6.2d, #54
+; CHECK-GI-NEXT: shl v16.2d, v0.2d, #54
+; CHECK-GI-NEXT: shl v17.2d, v2.2d, #54
+; CHECK-GI-NEXT: shl v18.2d, v4.2d, #54
+; CHECK-GI-NEXT: shl v7.2d, v7.2d, #54
+; CHECK-GI-NEXT: sshr v0.2d, v1.2d, #54
+; CHECK-GI-NEXT: sshr v1.2d, v3.2d, #54
+; CHECK-GI-NEXT: sshr v2.2d, v5.2d, #54
+; CHECK-GI-NEXT: sshr v3.2d, v6.2d, #54
+; CHECK-GI-NEXT: sshr v4.2d, v16.2d, #54
+; CHECK-GI-NEXT: sshr v5.2d, v17.2d, #54
+; CHECK-GI-NEXT: sshr v6.2d, v18.2d, #54
+; CHECK-GI-NEXT: sshr v7.2d, v7.2d, #54
+; CHECK-GI-NEXT: ret
+entry:
+ %c = sext <16 x i10> %a to <16 x i64>
+ ret <16 x i64> %c
+}
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
new file mode 100644
index 00000000000000..8ac9dd8fdc62bc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -0,0 +1,1345 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for zext_v16i10_v16i16
+
+define i16 @zext_i8_to_i16(i8 %a) {
+; CHECK-LABEL: zext_i8_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w0, w0, #0xff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i8 %a to i16
+ ret i16 %c
+}
+
+define i32 @zext_i8_to_i32(i8 %a) {
+; CHECK-LABEL: zext_i8_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w0, w0, #0xff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i8 %a to i32
+ ret i32 %c
+}
+
+define i64 @zext_i8_to_i64(i8 %a) {
+; CHECK-LABEL: zext_i8_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: and x0, x0, #0xff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i8 %a to i64
+ ret i64 %c
+}
+
+define i10 @zext_i8_to_i10(i8 %a) {
+; CHECK-LABEL: zext_i8_to_i10:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w0, w0, #0xff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i8 %a to i10
+ ret i10 %c
+}
+
+define i32 @zext_i16_to_i32(i16 %a) {
+; CHECK-LABEL: zext_i16_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w0, w0, #0xffff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i16 %a to i32
+ ret i32 %c
+}
+
+define i64 @zext_i16_to_i64(i16 %a) {
+; CHECK-LABEL: zext_i16_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: and x0, x0, #0xffff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i16 %a to i64
+ ret i64 %c
+}
+
+define i64 @zext_i32_to_i64(i32 %a) {
+; CHECK-LABEL: zext_i32_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, w0
+; CHECK-NEXT: ret
+entry:
+ %c = zext i32 %a to i64
+ ret i64 %c
+}
+
+define i16 @zext_i10_to_i16(i10 %a) {
+; CHECK-LABEL: zext_i10_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w0, w0, #0x3ff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i10 %a to i16
+ ret i16 %c
+}
+
+define i32 @zext_i10_to_i32(i10 %a) {
+; CHECK-LABEL: zext_i10_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w0, w0, #0x3ff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i10 %a to i32
+ ret i32 %c
+}
+
+define i64 @zext_i10_to_i64(i10 %a) {
+; CHECK-LABEL: zext_i10_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: and x0, x0, #0x3ff
+; CHECK-NEXT: ret
+entry:
+ %c = zext i10 %a to i64
+ ret i64 %c
+}
+
+define <2 x i16> @zext_v2i8_v2i16(<2 x i8> %a) {
+; CHECK-SD-LABEL: zext_v2i8_v2i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i8_v2i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI10_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <2 x i8> %a to <2 x i16>
+ ret <2 x i16> %c
+}
+
+define <2 x i32> @zext_v2i8_v2i32(<2 x i8> %a) {
+; CHECK-SD-LABEL: zext_v2i8_v2i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i8_v2i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI11_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <2 x i8> %a to <2 x i32>
+ ret <2 x i32> %c
+}
+
+define <2 x i64> @zext_v2i8_v2i64(<2 x i8> %a) {
+; CHECK-SD-LABEL: zext_v2i8_v2i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i8_v2i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI12_0
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <2 x i8> %a to <2 x i64>
+ ret <2 x i64> %c
+}
+
+define <2 x i32> @zext_v2i16_v2i32(<2 x i16> %a) {
+; CHECK-SD-LABEL: zext_v2i16_v2i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i16_v2i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI13_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <2 x i16> %a to <2 x i32>
+ ret <2 x i32> %c
+}
+
+define <2 x i64> @zext_v2i16_v2i64(<2 x i16> %a) {
+; CHECK-SD-LABEL: zext_v2i16_v2i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i16_v2i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI14_0
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <2 x i16> %a to <2 x i64>
+ ret <2 x i64> %c
+}
+
+define <2 x i64> @zext_v2i32_v2i64(<2 x i32> %a) {
+; CHECK-LABEL: zext_v2i32_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ret
+entry:
+ %c = zext <2 x i32> %a to <2 x i64>
+ ret <2 x i64> %c
+}
+
+define <2 x i16> @zext_v2i10_v2i16(<2 x i10> %a) {
+; CHECK-SD-LABEL: zext_v2i10_v2i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v1.2s, #3, msl #8
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i10_v2i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI16_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI16_0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <2 x i10> %a to <2 x i16>
+ ret <2 x i16> %c
+}
+
+define <2 x i32> @zext_v2i10_v2i32(<2 x i10> %a) {
+; CHECK-SD-LABEL: zext_v2i10_v2i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v1.2s, #3, msl #8
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i10_v2i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI17_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <2 x i10> %a to <2 x i32>
+ ret <2 x i32> %c
+}
+
+define <2 x i64> @zext_v2i10_v2i64(<2 x i10> %a) {
+; CHECK-SD-LABEL: zext_v2i10_v2i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v1.2s, #3, msl #8
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i10_v2i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI18_0
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI18_0]
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <2 x i10> %a to <2 x i64>
+ ret <2 x i64> %c
+}
+
+define <3 x i16> @zext_v3i8_v3i16(<3 x i8> %a) {
+; CHECK-SD-LABEL: zext_v3i8_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i8_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #255 // =0xff
+; CHECK-GI-NEXT: fmov s1, w0
+; CHECK-GI-NEXT: fmov s2, w1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT: fmov s2, w2
+; CHECK-GI-NEXT: mov v3.16b, v0.16b
+; CHECK-GI-NEXT: mov v3.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NEXT: mov v3.h[2], v0.h[0]
+; CHECK-GI-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NEXT: mov v3.h[3], v0.h[0]
+; CHECK-GI-NEXT: and v0.8b, v1.8b, v3.8b
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i8> %a to <3 x i16>
+ ret <3 x i16> %c
+}
+
+define <3 x i32> @zext_v3i8_v3i32(<3 x i8> %a) {
+; CHECK-SD-LABEL: zext_v3i8_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i8_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #255 // =0xff
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v0.s[1], w1
+; CHECK-GI-NEXT: mov v1.s[1], w8
+; CHECK-GI-NEXT: mov v0.s[2], w2
+; CHECK-GI-NEXT: mov v1.s[2], w8
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: mov v1.s[3], w8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i8> %a to <3 x i32>
+ ret <3 x i32> %c
+}
+
+define <3 x i64> @zext_v3i8_v3i64(<3 x i8> %a) {
+; CHECK-SD-LABEL: zext_v3i8_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s3, w2
+; CHECK-SD-NEXT: movi v0.2d, #0x000000000000ff
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: mov v1.s[1], w1
+; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: mov v2.b[0], v3.b[0]
+; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i8_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: adrp x8, .LCPI21_0
+; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-GI-NEXT: mov v0.d[1], x1
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-NEXT: and x8, x2, #0xff
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i8> %a to <3 x i64>
+ ret <3 x i64> %c
+}
+
+define <3 x i32> @zext_v3i16_v3i32(<3 x i16> %a) {
+; CHECK-SD-LABEL: zext_v3i16_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i16_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: uxth w8, w8
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov w8, s2
+; CHECK-GI-NEXT: uxth w9, w9
+; CHECK-GI-NEXT: uxth w8, w8
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: mov v0.s[2], w8
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i16> %a to <3 x i32>
+ ret <3 x i32> %c
+}
+
+define <3 x i64> @zext_v3i16_v3i64(<3 x i16> %a) {
+; CHECK-SD-LABEL: zext_v3i16_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll2 v2.2d, v2.4s, #0
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i16_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: ubfx x8, x8, #0, #16
+; CHECK-GI-NEXT: ubfx x9, x9, #0, #16
+; CHECK-GI-NEXT: ubfx x10, x10, #0, #16
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: fmov d1, x9
+; CHECK-GI-NEXT: fmov d2, x10
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i16> %a to <3 x i64>
+ ret <3 x i64> %c
+}
+
+define <3 x i64> @zext_v3i32_v3i64(<3 x i32> %a) {
+; CHECK-SD-LABEL: zext_v3i32_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll2 v2.2d, v0.4s, #0
+; CHECK-SD-NEXT: fmov d0, d3
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i32_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: fmov d1, x9
+; CHECK-GI-NEXT: fmov d2, x10
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i32> %a to <3 x i64>
+ ret <3 x i64> %c
+}
+
+define <3 x i16> @zext_v3i10_v3i16(<3 x i10> %a) {
+; CHECK-SD-LABEL: zext_v3i10_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i10_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1023 // =0x3ff
+; CHECK-GI-NEXT: fmov s1, w0
+; CHECK-GI-NEXT: fmov s2, w1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT: fmov s2, w2
+; CHECK-GI-NEXT: mov v3.16b, v0.16b
+; CHECK-GI-NEXT: mov v3.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-GI-NEXT: mov v3.h[2], v0.h[0]
+; CHECK-GI-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NEXT: mov v3.h[3], v0.h[0]
+; CHECK-GI-NEXT: and v0.8b, v1.8b, v3.8b
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i10> %a to <3 x i16>
+ ret <3 x i16> %c
+}
+
+define <3 x i32> @zext_v3i10_v3i32(<3 x i10> %a) {
+; CHECK-SD-LABEL: zext_v3i10_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: movi v1.4s, #3, msl #8
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i10_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1023 // =0x3ff
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v0.s[1], w1
+; CHECK-GI-NEXT: mov v1.s[1], w8
+; CHECK-GI-NEXT: mov v0.s[2], w2
+; CHECK-GI-NEXT: mov v1.s[2], w8
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: mov v1.s[3], w8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i10> %a to <3 x i32>
+ ret <3 x i32> %c
+}
+
+define <3 x i64> @zext_v3i10_v3i64(<3 x i10> %a) {
+; CHECK-SD-LABEL: zext_v3i10_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov w8, #1023 // =0x3ff
+; CHECK-SD-NEXT: fmov s3, w2
+; CHECK-SD-NEXT: mov v0.s[1], w1
+; CHECK-SD-NEXT: dup v2.2d, x8
+; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v2.8b, v3.8b, v2.8b
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v3i10_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: adrp x8, .LCPI27_0
+; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-GI-NEXT: mov v0.d[1], x1
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI27_0]
+; CHECK-GI-NEXT: and x8, x2, #0x3ff
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <3 x i10> %a to <3 x i64>
+ ret <3 x i64> %c
+}
+
+define <4 x i16> @zext_v4i8_v4i16(<4 x i8> %a) {
+; CHECK-SD-LABEL: zext_v4i8_v4i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i8_v4i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI28_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI28_0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <4 x i8> %a to <4 x i16>
+ ret <4 x i16> %c
+}
+
+define <4 x i32> @zext_v4i8_v4i32(<4 x i8> %a) {
+; CHECK-SD-LABEL: zext_v4i8_v4i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i8_v4i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI29_0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI29_0]
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <4 x i8> %a to <4 x i32>
+ ret <4 x i32> %c
+}
+
+define <4 x i64> @zext_v4i8_v4i64(<4 x i8> %a) {
+; CHECK-SD-LABEL: zext_v4i8_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i8_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: adrp x8, .LCPI30_0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <4 x i8> %a to <4 x i64>
+ ret <4 x i64> %c
+}
+
+define <4 x i32> @zext_v4i16_v4i32(<4 x i16> %a) {
+; CHECK-LABEL: zext_v4i16_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ret
+entry:
+ %c = zext <4 x i16> %a to <4 x i32>
+ ret <4 x i32> %c
+}
+
+define <4 x i64> @zext_v4i16_v4i64(<4 x i16> %a) {
+; CHECK-SD-LABEL: zext_v4i16_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i16_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <4 x i16> %a to <4 x i64>
+ ret <4 x i64> %c
+}
+
+define <4 x i64> @zext_v4i32_v4i64(<4 x i32> %a) {
+; CHECK-SD-LABEL: zext_v4i32_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i32_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <4 x i32> %a to <4 x i64>
+ ret <4 x i64> %c
+}
+
+define <4 x i16> @zext_v4i10_v4i16(<4 x i10> %a) {
+; CHECK-SD-LABEL: zext_v4i10_v4i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i10_v4i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <4 x i10> %a to <4 x i16>
+ ret <4 x i16> %c
+}
+
+define <4 x i32> @zext_v4i10_v4i32(<4 x i10> %a) {
+; CHECK-SD-LABEL: zext_v4i10_v4i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i10_v4i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <4 x i10> %a to <4 x i32>
+ ret <4 x i32> %c
+}
+
+define <4 x i64> @zext_v4i10_v4i64(<4 x i10> %a) {
+; CHECK-SD-LABEL: zext_v4i10_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v4i10_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: adrp x8, .LCPI36_0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <4 x i10> %a to <4 x i64>
+ ret <4 x i64> %c
+}
+
+define <8 x i16> @zext_v8i8_v8i16(<8 x i8> %a) {
+; CHECK-LABEL: zext_v8i8_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ret
+entry:
+ %c = zext <8 x i8> %a to <8 x i16>
+ ret <8 x i16> %c
+}
+
+define <8 x i32> @zext_v8i8_v8i32(<8 x i8> %a) {
+; CHECK-SD-LABEL: zext_v8i8_v8i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i8_v8i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <8 x i8> %a to <8 x i32>
+ ret <8 x i32> %c
+}
+
+define <8 x i64> @zext_v8i8_v8i64(<8 x i8> %a) {
+; CHECK-SD-LABEL: zext_v8i8_v8i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i8_v8i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: mov d4, v3.d[1]
+; CHECK-GI-NEXT: ushll v1.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <8 x i8> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <8 x i32> @zext_v8i16_v8i32(<8 x i16> %a) {
+; CHECK-SD-LABEL: zext_v8i16_v8i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i16_v8i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <8 x i16> %a to <8 x i32>
+ ret <8 x i32> %c
+}
+
+define <8 x i64> @zext_v8i16_v8i64(<8 x i16> %a) {
+; CHECK-SD-LABEL: zext_v8i16_v8i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i16_v8i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: mov d4, v3.d[1]
+; CHECK-GI-NEXT: ushll v1.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <8 x i16> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <8 x i64> @zext_v8i32_v8i64(<8 x i32> %a) {
+; CHECK-SD-LABEL: zext_v8i32_v8i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v4.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll2 v3.2d, v1.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-SD-NEXT: mov v1.16b, v4.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i32_v8i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d3, v0.d[1]
+; CHECK-GI-NEXT: mov d4, v1.d[1]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll v1.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v3.2d, v4.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <8 x i32> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <8 x i16> @zext_v8i10_v8i16(<8 x i10> %a) {
+; CHECK-SD-LABEL: zext_v8i10_v8i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.8h, #252, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i10_v8i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI43_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI43_0]
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <8 x i10> %a to <8 x i16>
+ ret <8 x i16> %c
+}
+
+define <8 x i32> @zext_v8i10_v8i32(<8 x i10> %a) {
+; CHECK-SD-LABEL: zext_v8i10_v8i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.8h, #252, lsl #8
+; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i10_v8i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI44_0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <8 x i10> %a to <8 x i32>
+ ret <8 x i32> %c
+}
+
+define <8 x i64> @zext_v8i10_v8i64(<8 x i10> %a) {
+; CHECK-SD-LABEL: zext_v8i10_v8i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: bic v0.8h, #252, lsl #8
+; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v8i10_v8i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: adrp x8, .LCPI45_0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI45_0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: mov d4, v1.d[1]
+; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll v5.2d, v1.2s, #0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0
+; CHECK-GI-NEXT: and v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT: and v2.16b, v5.16b, v3.16b
+; CHECK-GI-NEXT: and v3.16b, v4.16b, v3.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <8 x i10> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <16 x i16> @zext_v16i8_v16i16(<16 x i8> %a) {
+; CHECK-SD-LABEL: zext_v16i8_v16i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v16i8_v16i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <16 x i8> %a to <16 x i16>
+ ret <16 x i16> %c
+}
+
+define <16 x i32> @zext_v16i8_v16i32(<16 x i8> %a) {
+; CHECK-SD-LABEL: zext_v16i8_v16i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v2.8h, v0.16b, #0
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll2 v3.4s, v2.8h, #0
+; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v16i8_v16i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov d4, v3.d[1]
+; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT: ushll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <16 x i8> %a to <16 x i32>
+ ret <16 x i32> %c
+}
+
+define <16 x i64> @zext_v16i8_v16i64(<16 x i8> %a) {
+; CHECK-SD-LABEL: zext_v16i8_v16i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-SD-NEXT: ushll2 v4.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v16.4s, v1.4h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v7.2d, v2.4s, #0
+; CHECK-SD-NEXT: ushll2 v3.2d, v4.4s, #0
+; CHECK-SD-NEXT: ushll2 v5.2d, v16.4s, #0
+; CHECK-SD-NEXT: ushll v6.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v4.2s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v4.2d, v16.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v16i8_v16i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-GI-NEXT: mov d3, v0.d[1]
+; CHECK-GI-NEXT: mov d5, v2.d[1]
+; CHECK-GI-NEXT: ushll v4.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v6.4s, v2.4h, #0
+; CHECK-GI-NEXT: ushll v1.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v16.4s, v5.4h, #0
+; CHECK-GI-NEXT: mov d3, v4.d[1]
+; CHECK-GI-NEXT: mov d7, v6.d[1]
+; CHECK-GI-NEXT: mov d17, v16.d[1]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v4.2s, #0
+; CHECK-GI-NEXT: ushll v4.2d, v6.2s, #0
+; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v5.2d, v7.2s, #0
+; CHECK-GI-NEXT: ushll v6.2d, v16.2s, #0
+; CHECK-GI-NEXT: ushll v7.2d, v17.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <16 x i8> %a to <16 x i64>
+ ret <16 x i64> %c
+}
+
+define <16 x i32> @zext_v16i16_v16i32(<16 x i16> %a) {
+; CHECK-SD-LABEL: zext_v16i16_v16i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v4.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll2 v3.4s, v1.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-SD-NEXT: mov v1.16b, v4.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v16i16_v16i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d3, v0.d[1]
+; CHECK-GI-NEXT: mov d4, v1.d[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v1.4s, v3.4h, #0
+; CHECK-GI-NEXT: ushll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <16 x i16> %a to <16 x i32>
+ ret <16 x i32> %c
+}
+
+define <16 x i64> @zext_v16i16_v16i64(<16 x i16> %a) {
+; CHECK-SD-LABEL: zext_v16i16_v16i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll2 v4.4s, v1.8h, #0
+; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-NEXT: ushll2 v16.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
+; CHECK-SD-NEXT: ushll2 v7.2d, v4.4s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll2 v5.2d, v1.4s, #0
+; CHECK-SD-NEXT: ushll v6.2d, v4.2s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v4.2d, v1.2s, #0
+; CHECK-SD-NEXT: mov v1.16b, v16.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v16i16_v16i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d3, v0.d[1]
+; CHECK-GI-NEXT: mov d4, v1.d[1]
+; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0
+; CHECK-GI-NEXT: mov d1, v2.d[1]
+; CHECK-GI-NEXT: ushll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT: mov d6, v5.d[1]
+; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT: ushll v3.4s, v4.4h, #0
+; CHECK-GI-NEXT: mov d7, v2.d[1]
+; CHECK-GI-NEXT: mov d16, v3.d[1]
+; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT: ushll v5.2d, v6.2s, #0
+; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v3.2d, v7.2s, #0
+; CHECK-GI-NEXT: ushll v7.2d, v16.2s, #0
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <16 x i16> %a to <16 x i64>
+ ret <16 x i64> %c
+}
+
+define <16 x i64> @zext_v16i32_v16i64(<16 x i32> %a) {
+; CHECK-SD-LABEL: zext_v16i32_v16i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ushll2 v17.2d, v0.4s, #0
+; CHECK-SD-NEXT: ushll2 v18.2d, v1.4s, #0
+; CHECK-SD-NEXT: ushll v16.2d, v1.2s, #0
+; CHECK-SD-NEXT: ushll2 v5.2d, v2.4s, #0
+; CHECK-SD-NEXT: ushll2 v7.2d, v3.4s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-SD-NEXT: mov v1.16b, v17.16b
+; CHECK-SD-NEXT: mov v2.16b, v16.16b
+; CHECK-SD-NEXT: mov v3.16b, v18.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v16i32_v16i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d5, v1.d[1]
+; CHECK-GI-NEXT: mov d6, v2.d[1]
+; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-NEXT: mov d2, v3.d[1]
+; CHECK-GI-NEXT: ushll v17.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll v18.2d, v5.2s, #0
+; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v5.2d, v6.2s, #0
+; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v7.2d, v2.2s, #0
+; CHECK-GI-NEXT: mov v0.16b, v16.16b
+; CHECK-GI-NEXT: mov v2.16b, v17.16b
+; CHECK-GI-NEXT: mov v3.16b, v18.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <16 x i32> %a to <16 x i64>
+ ret <16 x i64> %c
+}
+
+define <16 x i16> @zext_v16i10_v16i16(<16 x i10> %a) {
+; CHECK-LABEL: zext_v16i10_v16i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr w8, [sp]
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: ldr w8, [sp, #8]
+; CHECK-NEXT: mov v0.h[1], w1
+; CHECK-NEXT: mov v1.h[1], w8
+; CHECK-NEXT: ldr w8, [sp, #16]
+; CHECK-NEXT: mov v0.h[2], w2
+; CHECK-NEXT: mov v1.h[2], w8
+; CHECK-NEXT: ldr w8, [sp, #24]
+; CHECK-NEXT: mov v0.h[3], w3
+; CHECK-NEXT: mov v1.h[3], w8
+; CHECK-NEXT: ldr w8, [sp, #32]
+; CHECK-NEXT: mov v0.h[4], w4
+; CHECK-NEXT: mov v1.h[4], w8
+; CHECK-NEXT: ldr w8, [sp, #40]
+; CHECK-NEXT: mov v0.h[5], w5
+; CHECK-NEXT: mov v1.h[5], w8
+; CHECK-NEXT: ldr w8, [sp, #48]
+; CHECK-NEXT: mov v0.h[6], w6
+; CHECK-NEXT: mov v1.h[6], w8
+; CHECK-NEXT: ldr w8, [sp, #56]
+; CHECK-NEXT: mov v0.h[7], w7
+; CHECK-NEXT: mov v1.h[7], w8
+; CHECK-NEXT: bic v0.8h, #252, lsl #8
+; CHECK-NEXT: bic v1.8h, #252, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %c = zext <16 x i10> %a to <16 x i16>
+ ret <16 x i16> %c
+}
+
+define <16 x i32> @zext_v16i10_v16i32(<16 x i10> %a) {
+; CHECK-SD-LABEL: zext_v16i10_v16i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr w11, [sp, #32]
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: ldr w13, [sp]
+; CHECK-SD-NEXT: fmov s1, w4
+; CHECK-SD-NEXT: ldr w10, [sp, #40]
+; CHECK-SD-NEXT: ldr w15, [sp, #8]
+; CHECK-SD-NEXT: fmov s3, w11
+; CHECK-SD-NEXT: fmov s2, w13
+; CHECK-SD-NEXT: ldr w9, [sp, #48]
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: ldr w14, [sp, #16]
+; CHECK-SD-NEXT: mov v1.h[1], w5
+; CHECK-SD-NEXT: ldr w8, [sp, #56]
+; CHECK-SD-NEXT: mov v2.h[1], w15
+; CHECK-SD-NEXT: ldr w12, [sp, #24]
+; CHECK-SD-NEXT: mov v3.h[1], w10
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: mov v1.h[2], w6
+; CHECK-SD-NEXT: mov v2.h[2], w14
+; CHECK-SD-NEXT: mov v3.h[2], w9
+; CHECK-SD-NEXT: mov v0.h[3], w3
+; CHECK-SD-NEXT: mov v1.h[3], w7
+; CHECK-SD-NEXT: mov v2.h[3], w12
+; CHECK-SD-NEXT: mov v3.h[3], w8
+; CHECK-SD-NEXT: movi v4.4s, #3, msl #8
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0
+; CHECK-SD-NEXT: ushll v3.4s, v3.4h, #0
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v4.16b
+; CHECK-SD-NEXT: and v2.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT: and v3.16b, v3.16b, v4.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v16i10_v16i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [sp]
+; CHECK-GI-NEXT: fmov s16, w0
+; CHECK-GI-NEXT: ldr s1, [sp, #8]
+; CHECK-GI-NEXT: fmov s17, w4
+; CHECK-GI-NEXT: ldr s4, [sp, #32]
+; CHECK-GI-NEXT: adrp x8, .LCPI53_0
+; CHECK-GI-NEXT: ldr s5, [sp, #40]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: ldr s2, [sp, #16]
+; CHECK-GI-NEXT: mov v16.s[1], w1
+; CHECK-GI-NEXT: ldr s6, [sp, #48]
+; CHECK-GI-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT: ldr s3, [sp, #24]
+; CHECK-GI-NEXT: mov v17.s[1], w5
+; CHECK-GI-NEXT: ldr s7, [sp, #56]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI53_0]
+; CHECK-GI-NEXT: mov v16.s[2], w2
+; CHECK-GI-NEXT: mov v4.s[2], v6.s[0]
+; CHECK-GI-NEXT: mov v17.s[2], w6
+; CHECK-GI-NEXT: mov v0.s[3], v3.s[0]
+; CHECK-GI-NEXT: mov v16.s[3], w3
+; CHECK-GI-NEXT: mov v4.s[3], v7.s[0]
+; CHECK-GI-NEXT: mov v17.s[3], w7
+; CHECK-GI-NEXT: and v2.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v16.16b, v1.16b
+; CHECK-GI-NEXT: and v3.16b, v4.16b, v1.16b
+; CHECK-GI-NEXT: and v1.16b, v17.16b, v1.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <16 x i10> %a to <16 x i32>
+ ret <16 x i32> %c
+}
+
+define <16 x i64> @zext_v16i10_v16i64(<16 x i10> %a) {
+; CHECK-SD-LABEL: zext_v16i10_v16i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1023 // =0x3ff
+; CHECK-SD-NEXT: ldr s4, [sp]
+; CHECK-SD-NEXT: ldr s5, [sp, #16]
+; CHECK-SD-NEXT: add x9, sp, #24
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: fmov s1, w2
+; CHECK-SD-NEXT: dup v7.2d, x8
+; CHECK-SD-NEXT: add x8, sp, #8
+; CHECK-SD-NEXT: fmov s2, w4
+; CHECK-SD-NEXT: fmov s3, w6
+; CHECK-SD-NEXT: ld1 { v5.s }[1], [x9]
+; CHECK-SD-NEXT: add x9, sp, #56
+; CHECK-SD-NEXT: ld1 { v4.s }[1], [x8]
+; CHECK-SD-NEXT: add x8, sp, #40
+; CHECK-SD-NEXT: ldr s6, [sp, #32]
+; CHECK-SD-NEXT: ldr s16, [sp, #48]
+; CHECK-SD-NEXT: mov v0.s[1], w1
+; CHECK-SD-NEXT: mov v1.s[1], w3
+; CHECK-SD-NEXT: ld1 { v6.s }[1], [x8]
+; CHECK-SD-NEXT: mov v2.s[1], w5
+; CHECK-SD-NEXT: ld1 { v16.s }[1], [x9]
+; CHECK-SD-NEXT: mov v3.s[1], w7
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT: ushll v5.2d, v5.2s, #0
+; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
+; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v7.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v7.16b
+; CHECK-SD-NEXT: and v2.16b, v2.16b, v7.16b
+; CHECK-SD-NEXT: and v3.16b, v3.16b, v7.16b
+; CHECK-SD-NEXT: and v4.16b, v4.16b, v7.16b
+; CHECK-SD-NEXT: and v5.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT: and v6.16b, v6.16b, v7.16b
+; CHECK-SD-NEXT: and v7.16b, v16.16b, v7.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v16i10_v16i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [sp]
+; CHECK-GI-NEXT: fmov s6, w0
+; CHECK-GI-NEXT: ldr s1, [sp, #8]
+; CHECK-GI-NEXT: fmov s16, w2
+; CHECK-GI-NEXT: ldr s2, [sp, #16]
+; CHECK-GI-NEXT: fmov s18, w4
+; CHECK-GI-NEXT: ldr s3, [sp, #24]
+; CHECK-GI-NEXT: fmov s19, w6
+; CHECK-GI-NEXT: ldr s4, [sp, #32]
+; CHECK-GI-NEXT: adrp x8, .LCPI54_0
+; CHECK-GI-NEXT: ldr s5, [sp, #40]
+; CHECK-GI-NEXT: ldr s7, [sp, #48]
+; CHECK-GI-NEXT: ldr s17, [sp, #56]
+; CHECK-GI-NEXT: mov v6.s[1], w1
+; CHECK-GI-NEXT: mov v16.s[1], w3
+; CHECK-GI-NEXT: mov v18.s[1], w5
+; CHECK-GI-NEXT: mov v19.s[1], w7
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v2.s[1], v3.s[0]
+; CHECK-GI-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-GI-NEXT: mov v7.s[1], v17.s[0]
+; CHECK-GI-NEXT: ldr q17, [x8, :lo12:.LCPI54_0]
+; CHECK-GI-NEXT: ushll v1.2d, v6.2s, #0
+; CHECK-GI-NEXT: ushll v3.2d, v16.2s, #0
+; CHECK-GI-NEXT: ushll v5.2d, v18.2s, #0
+; CHECK-GI-NEXT: ushll v6.2d, v19.2s, #0
+; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v18.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll v19.2d, v4.2s, #0
+; CHECK-GI-NEXT: ushll v7.2d, v7.2s, #0
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v17.16b
+; CHECK-GI-NEXT: and v1.16b, v3.16b, v17.16b
+; CHECK-GI-NEXT: and v2.16b, v5.16b, v17.16b
+; CHECK-GI-NEXT: and v3.16b, v6.16b, v17.16b
+; CHECK-GI-NEXT: and v4.16b, v16.16b, v17.16b
+; CHECK-GI-NEXT: and v5.16b, v18.16b, v17.16b
+; CHECK-GI-NEXT: and v6.16b, v19.16b, v17.16b
+; CHECK-GI-NEXT: and v7.16b, v7.16b, v17.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = zext <16 x i10> %a to <16 x i64>
+ ret <16 x i64> %c
+}
More information about the llvm-commits
mailing list