[llvm] d88d983 - [AArch64][GlobalISel] Support more types for TRUNC (#66927)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 11 08:05:31 PDT 2023
Author: chuongg3
Date: 2023-10-11T16:05:25+01:00
New Revision: d88d9834e985ad1feede6d9ce16b6d5c412b30d1
URL: https://github.com/llvm/llvm-project/commit/d88d9834e985ad1feede6d9ce16b6d5c412b30d1
DIFF: https://github.com/llvm/llvm-project/commit/d88d9834e985ad1feede6d9ce16b6d5c412b30d1.diff
LOG: [AArch64][GlobalISel] Support more types for TRUNC (#66927)
G_TRUNC will get lowered into trunc(merge(trunc(unmerge),
trunc(unmerge))) if the source is larger than 128 bits or the truncation
is more than half of the current bit size.
Now mirrors ZEXT/SEXT code more closely for vector types.
Added:
llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
llvm/test/CodeGen/AArch64/xtn.ll
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
llvm/test/CodeGen/AArch64/zext.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 8516a28d0052d21..86d3cb2bedb95b6 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -384,6 +384,7 @@ class LegalizerHelper {
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
LegalizeResult lowerFunnelShift(MachineInstr &MI);
LegalizeResult lowerEXT(MachineInstr &MI);
+ LegalizeResult lowerTRUNC(MachineInstr &MI);
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI);
LegalizeResult lowerRotate(MachineInstr &MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index ec501083aaefae2..196da03733c7d00 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3766,6 +3766,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_SEXT:
case G_ANYEXT:
return lowerEXT(MI);
+ case G_TRUNC:
+ return lowerTRUNC(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
}
@@ -5110,13 +5112,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_TRUNC: {
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_TRUNC:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FPEXT: {
if (TypeIdx != 0)
@@ -6165,6 +6161,63 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
return UnableToLegalize;
}
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
+ // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // Similar to how operand splitting is done in SelectiondDAG, we can handle
+ // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
+ // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
+ // %lo16(<4 x s16>) = G_TRUNC %inlo
+ // %hi16(<4 x s16>) = G_TRUNC %inhi
+ // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
+ // %res(<8 x s8>) = G_TRUNC %in16
+
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
+ isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
+ isPowerOf2_32(SrcTy.getNumElements()) &&
+ isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
+ // Split input type.
+ LLT SplitSrcTy = SrcTy.changeElementCount(
+ SrcTy.getElementCount().divideCoefficientBy(2));
+
+ // First, split the source into two smaller vectors.
+ SmallVector<Register, 2> SplitSrcs;
+ extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
+
+ // Truncate the splits into intermediate narrower elements.
+ LLT InterTy;
+ if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+ InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
+ else
+ InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
+ for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
+ SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
+ }
+
+ // Combine the new truncates into one vector
+ auto Merge = MIRBuilder.buildMergeLikeInstr(
+ DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
+
+ // Truncate the new vector to the final result type
+ if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
+ else
+ MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
+
+ MI.eraseFromParent();
+
+ return Legalized;
+ }
+ return UnableToLegalize;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e75f6e891adc334..378a8d0da4925d9 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -536,14 +536,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
});
getActionDefinitionsBuilder(G_TRUNC)
+ .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
+ .moreElementsToNextPow2(0)
+ .clampMaxNumElements(0, s8, 8)
+ .clampMaxNumElements(0, s16, 4)
+ .clampMaxNumElements(0, s32, 2)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
0, s8)
- .customIf([=](const LegalityQuery &Query) {
+ .lowerIf([=](const LegalityQuery &Query) {
LLT DstTy = Query.Types[0];
LLT SrcTy = Query.Types[1];
- return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
+ return DstTy.isVector() && (SrcTy.getSizeInBits() > 128 ||
+ (DstTy.getScalarSizeInBits() * 2 <
+ SrcTy.getScalarSizeInBits()));
})
+
.alwaysLegal();
getActionDefinitionsBuilder(G_SEXT_INREG)
@@ -1002,8 +1010,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
case TargetOpcode::G_GLOBAL_VALUE:
return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
- case TargetOpcode::G_TRUNC:
- return legalizeVectorTrunc(MI, Helper);
case TargetOpcode::G_SBFX:
case TargetOpcode::G_UBFX:
return legalizeBitfieldExtract(MI, MRI, Helper);
@@ -1102,54 +1108,6 @@ bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
return true;
}
-static void extractParts(Register Reg, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs) {
- for (int I = 0; I < NumParts; ++I)
- VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool AArch64LegalizerInfo::legalizeVectorTrunc(
- MachineInstr &MI, LegalizerHelper &Helper) const {
- MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- // Similar to how operand splitting is done in SelectiondDAG, we can handle
- // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
- // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
- // %lo16(<4 x s16>) = G_TRUNC %inlo
- // %hi16(<4 x s16>) = G_TRUNC %inhi
- // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
- // %res(<8 x s8>) = G_TRUNC %in16
-
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- assert(llvm::has_single_bit<uint32_t>(DstTy.getSizeInBits()) &&
- llvm::has_single_bit<uint32_t>(SrcTy.getSizeInBits()));
-
- // Split input type.
- LLT SplitSrcTy =
- SrcTy.changeElementCount(SrcTy.getElementCount().divideCoefficientBy(2));
- // First, split the source into two smaller vectors.
- SmallVector<Register, 2> SplitSrcs;
- extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
-
- // Truncate the splits into intermediate narrower elements.
- LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
- for (unsigned I = 0; I < SplitSrcs.size(); ++I)
- SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
-
- auto Concat = MIRBuilder.buildConcatVectors(
- DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
-
- Helper.Observer.changingInstr(MI);
- MI.getOperand(1).setReg(Concat.getReg(0));
- Helper.Observer.changedInstr(MI);
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
@@ -1319,6 +1277,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return true;
}
+ case Intrinsic::experimental_vector_reverse:
+ // TODO: Add support for vector_reverse
+ return false;
}
return true;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 853d5a2305ac68a..e6c9182da912dba 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -46,7 +46,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
- bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeBitfieldExtract(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
new file mode 100644
index 000000000000000..16b780a83973471
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
@@ -0,0 +1,531 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -O0 -mtriple=arm64-unknown-unknown -global-isel -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
+
+---
+name: xtn_v2i64_v2i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: xtn_v2i64_v2i8
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s8>) = G_TRUNC %0(<2 x s64>)
+ %2:_(<2 x s32>) = G_ANYEXT %1(<2 x s8>)
+ $d0 = COPY %2(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v2i128_v2i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: xtn_v2i128_v2i8
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR]](<2 x s64>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %1:_(s64) = COPY $x0
+ %2:_(s64) = COPY $x1
+ %3:_(s64) = COPY $x2
+ %4:_(s64) = COPY $x3
+ %5:_(s128) = G_MERGE_VALUES %1(s64), %2(s64)
+ %6:_(s128) = G_MERGE_VALUES %3(s64), %4(s64)
+ %0:_(<2 x s128>) = G_BUILD_VECTOR %5(s128), %6(s128)
+ %7:_(<2 x s8>) = G_TRUNC %0(<2 x s128>)
+ %8:_(<2 x s32>) = G_ANYEXT %7(<2 x s8>)
+ $d0 = COPY %8(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v2i64_v2i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: xtn_v2i64_v2i16
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s16>) = G_TRUNC %0(<2 x s64>)
+ %2:_(<2 x s32>) = G_ANYEXT %1(<2 x s16>)
+ $d0 = COPY %2(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v2i128_v2i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: xtn_v2i128_v2i16
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR]](<2 x s64>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %1:_(s64) = COPY $x0
+ %2:_(s64) = COPY $x1
+ %3:_(s64) = COPY $x2
+ %4:_(s64) = COPY $x3
+ %5:_(s128) = G_MERGE_VALUES %1(s64), %2(s64)
+ %6:_(s128) = G_MERGE_VALUES %3(s64), %4(s64)
+ %0:_(<2 x s128>) = G_BUILD_VECTOR %5(s128), %6(s128)
+ %7:_(<2 x s16>) = G_TRUNC %0(<2 x s128>)
+ %8:_(<2 x s32>) = G_ANYEXT %7(<2 x s16>)
+ $d0 = COPY %8(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v2i128_v2i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: xtn_v2i128_v2i32
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR]](<2 x s64>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %1:_(s64) = COPY $x0
+ %2:_(s64) = COPY $x1
+ %3:_(s64) = COPY $x2
+ %4:_(s64) = COPY $x3
+ %5:_(s128) = G_MERGE_VALUES %1(s64), %2(s64)
+ %6:_(s128) = G_MERGE_VALUES %3(s64), %4(s64)
+ %0:_(<2 x s128>) = G_BUILD_VECTOR %5(s128), %6(s128)
+ %7:_(<2 x s32>) = G_TRUNC %0(<2 x s128>)
+ $d0 = COPY %7(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v2i128_v2i64
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: xtn_v2i128_v2i64
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[COPY2]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(s64) = COPY $x0
+ %2:_(s64) = COPY $x1
+ %3:_(s64) = COPY $x2
+ %4:_(s64) = COPY $x3
+ %5:_(s128) = G_MERGE_VALUES %1(s64), %2(s64)
+ %6:_(s128) = G_MERGE_VALUES %3(s64), %4(s64)
+ %0:_(<2 x s128>) = G_BUILD_VECTOR %5(s128), %6(s128)
+ %7:_(<2 x s64>) = G_TRUNC %0(<2 x s128>)
+ $q0 = COPY %7(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
+
+---
+name: xtn_v3i16_v3i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: xtn_v3i16_v3i8
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[ANYEXT1]](s32)
+ ; CHECK-NEXT: $w2 = COPY [[ANYEXT2]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1, implicit $w2
+ %1:_(<4 x s16>) = COPY $d0
+ %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1(<4 x s16>)
+ %0:_(<3 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16)
+ %6:_(<3 x s8>) = G_TRUNC %0(<3 x s16>)
+ %10:_(s8), %11:_(s8), %12:_(s8) = G_UNMERGE_VALUES %6(<3 x s8>)
+ %7:_(s32) = G_ANYEXT %10(s8)
+ %8:_(s32) = G_ANYEXT %11(s8)
+ %9:_(s32) = G_ANYEXT %12(s8)
+ $w0 = COPY %7(s32)
+ $w1 = COPY %8(s32)
+ $w2 = COPY %9(s32)
+ RET_ReallyLR implicit $w0, implicit $w1, implicit $w2
+
+...
+
+---
+name: xtn_v3i32_v3i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: xtn_v3i32_v3i8
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[UV]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $w2 = COPY [[UV2]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1, implicit $w2
+ %1:_(<4 x s32>) = COPY $q0
+ %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<4 x s32>)
+ %0:_(<3 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32)
+ %6:_(<3 x s8>) = G_TRUNC %0(<3 x s32>)
+ %10:_(s8), %11:_(s8), %12:_(s8) = G_UNMERGE_VALUES %6(<3 x s8>)
+ %7:_(s32) = G_ANYEXT %10(s8)
+ %8:_(s32) = G_ANYEXT %11(s8)
+ %9:_(s32) = G_ANYEXT %12(s8)
+ $w0 = COPY %7(s32)
+ $w1 = COPY %8(s32)
+ $w2 = COPY %9(s32)
+ RET_ReallyLR implicit $w0, implicit $w1, implicit $w2
+
+...
+
+---
+name: xtn_v3i64_v3i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1, $d2
+
+ ; CHECK-LABEL: name: xtn_v3i64_v3i8
+ ; CHECK: liveins: $d0, $d1, $d2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+ ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[TRUNC1]](s32)
+ ; CHECK-NEXT: $w2 = COPY [[TRUNC2]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1, implicit $w2
+ %1:_(s64) = COPY $d0
+ %2:_(s64) = COPY $d1
+ %3:_(s64) = COPY $d2
+ %0:_(<3 x s64>) = G_BUILD_VECTOR %1(s64), %2(s64), %3(s64)
+ %4:_(<3 x s8>) = G_TRUNC %0(<3 x s64>)
+ %8:_(s8), %9:_(s8), %10:_(s8) = G_UNMERGE_VALUES %4(<3 x s8>)
+ %5:_(s32) = G_ANYEXT %8(s8)
+ %6:_(s32) = G_ANYEXT %9(s8)
+ %7:_(s32) = G_ANYEXT %10(s8)
+ $w0 = COPY %5(s32)
+ $w1 = COPY %6(s32)
+ $w2 = COPY %7(s32)
+ RET_ReallyLR implicit $w0, implicit $w1, implicit $w2
+
+...
+
+---
+name: xtn_v3i64_v3i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1, $d2
+
+ ; CHECK-LABEL: name: xtn_v3i64_v3i16
+ ; CHECK: liveins: $d0, $d1, $d2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[DEF]](s16)
+ ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %1:_(s64) = COPY $d0
+ %2:_(s64) = COPY $d1
+ %3:_(s64) = COPY $d2
+ %0:_(<3 x s64>) = G_BUILD_VECTOR %1(s64), %2(s64), %3(s64)
+ %4:_(<3 x s16>) = G_TRUNC %0(<3 x s64>)
+ %5:_(s16), %6:_(s16), %7:_(s16) = G_UNMERGE_VALUES %4(<3 x s16>)
+ %8:_(s16) = G_IMPLICIT_DEF
+ %9:_(<4 x s16>) = G_BUILD_VECTOR %5(s16), %6(s16), %7(s16), %8(s16)
+ $d0 = COPY %9(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v3i64_v3i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1, $d2
+
+ ; CHECK-LABEL: name: xtn_v3i64_v3i32
+ ; CHECK: liveins: $d0, $d1, $d2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32), [[TRUNC2]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(s64) = COPY $d0
+ %2:_(s64) = COPY $d1
+ %3:_(s64) = COPY $d2
+ %0:_(<3 x s64>) = G_BUILD_VECTOR %1(s64), %2(s64), %3(s64)
+ %4:_(<3 x s32>) = G_TRUNC %0(<3 x s64>)
+ %5:_(s32), %6:_(s32), %7:_(s32) = G_UNMERGE_VALUES %4(<3 x s32>)
+ %8:_(s32) = G_IMPLICIT_DEF
+ %9:_(<4 x s32>) = G_BUILD_VECTOR %5(s32), %6(s32), %7(s32), %8(s32)
+ $q0 = COPY %9(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+
+---
+name: xtn_v4i32_v4i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: xtn_v4i32_v4i8
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC]](<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
+ %2:_(<4 x s16>) = G_ANYEXT %1(<4 x s8>)
+ $d0 = COPY %2(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v4i64_v4i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: xtn_v4i64_v4i8
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[COPY1]](<2 x s64>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s32>), [[TRUNC1]](<2 x s32>)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s32>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC2]](<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %1:_(<2 x s64>) = COPY $q0
+ %2:_(<2 x s64>) = COPY $q1
+ %0:_(<4 x s64>) = G_CONCAT_VECTORS %1(<2 x s64>), %2(<2 x s64>)
+ %3:_(<4 x s8>) = G_TRUNC %0(<4 x s64>)
+ %4:_(<4 x s16>) = G_ANYEXT %3(<4 x s8>)
+ $d0 = COPY %4(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v4i64_v4i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: xtn_v4i64_v4i16
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[COPY1]](<2 x s64>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s32>), [[TRUNC1]](<2 x s32>)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s32>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC2]](<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %1:_(<2 x s64>) = COPY $q0
+ %2:_(<2 x s64>) = COPY $q1
+ %0:_(<4 x s64>) = G_CONCAT_VECTORS %1(<2 x s64>), %2(<2 x s64>)
+ %3:_(<4 x s16>) = G_TRUNC %0(<4 x s64>)
+ $d0 = COPY %3(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v4i64_v4i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: xtn_v4i64_v4i32
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[COPY1]](<2 x s64>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s32>), [[TRUNC1]](<2 x s32>)
+ ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<2 x s64>) = COPY $q0
+ %2:_(<2 x s64>) = COPY $q1
+ %0:_(<4 x s64>) = G_CONCAT_VECTORS %1(<2 x s64>), %2(<2 x s64>)
+ %3:_(<4 x s32>) = G_TRUNC %0(<4 x s64>)
+ $q0 = COPY %3(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+
+---
+name: xtn_v8i32_v8i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: xtn_v8i32_v8i8
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
+ ; CHECK-NEXT: $d0 = COPY [[TRUNC2]](<8 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %1:_(<4 x s32>) = COPY $q0
+ %2:_(<4 x s32>) = COPY $q1
+ %0:_(<8 x s32>) = G_CONCAT_VECTORS %1(<4 x s32>), %2(<4 x s32>)
+ %3:_(<8 x s8>) = G_TRUNC %0(<8 x s32>)
+ $d0 = COPY %3(<8 x s8>)
+ RET_ReallyLR implicit $d0
+
+...
+
+---
+name: xtn_v8i32_v8i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: xtn_v8i32_v8i16
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
+ ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<4 x s32>) = COPY $q0
+ %2:_(<4 x s32>) = COPY $q1
+ %0:_(<8 x s32>) = G_CONCAT_VECTORS %1(<4 x s32>), %2(<4 x s32>)
+ %3:_(<8 x s16>) = G_TRUNC %0(<8 x s32>)
+ $q0 = COPY %3(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+
+---
+name: xtn_v16i16_v16i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: xtn_v16i16_v16i8
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[COPY]](<8 x s16>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[COPY1]](<8 x s16>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>)
+ ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<8 x s16>) = COPY $q0
+ %2:_(<8 x s16>) = COPY $q1
+ %0:_(<16 x s16>) = G_CONCAT_VECTORS %1(<8 x s16>), %2(<8 x s16>)
+ %3:_(<16 x s8>) = G_TRUNC %0(<16 x s16>)
+ $q0 = COPY %3(<16 x s8>)
+ RET_ReallyLR implicit $q0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll
new file mode 100644
index 000000000000000..0dd4e3644b78356
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/xtn.ll
@@ -0,0 +1,473 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i8 @xtn_i16_to_i8(i16 %a) {
+; CHECK-LABEL: xtn_i16_to_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i16 %a to i8
+ ret i8 %arg1
+}
+
+define i8 @xtn_i32_to_i8(i32 %a) {
+; CHECK-LABEL: xtn_i32_to_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i32 %a to i8
+ ret i8 %arg1
+}
+
+define i8 @xtn_i64_to_i8(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i64 %a to i8
+ ret i8 %arg1
+}
+
+define i8 @xtn_i128_to_i8(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i128 %a to i8
+ ret i8 %arg1
+}
+
+define i16 @xtn_i32_to_i16(i32 %a) {
+; CHECK-LABEL: xtn_i32_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i32 %a to i16
+ ret i16 %arg1
+}
+
+define i16 @xtn_i64_to_i16(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i64 %a to i16
+ ret i16 %arg1
+}
+
+define i16 @xtn_i128_to_i16(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i128 %a to i16
+ ret i16 %arg1
+}
+
+define i32 @xtn_i64_to_i32(i64 %a) {
+; CHECK-LABEL: xtn_i64_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i64 %a to i32
+ ret i32 %arg1
+}
+
+define i32 @xtn_i128_to_i32(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i128 %a to i32
+ ret i32 %arg1
+}
+
+define i64 @xtn_i128_to_i64(i128 %a) {
+; CHECK-LABEL: xtn_i128_to_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc i128 %a to i64
+ ret i64 %arg1
+}
+
+define <2 x i8> @xtn_v2i16_v2i8(<2 x i16> %a) {
+; CHECK-LABEL: xtn_v2i16_v2i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i16> %a to <2 x i8>
+ ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i32_v2i8(<2 x i32> %a) {
+; CHECK-LABEL: xtn_v2i32_v2i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i32> %a to <2 x i8>
+ ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i64_v2i8(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i64> %a to <2 x i8>
+ ret <2 x i8> %arg1
+}
+
+define <2 x i8> @xtn_v2i128_v2i8(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.s[1], w2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: mov v0.d[1], x2
+; CHECK-GI-NEXT: xtn v0.2s, v0.2d
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i128> %a to <2 x i8>
+ ret <2 x i8> %arg1
+}
+
+define <2 x i16> @xtn_v2i32_v2i16(<2 x i32> %a) {
+; CHECK-LABEL: xtn_v2i32_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i32> %a to <2 x i16>
+ ret <2 x i16> %arg1
+}
+
+define <2 x i16> @xtn_v2i64_v2i16(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i64> %a to <2 x i16>
+ ret <2 x i16> %arg1
+}
+
+define <2 x i16> @xtn_v2i128_v2i16(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.s[1], w2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: mov v0.d[1], x2
+; CHECK-GI-NEXT: xtn v0.2s, v0.2d
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i128> %a to <2 x i16>
+ ret <2 x i16> %arg1
+}
+
+define <2 x i32> @xtn_v2i64_v2i32(<2 x i64> %a) {
+; CHECK-LABEL: xtn_v2i64_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i64> %a to <2 x i32>
+ ret <2 x i32> %arg1
+}
+
+define <2 x i32> @xtn_v2i128_v2i32(<2 x i128> %a) {
+; CHECK-SD-LABEL: xtn_v2i128_v2i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.s[1], w2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v2i128_v2i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: mov v0.d[1], x2
+; CHECK-GI-NEXT: xtn v0.2s, v0.2d
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i128> %a to <2 x i32>
+ ret <2 x i32> %arg1
+}
+
+define <2 x i64> @xtn_v2i128_v2i64(<2 x i128> %a) {
+; CHECK-LABEL: xtn_v2i128_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: mov v0.d[1], x2
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <2 x i128> %a to <2 x i64>
+ ret <2 x i64> %arg1
+}
+
+define <3 x i8> @xtn_v3i16_v3i8(<3 x i16> %a) {
+; CHECK-SD-LABEL: xtn_v3i16_v3i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: umov w0, v0.h[0]
+; CHECK-SD-NEXT: umov w1, v0.h[1]
+; CHECK-SD-NEXT: umov w2, v0.h[2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i16_v3i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: fmov w1, s1
+; CHECK-GI-NEXT: fmov w2, s2
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i16> %a to <3 x i8>
+ ret <3 x i8> %arg1
+}
+
+define <3 x i8> @xtn_v3i32_v3i8(<3 x i32> %a) {
+; CHECK-SD-LABEL: xtn_v3i32_v3i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: umov w0, v0.h[0]
+; CHECK-SD-NEXT: umov w1, v0.h[1]
+; CHECK-SD-NEXT: umov w2, v0.h[2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i32_v3i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: fmov w1, s1
+; CHECK-GI-NEXT: fmov w2, s2
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i32> %a to <3 x i8>
+ ret <3 x i8> %arg1
+}
+
+define <3 x i8> @xtn_v3i64_v3i8(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: xtn v1.2s, v2.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: fmov w2, s1
+; CHECK-SD-NEXT: mov w1, v0.s[1]
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: fmov x1, d1
+; CHECK-GI-NEXT: fmov x2, d2
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT: // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT: // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i64> %a to <3 x i8>
+ ret <3 x i8> %arg1
+}
+
+define <3 x i16> @xtn_v3i32_v3i16(<3 x i32> %a) {
+; CHECK-SD-LABEL: xtn_v3i32_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i32_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i32> %a to <3 x i16>
+ ret <3 x i16> %arg1
+}
+
+define <3 x i16> @xtn_v3i64_v3i16(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i64> %a to <3 x i16>
+ ret <3 x i16> %arg1
+}
+
+define <3 x i32> @xtn_v3i64_v3i32(<3 x i64> %a) {
+; CHECK-SD-LABEL: xtn_v3i64_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: xtn_v3i64_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: mov v0.s[2], w8
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: ret
+entry:
+ %arg1 = trunc <3 x i64> %a to <3 x i32>
+ ret <3 x i32> %arg1
+}
+
+define <4 x i8> @xtn_v4i16_v4i8(<4 x i16> %a) {
+; CHECK-LABEL: xtn_v4i16_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <4 x i16> %a to <4 x i8>
+ ret <4 x i8> %arg1
+}
+
+define <4 x i8> @xtn_v4i32_v4i8(<4 x i32> %a) {
+; CHECK-LABEL: xtn_v4i32_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <4 x i32> %a to <4 x i8>
+ ret <4 x i8> %arg1
+}
+
+define <4 x i8> @xtn_v4i64_v4i8(<4 x i64> %a) {
+; CHECK-LABEL: xtn_v4i64_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <4 x i64> %a to <4 x i8>
+ ret <4 x i8> %arg1
+}
+
+define <4 x i16> @xtn_v4i32_v4i16(<4 x i32> %a) {
+; CHECK-LABEL: xtn_v4i32_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <4 x i32> %a to <4 x i16>
+ ret <4 x i16> %arg1
+}
+
+define <4 x i16> @xtn_v4i64_v4i16(<4 x i64> %a) {
+; CHECK-LABEL: xtn_v4i64_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <4 x i64> %a to <4 x i16>
+ ret <4 x i16> %arg1
+}
+
+define <4 x i32> @xtn_v4i64_v4i32(<4 x i64> %a) {
+; CHECK-LABEL: xtn_v4i64_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <4 x i64> %a to <4 x i32>
+ ret <4 x i32> %arg1
+}
+
+define <8 x i8> @xtn_v8i16_v8i8(<8 x i16> %a) {
+; CHECK-LABEL: xtn_v8i16_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <8 x i16> %a to <8 x i8>
+ ret <8 x i8> %arg1
+}
+
+define <8 x i8> @xtn_v8i32_v8i8(<8 x i32> %a) {
+; CHECK-LABEL: xtn_v8i32_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <8 x i32> %a to <8 x i8>
+ ret <8 x i8> %arg1
+}
+
+define <8 x i16> @xtn_v8i32_v8i16(<8 x i32> %a) {
+; CHECK-LABEL: xtn_v8i32_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <8 x i32> %a to <8 x i16>
+ ret <8 x i16> %arg1
+}
+
+define <16 x i8> @xtn_v16i16_v16i8(<16 x i16> %a) {
+; CHECK-LABEL: xtn_v16i16_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %arg1 = trunc <16 x i16> %a to <16 x i8>
+ ret <16 x i8> %arg1
+}
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 8b30ee257ad2055..8573a8a2d2571d6 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -2,8 +2,6 @@
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for zext_v16i10_v16i16
-
define i16 @zext_i8_to_i16(i8 %a) {
; CHECK-LABEL: zext_i8_to_i16:
; CHECK: // %bb.0: // %entry
@@ -242,19 +240,18 @@ define <3 x i16> @zext_v3i8_v3i16(<3 x i8> %a) {
;
; CHECK-GI-LABEL: zext_v3i8_v3i16:
; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: mov w8, #255 // =0xff
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: fmov s2, w1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
-; CHECK-GI-NEXT: fmov s2, w2
-; CHECK-GI-NEXT: mov v3.16b, v0.16b
-; CHECK-GI-NEXT: mov v3.h[1], v0.h[0]
-; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NEXT: mov v3.h[2], v0.h[0]
-; CHECK-GI-NEXT: mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT: mov v3.h[3], v0.h[0]
-; CHECK-GI-NEXT: and v0.8b, v1.8b, v3.8b
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v0.s[1], w1
+; CHECK-GI-NEXT: mov v2.16b, v1.16b
+; CHECK-GI-NEXT: mov v0.s[2], w2
+; CHECK-GI-NEXT: mov v2.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: mov v2.h[2], v1.h[0]
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: mov v2.h[3], v0.h[0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-GI-NEXT: ret
entry:
%c = zext <3 x i8> %a to <3 x i16>
@@ -425,19 +422,18 @@ define <3 x i16> @zext_v3i10_v3i16(<3 x i10> %a) {
;
; CHECK-GI-LABEL: zext_v3i10_v3i16:
; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: mov w8, #1023 // =0x3ff
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: fmov s2, w1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
-; CHECK-GI-NEXT: fmov s2, w2
-; CHECK-GI-NEXT: mov v3.16b, v0.16b
-; CHECK-GI-NEXT: mov v3.h[1], v0.h[0]
-; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NEXT: mov v3.h[2], v0.h[0]
-; CHECK-GI-NEXT: mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT: mov v3.h[3], v0.h[0]
-; CHECK-GI-NEXT: and v0.8b, v1.8b, v3.8b
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v0.s[1], w1
+; CHECK-GI-NEXT: mov v2.16b, v1.16b
+; CHECK-GI-NEXT: mov v0.s[2], w2
+; CHECK-GI-NEXT: mov v2.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: mov v2.h[2], v1.h[0]
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: mov v2.h[3], v0.h[0]
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-GI-NEXT: ret
entry:
%c = zext <3 x i10> %a to <3 x i16>
More information about the llvm-commits
mailing list