[llvm] [AArch64][GlobalISel] Prefer to use Vector Truncate (PR #105692)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 11:09:02 PDT 2024
https://github.com/chuongg3 updated https://github.com/llvm/llvm-project/pull/105692
>From c896301d1b77839df964d98e8a8803a2e6642d61 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Wed, 21 Aug 2024 09:23:38 +0000
Subject: [PATCH] [AArch64][GlobalISel] Prefer to use Vector Truncate
Tries to combine scalarised truncates into vector truncate
operations and prefer to use G_CONCAT_VECTOR instructions
over G_BUILD_VECTORS instructions
EXAMPLE:
%a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
%T_a(i16) = G_TRUNC %a(i32)
%T_b(i16) = G_TRUNC %b(i32)
%Undef(i16) = G_IMPLICIT_DEF(i16)
%dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
===>
%Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
%Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
%dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 +
.../include/llvm/Target/GlobalISel/Combine.td | 10 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 106 ++++++++++++++++++
llvm/lib/Target/AArch64/AArch64Combine.td | 2 +-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 +-
.../AArch64/GlobalISel/legalize-freeze.mir | 24 +---
.../GlobalISel/legalize-insert-vector-elt.mir | 11 +-
llvm/test/CodeGen/AArch64/bswap.ll | 4 +-
llvm/test/CodeGen/AArch64/concat-vector.ll | 16 +--
.../AArch64/fixed-vector-interleave.ll | 22 +---
llvm/test/CodeGen/AArch64/fptoi.ll | 52 ++-------
llvm/test/CodeGen/AArch64/itofp.ll | 11 +-
llvm/test/CodeGen/AArch64/shift.ll | 90 ++-------------
llvm/test/CodeGen/AArch64/shufflevector.ll | 45 +-------
llvm/test/CodeGen/AArch64/xtn.ll | 17 +--
15 files changed, 178 insertions(+), 238 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 828532dcffb7d3..786ba0cfac5daa 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -599,6 +599,9 @@ class CombinerHelper {
bool matchRotateOutOfRange(MachineInstr &MI);
void applyRotateOutOfRange(MachineInstr &MI);
+ bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo);
+ void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo);
+
/// \returns true if a G_ICMP instruction \p MI can be replaced with a true
/// or false constant based off of KnownBits information.
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a595a51d7b01ff..daad30fe575f7f 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1602,6 +1602,13 @@ def insert_vector_elt_oob : GICombineRule<
[{ return Helper.matchInsertVectorElementOOB(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+// Combine v8i8 (buildvector i8 (trunc(unmerge)), i8 (trunc), i8 (trunc), i8 (trunc), undef, undef, undef, undef)
+def combine_use_vector_truncate : GICombineRule<
+ (defs root:$root, register_matchinfo:$matchinfo),
+ (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
+ [{ return Helper.matchUseVectorTruncate(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyUseVectorTruncate(*${root}, ${matchinfo}); }])>;
+
def add_of_vscale : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_VSCALE $left, $imm1),
@@ -2001,7 +2008,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
- sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
+ sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat,
+ combine_use_vector_truncate]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index df9c12bc9c97bd..c279289f9161bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3320,6 +3320,112 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
isConstTrueVal(TLI, Cst, IsVector, IsFP);
}
+// This combine tries to reduce the number of scalarised G_TRUNC instructions by
+// using vector truncates instead
+//
+// EXAMPLE:
+// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
+// %T_a(i16) = G_TRUNC %a(i32)
+// %T_b(i16) = G_TRUNC %b(i32)
+// %Undef(i16) = G_IMPLICIT_DEF(i16)
+// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
+//
+// ===>
+// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
+// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
+// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
+//
+// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
+bool CombinerHelper::matchUseVectorTruncate(MachineInstr &MI,
+ Register &MatchInfo) {
+ auto BuildMI = cast<GBuildVector>(&MI);
+ unsigned NumOperands = BuildMI->getNumSources();
+ LLT DstTy = MRI.getType(BuildMI->getReg(0));
+
+ // Check the G_BUILD_VECTOR sources
+ unsigned I;
+ MachineInstr *UnmergeMI = nullptr;
+
+ // Check all source TRUNCs come from the same UNMERGE instruction
+ for (I = 0; I < NumOperands; ++I) {
+ auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
+ auto SrcMIOpc = SrcMI->getOpcode();
+
+ // Check if the G_TRUNC instructions all come from the same MI
+ if (SrcMIOpc == TargetOpcode::G_TRUNC) {
+ if (!UnmergeMI) {
+ UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
+ if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ return false;
+ } else {
+ auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
+ if (UnmergeMI != UnmergeSrcMI)
+ return false;
+ }
+ } else {
+ break;
+ }
+ }
+ if (I < 2)
+ return false;
+
+ // Check the remaining source elements are only G_IMPLICIT_DEF
+ for (; I < NumOperands; ++I) {
+ auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
+ auto SrcMIOpc = SrcMI->getOpcode();
+
+ if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
+ return false;
+ }
+
+ // Check the size of unmerge source
+ MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
+ LLT UnmergeSrcTy = MRI.getType(MatchInfo);
+ if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
+ return false;
+
+ // Only generate legal instructions post-legalizer
+ if (!IsPreLegalize) {
+ LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
+
+ if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
+ !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
+ return false;
+
+ if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
+ return false;
+ }
+
+ return true;
+}
+
+void CombinerHelper::applyUseVectorTruncate(MachineInstr &MI,
+ Register &MatchInfo) {
+ Register MidReg;
+ auto BuildMI = cast<GBuildVector>(&MI);
+ Register DstReg = BuildMI->getReg(0);
+ LLT DstTy = MRI.getType(DstReg);
+ LLT UnmergeSrcTy = MRI.getType(MatchInfo);
+ unsigned DstTyNumElt = DstTy.getNumElements();
+ unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
+
+ // No need to pad vector if only G_TRUNC is needed
+ if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
+ MidReg = MatchInfo;
+ } else {
+ Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
+ SmallVector<Register> ConcatRegs = {MatchInfo};
+ for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
+ ConcatRegs.push_back(UndefReg);
+
+ auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
+ MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
+ }
+
+ Builder.buildTrunc(DstReg, MidReg);
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::matchNotCmp(MachineInstr &MI,
SmallVectorImpl<Register> &RegsToNegate) {
assert(MI.getOpcode() == TargetOpcode::G_XOR);
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 25989fb5988954..ead6455ddd5278 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -330,5 +330,5 @@ def AArch64PostLegalizerCombiner
select_to_minmax, or_to_bsp, combine_concat_vector,
commute_constant_to_rhs,
push_freeze_to_prevent_poison_from_propagating,
- combine_mul_cmlt]> {
+ combine_mul_cmlt, combine_use_vector_truncate]> {
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index db5cd1d32d73d0..28ee781e60761a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -95,7 +95,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
.legalFor({p0, s8, s16, s32, s64})
- .legalFor(PackedVectorAllTypeList)
+ .legalFor({v16s8, v8s16, v4s32, v2s64, v2p0, v8s8, v4s16, v2s32, v4s8,
+ v2s16, v2s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s8, s64)
.moreElementsToNextPow2(0)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
index 3e768c4d7a267c..03c28efe7e09fb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
@@ -159,25 +159,13 @@ body: |
; CHECK-LABEL: name: test_freeze_v3s8
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[UV4]]
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s8>) = G_FREEZE [[DEF]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[FREEZE]](<4 x s8>)
; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
- ; CHECK-NEXT: %ext0:_(s32) = G_AND [[ANYEXT1]], [[C]]
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
- ; CHECK-NEXT: %ext1:_(s32) = G_AND [[ANYEXT2]], [[C]]
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
- ; CHECK-NEXT: %ext2:_(s32) = G_AND [[ANYEXT3]], [[C]]
+ ; CHECK-NEXT: %ext0:_(s32) = G_ZEXT [[UV]](s8)
+ ; CHECK-NEXT: %ext1:_(s32) = G_ZEXT [[UV1]](s8)
+ ; CHECK-NEXT: %ext2:_(s32) = G_ZEXT [[UV2]](s8)
; CHECK-NEXT: %res:_(<4 x s32>) = G_BUILD_VECTOR %ext0(s32), %ext1(s32), %ext2(s32), %undef(s32)
; CHECK-NEXT: $q0 = COPY %res(<4 x s32>)
%x:_(<3 x s8>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
index 9a8697c1d9b866..11c6c7fb40faa1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
@@ -248,13 +248,10 @@ body: |
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
- ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16)
- ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16)
- ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF2]](<4 x s8>)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>)
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>)
diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
index e90014be21deb3..b14f1a43b7dcfd 100644
--- a/llvm/test/CodeGen/AArch64/bswap.ll
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -177,9 +177,7 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){
;
; CHECK-GI-LABEL: bswap_v2i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: rev16 v0.8b, v0.8b
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index 18570b2d793ff6..eee917e8acb0d7 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -183,15 +183,12 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
;
; CHECK-GI-LABEL: concat_v8s16_v2s16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: ldr h2, [x0, #2]
-; CHECK-GI-NEXT: dup v0.4s, w8
-; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
-; CHECK-GI-NEXT: xtn v2.4h, v0.4s
-; CHECK-GI-NEXT: xtn v1.4h, v1.4s
-; CHECK-GI-NEXT: fmov w8, s1
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr h1, [x0, #2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: mov v0.s[0], w8
-; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: mov v0.s[2], w8
; CHECK-GI-NEXT: mov v0.s[3], w8
@@ -209,10 +206,7 @@ define <16 x i8> @concat_v16s8_v4s8(ptr %ptr) {
;
; CHECK-GI-LABEL: concat_v16s8_v4s8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: dup v0.8h, w8
-; CHECK-GI-NEXT: xtn v1.8b, v0.8h
; CHECK-GI-NEXT: ldr s0, [x0]
-; CHECK-GI-NEXT: fmov w8, s1
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: mov v0.s[2], w8
; CHECK-GI-NEXT: mov v0.s[3], w8
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index aa20304e52a951..a9618fdc2dec30 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -3,24 +3,10 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
-; CHECK-SD-LABEL: interleave2_v4f16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: zip1 v0.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: interleave2_v4f16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: dup v2.4s, w8
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: xtn v0.4h, v2.4s
-; CHECK-GI-NEXT: mov v1.s[0], w8
-; CHECK-GI-NEXT: mov v2.s[0], w9
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov v1.s[1], w8
-; CHECK-GI-NEXT: mov v2.s[1], w8
-; CHECK-GI-NEXT: zip1 v0.4h, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: interleave2_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
%retval = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %vec0, <2 x half> %vec1)
ret <4 x half> %retval
}
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 20b5567e973d09..f72a49f6ab7c89 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -3172,42 +3172,22 @@ entry:
}
define <3 x i16> @fptos_v3f32_v3i16(<3 x float> %a) {
-; CHECK-SD-LABEL: fptos_v3f32_v3i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptos_v3f32_v3i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-GI-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NEXT: mov w9, v0.s[2]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v0.h[2], w9
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptos_v3f32_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
entry:
%c = fptosi <3 x float> %a to <3 x i16>
ret <3 x i16> %c
}
define <3 x i16> @fptou_v3f32_v3i16(<3 x float> %a) {
-; CHECK-SD-LABEL: fptou_v3f32_v3i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptou_v3f32_v3i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-GI-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NEXT: mov w9, v0.s[2]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v0.h[2], w9
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptou_v3f32_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
entry:
%c = fptoui <3 x float> %a to <3 x i16>
ret <3 x i16> %c
@@ -6077,11 +6057,7 @@ define <3 x i16> @fptos_v3f16_v3i16(<3 x half> %a) {
; CHECK-GI-NOFP16: // %bb.0: // %entry
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NOFP16-NEXT: mov w9, v0.s[2]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w8
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w9
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i16:
@@ -6110,11 +6086,7 @@ define <3 x i16> @fptou_v3f16_v3i16(<3 x half> %a) {
; CHECK-GI-NOFP16: // %bb.0: // %entry
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NOFP16-NEXT: mov w9, v0.s[2]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w8
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w9
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i16:
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 4ac04798e15481..f70ec0f35cb586 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -7450,9 +7450,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
;
; CHECK-GI-FP16-LABEL: stofp_v2i16_v2f16:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
+; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
@@ -7493,9 +7491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
;
; CHECK-GI-FP16-LABEL: utofp_v2i16_v2f16:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
+; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
@@ -8059,8 +8055,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
+; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll
index 951458da17c07e..a087e1e5763ea0 100644
--- a/llvm/test/CodeGen/AArch64/shift.ll
+++ b/llvm/test/CodeGen/AArch64/shift.ll
@@ -531,26 +531,8 @@ define <4 x i8> @shl_v4i8(<4 x i8> %0, <4 x i8> %1){
;
; CHECK-GI-LABEL: shl_v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NEXT: mov h4, v0.h[2]
-; CHECK-GI-NEXT: mov h5, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov h2, v1.h[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov h3, v1.h[3]
-; CHECK-GI-NEXT: mov v0.b[1], w8
-; CHECK-GI-NEXT: mov v1.b[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.b[2], w8
-; CHECK-GI-NEXT: mov v1.b[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.b[3], w8
-; CHECK-GI-NEXT: mov v1.b[3], w9
+; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: mov b1, v0.b[1]
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
@@ -592,12 +574,8 @@ define <2 x i16> @shl_v2i16(<2 x i16> %0, <2 x i16> %1){
;
; CHECK-GI-LABEL: shl_v2i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NEXT: mov w9, v1.s[1]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
@@ -686,26 +664,8 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){
;
; CHECK-GI-LABEL: ashr_v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov h2, v1.h[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h3, v0.h[1]
-; CHECK-GI-NEXT: mov h4, v1.h[2]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov h2, v1.h[3]
-; CHECK-GI-NEXT: fmov w9, s4
-; CHECK-GI-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NEXT: mov v1.b[1], w8
-; CHECK-GI-NEXT: fmov w8, s3
-; CHECK-GI-NEXT: mov h3, v0.h[2]
-; CHECK-GI-NEXT: mov v0.b[1], w8
-; CHECK-GI-NEXT: fmov w8, s3
-; CHECK-GI-NEXT: mov v1.b[2], w9
-; CHECK-GI-NEXT: mov v0.b[2], w8
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov v1.b[3], w8
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: mov v0.b[3], w8
+; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: neg v1.8b, v1.8b
; CHECK-GI-NEXT: sshl v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: mov b1, v0.b[1]
@@ -747,12 +707,8 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %0, <2 x i16> %1){
;
; CHECK-GI-LABEL: ashr_v2i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov w8, v1.s[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov w9, v0.s[1]
-; CHECK-GI-NEXT: mov v1.h[1], w8
-; CHECK-GI-NEXT: mov v0.h[1], w9
+; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: neg v1.4h, v1.4h
; CHECK-GI-NEXT: sshl v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: mov h1, v0.h[1]
@@ -830,26 +786,8 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){
;
; CHECK-GI-LABEL: lshr_v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov h2, v1.h[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h3, v0.h[1]
-; CHECK-GI-NEXT: mov h4, v1.h[2]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov h2, v1.h[3]
-; CHECK-GI-NEXT: fmov w9, s4
-; CHECK-GI-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NEXT: mov v1.b[1], w8
-; CHECK-GI-NEXT: fmov w8, s3
-; CHECK-GI-NEXT: mov h3, v0.h[2]
-; CHECK-GI-NEXT: mov v0.b[1], w8
-; CHECK-GI-NEXT: fmov w8, s3
-; CHECK-GI-NEXT: mov v1.b[2], w9
-; CHECK-GI-NEXT: mov v0.b[2], w8
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov v1.b[3], w8
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: mov v0.b[3], w8
+; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: neg v1.8b, v1.8b
; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: mov b1, v0.b[1]
@@ -890,12 +828,8 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %0, <2 x i16> %1){
;
; CHECK-GI-LABEL: lshr_v2i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov w8, v1.s[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov w9, v0.s[1]
-; CHECK-GI-NEXT: mov v1.h[1], w8
-; CHECK-GI-NEXT: mov v0.h[1], w9
+; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: neg v1.4h, v1.4h
; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: mov h1, v0.h[1]
diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll
index 954458e4459749..5f4ff1e64673bb 100644
--- a/llvm/test/CodeGen/AArch64/shufflevector.ll
+++ b/llvm/test/CodeGen/AArch64/shufflevector.ll
@@ -209,27 +209,9 @@ define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
;
; CHECK-GI-LABEL: shufflevector_v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NEXT: mov h4, v0.h[2]
-; CHECK-GI-NEXT: mov h5, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov h2, v1.h[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov h3, v1.h[3]
-; CHECK-GI-NEXT: mov v0.b[1], w8
-; CHECK-GI-NEXT: mov v1.b[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.b[2], w8
-; CHECK-GI-NEXT: mov v1.b[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.b[3], w8
-; CHECK-GI-NEXT: mov v1.b[3], w9
+; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
+; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
@@ -284,13 +266,9 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
;
; CHECK-GI-LABEL: shufflevector_v2i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NEXT: mov w9, v1.s[1]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: adrp x8, .LCPI17_0
+; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
@@ -403,16 +381,7 @@ define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
;
; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: mov h1, v0.h[3]
-; CHECK-GI-NEXT: mov v0.b[1], w8
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov v0.b[2], w8
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: mov v0.b[3], w8
+; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -448,9 +417,7 @@ define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
;
; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll
index ead790203f9496..fb3f8ebd7d1413 100644
--- a/llvm/test/CodeGen/AArch64/xtn.ll
+++ b/llvm/test/CodeGen/AArch64/xtn.ll
@@ -294,19 +294,10 @@ entry:
}
define <3 x i16> @xtn_v3i32_v3i16(<3 x i32> %a) {
-; CHECK-SD-LABEL: xtn_v3i32_v3i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: xtn_v3i32_v3i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, v0.s[1]
-; CHECK-GI-NEXT: mov w9, v0.s[2]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v0.h[2], w9
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: xtn_v3i32_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
entry:
%arg1 = trunc <3 x i32> %a to <3 x i16>
ret <3 x i16> %arg1
More information about the llvm-commits
mailing list