[llvm] [AArch64][GlobalISel] Combine G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16 (PR #142731)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 00:20:52 PDT 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/142731
We will generate G_UNMERGE(G_DUPLANE16) due to the legalization of shuffle vector splats with mismatching vector sizes. The G_DUPLANE intrinsics can handle different vector sizes (128bit and 64bit output, for example), and we can combine away the unmerge.
>From 383a02f94e30e787c28a35d1c81556ca3876fe50 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 4 Jun 2025 08:17:10 +0100
Subject: [PATCH] [AArch64][GlobalISel] Combine G_UNMERGE(G_DUPLANE16) ->
G_DUPLANE16
We will generate G_UNMERGE(G_DUPLANE16) due to the legalization of shuffle
vector splats with mismatching vector sizes. The G_DUPLANE intrinsics can
handle different vector sizes (128bit and 64bit output, for example), and we
can combine away the unmerge.
---
llvm/lib/Target/AArch64/AArch64Combine.td | 17 +-
llvm/test/CodeGen/AArch64/arm64-dup.ll | 14 +-
.../test/CodeGen/AArch64/arm64-neon-2velem.ll | 784 +++++-------------
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 42 +-
llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll | 56 +-
5 files changed, 276 insertions(+), 637 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f84e83816bf33..9fe331d5370de 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -172,6 +172,20 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;
+// Clean up G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16
+class unmerge_duplane<Instruction Op> : GICombineRule <
+ (defs root:$root),
+ (match (Op $a, $src, $c),
+ (G_UNMERGE_VALUES $d1, $d2, $a):$root,
+ [{ return MRI.getType(${d1}.getReg()).getSizeInBits() == 64; }]),
+ (apply (GIReplaceReg $d2, $d1), (Op $d1, $src, $c))
+>;
+def unmerge_duplane8 : unmerge_duplane<G_DUPLANE8>;
+def unmerge_duplane16 : unmerge_duplane<G_DUPLANE16>;
+def unmerge_duplane32 : unmerge_duplane<G_DUPLANE32>;
+def unmerge_duplane64 : unmerge_duplane<G_DUPLANE64>;
+def unmerge_duplanes : GICombineGroup<[unmerge_duplane8, unmerge_duplane16, unmerge_duplane32, unmerge_duplane64]>;
+
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
form_duplane, shuf_to_ins]>;
@@ -325,7 +339,8 @@ def AArch64PostLegalizerLowering
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift,
unmerge_ext_to_unmerge, lower_mulv2s64,
- vector_unmerge_lowering, insertelt_nonconst]> {
+ vector_unmerge_lowering, insertelt_nonconst,
+ unmerge_duplanes]> {
}
// Post-legalization combines which are primarily optimizations.
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 4c28ea7592202..12bf09e02aaf9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -422,16 +422,10 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
; the formation of an indexed-by-7 MLS.
define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
-; CHECK-SD-LABEL: test_high_splat:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mls.4h v0, v1, v2[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_high_splat:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup.8h v2, v2[7]
-; CHECK-GI-NEXT: mls.4h v0, v2, v1
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_high_splat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mls.4h v0, v1, v2[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %b
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
index c3ad3b4192cf9..85d8b7c3e2866 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -159,16 +159,10 @@ entry:
}
define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmla_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mla v0.4h, v1.4h, v2.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmla_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: mla v0.4h, v2.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmla_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %b
@@ -189,16 +183,10 @@ entry:
}
define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmla_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mla v0.2s, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmla_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: mla v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmla_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%mul = mul <2 x i32> %shuffle, %b
@@ -271,16 +259,10 @@ entry:
}
define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmls_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmls_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: mls v0.4h, v2.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmls_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %b
@@ -301,16 +283,10 @@ entry:
}
define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmls_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmls_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmls_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%mul = mul <2 x i32> %shuffle, %b
@@ -427,16 +403,10 @@ entry:
}
define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
-; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %a
@@ -455,16 +425,10 @@ entry:
}
define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%mul = mul <2 x i32> %shuffle, %a
@@ -483,16 +447,10 @@ entry:
}
define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_u16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_u16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
-; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %a
@@ -511,16 +469,10 @@ entry:
}
define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_u32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_u32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%mul = mul <2 x i32> %shuffle, %a
@@ -567,16 +519,10 @@ entry:
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK-SD-LABEL: test_vfma_laneq_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmla v0.2s, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vfma_laneq_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: fmla v0.2s, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vfma_laneq_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
%0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
@@ -834,16 +780,10 @@ entry:
}
define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmlal_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smlal v0.4s, v1.4h, v2.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlal_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlal_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -852,16 +792,10 @@ entry:
}
define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmlal_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smlal v0.2d, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlal_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlal_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -920,8 +854,7 @@ define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16
; CHECK-GI-LABEL: test_vmlal_high_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.h[7]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -940,8 +873,7 @@ define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32
; CHECK-GI-LABEL: test_vmlal_high_laneq_s32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.s[3]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -978,16 +910,10 @@ entry:
}
define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmlsl_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smlsl v0.4s, v1.4h, v2.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlsl_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlsl_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -996,16 +922,10 @@ entry:
}
define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmlsl_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smlsl v0.2d, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlsl_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlsl_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -1064,8 +984,7 @@ define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16
; CHECK-GI-LABEL: test_vmlsl_high_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.h[7]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -1084,8 +1003,7 @@ define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32
; CHECK-GI-LABEL: test_vmlsl_high_laneq_s32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.s[3]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -1122,16 +1040,10 @@ entry:
}
define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmlal_laneq_u16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umlal v0.4s, v1.4h, v2.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlal_laneq_u16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlal_laneq_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -1140,16 +1052,10 @@ entry:
}
define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmlal_laneq_u32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umlal v0.2d, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlal_laneq_u32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlal_laneq_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -1208,8 +1114,7 @@ define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16
; CHECK-GI-LABEL: test_vmlal_high_laneq_u16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.h[7]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -1228,8 +1133,7 @@ define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32
; CHECK-GI-LABEL: test_vmlal_high_laneq_u32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.s[3]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -1266,16 +1170,10 @@ entry:
}
define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmlsl_laneq_u16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umlsl v0.4s, v1.4h, v2.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlsl_laneq_u16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlsl_laneq_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -1284,16 +1182,10 @@ entry:
}
define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmlsl_laneq_u32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umlsl v0.2d, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlsl_laneq_u32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlsl_laneq_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -1352,8 +1244,7 @@ define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16
; CHECK-GI-LABEL: test_vmlsl_high_laneq_u16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.h[7]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -1372,8 +1263,7 @@ define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32
; CHECK-GI-LABEL: test_vmlsl_high_laneq_u32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.s[3]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -1512,16 +1402,10 @@ entry:
}
define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmull_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smull v0.4s, v0.4h, v1.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmull_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
-; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmull_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -1529,16 +1413,10 @@ entry:
}
define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmull_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmull_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmull_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -1546,16 +1424,10 @@ entry:
}
define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmull_laneq_u16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull v0.4s, v0.4h, v1.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmull_laneq_u16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
-; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmull_laneq_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[7]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -1563,16 +1435,10 @@ entry:
}
define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmull_laneq_u32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull v0.2d, v0.2s, v1.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmull_laneq_u32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmull_laneq_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -1588,8 +1454,7 @@ define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
; CHECK-GI-LABEL: test_vmull_high_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
-; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.h[7]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -1607,8 +1472,7 @@ define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
; CHECK-GI-LABEL: test_vmull_high_laneq_s32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.s[3]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -1626,8 +1490,7 @@ define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
; CHECK-GI-LABEL: test_vmull_high_laneq_u16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
-; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.h[7]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -1645,8 +1508,7 @@ define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
; CHECK-GI-LABEL: test_vmull_high_laneq_u32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.s[3]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -1816,16 +1678,10 @@ entry:
}
define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vqdmull_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmull_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[3]
-; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmull_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -1833,16 +1689,10 @@ entry:
}
define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vqdmull_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sqdmull v0.2d, v0.2s, v1.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmull_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmull_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -1898,8 +1748,7 @@ define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
; CHECK-GI-LABEL: test_vqdmull_high_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
-; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.h[7]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -1917,8 +1766,7 @@ define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
; CHECK-GI-LABEL: test_vqdmull_high_laneq_s32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.s[3]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -2322,16 +2170,10 @@ entry:
}
define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: fmul v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
%mul = fmul <2 x float> %shuffle, %a
@@ -2553,16 +2395,10 @@ entry:
}
define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK-SD-LABEL: test_vmulx_laneq_f32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmulx v0.2s, v0.2s, v1.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmulx_laneq_f32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
-; CHECK-GI-NEXT: fmulx v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmulx_laneq_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[3]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
%vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
@@ -2657,16 +2493,10 @@ entry:
}
define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmla_laneq_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mla v0.4h, v1.4h, v2.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmla_laneq_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: mla v0.4h, v2.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmla_laneq_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %shuffle, %b
@@ -2687,16 +2517,10 @@ entry:
}
define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmla_laneq_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mla v0.2s, v1.2s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmla_laneq_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: mla v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmla_laneq_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%mul = mul <2 x i32> %shuffle, %b
@@ -2769,16 +2593,10 @@ entry:
}
define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmls_laneq_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmls_laneq_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: mls v0.4h, v2.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmls_laneq_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %shuffle, %b
@@ -2799,16 +2617,10 @@ entry:
}
define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmls_laneq_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmls_laneq_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmls_laneq_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%mul = mul <2 x i32> %shuffle, %b
@@ -2925,16 +2737,10 @@ entry:
}
define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %shuffle, %a
@@ -2953,16 +2759,10 @@ entry:
}
define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%mul = mul <2 x i32> %shuffle, %a
@@ -2981,16 +2781,10 @@ entry:
}
define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_u16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_u16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_u16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %shuffle, %a
@@ -3009,16 +2803,10 @@ entry:
}
define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_u32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_u32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_u32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%mul = mul <2 x i32> %shuffle, %a
@@ -3061,16 +2849,10 @@ entry:
}
define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK-SD-LABEL: test_vfma_laneq_f32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmla v0.2s, v1.2s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vfma_laneq_f32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: fmla v0.2s, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vfma_laneq_f32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
%0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
@@ -3188,16 +2970,10 @@ entry:
}
define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmlal_laneq_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smlal v0.4s, v1.4h, v2.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlal_laneq_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlal_laneq_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -3206,16 +2982,10 @@ entry:
}
define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmlal_laneq_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smlal v0.2d, v1.2s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlal_laneq_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlal_laneq_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -3274,8 +3044,7 @@ define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i
; CHECK-GI-LABEL: test_vmlal_high_laneq_s16_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.h[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -3294,8 +3063,7 @@ define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i
; CHECK-GI-LABEL: test_vmlal_high_laneq_s32_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.s[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -3332,16 +3100,10 @@ entry:
}
define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmlsl_laneq_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlsl_laneq_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlsl_laneq_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -3350,16 +3112,10 @@ entry:
}
define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmlsl_laneq_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlsl_laneq_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlsl_laneq_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -3418,8 +3174,7 @@ define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i
; CHECK-GI-LABEL: test_vmlsl_high_laneq_s16_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -3438,8 +3193,7 @@ define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i
; CHECK-GI-LABEL: test_vmlsl_high_laneq_s32_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -3476,16 +3230,10 @@ entry:
}
define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmlal_laneq_u16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umlal v0.4s, v1.4h, v2.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlal_laneq_u16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlal_laneq_u16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -3494,16 +3242,10 @@ entry:
}
define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmlal_laneq_u32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umlal v0.2d, v1.2s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlal_laneq_u32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlal_laneq_u32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -3562,8 +3304,7 @@ define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i
; CHECK-GI-LABEL: test_vmlal_high_laneq_u16_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.h[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -3582,8 +3323,7 @@ define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i
; CHECK-GI-LABEL: test_vmlal_high_laneq_u32_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.s[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -3620,16 +3360,10 @@ entry:
}
define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmlsl_laneq_u16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlsl_laneq_u16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlsl_laneq_u16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -3638,16 +3372,10 @@ entry:
}
define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmlsl_laneq_u32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmlsl_laneq_u32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmlsl_laneq_u32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -3706,8 +3434,7 @@ define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i
; CHECK-GI-LABEL: test_vmlsl_high_laneq_u16_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
-; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -3726,8 +3453,7 @@ define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i
; CHECK-GI-LABEL: test_vmlsl_high_laneq_u32_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
-; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -3866,16 +3592,10 @@ entry:
}
define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmull_laneq_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smull v0.4s, v0.4h, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmull_laneq_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmull_laneq_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -3883,16 +3603,10 @@ entry:
}
define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmull_laneq_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmull_laneq_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmull_laneq_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -3900,16 +3614,10 @@ entry:
}
define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vmull_laneq_u16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull v0.4s, v0.4h, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmull_laneq_u16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmull_laneq_u16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -3917,16 +3625,10 @@ entry:
}
define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vmull_laneq_u32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull v0.2d, v0.2s, v1.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmull_laneq_u32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmull_laneq_u32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -3942,8 +3644,7 @@ define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
; CHECK-GI-LABEL: test_vmull_high_laneq_s16_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.h[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -3961,8 +3662,7 @@ define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
; CHECK-GI-LABEL: test_vmull_high_laneq_s32_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.s[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -3980,8 +3680,7 @@ define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
; CHECK-GI-LABEL: test_vmull_high_laneq_u16_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.h[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -3999,8 +3698,7 @@ define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
; CHECK-GI-LABEL: test_vmull_high_laneq_u32_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.s[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -4170,16 +3868,10 @@ entry:
}
define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vqdmull_laneq_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmull_laneq_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmull_laneq_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -4187,16 +3879,10 @@ entry:
}
define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vqdmull_laneq_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmull_laneq_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmull_laneq_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -4252,8 +3938,7 @@ define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
; CHECK-GI-LABEL: test_vqdmull_high_laneq_s16_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -4271,8 +3956,7 @@ define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
; CHECK-GI-LABEL: test_vqdmull_high_laneq_s32_0:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -4402,16 +4086,10 @@ entry:
}
define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK-SD-LABEL: test_vmul_laneq_f32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmul_laneq_f32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: fmul v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmul_laneq_f32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
%mul = fmul <2 x float> %shuffle, %a
@@ -4498,16 +4176,10 @@ entry:
}
define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK-SD-LABEL: test_vmulx_laneq_f32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmulx v0.2s, v0.2s, v1.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vmulx_laneq_f32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
-; CHECK-GI-NEXT: fmulx v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vmulx_laneq_f32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
%vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index 51f1351a5edf4..96a20f653e5a1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -807,46 +807,28 @@ define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
}
define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
-; CHECK-SD-LABEL: test_vdup_laneq_s8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: dup v0.8b, v0.b[5]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vdup_laneq_s8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: dup v0.16b, v0.b[5]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vdup_laneq_s8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8b, v0.b[5]
+; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i8> %shuffle
}
define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
-; CHECK-SD-LABEL: test_vdup_laneq_s16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: dup v0.4h, v0.h[2]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vdup_laneq_s16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: dup v0.8h, v0.h[2]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vdup_laneq_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.4h, v0.h[2]
+; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
ret <4 x i16> %shuffle
}
define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
-; CHECK-SD-LABEL: test_vdup_laneq_s32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: dup v0.2s, v0.s[1]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vdup_laneq_s32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: dup v0.4s, v0.s[1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vdup_laneq_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.2s, v0.s[1]
+; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
ret <2 x i32> %shuffle
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
index bbea8f7b93f02..c5630966118b8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -589,16 +589,10 @@ define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
; Using sqrdmlah intrinsics
define <4 x i16> @test_vqrdmlah_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vqrdmlah_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqrdmlah_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: sqrdmlah v0.4h, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqrdmlah_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
entry:
%lane = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vqrdmlah_v3.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %lane) #4
@@ -606,16 +600,10 @@ entry:
}
define <2 x i32> @test_vqrdmlah_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vqrdmlah_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqrdmlah_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: sqrdmlah v0.2s, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqrdmlah_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%lane = shufflevector <4 x i32> %v, <4 x i32> poison, <2 x i32> <i32 3, i32 3>
%vqrdmlah_v3.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %lane) #4
@@ -769,16 +757,10 @@ entry:
}
define <4 x i16> @test_vqrdmlsh_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-SD-LABEL: test_vqrdmlsh_laneq_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[7]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqrdmlsh_laneq_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
-; CHECK-GI-NEXT: sqrdmlsh v0.4h, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqrdmlsh_laneq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
entry:
%lane = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vqrdmlsh_v3.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %lane) #4
@@ -786,16 +768,10 @@ entry:
}
define <2 x i32> @test_vqrdmlsh_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-SD-LABEL: test_vqrdmlsh_laneq_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqrdmlsh_laneq_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
-; CHECK-GI-NEXT: sqrdmlsh v0.2s, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqrdmlsh_laneq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[3]
+; CHECK-NEXT: ret
entry:
%lane = shufflevector <4 x i32> %v, <4 x i32> poison, <2 x i32> <i32 3, i32 3>
%vqrdmlsh_v3.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %lane) #4
More information about the llvm-commits
mailing list