[llvm] 0878dd1 - [AArch64][GlobalISel] Add coverage for arm64-neon-2velem.ll. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 02:40:50 PDT 2025
Author: David Green
Date: 2025-03-17T09:40:46Z
New Revision: 0878dd14b20579dd127396787ec81ba7239b3366
URL: https://github.com/llvm/llvm-project/commit/0878dd14b20579dd127396787ec81ba7239b3366
DIFF: https://github.com/llvm/llvm-project/commit/0878dd14b20579dd127396787ec81ba7239b3366.diff
LOG: [AArch64][GlobalISel] Add coverage for arm64-neon-2velem.ll. NFC
Added:
Modified:
llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
index cb87ba9a4ed6c..c3ad3b4192cf9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -1,6 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefix=CHECK
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m3 | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -fp-contract=fast -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for test_vfms_lane_f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsq_lane_f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfms_laneq_f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsq_laneq_f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsq_lane_f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsq_laneq_f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsd_lane_f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmss_lane_f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmss_laneq_f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsd_laneq_f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsd_lane_f64_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmss_lane_f32_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmss_laneq_f32_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsd_laneq_f64_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_lane_s16_intrinsic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_laneq_s16_intrinsic_lo
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_laneq_s16_intrinsic_hi
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulhq_lane_s16_intrinsic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulhq_laneq_s16_intrinsic_lo
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulhq_laneq_s16_intrinsic_hi
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_lane_s32_intrinsic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_laneq_s32_intrinsic_lo
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_laneq_s32_intrinsic_hi
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulhq_lane_s32_intrinsic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulhq_laneq_s32_intrinsic_lo
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulhq_laneq_s32_intrinsic_hi
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulh_lane_s16_intrinsic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulh_laneq_s16_intrinsic_lo
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulh_laneq_s16_intrinsic_hi
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulhq_lane_s16_intrinsic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulhq_laneq_s16_intrinsic_lo
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulhq_laneq_s16_intrinsic_hi
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulh_lane_s32_intrinsic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulh_laneq_s32_intrinsic_lo
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulh_laneq_s32_intrinsic_hi
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulhq_lane_s32_intrinsic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulhq_laneq_s32_intrinsic_lo
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmulhq_laneq_s32_intrinsic_hi
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfms_lane_f32_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsq_lane_f32_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfms_laneq_f32_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsq_laneq_f32_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsq_laneq_f64_0
declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
@@ -115,10 +159,16 @@ entry:
}
define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmla_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmla_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mla v0.4h, v1.4h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmla_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: mla v0.4h, v2.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %b
@@ -139,10 +189,16 @@ entry:
}
define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmla_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmla_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mla v0.2s, v1.2s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmla_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: mla v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%mul = mul <2 x i32> %shuffle, %b
@@ -215,10 +271,16 @@ entry:
}
define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmls_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmls_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmls_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: mls v0.4h, v2.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %b
@@ -239,10 +301,16 @@ entry:
}
define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmls_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmls_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmls_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%mul = mul <2 x i32> %shuffle, %b
@@ -359,10 +427,16 @@ entry:
}
define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmul_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
+; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %a
@@ -381,10 +455,16 @@ entry:
}
define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmul_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%mul = mul <2 x i32> %shuffle, %a
@@ -403,10 +483,16 @@ entry:
}
define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmul_laneq_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
+; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%mul = mul <4 x i16> %shuffle, %a
@@ -425,10 +511,16 @@ entry:
}
define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmul_laneq_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%mul = mul <2 x i32> %shuffle, %a
@@ -475,10 +567,16 @@ entry:
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfma_laneq_f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vfma_laneq_f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmla v0.2s, v1.2s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vfma_laneq_f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: fmla v0.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
%0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
@@ -736,10 +834,16 @@ entry:
}
define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlal v0.4s, v1.4h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -748,10 +852,16 @@ entry:
}
define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlal v0.2d, v1.2s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -760,11 +870,18 @@ entry:
}
define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_lane_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: smlal2 v0.4s, v1.8h, v2.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_lane_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -774,11 +891,18 @@ entry:
}
define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_lane_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: smlal2 v0.2d, v1.4s, v2.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_lane_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -788,10 +912,17 @@ entry:
}
define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlal2 v0.4s, v1.8h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -801,10 +932,17 @@ entry:
}
define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlal2 v0.2d, v1.4s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
@@ -840,10 +978,16 @@ entry:
}
define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlsl v0.4s, v1.4h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -852,10 +996,16 @@ entry:
}
define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlsl v0.2d, v1.2s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -864,11 +1014,18 @@ entry:
}
define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_lane_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: smlsl2 v0.4s, v1.8h, v2.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_lane_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -878,11 +1035,18 @@ entry:
}
define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_lane_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: smlsl2 v0.2d, v1.4s, v2.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_lane_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -892,10 +1056,17 @@ entry:
}
define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlsl2 v0.4s, v1.8h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -905,10 +1076,17 @@ entry:
}
define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlsl2 v0.2d, v1.4s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
@@ -944,10 +1122,16 @@ entry:
}
define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_laneq_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_laneq_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlal v0.4s, v1.4h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_laneq_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -956,10 +1140,16 @@ entry:
}
define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_laneq_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_laneq_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlal v0.2d, v1.2s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_laneq_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -968,11 +1158,18 @@ entry:
}
define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_lane_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: umlal2 v0.4s, v1.8h, v2.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_lane_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -982,11 +1179,18 @@ entry:
}
define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_lane_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: umlal2 v0.2d, v1.4s, v2.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_lane_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -996,10 +1200,17 @@ entry:
}
define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_laneq_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlal2 v0.4s, v1.8h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_laneq_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -1009,10 +1220,17 @@ entry:
}
define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_laneq_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlal2 v0.2d, v1.4s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_laneq_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
@@ -1048,10 +1266,16 @@ entry:
}
define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_laneq_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlsl v0.4s, v1.4h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_laneq_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -1060,10 +1284,16 @@ entry:
}
define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_laneq_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlsl v0.2d, v1.2s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_laneq_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -1072,11 +1302,18 @@ entry:
}
define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_lane_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: umlsl2 v0.4s, v1.8h, v2.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_lane_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1086,11 +1323,18 @@ entry:
}
define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_lane_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: umlsl2 v0.2d, v1.4s, v2.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_lane_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -1100,10 +1344,17 @@ entry:
}
define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_laneq_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlsl2 v0.4s, v1.8h, v2.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_laneq_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
+; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -1113,10 +1364,17 @@ entry:
}
define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_laneq_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlsl2 v0.2d, v1.4s, v2.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_laneq_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
+; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
@@ -1174,11 +1432,18 @@ entry:
}
define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_lane_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_lane_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: smull2 v0.4s, v0.8h, v1.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_lane_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1187,11 +1452,18 @@ entry:
}
define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_lane_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_lane_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: smull2 v0.2d, v0.4s, v1.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_lane_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -1200,11 +1472,18 @@ entry:
}
define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_lane_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_lane_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: umull2 v0.4s, v0.8h, v1.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_lane_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1213,11 +1492,18 @@ entry:
}
define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_lane_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_lane_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: umull2 v0.2d, v0.4s, v1.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_lane_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -1226,10 +1512,16 @@ entry:
}
define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smull v0.4s, v0.4h, v1.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -1237,10 +1529,16 @@ entry:
}
define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -1248,10 +1546,16 @@ entry:
}
define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_laneq_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_laneq_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umull v0.4s, v0.4h, v1.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_laneq_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -1259,10 +1563,16 @@ entry:
}
define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_laneq_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_laneq_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umull v0.2d, v0.2s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_laneq_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -1270,10 +1580,17 @@ entry:
}
define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smull2 v0.4s, v0.8h, v1.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -1282,10 +1599,17 @@ entry:
}
define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smull2 v0.2d, v0.4s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
@@ -1294,10 +1618,17 @@ entry:
}
define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_laneq_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umull2 v0.4s, v0.8h, v1.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_laneq_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -1306,10 +1637,17 @@ entry:
}
define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_laneq_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umull2 v0.2d, v0.4s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_laneq_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
@@ -1344,11 +1682,18 @@ entry:
}
define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlal_high_lane_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlal_high_lane_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlal v0.4s, v1.4h, v2.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1358,11 +1703,18 @@ entry:
}
define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlal_high_lane_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlal_high_lane_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -1398,11 +1750,18 @@ entry:
}
define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1412,11 +1771,18 @@ entry:
}
define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -1450,10 +1816,16 @@ entry:
}
define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[3]
+; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -1461,10 +1833,16 @@ entry:
}
define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sqdmull v0.2d, v0.2s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -1472,11 +1850,18 @@ entry:
}
define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_high_lane_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_high_lane_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_high_lane_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1485,11 +1870,18 @@ entry:
}
define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_high_lane_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_high_lane_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_high_lane_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -1498,10 +1890,17 @@ entry:
}
define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_high_laneq_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[7]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_high_laneq_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[7]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_high_laneq_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[7]
+; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -1510,10 +1909,17 @@ entry:
}
define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_high_laneq_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_high_laneq_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_high_laneq_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
@@ -1916,10 +2322,16 @@ entry:
}
define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmul_laneq_f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: fmul v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
%mul = fmul <2 x float> %shuffle, %a
@@ -1927,10 +2339,16 @@ entry:
}
define <2 x float> @test_vmul_laneq3_f32_bitcast(<2 x float> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vmul_laneq3_f32_bitcast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq3_f32_bitcast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq3_f32_bitcast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.s[1]
+; CHECK-GI-NEXT: ret
%extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
%bc = bitcast <1 x double> %extract to <2 x float>
%splat = shufflevector <2 x float> %bc, <2 x float> undef, <2 x i32> <i32 1, i32 1>
@@ -1939,10 +2357,16 @@ define <2 x float> @test_vmul_laneq3_f32_bitcast(<2 x float> %a, <2 x double> %v
}
define <2 x float> @test_vmul_laneq2_f32_bitcast(<2 x float> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vmul_laneq2_f32_bitcast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq2_f32_bitcast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.s[2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq2_f32_bitcast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.s[0]
+; CHECK-GI-NEXT: ret
%extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
%bc = bitcast <1 x double> %extract to <2 x float>
%splat = shufflevector <2 x float> %bc, <2 x float> undef, <2 x i32> <i32 0, i32 0>
@@ -1951,11 +2375,18 @@ define <2 x float> @test_vmul_laneq2_f32_bitcast(<2 x float> %a, <2 x double> %v
}
define <4 x i16> @test_vadd_laneq5_i16_bitcast(<4 x i16> %a, <2 x double> %v) {
-; CHECK-LABEL: test_vadd_laneq5_i16_bitcast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.4h, v1.h[5]
-; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vadd_laneq5_i16_bitcast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4h, v1.h[5]
+; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vadd_laneq5_i16_bitcast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v1.4h, v1.h[1]
+; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
%extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1>
%bc = bitcast <1 x double> %extract to <4 x i16>
%splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1966,12 +2397,19 @@ define <4 x i16> @test_vadd_laneq5_i16_bitcast(<4 x i16> %a, <2 x double> %v) {
; TODO: The pattern in LowerVECTOR_SHUFFLE does not match what we are looking for.
define <4 x i16> @test_vadd_lane2_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x i8> %v) {
-; CHECK-LABEL: test_vadd_lane2_i16_bitcast_bigger_aligned:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.4h, v1.h[2]
-; CHECK-NEXT: dup v1.4h, v1.h[1]
-; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vadd_lane2_i16_bitcast_bigger_aligned:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4h, v1.h[2]
+; CHECK-SD-NEXT: dup v1.4h, v1.h[1]
+; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vadd_lane2_i16_bitcast_bigger_aligned:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ext v1.16b, v1.16b, v0.16b, #2
+; CHECK-GI-NEXT: dup v1.4h, v1.h[1]
+; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
%extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
%bc = bitcast <8 x i8> %extract to <4 x i16>
%splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1980,11 +2418,18 @@ define <4 x i16> @test_vadd_lane2_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x
}
define <4 x i16> @test_vadd_lane5_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x i8> %v) {
-; CHECK-LABEL: test_vadd_lane5_i16_bitcast_bigger_aligned:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.4h, v1.h[5]
-; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vadd_lane5_i16_bitcast_bigger_aligned:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4h, v1.h[5]
+; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vadd_lane5_i16_bitcast_bigger_aligned:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v1.4h, v1.h[1]
+; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
%extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%bc = bitcast <8 x i8> %extract to <4 x i16>
%splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1995,12 +2440,19 @@ define <4 x i16> @test_vadd_lane5_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x
; Negative test - can't dup bytes {3,4} of v8i16.
define <4 x i16> @test_vadd_lane_i16_bitcast_bigger_unaligned(<4 x i16> %a, <16 x i8> %v) {
-; CHECK-LABEL: test_vadd_lane_i16_bitcast_bigger_unaligned:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.8b, v1.8b, v0.8b, #1
-; CHECK-NEXT: dup v1.4h, v1.h[1]
-; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vadd_lane_i16_bitcast_bigger_unaligned:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.8b, v1.8b, v0.8b, #1
+; CHECK-SD-NEXT: dup v1.4h, v1.h[1]
+; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vadd_lane_i16_bitcast_bigger_unaligned:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ext v1.16b, v1.16b, v0.16b, #1
+; CHECK-GI-NEXT: dup v1.4h, v1.h[1]
+; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
%extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%bc = bitcast <8 x i8> %extract to <4 x i16>
%splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -2082,11 +2534,18 @@ entry:
}
define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
-; CHECK-LABEL: test_vmulxq_lane_f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmulxq_lane_f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmulxq_lane_f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: dup v1.2d, v1.d[0]
+; CHECK-GI-NEXT: fmulx v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
%vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
@@ -2094,10 +2553,16 @@ entry:
}
define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmulx_laneq_f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmulx_laneq_f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmulx v0.2s, v0.2s, v1.s[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmulx_laneq_f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[3]
+; CHECK-GI-NEXT: fmulx v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
%vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
@@ -2192,10 +2657,16 @@ entry:
}
define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmla_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmla_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mla v0.4h, v1.4h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmla_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: mla v0.4h, v2.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %shuffle, %b
@@ -2216,10 +2687,16 @@ entry:
}
define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmla_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmla_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mla v0.2s, v1.2s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmla_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: mla v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%mul = mul <2 x i32> %shuffle, %b
@@ -2292,10 +2769,16 @@ entry:
}
define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmls_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmls_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmls_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: mls v0.4h, v2.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %shuffle, %b
@@ -2316,10 +2799,16 @@ entry:
}
define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmls_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmls_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmls_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%mul = mul <2 x i32> %shuffle, %b
@@ -2436,10 +2925,16 @@ entry:
}
define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmul_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %shuffle, %a
@@ -2458,10 +2953,16 @@ entry:
}
define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmul_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%mul = mul <2 x i32> %shuffle, %a
@@ -2480,10 +2981,16 @@ entry:
}
define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmul_laneq_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %shuffle, %a
@@ -2502,10 +3009,16 @@ entry:
}
define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmul_laneq_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%mul = mul <2 x i32> %shuffle, %a
@@ -2548,10 +3061,16 @@ entry:
}
define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfma_laneq_f32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vfma_laneq_f32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmla v0.2s, v1.2s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vfma_laneq_f32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: fmla v0.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
%0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
@@ -2669,10 +3188,16 @@ entry:
}
define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlal v0.4s, v1.4h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -2681,10 +3206,16 @@ entry:
}
define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlal v0.2d, v1.2s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -2693,11 +3224,18 @@ entry:
}
define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_lane_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: smlal2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_lane_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -2707,11 +3245,18 @@ entry:
}
define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_lane_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: smlal2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_lane_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -2721,10 +3266,17 @@ entry:
}
define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlal2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: smlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
@@ -2734,10 +3286,17 @@ entry:
}
define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlal2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
@@ -2773,10 +3332,16 @@ entry:
}
define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -2785,10 +3350,16 @@ entry:
}
define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -2797,11 +3368,18 @@ entry:
}
define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_lane_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_lane_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -2811,11 +3389,18 @@ entry:
}
define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_lane_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_lane_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -2825,10 +3410,17 @@ entry:
}
define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: smlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
@@ -2838,10 +3430,17 @@ entry:
}
define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: smlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
@@ -2877,10 +3476,16 @@ entry:
}
define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_laneq_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_laneq_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlal v0.4s, v1.4h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_laneq_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -2889,10 +3494,16 @@ entry:
}
define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_laneq_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_laneq_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlal v0.2d, v1.2s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_laneq_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -2901,11 +3512,18 @@ entry:
}
define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_lane_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: umlal2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_lane_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -2915,11 +3533,18 @@ entry:
}
define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_lane_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_lane_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: umlal2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_lane_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -2929,10 +3554,17 @@ entry:
}
define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_laneq_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlal2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_laneq_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: umlal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
@@ -2942,10 +3574,17 @@ entry:
}
define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlal_high_laneq_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlal_high_laneq_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlal2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlal_high_laneq_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
@@ -2981,10 +3620,16 @@ entry:
}
define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_laneq_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_laneq_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
@@ -2993,10 +3638,16 @@ entry:
}
define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_laneq_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_laneq_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_laneq_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
@@ -3005,11 +3656,18 @@ entry:
}
define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_lane_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_lane_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -3019,11 +3677,18 @@ entry:
}
define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_lane_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_lane_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -3033,10 +3698,17 @@ entry:
}
define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_laneq_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_laneq_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.8h, v2.h[0]
+; CHECK-GI-NEXT: umlsl v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
@@ -3046,10 +3718,17 @@ entry:
}
define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK-LABEL: test_vmlsl_high_laneq_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmlsl_high_laneq_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmlsl_high_laneq_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: dup v2.4s, v2.s[0]
+; CHECK-GI-NEXT: umlsl v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
@@ -3107,11 +3786,18 @@ entry:
}
define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_lane_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_lane_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: smull2 v0.4s, v0.8h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_lane_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -3120,11 +3806,18 @@ entry:
}
define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_lane_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_lane_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: smull2 v0.2d, v0.4s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_lane_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -3133,11 +3826,18 @@ entry:
}
define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_lane_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_lane_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: umull2 v0.4s, v0.8h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_lane_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -3146,11 +3846,18 @@ entry:
}
define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_lane_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_lane_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: umull2 v0.2d, v0.4s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_lane_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -3159,10 +3866,16 @@ entry:
}
define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smull v0.4s, v0.4h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -3170,10 +3883,16 @@ entry:
}
define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -3181,10 +3900,16 @@ entry:
}
define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_laneq_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_laneq_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umull v0.4s, v0.4h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_laneq_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -3192,10 +3917,16 @@ entry:
}
define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_laneq_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_laneq_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umull v0.2d, v0.2s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_laneq_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -3203,10 +3934,17 @@ entry:
}
define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smull2 v0.4s, v0.8h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
@@ -3215,10 +3953,17 @@ entry:
}
define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: smull2 v0.2d, v0.4s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
@@ -3227,10 +3972,17 @@ entry:
}
define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_u16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_laneq_u16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umull2 v0.4s, v0.8h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_laneq_u16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
@@ -3239,10 +3991,17 @@ entry:
}
define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vmull_high_laneq_u32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_laneq_u32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: umull2 v0.2d, v0.4s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_laneq_u32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
@@ -3277,11 +4036,18 @@ entry:
}
define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlal_high_lane_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlal_high_lane_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlal v0.4s, v1.4h, v2.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -3291,11 +4057,18 @@ entry:
}
define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlal_high_lane_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlal_high_lane_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -3331,11 +4104,18 @@ entry:
}
define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -3345,11 +4125,18 @@ entry:
}
define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -3383,10 +4170,16 @@ entry:
}
define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
%vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
@@ -3394,10 +4187,16 @@ entry:
}
define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
%vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
@@ -3405,11 +4204,18 @@ entry:
}
define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_high_lane_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_high_lane_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -3418,11 +4224,18 @@ entry:
}
define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_high_lane_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_high_lane_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -3431,10 +4244,17 @@ entry:
}
define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK-LABEL: test_vqdmull_high_laneq_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_high_laneq_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_high_laneq_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
@@ -3443,10 +4263,17 @@ entry:
}
define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK-LABEL: test_vqdmull_high_laneq_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmull_high_laneq_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmull_high_laneq_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: sqdmull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
@@ -3575,10 +4402,16 @@ entry:
}
define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmul_laneq_f32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmul_laneq_f32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmul_laneq_f32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: fmul v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
%mul = fmul <2 x float> %shuffle, %a
@@ -3646,11 +4479,18 @@ entry:
}
define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
-; CHECK-LABEL: test_vmulxq_lane_f64_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmulxq_lane_f64_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: fmulx v0.2d, v0.2d, v1.d[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmulxq_lane_f64_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: dup v1.2d, v1.d[0]
+; CHECK-GI-NEXT: fmulx v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
%vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
@@ -3658,10 +4498,16 @@ entry:
}
define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK-LABEL: test_vmulx_laneq_f32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmulx_laneq_f32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmulx v0.2s, v0.2s, v1.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmulx_laneq_f32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: fmulx v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
%vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
More information about the llvm-commits
mailing list