[llvm] 752819e - [AArch64][ARM] Remove load from dup and vmul tests. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 20 07:23:43 PST 2022
Author: David Green
Date: 2022-12-20T15:23:38Z
New Revision: 752819e813d1de1e76b4b509ad6fbb97b52d2d03
URL: https://github.com/llvm/llvm-project/commit/752819e813d1de1e76b4b509ad6fbb97b52d2d03
DIFF: https://github.com/llvm/llvm-project/commit/752819e813d1de1e76b4b509ad6fbb97b52d2d03.diff
LOG: [AArch64][ARM] Remove load from dup and vmul tests. NFC
These tests needn't use loads in their testing of dup and mul
instructions, and as the load changes the test may no longer test what
they are intending (as in D140069).
Added:
Modified:
llvm/test/CodeGen/AArch64/arm64-dup.ll
llvm/test/CodeGen/AArch64/arm64-vmul.ll
llvm/test/CodeGen/ARM/vdup.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 6613f911f8258..0fdb60cc08b0e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -6,15 +6,15 @@ define <8 x i8> @v_dup8(i8 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.8b v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
- %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
- %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
- %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
- %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
- %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
- %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
- %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
- ret <8 x i8> %tmp8
+ %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
+ %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
+ %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
+ %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
+ %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
+ %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
+ %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
+ %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
+ ret <8 x i8> %tmp8
}
define <4 x i16> @v_dup16(i16 %A) nounwind {
@@ -22,11 +22,11 @@ define <4 x i16> @v_dup16(i16 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.4h v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
- %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
- %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
- %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
- ret <4 x i16> %tmp4
+ %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
+ %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
+ %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
+ %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
+ ret <4 x i16> %tmp4
}
define <2 x i32> @v_dup32(i32 %A) nounwind {
@@ -34,9 +34,9 @@ define <2 x i32> @v_dup32(i32 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.2s v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
- %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
- ret <2 x i32> %tmp2
+ %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
+ %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
+ ret <2 x i32> %tmp2
}
define <2 x float> @v_dupfloat(float %A) nounwind {
@@ -45,9 +45,9 @@ define <2 x float> @v_dupfloat(float %A) nounwind {
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: dup.2s v0, v0[0]
; CHECK-NEXT: ret
- %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
- %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
- ret <2 x float> %tmp2
+ %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
+ %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
+ ret <2 x float> %tmp2
}
define <16 x i8> @v_dupQ8(i8 %A) nounwind {
@@ -55,23 +55,23 @@ define <16 x i8> @v_dupQ8(i8 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.16b v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
- %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
- %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
- %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
- %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
- %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
- %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
- %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
- %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
- %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
- %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
- %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
- %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
- %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
- %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
- %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
- ret <16 x i8> %tmp16
+ %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
+ %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
+ %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
+ %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
+ %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
+ %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
+ %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
+ %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
+ %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
+ %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
+ %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
+ %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
+ %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
+ %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
+ %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
+ %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
+ ret <16 x i8> %tmp16
}
define <8 x i16> @v_dupQ16(i16 %A) nounwind {
@@ -79,15 +79,15 @@ define <8 x i16> @v_dupQ16(i16 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.8h v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
- %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
- %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
- %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
- %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
- %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
- %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
- %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
- ret <8 x i16> %tmp8
+ %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
+ %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
+ %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
+ %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
+ %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
+ %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
+ %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
+ %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
+ ret <8 x i16> %tmp8
}
define <4 x i32> @v_dupQ32(i32 %A) nounwind {
@@ -95,11 +95,11 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.4s v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
- %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
- %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
- %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
- ret <4 x i32> %tmp4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
+ %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
+ %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
+ %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
+ ret <4 x i32> %tmp4
}
define <4 x float> @v_dupQfloat(float %A) nounwind {
@@ -108,11 +108,11 @@ define <4 x float> @v_dupQfloat(float %A) nounwind {
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: dup.4s v0, v0[0]
; CHECK-NEXT: ret
- %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
- %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
- %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
- %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
- ret <4 x float> %tmp4
+ %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
+ %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
+ %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
+ %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
+ ret <4 x float> %tmp4
}
; Check to make sure it works with shuffles, too.
@@ -122,9 +122,9 @@ define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.8b v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
- %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
- ret <8 x i8> %tmp2
+ %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+ ret <8 x i8> %tmp2
}
define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
@@ -132,9 +132,9 @@ define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.4h v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
- %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %tmp2
+ %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ ret <4 x i16> %tmp2
}
define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
@@ -142,9 +142,9 @@ define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.2s v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
- %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
- ret <2 x i32> %tmp2
+ %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+ ret <2 x i32> %tmp2
}
define <2 x float> @v_shuffledupfloat(float %A) nounwind {
@@ -153,9 +153,9 @@ define <2 x float> @v_shuffledupfloat(float %A) nounwind {
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: dup.2s v0, v0[0]
; CHECK-NEXT: ret
- %tmp1 = insertelement <2 x float> undef, float %A, i32 0
- %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
- ret <2 x float> %tmp2
+ %tmp1 = insertelement <2 x float> undef, float %A, i32 0
+ %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+ ret <2 x float> %tmp2
}
define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
@@ -163,9 +163,9 @@ define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.16b v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
- %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
- ret <16 x i8> %tmp2
+ %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
+ %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %tmp2
}
define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
@@ -173,9 +173,9 @@ define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.8h v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
- %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %tmp2
+ %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
+ %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp2
}
define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
@@ -183,9 +183,9 @@ define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: dup.4s v0, w0
; CHECK-NEXT: ret
- %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
- %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
- ret <4 x i32> %tmp2
+ %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
+ %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %tmp2
}
define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
@@ -194,97 +194,89 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: dup.4s v0, v0[0]
; CHECK-NEXT: ret
- %tmp1 = insertelement <4 x float> undef, float %A, i32 0
- %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
- ret <4 x float> %tmp2
+ %tmp1 = insertelement <4 x float> undef, float %A, i32 0
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %tmp2
}
-define <8 x i8> @vduplane8(ptr %A) nounwind {
+define <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
; CHECK-LABEL: vduplane8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup.8b v0, v0[1]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i8>, ptr %A
- %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
- ret <8 x i8> %tmp2
+ %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <8 x i8> %tmp2
}
-define <4 x i16> @vduplane16(ptr %A) nounwind {
+define <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
; CHECK-LABEL: vduplane16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup.4h v0, v0[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
- ret <4 x i16> %tmp2
+ %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x i16> %tmp2
}
-define <2 x i32> @vduplane32(ptr %A) nounwind {
+define <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
; CHECK-LABEL: vduplane32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup.2s v0, v0[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
- ret <2 x i32> %tmp2
+ %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+ ret <2 x i32> %tmp2
}
-define <2 x float> @vduplanefloat(ptr %A) nounwind {
+define <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
; CHECK-LABEL: vduplanefloat:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup.2s v0, v0[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x float>, ptr %A
- %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
- ret <2 x float> %tmp2
+ %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+ ret <2 x float> %tmp2
}
-define <16 x i8> @vduplaneQ8(ptr %A) nounwind {
+define <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
; CHECK-LABEL: vduplaneQ8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup.16b v0, v0[1]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i8>, ptr %A
- %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
- ret <16 x i8> %tmp2
+ %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <16 x i8> %tmp2
}
-define <8 x i16> @vduplaneQ16(ptr %A) nounwind {
+define <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
; CHECK-LABEL: vduplaneQ16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup.8h v0, v0[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
- ret <8 x i16> %tmp2
+ %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <8 x i16> %tmp2
}
-define <4 x i32> @vduplaneQ32(ptr %A) nounwind {
+define <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
; CHECK-LABEL: vduplaneQ32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup.4s v0, v0[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
- ret <4 x i32> %tmp2
+ %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x i32> %tmp2
}
-define <4 x float> @vduplaneQfloat(ptr %A) nounwind {
+define <4 x float> @vduplaneQfloat(<2 x float> %A) nounwind {
; CHECK-LABEL: vduplaneQfloat:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup.4s v0, v0[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x float>, ptr %A
- %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
- ret <4 x float> %tmp2
+ %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x float> %tmp2
}
define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 0a29d6b86659e..7f743f605f255 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -1081,59 +1081,45 @@ declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x f
declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
-define <4 x i16> @mul_4h(ptr %A, ptr %B) nounwind {
+define <4 x i16> @mul_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
; CHECK-LABEL: mul_4h:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mul.4h v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = mul <4 x i16> %tmp1, %tmp3
+ %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = mul <4 x i16> %A, %tmp3
ret <4 x i16> %tmp4
}
-define <8 x i16> @mul_8h(ptr %A, ptr %B) nounwind {
+define <8 x i16> @mul_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: mul_8h:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: mul.8h v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i16>, ptr %A
- %tmp2 = load <8 x i16>, ptr %B
- %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %tmp4 = mul <8 x i16> %tmp1, %tmp3
+ %tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = mul <8 x i16> %A, %tmp3
ret <8 x i16> %tmp4
}
-define <2 x i32> @mul_2s(ptr %A, ptr %B) nounwind {
+define <2 x i32> @mul_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
; CHECK-LABEL: mul_2s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mul.2s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = mul <2 x i32> %tmp1, %tmp3
+ %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = mul <2 x i32> %A, %tmp3
ret <2 x i32> %tmp4
}
-define <4 x i32> @mul_4s(ptr %A, ptr %B) nounwind {
+define <4 x i32> @mul_4s(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: mul_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: mul.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i32>, ptr %A
- %tmp2 = load <4 x i32>, ptr %B
- %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = mul <4 x i32> %tmp1, %tmp3
+ %tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = mul <4 x i32> %A, %tmp3
ret <4 x i32> %tmp4
}
@@ -1153,45 +1139,34 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind {
ret <2 x i64> %tmp1
}
-define <2 x float> @fmul_lane_2s(ptr %A, ptr %B) nounwind {
+define <2 x float> @fmul_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
; CHECK-LABEL: fmul_lane_2s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmul.2s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x float>, ptr %A
- %tmp2 = load <2 x float>, ptr %B
- %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = fmul <2 x float> %tmp1, %tmp3
+ %tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = fmul <2 x float> %A, %tmp3
ret <2 x float> %tmp4
}
-define <4 x float> @fmul_lane_4s(ptr %A, ptr %B) nounwind {
+define <4 x float> @fmul_lane_4s(<4 x float> %A, <4 x float> %B) nounwind {
; CHECK-LABEL: fmul_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: fmul.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x float>, ptr %A
- %tmp2 = load <4 x float>, ptr %B
- %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = fmul <4 x float> %tmp1, %tmp3
+ %tmp3 = shufflevector <4 x float> %B, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = fmul <4 x float> %A, %tmp3
ret <4 x float> %tmp4
}
-define <2 x double> @fmul_lane_2d(ptr %A, ptr %B) nounwind {
+define <2 x double> @fmul_lane_2d(<2 x double> %A, <2 x double> %B) nounwind {
; CHECK-LABEL: fmul_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: fmul.2d v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x double>, ptr %A
- %tmp2 = load <2 x double>, ptr %B
- %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = fmul <2 x double> %tmp1, %tmp3
+ %tmp3 = shufflevector <2 x double> %B, <2 x double> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = fmul <2 x double> %A, %tmp3
ret <2 x double> %tmp4
}
@@ -1217,101 +1192,76 @@ define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
-define <2 x float> @fmulx_lane_2s(ptr %A, ptr %B) nounwind {
+define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
; CHECK-LABEL: fmulx_lane_2s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmulx.2s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x float>, ptr %A
- %tmp2 = load <2 x float>, ptr %B
- %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
+ %tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %A, <2 x float> %tmp3)
ret <2 x float> %tmp4
}
-define <4 x float> @fmulx_lane_4s(ptr %A, ptr %B) nounwind {
+define <4 x float> @fmulx_lane_4s(<4 x float> %A, <4 x float> %B) nounwind {
; CHECK-LABEL: fmulx_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: fmulx.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x float>, ptr %A
- %tmp2 = load <4 x float>, ptr %B
- %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
+ %tmp3 = shufflevector <4 x float> %B, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %A, <4 x float> %tmp3)
ret <4 x float> %tmp4
}
-define <2 x double> @fmulx_lane_2d(ptr %A, ptr %B) nounwind {
+define <2 x double> @fmulx_lane_2d(<2 x double> %A, <2 x double> %B) nounwind {
; CHECK-LABEL: fmulx_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: fmulx.2d v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x double>, ptr %A
- %tmp2 = load <2 x double>, ptr %B
- %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
+ %tmp3 = shufflevector <2 x double> %B, <2 x double> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %A, <2 x double> %tmp3)
ret <2 x double> %tmp4
}
-define <4 x i16> @sqdmulh_lane_4h(ptr %A, ptr %B) nounwind {
+define <4 x i16> @sqdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
; CHECK-LABEL: sqdmulh_lane_4h:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: sqdmulh.4h v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
+ %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %A, <4 x i16> %tmp3)
ret <4 x i16> %tmp4
}
-define <8 x i16> @sqdmulh_lane_8h(ptr %A, ptr %B) nounwind {
+define <8 x i16> @sqdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: sqdmulh_lane_8h:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqdmulh.8h v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i16>, ptr %A
- %tmp2 = load <8 x i16>, ptr %B
- %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
+ %tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %A, <8 x i16> %tmp3)
ret <8 x i16> %tmp4
}
-define <2 x i32> @sqdmulh_lane_2s(ptr %A, ptr %B) nounwind {
+define <2 x i32> @sqdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
; CHECK-LABEL: sqdmulh_lane_2s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: sqdmulh.2s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
+ %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %A, <2 x i32> %tmp3)
ret <2 x i32> %tmp4
}
-define <4 x i32> @sqdmulh_lane_4s(ptr %A, ptr %B) nounwind {
+define <4 x i32> @sqdmulh_lane_4s(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: sqdmulh_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqdmulh.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i32>, ptr %A
- %tmp2 = load <4 x i32>, ptr %B
- %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
+ %tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %A, <4 x i32> %tmp3)
ret <4 x i32> %tmp4
}
@@ -1327,59 +1277,45 @@ define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
ret i32 %tmp2
}
-define <4 x i16> @sqrdmulh_lane_4h(ptr %A, ptr %B) nounwind {
+define <4 x i16> @sqrdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
; CHECK-LABEL: sqrdmulh_lane_4h:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: sqrdmulh.4h v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
+ %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %A, <4 x i16> %tmp3)
ret <4 x i16> %tmp4
}
-define <8 x i16> @sqrdmulh_lane_8h(ptr %A, ptr %B) nounwind {
+define <8 x i16> @sqrdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: sqrdmulh_lane_8h:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrdmulh.8h v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i16>, ptr %A
- %tmp2 = load <8 x i16>, ptr %B
- %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
+ %tmp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %A, <8 x i16> %tmp3)
ret <8 x i16> %tmp4
}
-define <2 x i32> @sqrdmulh_lane_2s(ptr %A, ptr %B) nounwind {
+define <2 x i32> @sqrdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
; CHECK-LABEL: sqrdmulh_lane_2s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: sqrdmulh.2s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
+ %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %A, <2 x i32> %tmp3)
ret <2 x i32> %tmp4
}
-define <4 x i32> @sqrdmulh_lane_4s(ptr %A, ptr %B) nounwind {
+define <4 x i32> @sqrdmulh_lane_4s(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: sqrdmulh_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrdmulh.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i32>, ptr %A
- %tmp2 = load <4 x i32>, ptr %B
- %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
+ %tmp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %A, <4 x i32> %tmp3)
ret <4 x i32> %tmp4
}
@@ -1395,221 +1331,169 @@ define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
ret i32 %tmp2
}
-define <4 x i32> @sqdmull_lane_4s(ptr %A, ptr %B) nounwind {
+define <4 x i32> @sqdmull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
; CHECK-LABEL: sqdmull_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: sqdmull.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+ %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp3)
ret <4 x i32> %tmp4
}
-define <2 x i64> @sqdmull_lane_2d(ptr %A, ptr %B) nounwind {
+define <2 x i64> @sqdmull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
; CHECK-LABEL: sqdmull_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: sqdmull.2d v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+ %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp3)
ret <2 x i64> %tmp4
}
-define <4 x i32> @sqdmull2_lane_4s(ptr %A, ptr %B) nounwind {
+define <4 x i32> @sqdmull2_lane_4s(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: sqdmull2_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0, #8]
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: sqdmull.4s v0, v0, v1[1]
+; CHECK-NEXT: sqdmull2.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %load1 = load <8 x i16>, ptr %A
- %load2 = load <8 x i16>, ptr %B
- %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
- %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i32> %tmp4
}
-define <2 x i64> @sqdmull2_lane_2d(ptr %A, ptr %B) nounwind {
+define <2 x i64> @sqdmull2_lane_2d(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-LABEL: sqdmull2_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0, #8]
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: sqdmull.2d v0, v0, v1[1]
+; CHECK-NEXT: sqdmull2.2d v0, v0, v1[1]
; CHECK-NEXT: ret
- %load1 = load <4 x i32>, ptr %A
- %load2 = load <4 x i32>, ptr %B
- %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
- %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+ %tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i64> %tmp4
}
-define <4 x i32> @umull_lane_4s(ptr %A, ptr %B) nounwind {
+define <4 x i32> @umull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
; CHECK-LABEL: umull_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: umull.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+ %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp3)
ret <4 x i32> %tmp4
}
-define <2 x i64> @umull_lane_2d(ptr %A, ptr %B) nounwind {
+define <2 x i64> @umull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
; CHECK-LABEL: umull_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: umull.2d v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+ %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp3)
ret <2 x i64> %tmp4
}
-define <4 x i32> @smull_lane_4s(ptr %A, ptr %B) nounwind {
+define <4 x i32> @smull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
; CHECK-LABEL: smull_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: smull.4s v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+ %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp3)
ret <4 x i32> %tmp4
}
-define <2 x i64> @smull_lane_2d(ptr %A, ptr %B) nounwind {
+define <2 x i64> @smull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
; CHECK-LABEL: smull_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: smull.2d v0, v0, v1[1]
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+ %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp3)
ret <2 x i64> %tmp4
}
-define <4 x i32> @smlal_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
+define <4 x i32> @smlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
; CHECK-LABEL: smlal_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: smlal.4s v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: smlal.4s v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = load <4 x i32>, ptr %C
- %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
- %tmp6 = add <4 x i32> %tmp3, %tmp5
+ %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
+ %tmp6 = add <4 x i32> %C, %tmp5
ret <4 x i32> %tmp6
}
-define <2 x i64> @smlal_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
+define <2 x i64> @smlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
; CHECK-LABEL: smlal_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: smlal.2d v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: smlal.2d v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = load <2 x i64>, ptr %C
- %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
- %tmp6 = add <2 x i64> %tmp3, %tmp5
+ %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
+ %tmp6 = add <2 x i64> %C, %tmp5
ret <2 x i64> %tmp6
}
-define <4 x i32> @sqdmlal_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
+define <4 x i32> @sqdmlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
; CHECK-LABEL: sqdmlal_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: sqdmlal.4s v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: sqdmlal.4s v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = load <4 x i32>, ptr %C
- %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
- %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+ %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
+ %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %C, <4 x i32> %tmp5)
ret <4 x i32> %tmp6
}
-define <2 x i64> @sqdmlal_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
+define <2 x i64> @sqdmlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
; CHECK-LABEL: sqdmlal_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: sqdmlal.2d v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: sqdmlal.2d v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = load <2 x i64>, ptr %C
- %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
- %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+ %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
+ %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %C, <2 x i64> %tmp5)
ret <2 x i64> %tmp6
}
-define <4 x i32> @sqdmlal2_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
+define <4 x i32> @sqdmlal2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nounwind {
; CHECK-LABEL: sqdmlal2_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1]
-; CHECK-NEXT: sqdmlal.4s v0, v1, v2[1]
+; CHECK-NEXT: sqdmlal2.4s v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %load1 = load <8 x i16>, ptr %A
- %load2 = load <8 x i16>, ptr %B
- %tmp3 = load <4 x i32>, ptr %C
- %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
- %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+ %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %C, <4 x i32> %tmp5)
ret <4 x i32> %tmp6
}
-define <2 x i64> @sqdmlal2_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
+define <2 x i64> @sqdmlal2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nounwind {
; CHECK-LABEL: sqdmlal2_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1]
-; CHECK-NEXT: sqdmlal.2d v0, v1, v2[1]
+; CHECK-NEXT: sqdmlal2.2d v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %load1 = load <4 x i32>, ptr %A
- %load2 = load <4 x i32>, ptr %B
- %tmp3 = load <2 x i64>, ptr %C
- %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
- %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+ %tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+ %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %C, <2 x i64> %tmp5)
ret <2 x i64> %tmp6
}
@@ -1715,176 +1599,134 @@ define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64)
-define <4 x i32> @umlal_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
+define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
; CHECK-LABEL: umlal_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: umlal.4s v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umlal.4s v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = load <4 x i32>, ptr %C
- %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
- %tmp6 = add <4 x i32> %tmp3, %tmp5
+ %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
+ %tmp6 = add <4 x i32> %C, %tmp5
ret <4 x i32> %tmp6
}
-define <2 x i64> @umlal_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
+define <2 x i64> @umlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
; CHECK-LABEL: umlal_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: umlal.2d v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umlal.2d v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = load <2 x i64>, ptr %C
- %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
- %tmp6 = add <2 x i64> %tmp3, %tmp5
+ %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
+ %tmp6 = add <2 x i64> %C, %tmp5
ret <2 x i64> %tmp6
}
-define <4 x i32> @smlsl_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
+define <4 x i32> @smlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
; CHECK-LABEL: smlsl_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: smlsl.4s v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: smlsl.4s v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = load <4 x i32>, ptr %C
- %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
- %tmp6 = sub <4 x i32> %tmp3, %tmp5
+ %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
+ %tmp6 = sub <4 x i32> %C, %tmp5
ret <4 x i32> %tmp6
}
-define <2 x i64> @smlsl_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
+define <2 x i64> @smlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
; CHECK-LABEL: smlsl_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: smlsl.2d v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: smlsl.2d v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = load <2 x i64>, ptr %C
- %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
- %tmp6 = sub <2 x i64> %tmp3, %tmp5
+ %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
+ %tmp6 = sub <2 x i64> %C, %tmp5
ret <2 x i64> %tmp6
}
-define <4 x i32> @sqdmlsl_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
+define <4 x i32> @sqdmlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
; CHECK-LABEL: sqdmlsl_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: sqdmlsl.4s v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: sqdmlsl.4s v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = load <4 x i32>, ptr %C
- %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
- %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+ %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
+ %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %C, <4 x i32> %tmp5)
ret <4 x i32> %tmp6
}
-define <2 x i64> @sqdmlsl_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
+define <2 x i64> @sqdmlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
; CHECK-LABEL: sqdmlsl_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: sqdmlsl.2d v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: sqdmlsl.2d v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = load <2 x i64>, ptr %C
- %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
- %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+ %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
+ %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %C, <2 x i64> %tmp5)
ret <2 x i64> %tmp6
}
-define <4 x i32> @sqdmlsl2_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
+define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nounwind {
; CHECK-LABEL: sqdmlsl2_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1]
-; CHECK-NEXT: sqdmlsl.4s v0, v1, v2[1]
+; CHECK-NEXT: sqdmlsl2.4s v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %load1 = load <8 x i16>, ptr %A
- %load2 = load <8 x i16>, ptr %B
- %tmp3 = load <4 x i32>, ptr %C
- %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
- %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+ %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %C, <4 x i32> %tmp5)
ret <4 x i32> %tmp6
}
-define <2 x i64> @sqdmlsl2_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
+define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nounwind {
; CHECK-LABEL: sqdmlsl2_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1]
-; CHECK-NEXT: sqdmlsl.2d v0, v1, v2[1]
+; CHECK-NEXT: sqdmlsl2.2d v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %load1 = load <4 x i32>, ptr %A
- %load2 = load <4 x i32>, ptr %B
- %tmp3 = load <2 x i64>, ptr %C
- %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
- %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+ %tmp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %tmp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+ %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %C, <2 x i64> %tmp5)
ret <2 x i64> %tmp6
}
-define <4 x i32> @umlsl_lane_4s(ptr %A, ptr %B, ptr %C) nounwind {
+define <4 x i32> @umlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
; CHECK-LABEL: umlsl_lane_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: umlsl.4s v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umlsl.4s v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = load <4 x i16>, ptr %B
- %tmp3 = load <4 x i32>, ptr %C
- %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
- %tmp6 = sub <4 x i32> %tmp3, %tmp5
+ %tmp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %tmp4)
+ %tmp6 = sub <4 x i32> %C, %tmp5
ret <4 x i32> %tmp6
}
-define <2 x i64> @umlsl_lane_2d(ptr %A, ptr %B, ptr %C) nounwind {
+define <2 x i64> @umlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
; CHECK-LABEL: umlsl_lane_2d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ldr d2, [x0]
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: umlsl.2d v0, v2, v1[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umlsl.2d v2, v0, v1[1]
+; CHECK-NEXT: mov.16b v0, v2
; CHECK-NEXT: ret
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = load <2 x i32>, ptr %B
- %tmp3 = load <2 x i64>, ptr %C
- %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
- %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
- %tmp6 = sub <2 x i64> %tmp3, %tmp5
+ %tmp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %tmp4)
+ %tmp6 = sub <2 x i64> %C, %tmp5
ret <2 x i64> %tmp6
}
diff --git a/llvm/test/CodeGen/ARM/vdup.ll b/llvm/test/CodeGen/ARM/vdup.ll
index 9a792035a469b..3f6d38c929b98 100644
--- a/llvm/test/CodeGen/ARM/vdup.ll
+++ b/llvm/test/CodeGen/ARM/vdup.ll
@@ -219,103 +219,79 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
ret <4 x float> %tmp2
}
-define <8 x i8> @vduplane8(ptr %A) nounwind {
+define arm_aapcs_vfpcc <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
; CHECK-LABEL: vduplane8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vdup.8 d16, d16[1]
-; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vdup.8 d0, d0[1]
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <8 x i8>, ptr %A
- %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <8 x i8> %tmp2
}
-define <4 x i16> @vduplane16(ptr %A) nounwind {
+define arm_aapcs_vfpcc <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
; CHECK-LABEL: vduplane16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vdup.16 d16, d16[1]
-; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vdup.16 d0, d0[1]
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x i16> %tmp2
}
-define <2 x i32> @vduplane32(ptr %A) nounwind {
+define arm_aapcs_vfpcc <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
; CHECK-LABEL: vduplane32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vdup.32 d16, d16[1]
-; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vdup.32 d0, d0[1]
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+ %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
ret <2 x i32> %tmp2
}
-define <2 x float> @vduplanefloat(ptr %A) nounwind {
+define arm_aapcs_vfpcc <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
; CHECK-LABEL: vduplanefloat:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vdup.32 d16, d16[1]
-; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vdup.32 d0, d0[1]
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <2 x float>, ptr %A
- %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+ %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
ret <2 x float> %tmp2
}
-define <16 x i8> @vduplaneQ8(ptr %A) nounwind {
+define arm_aapcs_vfpcc <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
; CHECK-LABEL: vduplaneQ8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vdup.8 q8, d16[1]
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: vdup.8 q0, d0[1]
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <8 x i8>, ptr %A
- %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <16 x i8> %tmp2
}
-define <8 x i16> @vduplaneQ16(ptr %A) nounwind {
+define arm_aapcs_vfpcc <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
; CHECK-LABEL: vduplaneQ16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vdup.16 q8, d16[1]
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: vdup.16 q0, d0[1]
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <8 x i16> %tmp2
}
-define <4 x i32> @vduplaneQ32(ptr %A) nounwind {
+define arm_aapcs_vfpcc <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
; CHECK-LABEL: vduplaneQ32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vdup.32 q8, d16[1]
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: vdup.32 q0, d0[1]
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x i32> %tmp2
}
-define <4 x float> @vduplaneQfloat(ptr %A) nounwind {
+define arm_aapcs_vfpcc <4 x float> @vduplaneQfloat(<2 x float> %A) nounwind {
; CHECK-LABEL: vduplaneQfloat:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vdup.32 q8, d16[1]
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: vdup.32 q0, d0[1]
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <2 x float>, ptr %A
- %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x float> %tmp2
}
More information about the llvm-commits
mailing list