[llvm] r293602 - [X86] Add test cases for AVX1 broadcast fallback patterns when load can't be folded.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 30 21:18:27 PST 2017
Author: ctopper
Date: Mon Jan 30 23:18:27 2017
New Revision: 293602
URL: http://llvm.org/viewvc/llvm-project?rev=293602&view=rev
Log:
[X86] Add test cases for AVX1 broadcast fallback patterns when load can't be folded.
Also add test cases that do an insertelement to all elements for the 8 element vector tests.
Modified:
llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
Modified: llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll?rev=293602&r1=293601&r2=293602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll Mon Jan 30 23:18:27 2017
@@ -28,6 +28,40 @@ entry:
ret <4 x i64> %vecinit6.i
}
+define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: A2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl (%ecx), %edx
+; X32-NEXT: movl 4(%ecx), %ecx
+; X32-NEXT: movl %ecx, 4(%eax)
+; X32-NEXT: movl %edx, (%eax)
+; X32-NEXT: vmovd %edx, %xmm0
+; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: A2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: movq %rax, (%rsi)
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: retq
+entry:
+ %q = load i64, i64* %ptr, align 8
+ store i64 %q, i64* %ptr2, align 8 ; to create a chain to prevent broadcast
+ %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+ %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
+ %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
+ %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
+ ret <4 x i64> %vecinit6.i
+}
+
define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: B:
; X32: ## BB#0: ## %entry
@@ -48,6 +82,64 @@ entry:
ret <8 x i32> %vecinit6.i
}
+define <8 x i32> @B2(i32* %ptr) nounwind uwtable readnone ssp {
+; X32-LABEL: B2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: B2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %ymm0
+; X64-NEXT: retq
+entry:
+ %q = load i32, i32* %ptr, align 4
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
+ %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
+ %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
+ %vecinit8.i = insertelement <8 x i32> %vecinit6.i, i32 %q, i32 4
+ %vecinit10.i = insertelement <8 x i32> %vecinit8.i, i32 %q, i32 5
+ %vecinit12.i = insertelement <8 x i32> %vecinit10.i, i32 %q, i32 6
+ %vecinit14.i = insertelement <8 x i32> %vecinit12.i, i32 %q, i32 7
+ ret <8 x i32> %vecinit14.i
+}
+
+define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: B3:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl (%ecx), %ecx
+; X32-NEXT: vmovd %ecx, %xmm0
+; X32-NEXT: movl %ecx, (%eax)
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: B3:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: vmovd %eax, %xmm0
+; X64-NEXT: movl %eax, (%rsi)
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: retq
+entry:
+ %q = load i32, i32* %ptr, align 4
+ store i32 %q, i32* %ptr2, align 4 ; to create a chain to prevent broadcast
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
+ %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
+ %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
+ %vecinit8.i = insertelement <8 x i32> %vecinit6.i, i32 %q, i32 4
+ %vecinit10.i = insertelement <8 x i32> %vecinit8.i, i32 %q, i32 5
+ %vecinit12.i = insertelement <8 x i32> %vecinit10.i, i32 %q, i32 6
+ %vecinit14.i = insertelement <8 x i32> %vecinit12.i, i32 %q, i32 7
+ ret <8 x i32> %vecinit14.i
+}
+
define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: C:
; X32: ## BB#0: ## %entry
@@ -68,6 +160,34 @@ entry:
ret <4 x double> %vecinit6.i
}
+define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: C2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vmovsd %xmm0, (%eax)
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: C2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vmovsd %xmm0, (%rsi)
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: retq
+entry:
+ %q = load double, double* %ptr, align 8
+ store double %q, double* %ptr2, align 8 ; to create a chain to prevent broadcast
+ %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
+ %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
+ %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
+ %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
+ ret <4 x double> %vecinit6.i
+}
+
define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: D:
; X32: ## BB#0: ## %entry
@@ -88,6 +208,62 @@ entry:
ret <8 x float> %vecinit6.i
}
+define <8 x float> @D2(float* %ptr) nounwind uwtable readnone ssp {
+; X32-LABEL: D2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: D2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %ymm0
+; X64-NEXT: retq
+entry:
+ %q = load float, float* %ptr, align 4
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
+ %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
+ %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
+ %vecinit8.i = insertelement <8 x float> %vecinit6.i, float %q, i32 4
+ %vecinit10.i = insertelement <8 x float> %vecinit8.i, float %q, i32 5
+ %vecinit12.i = insertelement <8 x float> %vecinit10.i, float %q, i32 6
+ %vecinit14.i = insertelement <8 x float> %vecinit12.i, float %q, i32 7
+ ret <8 x float> %vecinit14.i
+}
+
+define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: D3:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vmovd %xmm0, (%eax)
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: D3:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vmovd %xmm0, (%rsi)
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: retq
+entry:
+ %q = load float, float* %ptr, align 4
+ store float %q, float* %ptr2, align 4 ; to create a chain to prevent broadcast
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
+ %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
+ %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
+ %vecinit8.i = insertelement <8 x float> %vecinit6.i, float %q, i32 4
+ %vecinit10.i = insertelement <8 x float> %vecinit8.i, float %q, i32 5
+ %vecinit12.i = insertelement <8 x float> %vecinit10.i, float %q, i32 6
+ %vecinit14.i = insertelement <8 x float> %vecinit12.i, float %q, i32 7
+ ret <8 x float> %vecinit14.i
+}
+
;;;; 128-bit versions
define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
@@ -110,6 +286,32 @@ entry:
ret <4 x float> %vecinit6.i
}
+define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: e2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vmovd %xmm0, (%eax)
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: e2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vmovd %xmm0, (%rsi)
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT: retq
+entry:
+ %q = load float, float* %ptr, align 4
+ store float %q, float* %ptr2, align 4 ; to create a chain to prevent broadcast
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+ %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+ %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+ ret <4 x float> %vecinit6.i
+}
+
; Don't broadcast constants on pre-AVX2 hardware.
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: _e2:
@@ -150,6 +352,34 @@ entry:
ret <4 x i32> %vecinit6.i
}
+define <4 x i32> @F2(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: F2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl (%ecx), %ecx
+; X32-NEXT: movl %ecx, (%eax)
+; X32-NEXT: vmovd %ecx, %xmm0
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: F2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl %eax, (%rsi)
+; X64-NEXT: vmovd %eax, %xmm0
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT: retq
+entry:
+ %q = load i32, i32* %ptr, align 4
+ store i32 %q, i32* %ptr2, align 4 ; to create a chain to prevent broadcast
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
+ %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
+ %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
+ ret <4 x i32> %vecinit6.i
+}
+
; FIXME: Pointer adjusted broadcasts
define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
@@ -382,6 +612,36 @@ entry:
ret <2 x i64> %vecinit2.i
}
+define <2 x i64> @G2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: G2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl (%ecx), %edx
+; X32-NEXT: movl 4(%ecx), %ecx
+; X32-NEXT: movl %ecx, 4(%eax)
+; X32-NEXT: movl %edx, (%eax)
+; X32-NEXT: vmovd %edx, %xmm0
+; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: G2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, (%rsi)
+; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT: retq
+entry:
+ %q = load i64, i64* %ptr, align 8
+ store i64 %q, i64* %ptr2, align 8 ; to create a chain to prevent broadcast
+ %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+ %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
+ ret <2 x i64> %vecinit2.i
+}
+
define <4 x i32> @H(<4 x i32> %a) {
; X32-LABEL: H:
; X32: ## BB#0: ## %entry
@@ -413,6 +673,30 @@ entry:
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
ret <2 x double> %vecinit2.i
+}
+
+define <2 x double> @I2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: I2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vmovsd %xmm0, (%eax)
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: I2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vmovsd %xmm0, (%rsi)
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-NEXT: retq
+entry:
+ %q = load double, double* %ptr, align 4
+ store double %q, double* %ptr2, align 4 ; to create a chain to prevent broadcast
+ %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+ %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+ ret <2 x double> %vecinit2.i
}
define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
More information about the llvm-commits
mailing list