[llvm] r293602 - [X86] Add test cases for AVX1 broadcast fallback patterns when load can't be folded.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 30 21:18:27 PST 2017


Author: ctopper
Date: Mon Jan 30 23:18:27 2017
New Revision: 293602

URL: http://llvm.org/viewvc/llvm-project?rev=293602&view=rev
Log:
[X86] Add test cases for AVX1 broadcast fallback patterns when load can't be folded.

Also add test cases that do an insertelement to all elements for the 8 element vector tests.

Modified:
    llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll

Modified: llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll?rev=293602&r1=293601&r2=293602&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll Mon Jan 30 23:18:27 2017
@@ -28,6 +28,40 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
+define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: A2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl (%ecx), %edx
+; X32-NEXT:    movl 4(%ecx), %ecx
+; X32-NEXT:    movl %ecx, 4(%eax)
+; X32-NEXT:    movl %edx, (%eax)
+; X32-NEXT:    vmovd %edx, %xmm0
+; X32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X32-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
+; X32-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
+; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: A2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    vmovq %rax, %xmm0
+; X64-NEXT:    movq %rax, (%rsi)
+; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT:    retq
+entry:
+  %q = load i64, i64* %ptr, align 8
+  store i64 %q, i64* %ptr2, align 8 ; to create a chain to prevent broadcast
+  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
+  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
+  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
+  ret <4 x i64> %vecinit6.i
+}
+
 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
 ; X32-LABEL: B:
 ; X32:       ## BB#0: ## %entry
@@ -48,6 +82,64 @@ entry:
   ret <8 x i32> %vecinit6.i
 }
 
+define <8 x i32> @B2(i32* %ptr) nounwind uwtable readnone ssp {
+; X32-LABEL: B2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    vbroadcastss (%eax), %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: B2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    vbroadcastss (%rdi), %ymm0
+; X64-NEXT:    retq
+entry:
+  %q = load i32, i32* %ptr, align 4
+  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+  %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
+  %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
+  %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
+  %vecinit8.i = insertelement <8 x i32> %vecinit6.i, i32 %q, i32 4
+  %vecinit10.i = insertelement <8 x i32> %vecinit8.i, i32 %q, i32 5
+  %vecinit12.i = insertelement <8 x i32> %vecinit10.i, i32 %q, i32 6
+  %vecinit14.i = insertelement <8 x i32> %vecinit12.i, i32 %q, i32 7
+  ret <8 x i32> %vecinit14.i
+}
+
+define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: B3:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl (%ecx), %ecx
+; X32-NEXT:    vmovd %ecx, %xmm0
+; X32-NEXT:    movl %ecx, (%eax)
+; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: B3:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    vmovd %eax, %xmm0
+; X64-NEXT:    movl %eax, (%rsi)
+; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT:    retq
+entry:
+  %q = load i32, i32* %ptr, align 4
+  store i32 %q, i32* %ptr2, align 4 ; to create a chain to prevent broadcast
+  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+  %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
+  %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
+  %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
+  %vecinit8.i = insertelement <8 x i32> %vecinit6.i, i32 %q, i32 4
+  %vecinit10.i = insertelement <8 x i32> %vecinit8.i, i32 %q, i32 5
+  %vecinit12.i = insertelement <8 x i32> %vecinit10.i, i32 %q, i32 6
+  %vecinit14.i = insertelement <8 x i32> %vecinit12.i, i32 %q, i32 7
+  ret <8 x i32> %vecinit14.i
+}
+
 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
 ; X32-LABEL: C:
 ; X32:       ## BB#0: ## %entry
@@ -68,6 +160,34 @@ entry:
   ret <4 x double> %vecinit6.i
 }
 
+define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: C2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT:    vmovsd %xmm0, (%eax)
+; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: C2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    vmovsd %xmm0, (%rsi)
+; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT:    retq
+entry:
+  %q = load double, double* %ptr, align 8
+  store double %q, double* %ptr2, align 8 ; to create a chain to prevent broadcast
+  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
+  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
+  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
+  ret <4 x double> %vecinit6.i
+}
+
 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
 ; X32-LABEL: D:
 ; X32:       ## BB#0: ## %entry
@@ -88,6 +208,62 @@ entry:
   ret <8 x float> %vecinit6.i
 }
 
+define <8 x float> @D2(float* %ptr) nounwind uwtable readnone ssp {
+; X32-LABEL: D2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    vbroadcastss (%eax), %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: D2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    vbroadcastss (%rdi), %ymm0
+; X64-NEXT:    retq
+entry:
+  %q = load float, float* %ptr, align 4
+  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
+  %vecinit8.i = insertelement <8 x float> %vecinit6.i, float %q, i32 4
+  %vecinit10.i = insertelement <8 x float> %vecinit8.i, float %q, i32 5
+  %vecinit12.i = insertelement <8 x float> %vecinit10.i, float %q, i32 6
+  %vecinit14.i = insertelement <8 x float> %vecinit12.i, float %q, i32 7
+  ret <8 x float> %vecinit14.i
+}
+
+define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: D3:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:    vmovd %xmm0, (%eax)
+; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: D3:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    vmovd %xmm0, (%rsi)
+; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT:    retq
+entry:
+  %q = load float, float* %ptr, align 4
+  store float %q, float* %ptr2, align 4 ; to create a chain to prevent broadcast
+  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
+  %vecinit8.i = insertelement <8 x float> %vecinit6.i, float %q, i32 4
+  %vecinit10.i = insertelement <8 x float> %vecinit8.i, float %q, i32 5
+  %vecinit12.i = insertelement <8 x float> %vecinit10.i, float %q, i32 6
+  %vecinit14.i = insertelement <8 x float> %vecinit12.i, float %q, i32 7
+  ret <8 x float> %vecinit14.i
+}
+
 ;;;; 128-bit versions
 
 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
@@ -110,6 +286,32 @@ entry:
   ret <4 x float> %vecinit6.i
 }
 
+define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: e2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:    vmovd %xmm0, (%eax)
+; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: e2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    vmovd %xmm0, (%rsi)
+; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT:    retq
+entry:
+  %q = load float, float* %ptr, align 4
+  store float %q, float* %ptr2, align 4 ; to create a chain to prevent broadcast
+  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
 ; Don't broadcast constants on pre-AVX2 hardware.
 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
 ; X32-LABEL: _e2:
@@ -150,6 +352,34 @@ entry:
   ret <4 x i32> %vecinit6.i
 }
 
+define <4 x i32> @F2(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: F2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl (%ecx), %ecx
+; X32-NEXT:    movl %ecx, (%eax)
+; X32-NEXT:    vmovd %ecx, %xmm0
+; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: F2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl %eax, (%rsi)
+; X64-NEXT:    vmovd %eax, %xmm0
+; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT:    retq
+entry:
+  %q = load i32, i32* %ptr, align 4
+  store i32 %q, i32* %ptr2, align 4 ; to create a chain to prevent broadcast
+  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+  %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
+  %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
+  %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
+  ret <4 x i32> %vecinit6.i
+}
+
 ; FIXME: Pointer adjusted broadcasts
 
 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
@@ -382,6 +612,36 @@ entry:
   ret <2 x i64> %vecinit2.i
 }
 
+define <2 x i64> @G2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: G2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl (%ecx), %edx
+; X32-NEXT:    movl 4(%ecx), %ecx
+; X32-NEXT:    movl %ecx, 4(%eax)
+; X32-NEXT:    movl %edx, (%eax)
+; X32-NEXT:    vmovd %edx, %xmm0
+; X32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X32-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
+; X32-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: G2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq %rax, (%rsi)
+; X64-NEXT:    vmovq %rax, %xmm0
+; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT:    retq
+entry:
+  %q = load i64, i64* %ptr, align 8
+  store i64 %q, i64* %ptr2, align 8 ; to create a chain to prevent broadcast
+  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+  %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
+  ret <2 x i64> %vecinit2.i
+}
+
 define <4 x i32> @H(<4 x i32> %a) {
 ; X32-LABEL: H:
 ; X32:       ## BB#0: ## %entry
@@ -413,6 +673,30 @@ entry:
   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
   ret <2 x double> %vecinit2.i
+}
+
+define <2 x double> @I2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
+; X32-LABEL: I2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT:    vmovsd %xmm0, (%eax)
+; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: I2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    vmovsd %xmm0, (%rsi)
+; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-NEXT:    retq
+entry:
+  %q = load double, double* %ptr, align 4
+  store double %q, double* %ptr2, align 4 ; to create a chain to prevent broadcast
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
 }
 
 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {




More information about the llvm-commits mailing list