[llvm-branch-commits] [llvm] c1899cd - [X86][AVX] Add PR48908 shuffle test case

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Feb 3 11:27:53 PST 2021


Author: Simon Pilgrim
Date: 2021-02-03T11:26:33-08:00
New Revision: c1899cd5102dbdacd006fdb33db075319ccc933f

URL: https://github.com/llvm/llvm-project/commit/c1899cd5102dbdacd006fdb33db075319ccc933f
DIFF: https://github.com/llvm/llvm-project/commit/c1899cd5102dbdacd006fdb33db075319ccc933f.diff

LOG: [X86][AVX] Add PR48908 shuffle test case

(cherry picked from commit da8845fc3d3bb0b0e133f020931440511fa72723)

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 979c365acfd7..3da83b25d363 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -436,6 +436,157 @@ entry:
   unreachable
 }
 
+define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double>* noalias %out0, <4 x double>* noalias %out1, <4 x double>* noalias %out2) {
+; X86-AVX1-LABEL: PR48908:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
+; X86-AVX1-NEXT:    vmovapd %ymm4, (%edx)
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
+; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
+; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X86-AVX1-NEXT:    vmovapd %ymm3, (%ecx)
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X86-AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X86-AVX1-NEXT:    vmovapd %ymm0, (%eax)
+; X86-AVX1-NEXT:    vzeroupper
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: PR48908:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X86-AVX2-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
+; X86-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1]
+; X86-AVX2-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
+; X86-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3]
+; X86-AVX2-NEXT:    vmovapd %ymm3, (%edx)
+; X86-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3]
+; X86-AVX2-NEXT:    vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0]
+; X86-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X86-AVX2-NEXT:    vmovapd %ymm3, (%ecx)
+; X86-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X86-AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X86-AVX2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X86-AVX2-NEXT:    vmovapd %ymm0, (%eax)
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    retl
+;
+; X86-AVX512-LABEL: PR48908:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
+; X86-AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2]
+; X86-AVX512-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3]
+; X86-AVX512-NEXT:    vmovapd {{.*#+}} ymm5 = [0,0,3,0,8,0,1,0]
+; X86-AVX512-NEXT:    vpermt2pd %zmm2, %zmm5, %zmm3
+; X86-AVX512-NEXT:    vmovapd %ymm3, (%edx)
+; X86-AVX512-NEXT:    vmovapd {{.*#+}} ymm3 = [0,0,3,0,10,0,1,0]
+; X86-AVX512-NEXT:    vpermt2pd %zmm0, %zmm3, %zmm4
+; X86-AVX512-NEXT:    vmovapd %ymm4, (%ecx)
+; X86-AVX512-NEXT:    vmovapd {{.*#+}} ymm3 = <3,0,11,0,u,u,u,u>
+; X86-AVX512-NEXT:    vpermi2pd %zmm1, %zmm0, %zmm3
+; X86-AVX512-NEXT:    vmovapd {{.*#+}} ymm0 = [2,0,8,0,9,0,3,0]
+; X86-AVX512-NEXT:    vpermi2pd %zmm3, %zmm2, %zmm0
+; X86-AVX512-NEXT:    vmovapd %ymm0, (%eax)
+; X86-AVX512-NEXT:    vzeroupper
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX1-LABEL: PR48908:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
+; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
+; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
+; X64-AVX1-NEXT:    vmovapd %ymm4, (%rdi)
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
+; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
+; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X64-AVX1-NEXT:    vmovapd %ymm3, (%rsi)
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X64-AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X64-AVX1-NEXT:    vmovapd %ymm0, (%rdx)
+; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: PR48908:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X64-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X64-AVX2-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
+; X64-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1]
+; X64-AVX2-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
+; X64-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3]
+; X64-AVX2-NEXT:    vmovapd %ymm3, (%rdi)
+; X64-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3]
+; X64-AVX2-NEXT:    vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0]
+; X64-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X64-AVX2-NEXT:    vmovapd %ymm3, (%rsi)
+; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X64-AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X64-AVX2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X64-AVX2-NEXT:    vmovapd %ymm0, (%rdx)
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: PR48908:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
+; X64-AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; X64-AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; X64-AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X64-AVX512-NEXT:    vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2]
+; X64-AVX512-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X64-AVX512-NEXT:    vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3]
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm5 = [0,3,8,1]
+; X64-AVX512-NEXT:    vpermt2pd %zmm2, %zmm5, %zmm3
+; X64-AVX512-NEXT:    vmovapd %ymm3, (%rdi)
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm3 = [0,3,10,1]
+; X64-AVX512-NEXT:    vpermt2pd %zmm0, %zmm3, %zmm4
+; X64-AVX512-NEXT:    vmovapd %ymm4, (%rsi)
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm3 = <3,11,u,u>
+; X64-AVX512-NEXT:    vpermi2pd %zmm1, %zmm0, %zmm3
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm0 = [2,8,9,3]
+; X64-AVX512-NEXT:    vpermi2pd %zmm3, %zmm2, %zmm0
+; X64-AVX512-NEXT:    vmovapd %ymm0, (%rdx)
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
+  %t0 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  %t1 = shufflevector <4 x double> %v1, <4 x double> %v2, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
+  %r0 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> <i32 0, i32 3, i32 6, i32 1>
+  store <4 x double> %r0, <4 x double>* %out0, align 32
+  %r1 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> <i32 4, i32 7, i32 2, i32 5>
+  store <4 x double> %r1, <4 x double>* %out1, align 32
+  %t2 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
+  %r2 = shufflevector <4 x double> %t2, <4 x double> %v2, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+  store <4 x double> %r2, <4 x double>* %out2, align 32
+  ret void
+}
+
 define <4 x i64> @concat_self_v4i64(<2 x i64> %x) {
 ; AVX1-LABEL: concat_self_v4i64:
 ; AVX1:       # %bb.0:


        


More information about the llvm-branch-commits mailing list