[llvm] r293663 - [X86][XOP] Add test showing failure to combine build vector to vpermil2ps shuffle

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 31 10:10:35 PST 2017


Author: rksimon
Date: Tue Jan 31 12:10:34 2017
New Revision: 293663

URL: http://llvm.org/viewvc/llvm-project?rev=293663&view=rev
Log:
[X86][XOP] Add test showing failure to combine build vector to vpermil2ps shuffle

Modified:
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=293663&r1=293662&r2=293663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Tue Jan 31 12:10:34 2017
@@ -341,6 +341,34 @@ define void @buildvector_v4f32_0404(floa
   ret void
 }
 
+define void @buildvector_v4f32_07z6(float %a, <4 x float> %b, <4 x float>* %ptr) {
+; X32-LABEL: buildvector_v4f32_07z6:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; X32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],zero,xmm0[2]
+; X32-NEXT:    vmovaps %xmm0, (%eax)
+; X32-NEXT:    retl
+;
+; X64-LABEL: buildvector_v4f32_07z6:
+; X64:       # BB#0:
+; X64-NEXT:    vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; X64-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm1[2]
+; X64-NEXT:    vmovaps %xmm0, (%rdi)
+; X64-NEXT:    retq
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %v0 = insertelement <4 x float> undef, float  %a, i32 0
+  %v1 = insertelement <4 x float> %v0,   float %b3, i32 1
+  %v2 = insertelement <4 x float> %v1,   float 0.0, i32 2
+  %v3 = insertelement <4 x float> %v2,   float %b2, i32 3
+  store <4 x float> %v3, <4 x float>* %ptr
+  ret void
+}
+
 define <2 x double> @constant_fold_vpermil2pd() {
 ; X32-LABEL: constant_fold_vpermil2pd:
 ; X32:       # BB#0:




More information about the llvm-commits mailing list