[llvm] r267339 - [X86][SSE] Demonstrate issue with decoding shuffle masks that have been lowered as rematerialized constants on scalar unit

Sun Apr 24 06:45:30 PDT 2016

Author: rksimon
Date: Sun Apr 24 08:45:30 2016
New Revision: 267339

URL: http://llvm.org/viewvc/llvm-project?rev=267339&view=rev
Log:
[X86][SSE] Demonstrate issue with decoding shuffle masks that have been lowered as rematerialized constants on scalar unit

Found whilst investigating PR27472

Modified:
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=267339&r1=267338&r2=267339&view=diff
==============================================================================

--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Sun Apr 24 08:45:30 2016
@@ -9,6 +9,28 @@
 
 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
 
+define <16 x i8> @combine_vpshufb_zero(<16 x i8> %a0) {
+; SSE-LABEL: combine_vpshufb_zero:
+; SSE:       # BB#0:
+; SSE-NEXT:    movl $128, %eax
+; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    pshufb %xmm1, %xmm0
+; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_vpshufb_zero:
+; AVX:       # BB#0:
+; AVX-NEXT:    movl $128, %eax
+; AVX-NEXT:    vmovd %eax, %xmm1
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %res2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res1, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
+  ret <16 x i8> %res2
+}
+
 define <4 x float> @combine_pshufb_movddup(<4 x float> %a0) {
 ; SSE-LABEL: combine_pshufb_movddup:
 ; SSE:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=267339&r1=267338&r2=267339&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Sun Apr 24 08:45:30 2016
@@ -20,6 +20,21 @@ define <16 x i8> @combine_vpperm_identit
   ret <16 x i8> %res1
 }
 
+define <16 x i8> @combine_vpperm_zero(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: combine_vpperm_zero:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl $128, %eax
+; CHECK-NEXT:    vmovd %eax, %xmm2
+; CHECK-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT:    retq
+  %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %res2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res1, <16 x i8> undef, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
+  ret <16 x i8> %res2
+}
+
 define <16 x i8> @combine_vpperm_identity_bitcast(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: combine_vpperm_identity_bitcast:
 ; CHECK:       # BB#0: