[llvm] r321291 - [X86] Promote v8i1 shuffles to v8i32 instead of v8i64 if we have VLX.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 21 10:44:06 PST 2017


Author: ctopper
Date: Thu Dec 21 10:44:06 2017
New Revision: 321291

URL: http://llvm.org/viewvc/llvm-project?rev=321291&view=rev
Log:
[X86] Promote v8i1 shuffles to v8i32 instead of v8i64 if we have VLX.

We should have equally good shuffle options for v8i32 with VLX. This was spotted during my attempts to remove 512-bit vectors from SKX.

We still use 512-bits for v16i1, v32i1, and v64i1. I'm less sure we can handle those well with narrower vectors. i32 and i64 element sizes get the best shuffle support.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
    llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=321291&r1=321290&r2=321291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 21 10:44:06 2017
@@ -14219,7 +14219,9 @@ static SDValue lower1BitVectorShuffle(co
     ExtVT = MVT::v4i32;
     break;
   case MVT::v8i1:
-    ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL
+    // Take 512-bit type, more shuffles on KNL. If we have VLX use a 256-bit
+    // shuffle.
+    ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64;
     break;
   case MVT::v16i1:
     ExtVT = MVT::v16i32;

Modified: llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll?rev=321291&r1=321290&r2=321291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll Thu Dec 21 10:44:06 2017
@@ -249,9 +249,9 @@ define void @load_v32i1_broadcast_16_v8i
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    kmovd (%rdi), %k0
 ; AVX512-NEXT:    kshiftrd $16, %k0, %k0
-; AVX512-NEXT:    vpmovm2q %k0, %zmm2
-; AVX512-NEXT:    vpbroadcastq %xmm2, %zmm2
-; AVX512-NEXT:    vpmovq2m %zmm2, %k1
+; AVX512-NEXT:    vpmovm2d %k0, %ymm2
+; AVX512-NEXT:    vpbroadcastd %xmm2, %ymm2
+; AVX512-NEXT:    vpmovd2m %ymm2, %k1
 ; AVX512-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512-NEXT:    vmovaps %ymm1, (%rsi)
 ; AVX512-NEXT:    vzeroupper
@@ -261,10 +261,11 @@ define void @load_v32i1_broadcast_16_v8i
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    kmovd (%rdi), %k0
 ; AVX512NOTDQ-NEXT:    kshiftrd $16, %k0, %k1
-; AVX512NOTDQ-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpbroadcastq %xmm2, %zmm2
-; AVX512NOTDQ-NEXT:    vpsllq $63, %zmm2, %zmm2
-; AVX512NOTDQ-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm2, %ymm2
+; AVX512NOTDQ-NEXT:    vpslld $31, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT:    vptestmd %ymm2, %ymm2, %k1
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm1, (%rsi)
 ; AVX512NOTDQ-NEXT:    vzeroupper
@@ -340,10 +341,10 @@ define void @load_v32i1_broadcast_31_v8i
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    kmovd (%rdi), %k0
 ; AVX512-NEXT:    kshiftrd $24, %k0, %k0
-; AVX512-NEXT:    vpmovm2q %k0, %zmm2
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7]
-; AVX512-NEXT:    vpermq %zmm2, %zmm3, %zmm2
-; AVX512-NEXT:    vpmovq2m %zmm2, %k1
+; AVX512-NEXT:    vpmovm2d %k0, %ymm2
+; AVX512-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
+; AVX512-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512-NEXT:    vpmovd2m %ymm2, %k1
 ; AVX512-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512-NEXT:    vmovaps %ymm1, (%rsi)
 ; AVX512-NEXT:    vzeroupper
@@ -353,11 +354,12 @@ define void @load_v32i1_broadcast_31_v8i
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    kmovd (%rdi), %k0
 ; AVX512NOTDQ-NEXT:    kshiftrd $24, %k0, %k1
-; AVX512NOTDQ-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpbroadcastq {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7]
-; AVX512NOTDQ-NEXT:    vpermq %zmm2, %zmm3, %zmm2
-; AVX512NOTDQ-NEXT:    vpsllq $63, %zmm2, %zmm2
-; AVX512NOTDQ-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512NOTDQ-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
+; AVX512NOTDQ-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512NOTDQ-NEXT:    vpslld $31, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT:    vptestmd %ymm2, %ymm2, %k1
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm1, (%rsi)
 ; AVX512NOTDQ-NEXT:    vzeroupper
@@ -433,9 +435,9 @@ define void @load_v64i1_broadcast_32_v8i
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    kmovq (%rdi), %k0
 ; AVX512-NEXT:    kshiftrq $32, %k0, %k0
-; AVX512-NEXT:    vpmovm2q %k0, %zmm2
-; AVX512-NEXT:    vpbroadcastq %xmm2, %zmm2
-; AVX512-NEXT:    vpmovq2m %zmm2, %k1
+; AVX512-NEXT:    vpmovm2d %k0, %ymm2
+; AVX512-NEXT:    vpbroadcastd %xmm2, %ymm2
+; AVX512-NEXT:    vpmovd2m %ymm2, %k1
 ; AVX512-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512-NEXT:    vmovaps %ymm1, (%rsi)
 ; AVX512-NEXT:    vzeroupper
@@ -445,10 +447,11 @@ define void @load_v64i1_broadcast_32_v8i
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
 ; AVX512NOTDQ-NEXT:    kshiftrq $32, %k0, %k1
-; AVX512NOTDQ-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpbroadcastq %xmm2, %zmm2
-; AVX512NOTDQ-NEXT:    vpsllq $63, %zmm2, %zmm2
-; AVX512NOTDQ-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm2, %ymm2
+; AVX512NOTDQ-NEXT:    vpslld $31, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT:    vptestmd %ymm2, %ymm2, %k1
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm1, (%rsi)
 ; AVX512NOTDQ-NEXT:    vzeroupper
@@ -555,10 +558,10 @@ define void @load_v64i1_broadcast_63_v8i
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    kmovq (%rdi), %k0
 ; AVX512-NEXT:    kshiftrq $56, %k0, %k0
-; AVX512-NEXT:    vpmovm2q %k0, %zmm2
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7]
-; AVX512-NEXT:    vpermq %zmm2, %zmm3, %zmm2
-; AVX512-NEXT:    vpmovq2m %zmm2, %k1
+; AVX512-NEXT:    vpmovm2d %k0, %ymm2
+; AVX512-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
+; AVX512-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512-NEXT:    vpmovd2m %ymm2, %k1
 ; AVX512-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512-NEXT:    vmovaps %ymm1, (%rsi)
 ; AVX512-NEXT:    vzeroupper
@@ -568,11 +571,12 @@ define void @load_v64i1_broadcast_63_v8i
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
 ; AVX512NOTDQ-NEXT:    kshiftrq $56, %k0, %k1
-; AVX512NOTDQ-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpbroadcastq {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7]
-; AVX512NOTDQ-NEXT:    vpermq %zmm2, %zmm3, %zmm2
-; AVX512NOTDQ-NEXT:    vpsllq $63, %zmm2, %zmm2
-; AVX512NOTDQ-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512NOTDQ-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
+; AVX512NOTDQ-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512NOTDQ-NEXT:    vpslld $31, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT:    vptestmd %ymm2, %ymm2, %k1
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm1, (%rsi)
 ; AVX512NOTDQ-NEXT:    vzeroupper
@@ -1054,9 +1058,9 @@ define void @load_v32i1_broadcast_16_v8i
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    kmovd (%rdi), %k0
 ; AVX512-NEXT:    kshiftrd $16, %k0, %k0
-; AVX512-NEXT:    vpmovm2q %k0, %zmm0
-; AVX512-NEXT:    vpbroadcastq %xmm0, %zmm0
-; AVX512-NEXT:    vpmovq2m %zmm0, %k0
+; AVX512-NEXT:    vpmovm2d %k0, %ymm0
+; AVX512-NEXT:    vpbroadcastd %xmm0, %ymm0
+; AVX512-NEXT:    vpmovd2m %ymm0, %k0
 ; AVX512-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1065,10 +1069,11 @@ define void @load_v32i1_broadcast_16_v8i
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    kmovd (%rdi), %k0
 ; AVX512NOTDQ-NEXT:    kshiftrd $16, %k0, %k1
-; AVX512NOTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpbroadcastq %xmm0, %zmm0
-; AVX512NOTDQ-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512NOTDQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm0, %ymm0
+; AVX512NOTDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512NOTDQ-NEXT:    kmovd %k0, %eax
 ; AVX512NOTDQ-NEXT:    movb %al, (%rsi)
 ; AVX512NOTDQ-NEXT:    vzeroupper
@@ -1159,10 +1164,10 @@ define void @load_v32i1_broadcast_31_v8i
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    kmovd (%rdi), %k0
 ; AVX512-NEXT:    kshiftrd $24, %k0, %k0
-; AVX512-NEXT:    vpmovm2q %k0, %zmm0
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7]
-; AVX512-NEXT:    vpermq %zmm0, %zmm1, %zmm0
-; AVX512-NEXT:    vpmovq2m %zmm0, %k0
+; AVX512-NEXT:    vpmovm2d %k0, %ymm0
+; AVX512-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
+; AVX512-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512-NEXT:    vpmovd2m %ymm0, %k0
 ; AVX512-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1171,11 +1176,12 @@ define void @load_v32i1_broadcast_31_v8i
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    kmovd (%rdi), %k0
 ; AVX512NOTDQ-NEXT:    kshiftrd $24, %k0, %k1
-; AVX512NOTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7]
-; AVX512NOTDQ-NEXT:    vpermq %zmm0, %zmm1, %zmm0
-; AVX512NOTDQ-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512NOTDQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512NOTDQ-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
+; AVX512NOTDQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512NOTDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512NOTDQ-NEXT:    kmovd %k0, %eax
 ; AVX512NOTDQ-NEXT:    movb %al, (%rsi)
 ; AVX512NOTDQ-NEXT:    vzeroupper
@@ -1266,9 +1272,9 @@ define void @load_v64i1_broadcast_32_v8i
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    kmovq (%rdi), %k0
 ; AVX512-NEXT:    kshiftrq $32, %k0, %k0
-; AVX512-NEXT:    vpmovm2q %k0, %zmm0
-; AVX512-NEXT:    vpbroadcastq %xmm0, %zmm0
-; AVX512-NEXT:    vpmovq2m %zmm0, %k0
+; AVX512-NEXT:    vpmovm2d %k0, %ymm0
+; AVX512-NEXT:    vpbroadcastd %xmm0, %ymm0
+; AVX512-NEXT:    vpmovd2m %ymm0, %k0
 ; AVX512-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1277,10 +1283,11 @@ define void @load_v64i1_broadcast_32_v8i
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
 ; AVX512NOTDQ-NEXT:    kshiftrq $32, %k0, %k1
-; AVX512NOTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpbroadcastq %xmm0, %zmm0
-; AVX512NOTDQ-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512NOTDQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm0, %ymm0
+; AVX512NOTDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512NOTDQ-NEXT:    kmovd %k0, %eax
 ; AVX512NOTDQ-NEXT:    movb %al, (%rsi)
 ; AVX512NOTDQ-NEXT:    vzeroupper
@@ -1399,10 +1406,10 @@ define void @load_v64i1_broadcast_63_v8i
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    kmovq (%rdi), %k0
 ; AVX512-NEXT:    kshiftrq $56, %k0, %k0
-; AVX512-NEXT:    vpmovm2q %k0, %zmm0
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7]
-; AVX512-NEXT:    vpermq %zmm0, %zmm1, %zmm0
-; AVX512-NEXT:    vpmovq2m %zmm0, %k0
+; AVX512-NEXT:    vpmovm2d %k0, %ymm0
+; AVX512-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
+; AVX512-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512-NEXT:    vpmovd2m %ymm0, %k0
 ; AVX512-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1411,11 +1418,12 @@ define void @load_v64i1_broadcast_63_v8i
 ; AVX512NOTDQ:       # %bb.0:
 ; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
 ; AVX512NOTDQ-NEXT:    kshiftrq $56, %k0, %k1
-; AVX512NOTDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7]
-; AVX512NOTDQ-NEXT:    vpermq %zmm0, %zmm1, %zmm0
-; AVX512NOTDQ-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512NOTDQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512NOTDQ-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
+; AVX512NOTDQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512NOTDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512NOTDQ-NEXT:    kmovd %k0, %eax
 ; AVX512NOTDQ-NEXT:    movb %al, (%rsi)
 ; AVX512NOTDQ-NEXT:    vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll?rev=321291&r1=321290&r2=321291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll Thu Dec 21 10:44:06 2017
@@ -30,10 +30,9 @@ define <8 x i1> @test2(<2 x i1> %a) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
 ; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
-; CHECK-NEXT:    vpmovm2q %k0, %zmm0
-; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; CHECK-NEXT:    vpmovq2m %zmm0, %k0
+; CHECK-NEXT:    vpmovm2d %k0, %ymm0
+; CHECK-NEXT:    vperm2i128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; CHECK-NEXT:    vpmovd2m %ymm0, %k0
 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=321291&r1=321290&r2=321291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Thu Dec 21 10:44:06 2017
@@ -123,12 +123,12 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0
 ; AVX512VL-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
-; AVX512VL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
-; AVX512VL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
-; AVX512VL-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k1
 ; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1} {z}
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,6,1,0,3,7,7,0]
+; AVX512VL-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX512VL-NEXT:    vpslld $31, %ymm1, %ymm1
+; AVX512VL-NEXT:    vptestmd %ymm1, %ymm1, %k1
 ; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
 ; AVX512VL-NEXT:    vzeroupper
@@ -137,10 +137,10 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0
 ; VL_BW_DQ-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0
-; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
-; VL_BW_DQ-NEXT:    vpermq %zmm0, %zmm1, %zmm0
-; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,6,1,0,3,7,7,0]
+; VL_BW_DQ-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; VL_BW_DQ-NEXT:    vpmovd2m %ymm0, %k0
 ; VL_BW_DQ-NEXT:    vpmovm2w %k0, %xmm0
 ; VL_BW_DQ-NEXT:    vzeroupper
 ; VL_BW_DQ-NEXT:    retq
@@ -250,12 +250,12 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
 ; AVX512VL-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    kmovw %edi, %k1
-; AVX512VL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; AVX512VL-NEXT:    vpbroadcastq %xmm0, %zmm0
-; AVX512VL-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k1
 ; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1} {z}
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX512VL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT:    vpslld $31, %ymm1, %ymm1
+; AVX512VL-NEXT:    vptestmd %ymm1, %ymm1, %k1
 ; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
 ; AVX512VL-NEXT:    vzeroupper
@@ -264,10 +264,10 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
 ; VL_BW_DQ-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    kmovd %edi, %k0
-; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; VL_BW_DQ-NEXT:    vpbroadcastq %xmm0, %zmm0
-; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; VL_BW_DQ-NEXT:    vpbroadcastq %xmm0, %ymm0
+; VL_BW_DQ-NEXT:    vpmovd2m %ymm0, %k0
 ; VL_BW_DQ-NEXT:    vpmovm2w %k0, %xmm0
 ; VL_BW_DQ-NEXT:    vzeroupper
 ; VL_BW_DQ-NEXT:    retq
@@ -294,12 +294,14 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
 ; AVX512VL-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    kmovw %edi, %k1
-; AVX512VL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,2,3]
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
 ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
-; AVX512VL-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512VL-NEXT:    vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3,4,5,6,7]
+; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovw %k0, %eax
 ; AVX512VL-NEXT:    # kill: def %al killed %al killed %eax
 ; AVX512VL-NEXT:    vzeroupper
@@ -308,11 +310,12 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
 ; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    kmovd %edi, %k0
-; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,2,3]
+; VL_BW_DQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
 ; VL_BW_DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
-; VL_BW_DQ-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
-; VL_BW_DQ-NEXT:    vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3,4,5,6,7]
+; VL_BW_DQ-NEXT:    vpmovd2m %ymm0, %k0
 ; VL_BW_DQ-NEXT:    kmovd %k0, %eax
 ; VL_BW_DQ-NEXT:    # kill: def %al killed %al killed %eax
 ; VL_BW_DQ-NEXT:    vzeroupper
@@ -339,10 +342,11 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a
 ; AVX512VL-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    kmovw %edi, %k1
-; AVX512VL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
-; AVX512VL-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovw %k0, %eax
 ; AVX512VL-NEXT:    # kill: def %al killed %al killed %eax
 ; AVX512VL-NEXT:    vzeroupper
@@ -351,9 +355,9 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a
 ; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    kmovd %edi, %k0
-; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
-; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; VL_BW_DQ-NEXT:    vpmovd2m %ymm0, %k0
 ; VL_BW_DQ-NEXT:    kmovd %k0, %eax
 ; VL_BW_DQ-NEXT:    # kill: def %al killed %al killed %eax
 ; VL_BW_DQ-NEXT:    vzeroupper
@@ -382,12 +386,13 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a
 ; AVX512VL-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    kmovw %edi, %k1
-; AVX512VL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
-; AVX512VL-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512VL-NEXT:    vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0]
+; AVX512VL-NEXT:    vpermi2d %ymm1, %ymm0, %ymm2
+; AVX512VL-NEXT:    vpslld $31, %ymm2, %ymm0
+; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovw %k0, %eax
 ; AVX512VL-NEXT:    # kill: def %al killed %al killed %eax
 ; AVX512VL-NEXT:    vzeroupper
@@ -396,11 +401,11 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a
 ; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    kmovd %edi, %k0
-; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %ymm0
 ; VL_BW_DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
-; VL_BW_DQ-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
-; VL_BW_DQ-NEXT:    vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0]
+; VL_BW_DQ-NEXT:    vpermi2d %ymm1, %ymm0, %ymm2
+; VL_BW_DQ-NEXT:    vpmovd2m %ymm2, %k0
 ; VL_BW_DQ-NEXT:    kmovd %k0, %eax
 ; VL_BW_DQ-NEXT:    # kill: def %al killed %al killed %eax
 ; VL_BW_DQ-NEXT:    vzeroupper
@@ -429,12 +434,13 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %
 ; AVX512VL-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    kmovw %edi, %k1
-; AVX512VL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
-; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; AVX512VL-NEXT:    vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
+; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7]
+; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovw %k0, %eax
 ; AVX512VL-NEXT:    # kill: def %al killed %al killed %eax
 ; AVX512VL-NEXT:    vzeroupper
@@ -443,11 +449,11 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %
 ; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    kmovd %edi, %k0
-; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
-; VL_BW_DQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; VL_BW_DQ-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; VL_BW_DQ-NEXT:    vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
+; VL_BW_DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; VL_BW_DQ-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7]
+; VL_BW_DQ-NEXT:    vpmovd2m %ymm0, %k0
 ; VL_BW_DQ-NEXT:    kmovd %k0, %eax
 ; VL_BW_DQ-NEXT:    # kill: def %al killed %al killed %eax
 ; VL_BW_DQ-NEXT:    vzeroupper
@@ -478,14 +484,16 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8
 ; AVX512VL-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    kmovw %edi, %k1
+; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512VL-NEXT:    movb $51, %al
 ; AVX512VL-NEXT:    kmovw %eax, %k2
-; AVX512VL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; AVX512VL-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; AVX512VL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512VL-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512VL-NEXT:    vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k2} {z}
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,1]
+; AVX512VL-NEXT:    vpermi2d %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT:    vpslld $31, %ymm2, %ymm0
+; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovw %k0, %eax
 ; AVX512VL-NEXT:    # kill: def %al killed %al killed %eax
 ; AVX512VL-NEXT:    vzeroupper
@@ -494,11 +502,12 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8
 ; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    kmovd %edi, %k0
-; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,6,1,0,3,7,7,1]
-; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,18446744073709551615,18446744073709551615,0,0]
-; VL_BW_DQ-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; VL_BW_DQ-NEXT:    vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; VL_BW_DQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [8,6,1,0,3,7,7,1]
+; VL_BW_DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [4294967295,4294967295,0,0,4294967295,4294967295,0,0]
+; VL_BW_DQ-NEXT:    vpermt2d %ymm0, %ymm1, %ymm2
+; VL_BW_DQ-NEXT:    vpmovd2m %ymm2, %k0
 ; VL_BW_DQ-NEXT:    kmovd %k0, %eax
 ; VL_BW_DQ-NEXT:    # kill: def %al killed %al killed %eax
 ; VL_BW_DQ-NEXT:    vzeroupper
@@ -531,12 +540,12 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_
 ; AVX512VL-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512VL-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; AVX512VL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; AVX512VL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2
-; AVX512VL-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; AVX512VL-NEXT:    vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1} {z}
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
+; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovw %k0, %eax
 ; AVX512VL-NEXT:    # kill: def %al killed %al killed %eax
 ; AVX512VL-NEXT:    vzeroupper
@@ -546,11 +555,11 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    vpsllw $15, %xmm0, %xmm0
 ; VL_BW_DQ-NEXT:    vpmovw2m %xmm0, %k0
-; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; VL_BW_DQ-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2
-; VL_BW_DQ-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; VL_BW_DQ-NEXT:    vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; VL_BW_DQ-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
+; VL_BW_DQ-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; VL_BW_DQ-NEXT:    vpmovd2m %ymm0, %k0
 ; VL_BW_DQ-NEXT:    kmovd %k0, %eax
 ; VL_BW_DQ-NEXT:    # kill: def %al killed %al killed %eax
 ; VL_BW_DQ-NEXT:    vzeroupper




More information about the llvm-commits mailing list