[llvm] r321291 - [X86] Promote v8i1 shuffles to v8i32 instead of v8i64 if we have VLX.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 21 10:44:06 PST 2017
Author: ctopper
Date: Thu Dec 21 10:44:06 2017
New Revision: 321291
URL: http://llvm.org/viewvc/llvm-project?rev=321291&view=rev
Log:
[X86] Promote v8i1 shuffles to v8i32 instead of v8i64 if we have VLX.
We should have equally good shuffle options for v8i32 with VLX. This was spotted during my attempts to remove 512-bit vectors from SKX.
We still use 512-bits for v16i1, v32i1, and v64i1. I'm less sure we can handle those well with narrower vectors. i32 and i64 element sizes get the best shuffle support.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=321291&r1=321290&r2=321291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 21 10:44:06 2017
@@ -14219,7 +14219,9 @@ static SDValue lower1BitVectorShuffle(co
ExtVT = MVT::v4i32;
break;
case MVT::v8i1:
- ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL
+ // Take 512-bit type, more shuffles on KNL. If we have VLX use a 256-bit
+ // shuffle.
+ ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64;
break;
case MVT::v16i1:
ExtVT = MVT::v16i32;
Modified: llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll?rev=321291&r1=321290&r2=321291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll Thu Dec 21 10:44:06 2017
@@ -249,9 +249,9 @@ define void @load_v32i1_broadcast_16_v8i
; AVX512: # %bb.0:
; AVX512-NEXT: kmovd (%rdi), %k0
; AVX512-NEXT: kshiftrd $16, %k0, %k0
-; AVX512-NEXT: vpmovm2q %k0, %zmm2
-; AVX512-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512-NEXT: vpmovq2m %zmm2, %k1
+; AVX512-NEXT: vpmovm2d %k0, %ymm2
+; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX512-NEXT: vpmovd2m %ymm2, %k1
; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovaps %ymm1, (%rsi)
; AVX512-NEXT: vzeroupper
@@ -261,10 +261,11 @@ define void @load_v32i1_broadcast_16_v8i
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
-; AVX512NOTDQ-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512NOTDQ-NEXT: vpsllq $63, %zmm2, %zmm2
-; AVX512NOTDQ-NEXT: vptestmq %zmm2, %zmm2, %k1
+; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
; AVX512NOTDQ-NEXT: vzeroupper
@@ -340,10 +341,10 @@ define void @load_v32i1_broadcast_31_v8i
; AVX512: # %bb.0:
; AVX512-NEXT: kmovd (%rdi), %k0
; AVX512-NEXT: kshiftrd $24, %k0, %k0
-; AVX512-NEXT: vpmovm2q %k0, %zmm2
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7]
-; AVX512-NEXT: vpermq %zmm2, %zmm3, %zmm2
-; AVX512-NEXT: vpmovq2m %zmm2, %k1
+; AVX512-NEXT: vpmovm2d %k0, %ymm2
+; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
+; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512-NEXT: vpmovd2m %ymm2, %k1
; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovaps %ymm1, (%rsi)
; AVX512-NEXT: vzeroupper
@@ -353,11 +354,12 @@ define void @load_v32i1_broadcast_31_v8i
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
-; AVX512NOTDQ-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7]
-; AVX512NOTDQ-NEXT: vpermq %zmm2, %zmm3, %zmm2
-; AVX512NOTDQ-NEXT: vpsllq $63, %zmm2, %zmm2
-; AVX512NOTDQ-NEXT: vptestmq %zmm2, %zmm2, %k1
+; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
+; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
; AVX512NOTDQ-NEXT: vzeroupper
@@ -433,9 +435,9 @@ define void @load_v64i1_broadcast_32_v8i
; AVX512: # %bb.0:
; AVX512-NEXT: kmovq (%rdi), %k0
; AVX512-NEXT: kshiftrq $32, %k0, %k0
-; AVX512-NEXT: vpmovm2q %k0, %zmm2
-; AVX512-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512-NEXT: vpmovq2m %zmm2, %k1
+; AVX512-NEXT: vpmovm2d %k0, %ymm2
+; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX512-NEXT: vpmovd2m %ymm2, %k1
; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovaps %ymm1, (%rsi)
; AVX512-NEXT: vzeroupper
@@ -445,10 +447,11 @@ define void @load_v64i1_broadcast_32_v8i
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
-; AVX512NOTDQ-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512NOTDQ-NEXT: vpsllq $63, %zmm2, %zmm2
-; AVX512NOTDQ-NEXT: vptestmq %zmm2, %zmm2, %k1
+; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
; AVX512NOTDQ-NEXT: vzeroupper
@@ -555,10 +558,10 @@ define void @load_v64i1_broadcast_63_v8i
; AVX512: # %bb.0:
; AVX512-NEXT: kmovq (%rdi), %k0
; AVX512-NEXT: kshiftrq $56, %k0, %k0
-; AVX512-NEXT: vpmovm2q %k0, %zmm2
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7]
-; AVX512-NEXT: vpermq %zmm2, %zmm3, %zmm2
-; AVX512-NEXT: vpmovq2m %zmm2, %k1
+; AVX512-NEXT: vpmovm2d %k0, %ymm2
+; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
+; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512-NEXT: vpmovd2m %ymm2, %k1
; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovaps %ymm1, (%rsi)
; AVX512-NEXT: vzeroupper
@@ -568,11 +571,12 @@ define void @load_v64i1_broadcast_63_v8i
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
-; AVX512NOTDQ-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7]
-; AVX512NOTDQ-NEXT: vpermq %zmm2, %zmm3, %zmm2
-; AVX512NOTDQ-NEXT: vpsllq $63, %zmm2, %zmm2
-; AVX512NOTDQ-NEXT: vptestmq %zmm2, %zmm2, %k1
+; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
+; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
+; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
; AVX512NOTDQ-NEXT: vzeroupper
@@ -1054,9 +1058,9 @@ define void @load_v32i1_broadcast_16_v8i
; AVX512: # %bb.0:
; AVX512-NEXT: kmovd (%rdi), %k0
; AVX512-NEXT: kshiftrd $16, %k0, %k0
-; AVX512-NEXT: vpmovm2q %k0, %zmm0
-; AVX512-NEXT: vpbroadcastq %xmm0, %zmm0
-; AVX512-NEXT: vpmovq2m %zmm0, %k0
+; AVX512-NEXT: vpmovm2d %k0, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX512-NEXT: vpmovd2m %ymm0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1065,10 +1069,11 @@ define void @load_v32i1_broadcast_16_v8i
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
-; AVX512NOTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %zmm0
-; AVX512NOTDQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512NOTDQ-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
; AVX512NOTDQ-NEXT: vzeroupper
@@ -1159,10 +1164,10 @@ define void @load_v32i1_broadcast_31_v8i
; AVX512: # %bb.0:
; AVX512-NEXT: kmovd (%rdi), %k0
; AVX512-NEXT: kshiftrd $24, %k0, %k0
-; AVX512-NEXT: vpmovm2q %k0, %zmm0
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7]
-; AVX512-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512-NEXT: vpmovq2m %zmm0, %k0
+; AVX512-NEXT: vpmovm2d %k0, %ymm0
+; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
+; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512-NEXT: vpmovd2m %ymm0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1171,11 +1176,12 @@ define void @load_v32i1_broadcast_31_v8i
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
-; AVX512NOTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7]
-; AVX512NOTDQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512NOTDQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512NOTDQ-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
+; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
; AVX512NOTDQ-NEXT: vzeroupper
@@ -1266,9 +1272,9 @@ define void @load_v64i1_broadcast_32_v8i
; AVX512: # %bb.0:
; AVX512-NEXT: kmovq (%rdi), %k0
; AVX512-NEXT: kshiftrq $32, %k0, %k0
-; AVX512-NEXT: vpmovm2q %k0, %zmm0
-; AVX512-NEXT: vpbroadcastq %xmm0, %zmm0
-; AVX512-NEXT: vpmovq2m %zmm0, %k0
+; AVX512-NEXT: vpmovm2d %k0, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX512-NEXT: vpmovd2m %ymm0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1277,10 +1283,11 @@ define void @load_v64i1_broadcast_32_v8i
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
-; AVX512NOTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %zmm0
-; AVX512NOTDQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512NOTDQ-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
; AVX512NOTDQ-NEXT: vzeroupper
@@ -1399,10 +1406,10 @@ define void @load_v64i1_broadcast_63_v8i
; AVX512: # %bb.0:
; AVX512-NEXT: kmovq (%rdi), %k0
; AVX512-NEXT: kshiftrq $56, %k0, %k0
-; AVX512-NEXT: vpmovm2q %k0, %zmm0
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7]
-; AVX512-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512-NEXT: vpmovq2m %zmm0, %k0
+; AVX512-NEXT: vpmovm2d %k0, %ymm0
+; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
+; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512-NEXT: vpmovd2m %ymm0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1411,11 +1418,12 @@ define void @load_v64i1_broadcast_63_v8i
; AVX512NOTDQ: # %bb.0:
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
-; AVX512NOTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7]
-; AVX512NOTDQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512NOTDQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512NOTDQ-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
+; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
; AVX512NOTDQ-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll?rev=321291&r1=321290&r2=321291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll Thu Dec 21 10:44:06 2017
@@ -30,10 +30,9 @@ define <8 x i1> @test2(<2 x i1> %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpmovm2q %k0, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; CHECK-NEXT: vpmovq2m %zmm0, %k0
+; CHECK-NEXT: vpmovm2d %k0, %ymm0
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; CHECK-NEXT: vpmovd2m %ymm0, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=321291&r1=321290&r2=321291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Thu Dec 21 10:44:06 2017
@@ -123,12 +123,12 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0
; AVX512VL-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
-; AVX512VL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
-; AVX512VL-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,1,0,3,7,7,0]
+; AVX512VL-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1
+; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-NEXT: vzeroupper
@@ -137,10 +137,10 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0
; VL_BW_DQ-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
-; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
-; VL_BW_DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [3,6,1,0,3,7,7,0]
+; VL_BW_DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
@@ -250,12 +250,12 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
; AVX512VL-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: kmovw %edi, %k1
-; AVX512VL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vpbroadcastq %xmm0, %zmm0
-; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1
+; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-NEXT: vzeroupper
@@ -264,10 +264,10 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
; VL_BW_DQ-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
-; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
-; VL_BW_DQ-NEXT: vpbroadcastq %xmm0, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; VL_BW_DQ-NEXT: vpbroadcastq %xmm0, %ymm0
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
@@ -294,12 +294,14 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
; AVX512VL-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: kmovw %edi, %k1
-; AVX512VL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,2,3]
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
-; AVX512VL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512VL-NEXT: vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3,4,5,6,7]
+; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
; AVX512VL-NEXT: vzeroupper
@@ -308,11 +310,12 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
-; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,2,3]
+; VL_BW_DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
-; VL_BW_DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3,4,5,6,7]
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def %al killed %al killed %eax
; VL_BW_DQ-NEXT: vzeroupper
@@ -339,10 +342,11 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a
; AVX512VL-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: kmovw %edi, %k1
-; AVX512VL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
-; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
; AVX512VL-NEXT: vzeroupper
@@ -351,9 +355,9 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a
; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
-; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def %al killed %al killed %eax
; VL_BW_DQ-NEXT: vzeroupper
@@ -382,12 +386,13 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a
; AVX512VL-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: kmovw %edi, %k1
-; AVX512VL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
-; AVX512VL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512VL-NEXT: vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0]
+; AVX512VL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
+; AVX512VL-NEXT: vpslld $31, %ymm2, %ymm0
+; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
; AVX512VL-NEXT: vzeroupper
@@ -396,11 +401,11 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a
; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
-; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
-; VL_BW_DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0]
+; VL_BW_DQ-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
+; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0
; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def %al killed %al killed %eax
; VL_BW_DQ-NEXT: vzeroupper
@@ -429,12 +434,13 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %
; AVX512VL-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: kmovw %edi, %k1
-; AVX512VL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
-; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
-; AVX512VL-NEXT: vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7]
+; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
; AVX512VL-NEXT: vzeroupper
@@ -443,11 +449,11 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %
; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
-; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
-; VL_BW_DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
-; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
+; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7]
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def %al killed %al killed %eax
; VL_BW_DQ-NEXT: vzeroupper
@@ -478,14 +484,16 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8
; AVX512VL-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: kmovw %edi, %k1
+; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: movb $51, %al
; AVX512VL-NEXT: kmovw %eax, %k2
-; AVX512VL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; AVX512VL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512VL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512VL-NEXT: vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,1]
+; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vpslld $31, %ymm2, %ymm0
+; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
; AVX512VL-NEXT: vzeroupper
@@ -494,11 +502,12 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8
; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
-; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,6,1,0,3,7,7,1]
-; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,18446744073709551615,18446744073709551615,0,0]
-; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
-; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,1,0,3,7,7,1]
+; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [4294967295,4294967295,0,0,4294967295,4294967295,0,0]
+; VL_BW_DQ-NEXT: vpermt2d %ymm0, %ymm1, %ymm2
+; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0
; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def %al killed %al killed %eax
; VL_BW_DQ-NEXT: vzeroupper
@@ -531,12 +540,12 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_
; AVX512VL-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k1
-; AVX512VL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; AVX512VL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
-; AVX512VL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
-; AVX512VL-NEXT: vpsllq $63, %zmm2, %zmm0
-; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovw %k0, %eax
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
; AVX512VL-NEXT: vzeroupper
@@ -546,11 +555,11 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: vpsllw $15, %xmm0, %xmm0
; VL_BW_DQ-NEXT: vpmovw2m %xmm0, %k0
-; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; VL_BW_DQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
-; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
-; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
+; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; VL_BW_DQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def %al killed %al killed %eax
; VL_BW_DQ-NEXT: vzeroupper
More information about the llvm-commits
mailing list