[llvm] r323201 - [X86] Legalize v32i1 without BWI via splitting to v16i1 rather than the default of promoting to v32i8.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 23 06:25:39 PST 2018
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Tue Jan 23 06:25:39 2018
@@ -214,20 +214,40 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7
define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) {
; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1
+; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; AVX512VL-NEXT: vptestmd %zmm2, %zmm2, %k1
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
@@ -250,25 +270,19 @@ define <32 x i16> @shuf32i1_3_6_22_12_3_
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm1
-; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512F-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm1
-; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm0
-; AVX512F-NEXT: vpandn %ymm4, %ymm0, %ymm4
-; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT: vpor %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
-; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1
-; AVX512F-NEXT: vpandn %ymm5, %ymm1, %ymm2
-; AVX512F-NEXT: vpand %ymm1, %ymm3, %ymm1
-; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm0
+; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm6
+; AVX512F-NEXT: vptestmd %zmm6, %zmm6, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm1
+; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0
+; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16:
@@ -276,25 +290,19 @@ define <32 x i16> @shuf32i1_3_6_22_12_3_
; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm0, %ymm0
; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm1
-; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512VL-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm1
-; AVX512VL-NEXT: vpmovsxbw %xmm1, %ymm0
-; AVX512VL-NEXT: vpandn %ymm4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm0, %ymm2, %ymm0
-; AVX512VL-NEXT: vpor %ymm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm1
-; AVX512VL-NEXT: vpmovsxbw %xmm1, %ymm1
-; AVX512VL-NEXT: vpandn %ymm5, %ymm1, %ymm2
-; AVX512VL-NEXT: vpand %ymm1, %ymm3, %ymm1
-; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm0
+; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm6
+; AVX512VL-NEXT: vptestmd %zmm6, %zmm6, %k1
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VL-NEXT: vpmovdw %zmm0, %ymm1
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1
; AVX512VL-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16:
@@ -318,11 +326,19 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm3
+; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermi2d %zmm0, %zmm3, %zmm4
+; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512F-NEXT: retq
;
@@ -330,11 +346,19 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm3
+; AVX512VL-NEXT: vptestmd %zmm3, %zmm3, %k1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512VL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512VL-NEXT: vpermi2d %zmm0, %zmm3, %zmm4
+; AVX512VL-NEXT: vptestmd %zmm4, %zmm4, %k1
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VL-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
@@ -360,24 +384,14 @@ define <32 x i16> @shuf32i1_3_6_22_12_3_
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512F-NEXT: vpcmpeqd %zmm6, %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd %zmm6, %zmm1, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm6
+; AVX512F-NEXT: vptestmd %zmm6, %zmm6, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512F-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm1
-; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512F-NEXT: vpsllw $15, %ymm0, %ymm0
-; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm0
-; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm4, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
-; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512F-NEXT: vpsllw $15, %ymm1, %ymm1
-; AVX512F-NEXT: vpsraw $15, %ymm1, %ymm1
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm1
+; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1
; AVX512F-NEXT: retq
;
@@ -386,24 +400,14 @@ define <32 x i16> @shuf32i1_3_6_22_12_3_
; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512VL-NEXT: vpcmpeqd %zmm6, %zmm0, %k1
; AVX512VL-NEXT: vpcmpeqd %zmm6, %zmm1, %k2
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm6
+; AVX512VL-NEXT: vptestmd %zmm6, %zmm6, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512VL-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm1
-; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512VL-NEXT: vpsllw $15, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsraw $15, %ymm0, %ymm0
-; AVX512VL-NEXT: vpblendvb %ymm0, %ymm2, %ymm4, %ymm0
-; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm1
-; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512VL-NEXT: vpsllw $15, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsraw $15, %ymm1, %ymm1
+; AVX512VL-NEXT: vpmovdw %zmm0, %ymm1
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1
; AVX512VL-NEXT: retq
;
@@ -433,18 +437,14 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512F-NEXT: vpcmpeqd %zmm4, %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd %zmm4, %zmm1, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm4
+; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
; AVX512F-NEXT: retq
;
@@ -453,18 +453,14 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpcmpeqd %zmm4, %zmm0, %k1
; AVX512VL-NEXT: vpcmpeqd %zmm4, %zmm1, %k2
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm4
+; AVX512VL-NEXT: vptestmd %zmm4, %zmm4, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512VL-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VL-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
; AVX512VL-NEXT: retq
;
@@ -852,65 +848,33 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0
define i64 @shuf64i1_zero(i64 %a) {
; AVX512F-LABEL: shuf64i1_zero:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $96, %rsp
-; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, (%rsp)
-; AVX512F-NEXT: movl (%rsp), %ecx
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: kmovw %k0, %ecx
+; AVX512F-NEXT: shll $16, %ecx
+; AVX512F-NEXT: orl %eax, %ecx
; AVX512F-NEXT: movq %rcx, %rax
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rcx, %rax
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shuf64i1_zero:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: pushq %rbp
-; AVX512VL-NEXT: .cfi_def_cfa_offset 16
-; AVX512VL-NEXT: .cfi_offset %rbp, -16
-; AVX512VL-NEXT: movq %rsp, %rbp
-; AVX512VL-NEXT: .cfi_def_cfa_register %rbp
-; AVX512VL-NEXT: andq $-32, %rsp
-; AVX512VL-NEXT: subq $96, %rsp
-; AVX512VL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; AVX512VL-NEXT: kmovw %edi, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512VL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm0, %zmm0
; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512VL-NEXT: kmovw %k0, (%rsp)
-; AVX512VL-NEXT: movl (%rsp), %ecx
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: kmovw %k0, %ecx
+; AVX512VL-NEXT: shll $16, %ecx
+; AVX512VL-NEXT: orl %eax, %ecx
; AVX512VL-NEXT: movq %rcx, %rax
; AVX512VL-NEXT: shlq $32, %rax
; AVX512VL-NEXT: orq %rcx, %rax
-; AVX512VL-NEXT: movq %rbp, %rsp
-; AVX512VL-NEXT: popq %rbp
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
More information about the llvm-commits
mailing list