[llvm] r342830 - [X86] Add isel pattern for (v8i16 (sext (v8i1))) with DQI and no BWI.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 22 23:49:49 PDT 2018


Author: ctopper
Date: Sat Sep 22 23:49:48 2018
New Revision: 342830

URL: http://llvm.org/viewvc/llvm-project?rev=342830&view=rev
Log:
[X86] Add isel pattern for (v8i16 (sext (v8i1))) with DQI and no BWI.

Our lowering that tries to avoid this sign extend can be defeated by the DAG combine folding it with a truncate.

The pattern needs to extend to an v8i32 then truncate back down to v8i16.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-ext.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=342830&r1=342829&r2=342830&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Sep 22 23:49:48 2018
@@ -9687,6 +9687,11 @@ let Predicates = [HasDQI, NoBWI] in {
             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
 }
 
+let Predicates = [HasDQI, NoBWI, HasVLX] in {
+  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
+            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - COMPRESS and EXPAND
 //

Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=342830&r1=342829&r2=342830&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Sat Sep 22 23:49:48 2018
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW
 
 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
 ; KNL-LABEL: zext_8x8mem_to_8x16:
@@ -17,6 +18,14 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX512DQNOBW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512DQNOBW-NEXT:    vpsraw $15, %xmm0, %xmm0
+; AVX512DQNOBW-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   %x   = zext <8 x i8> %a to <8 x i16>
   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
@@ -38,6 +47,14 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbw (%rdi), %xmm1
+; AVX512DQNOBW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512DQNOBW-NEXT:    vpsraw $15, %xmm0, %xmm0
+; AVX512DQNOBW-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   %x   = sext <8 x i8> %a to <8 x i16>
   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
@@ -61,6 +78,15 @@ define <16 x i16> @zext_16x8mem_to_16x16
 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <16 x i8>,<16 x i8> *%i,align 1
   %x   = zext <16 x i8> %a to <16 x i16>
   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
@@ -83,6 +109,15 @@ define <16 x i16> @sext_16x8mem_to_16x16
 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vpmovsxbw (%rdi), %ymm1
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <16 x i8>,<16 x i8> *%i,align 1
   %x   = sext <16 x i8> %a to <16 x i16>
   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
@@ -114,6 +149,15 @@ define <16 x i16> @zext_16x8_to_16x16_ma
 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX512DQNOBW-NEXT:    retq
   %x   = zext <16 x i8> %a to <16 x i16>
   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
   ret <16 x i16> %ret
@@ -144,6 +188,15 @@ define <16 x i16> @sext_16x8_to_16x16_ma
 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
 ; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX512DQNOBW-NEXT:    retq
   %x   = sext <16 x i8> %a to <16 x i16>
   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
   ret <16 x i16> %ret
@@ -171,6 +224,21 @@ define <32 x i16> @zext_32x8mem_to_32x16
 ; SKX-NEXT:    vpmovb2m %ymm0, %k1
 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <32 x i8>,<32 x i8> *%i,align 1
   %x   = zext <32 x i8> %a to <32 x i16>
   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
@@ -199,6 +267,21 @@ define <32 x i16> @sext_32x8mem_to_32x16
 ; SKX-NEXT:    vpmovb2m %ymm0, %k1
 ; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vpmovsxbw 16(%rdi), %ymm2
+; AVX512DQNOBW-NEXT:    vpmovsxbw (%rdi), %ymm3
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <32 x i8>,<32 x i8> *%i,align 1
   %x   = sext <32 x i8> %a to <32 x i16>
   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
@@ -218,6 +301,14 @@ define <32 x i16> @zext_32x8_to_32x16(<3
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_32x8_to_32x16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vmovdqa %ymm2, %ymm0
+; AVX512DQNOBW-NEXT:    retq
   %x   = zext <32 x i8> %a to <32 x i16>
   ret <32 x i16> %x
 }
@@ -245,6 +336,22 @@ define <32 x i16> @zext_32x8_to_32x16_ma
 ; SKX-NEXT:    vpmovb2m %ymm1, %k1
 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm2, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm3, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    retq
   %x   = zext <32 x i8> %a to <32 x i16>
   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   ret <32 x i16> %ret
@@ -263,6 +370,14 @@ define <32 x i16> @sext_32x8_to_32x16(<3
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_32x8_to_32x16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm2
+; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm1
+; AVX512DQNOBW-NEXT:    vmovdqa %ymm2, %ymm0
+; AVX512DQNOBW-NEXT:    retq
   %x   = sext <32 x i8> %a to <32 x i16>
   ret <32 x i16> %x
 }
@@ -290,6 +405,22 @@ define <32 x i16> @sext_32x8_to_32x16_ma
 ; SKX-NEXT:    vpmovb2m %ymm1, %k1
 ; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm3, %ymm3
+; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm2, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm3, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    retq
   %x   = sext <32 x i8> %a to <32 x i16>
   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   ret <32 x i16> %ret
@@ -306,12 +437,12 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_4x8mem_to_4x32:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_4x8mem_to_4x32:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   %x   = zext <4 x i8> %a to <4 x i32>
   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
@@ -329,12 +460,12 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_4x8mem_to_4x32:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_4x8mem_to_4x32:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   %x   = sext <4 x i8> %a to <4 x i32>
   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
@@ -358,6 +489,14 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   %x   = zext <8 x i8> %a to <8 x i32>
   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -381,6 +520,14 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   %x   = sext <8 x i8> %a to <8 x i32>
   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -402,6 +549,14 @@ define <16 x i32> @zext_16x8mem_to_16x32
 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <16 x i8>,<16 x i8> *%i,align 1
   %x   = zext <16 x i8> %a to <16 x i32>
   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
@@ -423,6 +578,14 @@ define <16 x i32> @sext_16x8mem_to_16x32
 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <16 x i8>,<16 x i8> *%i,align 1
   %x   = sext <16 x i8> %a to <16 x i32>
   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
@@ -444,6 +607,14 @@ define <16 x i32> @zext_16x8_to_16x32_ma
 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm1, %zmm1
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm1, %zmm1
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm1, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512DQNOBW-NEXT:    retq
   %x   = zext <16 x i8> %a to <16 x i32>
   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   ret <16 x i32> %ret
@@ -464,6 +635,14 @@ define <16 x i32> @sext_16x8_to_16x32_ma
 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
 ; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm1, %zmm1
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm1, %zmm1
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm1, %k1
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
+; AVX512DQNOBW-NEXT:    retq
   %x   = sext <16 x i8> %a to <16 x i32>
   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   ret <16 x i32> %ret
@@ -498,12 +677,12 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_2x8mem_to_2x64:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vpmovq2m %xmm0, %k1
-; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_2x8mem_to_2x64:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT:    retq
   %a   = load <2 x i8>,<2 x i8> *%i,align 1
   %x   = zext <2 x i8> %a to <2 x i64>
   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
@@ -520,12 +699,12 @@ define <2 x i64> @sext_2x8mem_to_2x64mas
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_2x8mem_to_2x64mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vpmovq2m %xmm0, %k1
-; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
+; AVX512DQ-NEXT:    retq
   %a   = load <2 x i8>,<2 x i8> *%i,align 1
   %x   = sext <2 x i8> %a to <2 x i64>
   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
@@ -551,12 +730,12 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_4x8mem_to_4x64:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_4x8mem_to_4x64:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   %x   = zext <4 x i8> %a to <4 x i64>
   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
@@ -573,12 +752,12 @@ define <4 x i64> @sext_4x8mem_to_4x64mas
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_4x8mem_to_4x64mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   %x   = sext <4 x i8> %a to <4 x i64>
   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
@@ -610,6 +789,14 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   %x   = zext <8 x i8> %a to <8 x i64>
   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
@@ -631,6 +818,14 @@ define <8 x i64> @sext_8x8mem_to_8x64mas
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   %x   = sext <8 x i8> %a to <8 x i64>
   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
@@ -658,12 +853,12 @@ define <4 x i32> @zext_4x16mem_to_4x32(<
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_4x16mem_to_4x32:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_4x16mem_to_4x32:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   %x   = zext <4 x i16> %a to <4 x i32>
   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
@@ -681,12 +876,12 @@ define <4 x i32> @sext_4x16mem_to_4x32ma
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_4x16mem_to_4x32mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   %x   = sext <4 x i16> %a to <4 x i32>
   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
@@ -721,6 +916,14 @@ define <8 x i32> @zext_8x16mem_to_8x32(<
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   %x   = zext <8 x i16> %a to <8 x i32>
   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -744,6 +947,14 @@ define <8 x i32> @sext_8x16mem_to_8x32ma
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   %x   = sext <8 x i16> %a to <8 x i32>
   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
@@ -777,6 +988,14 @@ define <8 x i32> @zext_8x16_to_8x32mask(
 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm1, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512DQNOBW-NEXT:    retq
   %x   = zext <8 x i16> %a to <8 x i32>
   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
   ret <8 x i32> %ret
@@ -806,6 +1025,14 @@ define <16 x i32> @zext_16x16mem_to_16x3
 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <16 x i16>,<16 x i16> *%i,align 1
   %x   = zext <16 x i16> %a to <16 x i32>
   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
@@ -827,6 +1054,14 @@ define <16 x i32> @sext_16x16mem_to_16x3
 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <16 x i16>,<16 x i16> *%i,align 1
   %x   = sext <16 x i16> %a to <16 x i32>
   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
@@ -857,6 +1092,14 @@ define <16 x i32> @zext_16x16_to_16x32ma
 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm1, %zmm1
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm1, %zmm1
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm1, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512DQNOBW-NEXT:    retq
   %x   = zext <16 x i16> %a to <16 x i32>
   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   ret <16 x i32> %ret
@@ -882,12 +1125,12 @@ define <2 x i64> @zext_2x16mem_to_2x64(<
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_2x16mem_to_2x64:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vpmovq2m %xmm0, %k1
-; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_2x16mem_to_2x64:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX512DQ-NEXT:    retq
   %a   = load <2 x i16>,<2 x i16> *%i,align 1
   %x   = zext <2 x i16> %a to <2 x i64>
   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
@@ -905,12 +1148,12 @@ define <2 x i64> @sext_2x16mem_to_2x64ma
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_2x16mem_to_2x64mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vpmovq2m %xmm0, %k1
-; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
+; AVX512DQ-NEXT:    retq
   %a   = load <2 x i16>,<2 x i16> *%i,align 1
   %x   = sext <2 x i16> %a to <2 x i64>
   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
@@ -937,12 +1180,12 @@ define <4 x i64> @zext_4x16mem_to_4x64(<
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_4x16mem_to_4x64:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_4x16mem_to_4x64:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   %x   = zext <4 x i16> %a to <4 x i64>
   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
@@ -959,12 +1202,12 @@ define <4 x i64> @sext_4x16mem_to_4x64ma
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_4x16mem_to_4x64mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   %x   = sext <4 x i16> %a to <4 x i64>
   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
@@ -996,6 +1239,14 @@ define <8 x i64> @zext_8x16mem_to_8x64(<
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   %x   = zext <8 x i16> %a to <8 x i64>
   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
@@ -1017,6 +1268,14 @@ define <8 x i64> @sext_8x16mem_to_8x64ma
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   %x   = sext <8 x i16> %a to <8 x i64>
   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
@@ -1048,6 +1307,14 @@ define <8 x i64> @zext_8x16_to_8x64mask(
 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm1, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX512DQNOBW-NEXT:    retq
   %x   = zext <8 x i16> %a to <8 x i64>
   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   ret <8 x i64> %ret
@@ -1073,12 +1340,12 @@ define <2 x i64> @zext_2x32mem_to_2x64(<
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_2x32mem_to_2x64:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vpmovq2m %xmm0, %k1
-; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_2x32mem_to_2x64:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
+; AVX512DQ-NEXT:    retq
   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   %x   = zext <2 x i32> %a to <2 x i64>
   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
@@ -1096,12 +1363,12 @@ define <2 x i64> @sext_2x32mem_to_2x64ma
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_2x32mem_to_2x64mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vpmovq2m %xmm0, %k1
-; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
+; AVX512DQ-NEXT:    retq
   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   %x   = sext <2 x i32> %a to <2 x i64>
   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
@@ -1128,12 +1395,12 @@ define <4 x i64> @zext_4x32mem_to_4x64(<
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_4x32mem_to_4x64:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_4x32mem_to_4x64:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   %x   = zext <4 x i32> %a to <4 x i64>
   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
@@ -1150,12 +1417,12 @@ define <4 x i64> @sext_4x32mem_to_4x64ma
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_4x32mem_to_4x64mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vpmovd2m %xmm0, %k1
-; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
+; AVX512DQ-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
+; AVX512DQ-NEXT:    retq
   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   %x   = sext <4 x i32> %a to <4 x i64>
   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
@@ -1191,12 +1458,12 @@ define <4 x i64> @zext_4x32_to_4x64mask(
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: zext_4x32_to_4x64mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX-NEXT:    vpmovd2m %xmm1, %k1
-; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: zext_4x32_to_4x64mask:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX512DQ-NEXT:    vpmovd2m %xmm1, %k1
+; AVX512DQ-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512DQ-NEXT:    retq
   %x   = zext <4 x i32> %a to <4 x i64>
   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   ret <4 x i64> %ret
@@ -1217,6 +1484,14 @@ define <8 x i64> @zext_8x32mem_to_8x64(<
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   %x   = zext <8 x i32> %a to <8 x i64>
   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
@@ -1238,6 +1513,14 @@ define <8 x i64> @sext_8x32mem_to_8x64ma
 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
+; AVX512DQNOBW-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
+; AVX512DQNOBW-NEXT:    retq
   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   %x   = sext <8 x i32> %a to <8 x i64>
   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
@@ -1278,6 +1561,14 @@ define <8 x i64> @zext_8x32_to_8x64mask(
 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm1, %k1
+; AVX512DQNOBW-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; AVX512DQNOBW-NEXT:    retq
   %x   = zext <8 x i32> %a to <8 x i64>
   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   ret <8 x i64> %ret
@@ -1314,6 +1605,13 @@ define   <16 x i32> @zext_16i1_to_16xi32
 ; SKX-NEXT:    vpmovm2d %k0, %zmm0
 ; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    kmovw %edi, %k0
+; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %zmm0
+; AVX512DQNOBW-NEXT:    vpsrld $31, %zmm0, %zmm0
+; AVX512DQNOBW-NEXT:    retq
   %a = bitcast i16 %b to <16 x i1>
   %c = zext <16 x i1> %a to <16 x i32>
   ret <16 x i32> %c
@@ -1333,6 +1631,13 @@ define   <8 x i64> @zext_8i1_to_8xi64(i8
 ; SKX-NEXT:    vpmovm2q %k0, %zmm0
 ; SKX-NEXT:    vpsrlq $63, %zmm0, %zmm0
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    kmovw %edi, %k0
+; AVX512DQNOBW-NEXT:    vpmovm2q %k0, %zmm0
+; AVX512DQNOBW-NEXT:    vpsrlq $63, %zmm0, %zmm0
+; AVX512DQNOBW-NEXT:    retq
   %a = bitcast i8 %b to <8 x i1>
   %c = zext <8 x i1> %a to <8 x i64>
   ret <8 x i64> %c
@@ -1356,6 +1661,16 @@ define i16 @trunc_16i8_to_16i1(<16 x i8>
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: trunc_16i8_to_16i1:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k0, %eax
+; AVX512DQNOBW-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512DQNOBW-NEXT:    vzeroupper
+; AVX512DQNOBW-NEXT:    retq
   %mask_b = trunc <16 x i8>%a to <16 x i1>
   %mask = bitcast <16 x i1> %mask_b to i16
   ret i16 %mask
@@ -1379,6 +1694,15 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k0, %eax
+; AVX512DQNOBW-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512DQNOBW-NEXT:    vzeroupper
+; AVX512DQNOBW-NEXT:    retq
   %mask_b = trunc <16 x i32>%a to <16 x i1>
   %mask = bitcast <16 x i1> %mask_b to i16
   ret i16 %mask
@@ -1417,6 +1741,16 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k0, %eax
+; AVX512DQNOBW-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512DQNOBW-NEXT:    vzeroupper
+; AVX512DQNOBW-NEXT:    retq
   %mask_b = trunc <8 x i16>%a to <8 x i1>
   %mask = bitcast <8 x i1> %mask_b to i8
   ret i8 %mask
@@ -1430,11 +1764,11 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_8i1_8i32:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
-; SKX-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_8i1_8i32:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0
+; AVX512DQ-NEXT:    retq
   %x = icmp slt <8 x i32> %a1, %a2
   %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
   %y = sext <8 x i1> %x1 to <8 x i32>
@@ -1468,6 +1802,19 @@ define i16 @trunc_i32_to_i1(i32 %a) {
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    movw $-4, %ax
+; AVX512DQNOBW-NEXT:    kmovw %eax, %k0
+; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
+; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
+; AVX512DQNOBW-NEXT:    andl $1, %edi
+; AVX512DQNOBW-NEXT:    kmovw %edi, %k1
+; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k0, %eax
+; AVX512DQNOBW-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX512DQNOBW-NEXT:    retq
   %a_i = trunc i32 %a to i1
   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
   %res = bitcast <16 x i1> %maskv to i16
@@ -1489,6 +1836,14 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32
 ; SKX-NEXT:    vpmovm2w %k0, %xmm0
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: sext_8i1_8i16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
+; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovdw %ymm0, %xmm0
+; AVX512DQNOBW-NEXT:    vzeroupper
+; AVX512DQNOBW-NEXT:    retq
   %x = icmp slt <8 x i32> %a1, %a2
   %y = sext <8 x i1> %x to <8 x i16>
   ret <8 x i16> %y
@@ -1501,11 +1856,11 @@ define <16 x i32> @sext_16i1_16i32(<16 x
 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_16i1_16i32:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
-; SKX-NEXT:    vpmovm2d %k0, %zmm0
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_16i1_16i32:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
+; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT:    retq
   %x = icmp slt <16 x i32> %a1, %a2
   %y = sext <16 x i1> %x to <16 x i32>
   ret <16 x i32> %y
@@ -1518,11 +1873,11 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32
 ; KNL-NEXT:    vpmovsxdq %ymm0, %zmm0
 ; KNL-NEXT:    retq
 ;
-; SKX-LABEL: sext_8i1_8i64:
-; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
-; SKX-NEXT:    vpmovm2q %k0, %zmm0
-; SKX-NEXT:    retq
+; AVX512DQ-LABEL: sext_8i1_8i64:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
+; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
+; AVX512DQ-NEXT:    retq
   %x = icmp slt <8 x i32> %a1, %a2
   %y = sext <8 x i1> %x to <8 x i64>
   ret <8 x i64> %y
@@ -1570,6 +1925,26 @@ define <64 x i16> @test21(<64 x i16> %x
 ; SKX-NEXT:    kshiftrq $32, %k1, %k1
 ; SKX-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: test21:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm4, %ymm4
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm4, %ymm4
+; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm4, %ymm0
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm5, %ymm4
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm4, %ymm4
+; AVX512DQNOBW-NEXT:    vpand %ymm1, %ymm4, %ymm1
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm6, %ymm4
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm4, %ymm4
+; AVX512DQNOBW-NEXT:    vpand %ymm2, %ymm4, %ymm2
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm7, %ymm4
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm4, %ymm4
+; AVX512DQNOBW-NEXT:    vpand %ymm3, %ymm4, %ymm3
+; AVX512DQNOBW-NEXT:    retq
   %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
   ret <64 x i16> %ret
 }
@@ -1600,6 +1975,15 @@ define <16 x i16> @shuffle_zext_16x8_to_
 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX512DQNOBW-NEXT:    retq
   %x   = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
   %bc  = bitcast <32 x i8> %x to <16 x i16>
   %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
@@ -1681,6 +2065,15 @@ define <64 x i8> @zext_64xi1_to_64xi8(<6
 ; SKX-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1
 ; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512DQNOBW-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    retq
   %mask = icmp eq <64 x i8> %x, %y
   %1 = zext <64 x i1> %mask to <64 x i8>
   ret <64 x i8> %1
@@ -1701,6 +2094,14 @@ define <32 x i16> @zext_32xi1_to_32xi16(
 ; SKX-NEXT:    vpmovm2w %k0, %zmm0
 ; SKX-NEXT:    vpsrlw $15, %zmm0, %zmm0
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpsrlw $15, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpsrlw $15, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    retq
   %mask = icmp eq <32 x i16> %x, %y
   %1 = zext <32 x i1> %mask to <32 x i16>
   ret <32 x i16> %1
@@ -1736,6 +2137,18 @@ define <32 x i8> @zext_32xi1_to_32xi8(<3
 ; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
 ; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
+;
+; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8:
+; AVX512DQNOBW:       # %bb.0:
+; AVX512DQNOBW-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512DQNOBW-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512DQNOBW-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
+; AVX512DQNOBW-NEXT:    vpmovsxwd %ymm1, %zmm1
+; AVX512DQNOBW-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512DQNOBW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512DQNOBW-NEXT:    retq
   %mask = icmp eq <32 x i16> %x, %y
   %1 = zext <32 x i1> %mask to <32 x i8>
   ret <32 x i8> %1




More information about the llvm-commits mailing list