[llvm] r325534 - [X86] Use vpmovq2m/vpmovd2m for truncate to vXi1 when possible.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 19 14:07:31 PST 2018


Author: ctopper
Date: Mon Feb 19 14:07:31 2018
New Revision: 325534

URL: http://llvm.org/viewvc/llvm-project?rev=325534&view=rev
Log:
[X86] Use vpmovq2m/vpmovd2m for truncate to vXi1 when possible.

Previously we used vptestmd, but the scheduling data for SKX says vpmovq2m/vpmovd2m is lower latency. We already used vpmovb2m/vpmovw2m for byte/word truncates. So this is more consistent anyway.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
    llvm/trunk/test/CodeGen/X86/avx512-ext.ll
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
    llvm/trunk/test/CodeGen/X86/avx512-vselect.ll
    llvm/trunk/test/CodeGen/X86/compress_expand.ll
    llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
    llvm/trunk/test/CodeGen/X86/pr33349.ll
    llvm/trunk/test/CodeGen/X86/required-vector-width.ll
    llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Feb 19 14:07:31 2018
@@ -16762,6 +16762,10 @@ static SDValue LowerTruncateVecI1(SDValu
     In = DAG.getNode(ISD::SHL, DL, InVT, In,
                      DAG.getConstant(ShiftInx, DL, InVT));
   }
+  // If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m.
+  if (Subtarget.hasDQI())
+    return DAG.getNode(X86ISD::CMPM, DL, VT, DAG.getConstant(0, DL, InVT),
+                       In, DAG.getConstant(6, DL, MVT::i8));
   return DAG.getNode(X86ISD::CMPM, DL, VT, In,
                      getZeroVector(InVT, Subtarget, DAG, DL),
                      DAG.getConstant(4, DL, MVT::i8));

Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Mon Feb 19 14:07:31 2018
@@ -715,23 +715,41 @@ define <4 x float> @f64to4f32(<4 x doubl
 }
 
 define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
-; NOVL-LABEL: f64to4f32_mask:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    vpslld $31, %xmm1, %xmm1
-; NOVL-NEXT:    vptestmd %zmm1, %zmm1, %k1
-; NOVL-NEXT:    vcvtpd2ps %ymm0, %xmm0
-; NOVL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
-; NOVL-NEXT:    vzeroupper
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: f64to4f32_mask:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    vpslld $31, %xmm1, %xmm1
+; NOVLDQ-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; NOVLDQ-NEXT:    vcvtpd2ps %ymm0, %xmm0
+; NOVLDQ-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: f64to4f32_mask:
-; VL:       # %bb.0:
-; VL-NEXT:    vpslld $31, %xmm1, %xmm1
-; VL-NEXT:    vptestmd %xmm1, %xmm1, %k1
-; VL-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
-; VL-NEXT:    vzeroupper
-; VL-NEXT:    retq
+; VLDQ-LABEL: f64to4f32_mask:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vpslld $31, %xmm1, %xmm1
+; VLDQ-NEXT:    vpmovd2m %xmm1, %k1
+; VLDQ-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    vzeroupper
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: f64to4f32_mask:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vpslld $31, %xmm1, %xmm1
+; VLNODQ-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; VLNODQ-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    vzeroupper
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: f64to4f32_mask:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    vpslld $31, %xmm1, %xmm1
+; DQNOVL-NEXT:    vpmovd2m %zmm1, %k1
+; DQNOVL-NEXT:    vcvtpd2ps %ymm0, %xmm0
+; DQNOVL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
   %a = fptrunc <4 x double> %b to <4 x float>
   %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
   ret <4 x float> %c
@@ -2041,288 +2059,521 @@ define <2 x double> @ubto2f64(<2 x i32>
 }
 
 define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) {
-; NOVL-LABEL: test_2f64toub:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
-; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NOVL-NEXT:    vcvttpd2udq %zmm0, %ymm0
-; NOVL-NEXT:    vpslld $31, %ymm0, %ymm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
-; NOVL-NEXT:    vzeroupper
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_2f64toub:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; NOVLDQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
+; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_2f64toub:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
-; VL-NEXT:    vpslld $31, %xmm0, %xmm0
-; VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; VL-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_2f64toub:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
+; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_2f64toub:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
+; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_2f64toub:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; DQNOVL-NEXT:    vcvttpd2udq %zmm0, %ymm0
+; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
   %mask = fptoui <2 x double> %a to <2 x i1>
   %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
   ret <2 x i64> %select
 }
 
 define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) {
-; NOVL-LABEL: test_4f64toub:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
-; NOVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
-; NOVL-NEXT:    vpslld $31, %xmm0, %xmm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_4f64toub:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_4f64toub:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttpd2dq %ymm0, %xmm0
-; VL-NEXT:    vpslld $31, %xmm0, %xmm0
-; VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; VL-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_4f64toub:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_4f64toub:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_4f64toub:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; DQNOVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; DQNOVL-NEXT:    retq
   %mask = fptoui <4 x double> %a to <4 x i1>
   %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
   ret <4 x i64> %select
 }
 
 define <8 x i64> @test_8f64toub(<8 x double> %a, <8 x i64> %passthru) {
-; NOVL-LABEL: test_8f64toub:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
-; NOVL-NEXT:    vpslld $31, %ymm0, %ymm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_8f64toub:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_8f64toub:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
-; VL-NEXT:    vpslld $31, %ymm0, %ymm0
-; VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
-; VL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_8f64toub:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; VLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_8f64toub:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; VLNODQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_8f64toub:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    retq
   %mask = fptoui <8 x double> %a to <8 x i1>
   %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
   ret <8 x i64> %select
 }
 
 define <2 x i64> @test_2f32toub(<2 x float> %a, <2 x i64> %passthru) {
-; NOVL-LABEL: test_2f32toub:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
-; NOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; NOVL-NEXT:    vpslld $31, %xmm0, %xmm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
-; NOVL-NEXT:    vzeroupper
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_2f32toub:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_2f32toub:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; VL-NEXT:    vpslld $31, %xmm0, %xmm0
-; VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; VL-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_2f32toub:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_2f32toub:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_2f32toub:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
   %mask = fptoui <2 x float> %a to <2 x i1>
   %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
   ret <2 x i64> %select
 }
 
 define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) {
-; NOVL-LABEL: test_4f32toub:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
-; NOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; NOVL-NEXT:    vpslld $31, %xmm0, %xmm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_4f32toub:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_4f32toub:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; VL-NEXT:    vpslld $31, %xmm0, %xmm0
-; VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; VL-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_4f32toub:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_4f32toub:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_4f32toub:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; DQNOVL-NEXT:    retq
   %mask = fptoui <4 x float> %a to <4 x i1>
   %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
   ret <4 x i64> %select
 }
 
 define <8 x i64> @test_8f32toub(<8 x float> %a, <8 x i64> %passthru) {
-; NOVL-LABEL: test_8f32toub:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    vcvttps2dq %ymm0, %ymm0
-; NOVL-NEXT:    vpslld $31, %ymm0, %ymm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_8f32toub:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
+; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_8f32toub:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttps2dq %ymm0, %ymm0
-; VL-NEXT:    vpslld $31, %ymm0, %ymm0
-; VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
-; VL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_8f32toub:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
+; VLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_8f32toub:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttps2dq %ymm0, %ymm0
+; VLNODQ-NEXT:    vpslld $31, %ymm0, %ymm0
+; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_8f32toub:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    vcvttps2dq %ymm0, %ymm0
+; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    retq
   %mask = fptoui <8 x float> %a to <8 x i1>
   %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
   ret <8 x i64> %select
 }
 
 define <16 x i32> @test_16f32toub(<16 x float> %a, <16 x i32> %passthru) {
-; ALL-LABEL: test_16f32toub:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
-; ALL-NEXT:    vpslld $31, %zmm0, %zmm0
-; ALL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; ALL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
-; ALL-NEXT:    retq
+; NODQ-LABEL: test_16f32toub:
+; NODQ:       # %bb.0:
+; NODQ-NEXT:    vcvttps2dq %zmm0, %zmm0
+; NODQ-NEXT:    vpslld $31, %zmm0, %zmm0
+; NODQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NODQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; NODQ-NEXT:    retq
+;
+; VLDQ-LABEL: test_16f32toub:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttps2dq %zmm0, %zmm0
+; VLDQ-NEXT:    vpslld $31, %zmm0, %zmm0
+; VLDQ-NEXT:    vpmovd2m %zmm0, %k1
+; VLDQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_16f32toub:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    vcvttps2dq %zmm0, %zmm0
+; DQNOVL-NEXT:    vpslld $31, %zmm0, %zmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    retq
   %mask = fptoui <16 x float> %a to <16 x i1>
   %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer
   ret <16 x i32> %select
 }
 
 define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) {
-; NOVL-LABEL: test_2f64tosb:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
-; NOVL-NEXT:    vcvttpd2dq %xmm0, %xmm0
-; NOVL-NEXT:    vpslld $31, %xmm0, %xmm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
-; NOVL-NEXT:    vzeroupper
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_2f64tosb:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; NOVLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_2f64tosb:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
-; VL-NEXT:    vpslld $31, %xmm0, %xmm0
-; VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; VL-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_2f64tosb:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_2f64tosb:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_2f64tosb:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; DQNOVL-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
   %mask = fptosi <2 x double> %a to <2 x i1>
   %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
   ret <2 x i64> %select
 }
 
 define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) {
-; NOVL-LABEL: test_4f64tosb:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
-; NOVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_4f64tosb:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_4f64tosb:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttpd2dq %ymm0, %xmm0
-; VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; VL-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_4f64tosb:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_4f64tosb:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_4f64tosb:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; DQNOVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; DQNOVL-NEXT:    retq
   %mask = fptosi <4 x double> %a to <4 x i1>
   %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
   ret <4 x i64> %select
 }
 
 define <8 x i64> @test_8f64tosb(<8 x double> %a, <8 x i64> %passthru) {
-; NOVL-LABEL: test_8f64tosb:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_8f64tosb:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_8f64tosb:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
-; VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
-; VL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_8f64tosb:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_8f64tosb:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_8f64tosb:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    retq
   %mask = fptosi <8 x double> %a to <8 x i1>
   %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
   ret <8 x i64> %select
 }
 
 define <2 x i64> @test_2f32tosb(<2 x float> %a, <2 x i64> %passthru) {
-; NOVL-LABEL: test_2f32tosb:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
-; NOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
-; NOVL-NEXT:    vzeroupper
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_2f32tosb:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_2f32tosb:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; VL-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_2f32tosb:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_2f32tosb:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_2f32tosb:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
   %mask = fptosi <2 x float> %a to <2 x i1>
   %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
   ret <2 x i64> %select
 }
 
 define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) {
-; NOVL-LABEL: test_4f32tosb:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
-; NOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_4f32tosb:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_4f32tosb:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; VL-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_4f32tosb:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_4f32tosb:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_4f32tosb:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; DQNOVL-NEXT:    retq
   %mask = fptosi <4 x float> %a to <4 x i1>
   %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
   ret <4 x i64> %select
 }
 
 define <8 x i64> @test_8f32tosb(<8 x float> %a, <8 x i64> %passthru) {
-; NOVL-LABEL: test_8f32tosb:
-; NOVL:       # %bb.0:
-; NOVL-NEXT:    vcvttps2dq %ymm0, %ymm0
-; NOVL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; NOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; NOVL-NEXT:    retq
+; NOVLDQ-LABEL: test_8f32tosb:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
+; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    retq
 ;
-; VL-LABEL: test_8f32tosb:
-; VL:       # %bb.0:
-; VL-NEXT:    vcvttps2dq %ymm0, %ymm0
-; VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
-; VL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; VL-NEXT:    retq
+; VLDQ-LABEL: test_8f32tosb:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
+; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
+; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_8f32tosb:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vcvttps2dq %ymm0, %ymm0
+; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
+; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_8f32tosb:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    vcvttps2dq %ymm0, %ymm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    retq
   %mask = fptosi <8 x float> %a to <8 x i1>
   %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
   ret <8 x i64> %select
 }
 
 define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) {
-; ALL-LABEL: test_16f32tosb:
-; ALL:       # %bb.0:
-; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
-; ALL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; ALL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
-; ALL-NEXT:    retq
+; NODQ-LABEL: test_16f32tosb:
+; NODQ:       # %bb.0:
+; NODQ-NEXT:    vcvttps2dq %zmm0, %zmm0
+; NODQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; NODQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; NODQ-NEXT:    retq
+;
+; VLDQ-LABEL: test_16f32tosb:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vcvttps2dq %zmm0, %zmm0
+; VLDQ-NEXT:    vpmovd2m %zmm0, %k1
+; VLDQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_16f32tosb:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    vcvttps2dq %zmm0, %zmm0
+; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    retq
   %mask = fptosi <16 x float> %a to <16 x i1>
   %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer
   ret <16 x i32> %select

Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Mon Feb 19 14:07:31 2018
@@ -308,7 +308,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4
 ; SKX-LABEL: zext_4x8mem_to_4x32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; SKX-NEXT:    retq
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
@@ -330,7 +330,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4
 ; SKX-LABEL: sext_4x8mem_to_4x32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
@@ -498,7 +498,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2
 ; SKX-LABEL: zext_2x8mem_to_2x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
 ; SKX-NEXT:    retq
   %a   = load <2 x i8>,<2 x i8> *%i,align 1
@@ -519,7 +519,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mas
 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
   %a   = load <2 x i8>,<2 x i8> *%i,align 1
@@ -550,7 +550,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4
 ; SKX-LABEL: zext_4x8mem_to_4x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
 ; SKX-NEXT:    retq
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
@@ -572,7 +572,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mas
 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
@@ -656,7 +656,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<
 ; SKX-LABEL: zext_4x16mem_to_4x32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; SKX-NEXT:    retq
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
@@ -678,7 +678,7 @@ define <4 x i32> @sext_4x16mem_to_4x32ma
 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
@@ -878,7 +878,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<
 ; SKX-LABEL: zext_2x16mem_to_2x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
 ; SKX-NEXT:    retq
   %a   = load <2 x i16>,<2 x i16> *%i,align 1
@@ -900,7 +900,7 @@ define <2 x i64> @sext_2x16mem_to_2x64ma
 ; SKX-LABEL: sext_2x16mem_to_2x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
   %a   = load <2 x i16>,<2 x i16> *%i,align 1
@@ -932,7 +932,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<
 ; SKX-LABEL: zext_4x16mem_to_4x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; SKX-NEXT:    retq
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
@@ -954,7 +954,7 @@ define <4 x i64> @sext_4x16mem_to_4x64ma
 ; SKX-LABEL: sext_4x16mem_to_4x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
@@ -1067,7 +1067,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<
 ; SKX-LABEL: zext_2x32mem_to_2x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
 ; SKX-NEXT:    retq
   %a   = load <2 x i32>,<2 x i32> *%i,align 1
@@ -1089,7 +1089,7 @@ define <2 x i64> @sext_2x32mem_to_2x64ma
 ; SKX-LABEL: sext_2x32mem_to_2x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
 ; SKX-NEXT:    retq
   %a   = load <2 x i32>,<2 x i32> *%i,align 1
@@ -1121,7 +1121,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<
 ; SKX-LABEL: zext_4x32mem_to_4x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; SKX-NEXT:    retq
   %a   = load <4 x i32>,<4 x i32> *%i,align 1
@@ -1143,7 +1143,7 @@ define <4 x i64> @sext_4x32mem_to_4x64ma
 ; SKX-LABEL: sext_4x32mem_to_4x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
 ; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
 ; SKX-NEXT:    retq
   %a   = load <4 x i32>,<4 x i32> *%i,align 1
@@ -1184,7 +1184,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(
 ; SKX-LABEL: zext_4x32_to_4x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; SKX-NEXT:    retq
   %x   = zext <4 x i32> %a to <4 x i64>
@@ -1362,7 +1362,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
 ; SKX-LABEL: trunc_16i32_to_16i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %zmm0, %zmm0
-; SKX-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; SKX-NEXT:    vpmovd2m %zmm0, %k0
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Mon Feb 19 14:07:31 2018
@@ -539,7 +539,7 @@ define void @test7(<8 x i1> %mask)  {
 ; AVX512DQ:       ## %bb.0: ## %allocas
 ; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovw %k0, %eax
 ; AVX512DQ-NEXT:    orb $85, %al
 ; AVX512DQ-NEXT:    vzeroupper
@@ -688,7 +688,7 @@ define <16 x i1> @test9(<16 x i1>%a, <16
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512DQ-NEXT:  LBB18_3:
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512DQ-NEXT:    vzeroupper
@@ -729,7 +729,7 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x
 ; SKX-NEXT:  LBB20_1:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
 ; SKX-NEXT:  LBB20_3:
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; SKX-NEXT:    vpmovd2m %xmm0, %k0
 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
 ; SKX-NEXT:    retq
 ;
@@ -759,7 +759,7 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x
 ; AVX512DQ-NEXT:  LBB20_1:
 ; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
 ; AVX512DQ-NEXT:  LBB20_3:
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512DQ-NEXT:    vzeroupper
@@ -1203,7 +1203,7 @@ define void @test22(<4 x i1> %a, <4 x i1
 ; SKX-LABEL: test22:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; SKX-NEXT:    vpmovd2m %xmm0, %k0
 ; SKX-NEXT:    kmovb %k0, (%rdi)
 ; SKX-NEXT:    retq
 ;
@@ -1219,7 +1219,7 @@ define void @test22(<4 x i1> %a, <4 x i1
 ; AVX512DQ-LABEL: test22:
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
@@ -1240,7 +1240,7 @@ define void @test23(<2 x i1> %a, <2 x i1
 ; SKX-LABEL: test23:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k0
+; SKX-NEXT:    vpmovq2m %xmm0, %k0
 ; SKX-NEXT:    kmovb %k0, (%rdi)
 ; SKX-NEXT:    retq
 ;
@@ -1256,7 +1256,7 @@ define void @test23(<2 x i1> %a, <2 x i1
 ; AVX512DQ-LABEL: test23:
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
@@ -1317,7 +1317,7 @@ define void @store_v2i1(<2 x i1> %c , <2
 ; SKX-LABEL: store_v2i1:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k0
+; SKX-NEXT:    vpmovq2m %xmm0, %k0
 ; SKX-NEXT:    knotw %k0, %k0
 ; SKX-NEXT:    kmovb %k0, (%rdi)
 ; SKX-NEXT:    retq
@@ -1335,7 +1335,7 @@ define void @store_v2i1(<2 x i1> %c , <2
 ; AVX512DQ-LABEL: store_v2i1:
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
 ; AVX512DQ-NEXT:    knotw %k0, %k0
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
@@ -1359,7 +1359,7 @@ define void @store_v4i1(<4 x i1> %c , <4
 ; SKX-LABEL: store_v4i1:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; SKX-NEXT:    vpmovd2m %xmm0, %k0
 ; SKX-NEXT:    knotw %k0, %k0
 ; SKX-NEXT:    kmovb %k0, (%rdi)
 ; SKX-NEXT:    retq
@@ -1377,7 +1377,7 @@ define void @store_v4i1(<4 x i1> %c , <4
 ; AVX512DQ-LABEL: store_v4i1:
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
 ; AVX512DQ-NEXT:    knotw %k0, %k0
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
@@ -1421,7 +1421,7 @@ define void @store_v8i1(<8 x i1> %c , <8
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
 ; AVX512DQ-NEXT:    knotb %k0, %k0
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
@@ -1463,7 +1463,7 @@ define void @store_v16i1(<16 x i1> %c ,
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
 ; AVX512DQ-NEXT:    knotw %k0, %k0
 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
@@ -2068,7 +2068,7 @@ define void @store_8i1(<8 x i1>* %a, <8
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
@@ -2107,7 +2107,7 @@ define void @store_8i1_1(<8 x i1>* %a, <
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
@@ -2145,7 +2145,7 @@ define void @store_16i1(<16 x i1>* %a, <
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
@@ -2188,11 +2188,11 @@ define void @store_32i1(<32 x i1>* %a, <
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
 ; AVX512DQ-NEXT:    vpslld $31, %zmm1, %zmm1
-; AVX512DQ-NEXT:    vptestmd %zmm1, %zmm1, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k0
 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
 ; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
@@ -2235,10 +2235,10 @@ define void @store_32i1_1(<32 x i1>* %a,
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
 ; AVX512DQ-NEXT:    vpmovsxwd %ymm1, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
 ; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
@@ -2292,16 +2292,16 @@ define void @store_64i1(<64 x i1>* %a, <
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm1, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm2, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k2
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k2
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm3, %zmm0
 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k3
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k3
 ; AVX512DQ-NEXT:    kmovw %k3, 6(%rdi)
 ; AVX512DQ-NEXT:    kmovw %k2, 4(%rdi)
 ; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Mon Feb 19 14:07:31 2018
@@ -1789,7 +1789,7 @@ define <4 x float> @f64to4f32_mask(<4 x
 ; GENERIC-LABEL: f64to4f32_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1797,7 +1797,7 @@ define <4 x float> @f64to4f32_mask(<4 x
 ; SKX-LABEL: f64to4f32_mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00]
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
@@ -3211,14 +3211,14 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4
 ; GENERIC-LABEL: zext_4x8mem_to_4x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x8mem_to_4x32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
@@ -3231,14 +3231,14 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4
 ; GENERIC-LABEL: sext_4x8mem_to_4x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x8mem_to_4x32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
@@ -3397,14 +3397,14 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2
 ; GENERIC-LABEL: zext_2x8mem_to_2x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_2x8mem_to_2x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <2 x i8>,<2 x i8> *%i,align 1
@@ -3416,14 +3416,14 @@ define <2 x i64> @sext_2x8mem_to_2x64mas
 ; GENERIC-LABEL: sext_2x8mem_to_2x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <2 x i8>,<2 x i8> *%i,align 1
@@ -3450,14 +3450,14 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4
 ; GENERIC-LABEL: zext_4x8mem_to_4x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x8mem_to_4x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
@@ -3470,14 +3470,14 @@ define <4 x i64> @sext_4x8mem_to_4x64mas
 ; GENERIC-LABEL: sext_4x8mem_to_4x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i8>,<4 x i8> *%i,align 1
@@ -3560,14 +3560,14 @@ define <4 x i32> @zext_4x16mem_to_4x32(<
 ; GENERIC-LABEL: zext_4x16mem_to_4x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x16mem_to_4x32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
@@ -3580,14 +3580,14 @@ define <4 x i32> @sext_4x16mem_to_4x32ma
 ; GENERIC-LABEL: sext_4x16mem_to_4x32mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
@@ -3791,14 +3791,14 @@ define <2 x i64> @zext_2x16mem_to_2x64(<
 ; GENERIC-LABEL: zext_2x16mem_to_2x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_2x16mem_to_2x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <2 x i16>,<2 x i16> *%i,align 1
@@ -3811,14 +3811,14 @@ define <2 x i64> @sext_2x16mem_to_2x64ma
 ; GENERIC-LABEL: sext_2x16mem_to_2x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_2x16mem_to_2x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <2 x i16>,<2 x i16> *%i,align 1
@@ -3846,14 +3846,14 @@ define <4 x i64> @zext_4x16mem_to_4x64(<
 ; GENERIC-LABEL: zext_4x16mem_to_4x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x16mem_to_4x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
@@ -3866,14 +3866,14 @@ define <4 x i64> @sext_4x16mem_to_4x64ma
 ; GENERIC-LABEL: sext_4x16mem_to_4x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x16mem_to_4x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i16>,<4 x i16> *%i,align 1
@@ -3989,14 +3989,14 @@ define <2 x i64> @zext_2x32mem_to_2x64(<
 ; GENERIC-LABEL: zext_2x32mem_to_2x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_2x32mem_to_2x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <2 x i32>,<2 x i32> *%i,align 1
@@ -4009,14 +4009,14 @@ define <2 x i64> @sext_2x32mem_to_2x64ma
 ; GENERIC-LABEL: sext_2x32mem_to_2x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_2x32mem_to_2x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <2 x i32>,<2 x i32> *%i,align 1
@@ -4044,14 +4044,14 @@ define <4 x i64> @zext_4x32mem_to_4x64(<
 ; GENERIC-LABEL: zext_4x32mem_to_4x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x32mem_to_4x64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i32>,<4 x i32> *%i,align 1
@@ -4064,14 +4064,14 @@ define <4 x i64> @sext_4x32mem_to_4x64ma
 ; GENERIC-LABEL: sext_4x32mem_to_4x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_4x32mem_to_4x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a   = load <4 x i32>,<4 x i32> *%i,align 1
@@ -4113,14 +4113,14 @@ define <4 x i64> @zext_4x32_to_4x64mask(
 ; GENERIC-LABEL: zext_4x32_to_4x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: zext_4x32_to_4x64mask:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x   = zext <4 x i32> %a to <4 x i64>
@@ -4306,7 +4306,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
 ; GENERIC-LABEL: trunc_16i32_to_16i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
@@ -4315,7 +4315,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
 ; SKX-LABEL: trunc_16i32_to_16i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
@@ -7138,7 +7138,7 @@ define <4 x i1> @vmov_test11(<4 x i1>%a,
 ; GENERIC-NEXT:  .LBB389_1:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB389_3:
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7152,7 +7152,7 @@ define <4 x i1> @vmov_test11(<4 x i1>%a,
 ; SKX-NEXT:  .LBB389_1:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:  .LBB389_3:
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %mask = icmp sgt i32 %a1, %b1
@@ -7361,14 +7361,14 @@ define void @vmov_test22(<4 x i1> %a, <4
 ; GENERIC-LABEL: vmov_test22:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vmov_test22:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   store <4 x i1> %a, <4 x i1>* %addr
@@ -7379,14 +7379,14 @@ define void @vmov_test23(<2 x i1> %a, <2
 ; GENERIC-LABEL: vmov_test23:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vmov_test23:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   store <2 x i1> %a, <2 x i1>* %addr
@@ -7418,7 +7418,7 @@ define void @store_v2i1(<2 x i1> %c , <2
 ; GENERIC-LABEL: store_v2i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7426,7 +7426,7 @@ define void @store_v2i1(<2 x i1> %c , <2
 ; SKX-LABEL: store_v2i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
@@ -7439,7 +7439,7 @@ define void @store_v4i1(<4 x i1> %c , <4
 ; GENERIC-LABEL: store_v4i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7447,7 +7447,7 @@ define void @store_v4i1(<4 x i1> %c , <4
 ; SKX-LABEL: store_v4i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]

Modified: llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll Mon Feb 19 14:07:31 2018
@@ -5,7 +5,7 @@ define <8 x i1> @test(<2 x i1> %a) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
 ; CHECK-NEXT:    kshiftlb $2, %k0, %k0
 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0
 ; CHECK-NEXT:    retq
@@ -17,7 +17,7 @@ define <8 x i1> @test1(<2 x i1> %a) {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
 ; CHECK-NEXT:    kshiftlb $4, %k0, %k0
 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0
 ; CHECK-NEXT:    retq
@@ -29,7 +29,7 @@ define <8 x i1> @test2(<2 x i1> %a) {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
 ; CHECK-NEXT:    vpmovm2d %k0, %ymm0
 ; CHECK-NEXT:    vperm2i128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 ; CHECK-NEXT:    vpmovd2m %ymm0, %k0
@@ -44,7 +44,7 @@ define <8 x i1> @test3(<4 x i1> %a) {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; CHECK-NEXT:    vpmovd2m %xmm0, %k0
 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0
 ; CHECK-NEXT:    retq
 
@@ -56,9 +56,9 @@ define <8 x i1> @test4(<4 x i1> %a, <4 x
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
-; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k0
+; CHECK-NEXT:    vpmovd2m %xmm1, %k0
 ; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; CHECK-NEXT:    vpmovd2m %xmm0, %k1
 ; CHECK-NEXT:    kshiftlb $4, %k0, %k0
 ; CHECK-NEXT:    korb %k0, %k1, %k0
 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0
@@ -72,9 +72,9 @@ define <4 x i1> @test5(<2 x i1> %a, <2 x
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsllq $63, %xmm1, %xmm1
-; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k0
+; CHECK-NEXT:    vpmovq2m %xmm1, %k0
 ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; CHECK-NEXT:    vpmovq2m %xmm0, %k1
 ; CHECK-NEXT:    kshiftlb $2, %k0, %k0
 ; CHECK-NEXT:    korb %k0, %k1, %k0
 ; CHECK-NEXT:    vpmovm2d %k0, %xmm0
@@ -88,9 +88,9 @@ define <16 x i1> @test6(<2 x i1> %a, <2
 ; CHECK-LABEL: test6:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsllq $63, %xmm1, %xmm1
-; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k0
+; CHECK-NEXT:    vpmovq2m %xmm1, %k0
 ; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; CHECK-NEXT:    vpmovq2m %xmm0, %k1
 ; CHECK-NEXT:    kshiftlb $2, %k0, %k0
 ; CHECK-NEXT:    korb %k0, %k1, %k0
 ; CHECK-NEXT:    vpmovm2b %k0, %xmm0
@@ -104,9 +104,9 @@ define <32 x i1> @test7(<4 x i1> %a, <4
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
-; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k0
+; CHECK-NEXT:    vpmovd2m %xmm1, %k0
 ; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1
+; CHECK-NEXT:    vpmovd2m %xmm0, %k1
 ; CHECK-NEXT:    kshiftlb $4, %k0, %k0
 ; CHECK-NEXT:    korb %k0, %k1, %k0
 ; CHECK-NEXT:    vpmovm2b %k0, %ymm0
@@ -147,7 +147,7 @@ define <2 x i1> @test10(<4 x i1> %a, <4
 ; CHECK-LABEL: test10:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; CHECK-NEXT:    vpmovd2m %xmm0, %k0
 ; CHECK-NEXT:    kshiftrb $2, %k0, %k0
 ; CHECK-NEXT:    vpmovm2q %k0, %xmm0
 ; CHECK-NEXT:    retq
@@ -159,7 +159,7 @@ define <8 x i1> @test11(<4 x i1> %a, <4
 ; CHECK-LABEL: test11:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; CHECK-NEXT:    vpmovd2m %xmm0, %k0
 ; CHECK-NEXT:    kshiftlb $4, %k0, %k0
 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0
 ; CHECK-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/avx512-vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vselect.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vselect.ll Mon Feb 19 14:07:31 2018
@@ -5,12 +5,19 @@
 target triple = "x86_64-unknown-unknown"
 
 define <8 x i64> @test1(<8 x i64> %m, <8 x i64> %a, <8 x i64> %b) {
-; CHECK-LABEL: test1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vpsllq $63, %zmm0, %zmm0
-; CHECK-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; CHECK-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: test1:
+; CHECK-SKX:       # %bb.0: # %entry
+; CHECK-SKX-NEXT:    vpsllq $63, %zmm0, %zmm0
+; CHECK-SKX-NEXT:    vpmovq2m %zmm0, %k1
+; CHECK-SKX-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-KNL-LABEL: test1:
+; CHECK-KNL:       # %bb.0: # %entry
+; CHECK-KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; CHECK-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; CHECK-KNL-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-KNL-NEXT:    retq
 entry:
   %m.trunc = trunc <8 x i64> %m to <8 x i1>
   %ret = select <8 x i1> %m.trunc, <8 x i64> %a, <8 x i64> %b

Modified: llvm/trunk/test/CodeGen/X86/compress_expand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/compress_expand.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/compress_expand.ll (original)
+++ llvm/trunk/test/CodeGen/X86/compress_expand.ll Mon Feb 19 14:07:31 2018
@@ -191,7 +191,7 @@ define void @test10(i64* %base, <4 x i64
 ; SKX-LABEL: test10:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX-NEXT:    vpcompressq %ymm0, (%rdi) {%k1}
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
@@ -213,7 +213,7 @@ define void @test11(i64* %base, <2 x i64
 ; SKX-LABEL: test11:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX-NEXT:    vpcompressq %xmm0, (%rdi) {%k1}
 ; SKX-NEXT:    retq
 ;
@@ -234,7 +234,7 @@ define void @test12(float* %base, <4 x f
 ; SKX-LABEL: test12:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX-NEXT:    vcompressps %xmm0, (%rdi) {%k1}
 ; SKX-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Mon Feb 19 14:07:31 2018
@@ -841,7 +841,7 @@ define <4 x float> @test15(float* %base,
 ; SKX-LABEL: test15:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX-NEXT:    vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
 ; SKX-NEXT:    vmovaps %xmm1, %xmm0
 ; SKX-NEXT:    retq
@@ -849,7 +849,7 @@ define <4 x float> @test15(float* %base,
 ; SKX_32-LABEL: test15:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
 ; SKX_32-NEXT:    vmovaps %xmm1, %xmm0
@@ -891,7 +891,7 @@ define <4 x double> @test16(double* %bas
 ; SKX-LABEL: test16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX-NEXT:    vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1}
 ; SKX-NEXT:    vmovapd %ymm2, %ymm0
 ; SKX-NEXT:    retq
@@ -899,7 +899,7 @@ define <4 x double> @test16(double* %bas
 ; SKX_32-LABEL: test16:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1}
 ; SKX_32-NEXT:    vmovapd %ymm2, %ymm0
@@ -946,7 +946,7 @@ define <2 x double> @test17(double* %bas
 ; SKX-NEXT:    vpsllq $32, %xmm0, %xmm0
 ; SKX-NEXT:    vpsraq $32, %xmm0, %xmm0
 ; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX-NEXT:    vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
 ; SKX-NEXT:    vmovapd %xmm2, %xmm0
 ; SKX-NEXT:    retq
@@ -956,7 +956,7 @@ define <2 x double> @test17(double* %bas
 ; SKX_32-NEXT:    vpsllq $32, %xmm0, %xmm0
 ; SKX_32-NEXT:    vpsraq $32, %xmm0, %xmm0
 ; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1}
 ; SKX_32-NEXT:    vmovapd %xmm2, %xmm0
@@ -1002,7 +1002,7 @@ define void @test18(<4 x i32>%a1, <4 x i
 ; SKX-LABEL: test18:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm2, %xmm2
-; SKX-NEXT:    vptestmd %xmm2, %xmm2, %k1
+; SKX-NEXT:    vpmovd2m %xmm2, %k1
 ; SKX-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
@@ -1010,7 +1010,7 @@ define void @test18(<4 x i32>%a1, <4 x i
 ; SKX_32-LABEL: test18:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpslld $31, %xmm2, %xmm2
-; SKX_32-NEXT:    vptestmd %xmm2, %xmm2, %k1
+; SKX_32-NEXT:    vpmovd2m %xmm2, %k1
 ; SKX_32-NEXT:    vpscatterdd %xmm0, (,%xmm1) {%k1}
 ; SKX_32-NEXT:    retl
   call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
@@ -1046,7 +1046,7 @@ define void @test19(<4 x double>%a1, dou
 ; SKX-LABEL: test19:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX-NEXT:    vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1}
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
@@ -1054,7 +1054,7 @@ define void @test19(<4 x double>%a1, dou
 ; SKX_32-LABEL: test19:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1}
 ; SKX_32-NEXT:    vzeroupper
@@ -1093,7 +1093,7 @@ define void @test20(<2 x float>%a1, <2 x
 ; SKX-LABEL: test20:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm2, %xmm2
-; SKX-NEXT:    vptestmq %xmm2, %xmm2, %k1
+; SKX-NEXT:    vpmovq2m %xmm2, %k1
 ; SKX-NEXT:    vscatterqps %xmm0, (,%xmm1) {%k1}
 ; SKX-NEXT:    retq
 ;
@@ -1101,7 +1101,7 @@ define void @test20(<2 x float>%a1, <2 x
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; SKX_32-NEXT:    vpsllq $63, %xmm2, %xmm2
-; SKX_32-NEXT:    vptestmq %xmm2, %xmm2, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm2, %k1
 ; SKX_32-NEXT:    vscatterdps %xmm0, (,%xmm1) {%k1}
 ; SKX_32-NEXT:    retl
   call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
@@ -1137,7 +1137,7 @@ define void @test21(<2 x i32>%a1, <2 x i
 ; SKX-LABEL: test21:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm2, %xmm2
-; SKX-NEXT:    vptestmq %xmm2, %xmm2, %k1
+; SKX-NEXT:    vpmovq2m %xmm2, %k1
 ; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SKX-NEXT:    vpscatterqd %xmm0, (,%xmm1) {%k1}
 ; SKX-NEXT:    retq
@@ -1145,7 +1145,7 @@ define void @test21(<2 x i32>%a1, <2 x i
 ; SKX_32-LABEL: test21:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpsllq $63, %xmm2, %xmm2
-; SKX_32-NEXT:    vptestmq %xmm2, %xmm2, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm2, %k1
 ; SKX_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SKX_32-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; SKX_32-NEXT:    vpscatterdd %xmm0, (,%xmm1) {%k1}
@@ -1189,7 +1189,7 @@ define <2 x float> @test22(float* %base,
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX-NEXT:    vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
 ; SKX-NEXT:    vmovaps %xmm2, %xmm0
 ; SKX-NEXT:    retq
@@ -1198,7 +1198,7 @@ define <2 x float> @test22(float* %base,
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vgatherdps (%eax,%xmm0,4), %xmm2 {%k1}
 ; SKX_32-NEXT:    vmovaps %xmm2, %xmm0
@@ -1240,7 +1240,7 @@ define <2 x float> @test22a(float* %base
 ; SKX-LABEL: test22a:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX-NEXT:    vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1}
 ; SKX-NEXT:    vmovaps %xmm2, %xmm0
 ; SKX-NEXT:    retq
@@ -1248,7 +1248,7 @@ define <2 x float> @test22a(float* %base
 ; SKX_32-LABEL: test22a:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vgatherqps (%eax,%xmm0,4), %xmm2 {%k1}
 ; SKX_32-NEXT:    vmovaps %xmm2, %xmm0
@@ -1292,7 +1292,7 @@ define <2 x i32> @test23(i32* %base, <2
 ; SKX-LABEL: test23:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SKX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
 ; SKX-NEXT:    vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
@@ -1302,7 +1302,7 @@ define <2 x i32> @test23(i32* %base, <2
 ; SKX_32-LABEL: test23:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SKX_32-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
@@ -1346,7 +1346,7 @@ define <2 x i32> @test23b(i32* %base, <2
 ; SKX-LABEL: test23b:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
 ; SKX-NEXT:    vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
@@ -1355,7 +1355,7 @@ define <2 x i32> @test23b(i32* %base, <2
 ; SKX_32-LABEL: test23b:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
 ; SKX_32-NEXT:    vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
@@ -1447,7 +1447,7 @@ define <2 x i64> @test25(i64* %base, <2
 ; SKX-NEXT:    vpsllq $32, %xmm0, %xmm0
 ; SKX-NEXT:    vpsraq $32, %xmm0, %xmm0
 ; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX-NEXT:    vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
 ; SKX-NEXT:    vmovdqa %xmm2, %xmm0
 ; SKX-NEXT:    retq
@@ -1457,7 +1457,7 @@ define <2 x i64> @test25(i64* %base, <2
 ; SKX_32-NEXT:    vpsllq $32, %xmm0, %xmm0
 ; SKX_32-NEXT:    vpsraq $32, %xmm0, %xmm0
 ; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmq %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
 ; SKX_32-NEXT:    vmovdqa %xmm2, %xmm0
@@ -1739,7 +1739,7 @@ define <3 x i32> @test30(<3 x i32*> %bas
 ; SKX-LABEL: test30:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm2, %xmm2
-; SKX-NEXT:    vptestmd %xmm2, %xmm2, %k1
+; SKX-NEXT:    vpmovd2m %xmm2, %k1
 ; SKX-NEXT:    kmovw %k1, %eax
 ; SKX-NEXT:    vpmovsxdq %xmm1, %ymm1
 ; SKX-NEXT:    vpsllq $2, %ymm1, %ymm1
@@ -1778,7 +1778,7 @@ define <3 x i32> @test30(<3 x i32*> %bas
 ; SKX_32-NEXT:    subl $12, %esp
 ; SKX_32-NEXT:    .cfi_def_cfa_offset 16
 ; SKX_32-NEXT:    vpslld $31, %xmm2, %xmm2
-; SKX_32-NEXT:    vptestmd %xmm2, %xmm2, %k1
+; SKX_32-NEXT:    vpmovd2m %xmm2, %k1
 ; SKX_32-NEXT:    kmovw %k1, %eax
 ; SKX_32-NEXT:    vpslld $2, %xmm1, %xmm1
 ; SKX_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
@@ -1882,7 +1882,7 @@ define <16 x i32> @test_gather_16i32(<16
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 ; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
-; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vpmovd2m %zmm2, %k1
 ; SKX-NEXT:    vextracti64x4 $1, %zmm3, %ymm2
 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
@@ -1894,7 +1894,7 @@ define <16 x i32> @test_gather_16i32(<16
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
-; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 ; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
 ; SKX_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; SKX_32-NEXT:    retl
@@ -1940,7 +1940,7 @@ define <16 x i64> @test_gather_16i64(<16
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 ; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
-; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vpmovd2m %zmm2, %k1
 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX-NEXT:    vpgatherqq (,%zmm0), %zmm3 {%k1}
 ; SKX-NEXT:    vpgatherqq (,%zmm1), %zmm4 {%k2}
@@ -1959,7 +1959,7 @@ define <16 x i64> @test_gather_16i64(<16
 ; SKX_32-NEXT:    subl $64, %esp
 ; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
-; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 ; SKX_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
 ; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX_32-NEXT:    vpgatherdq (,%ymm0), %zmm2 {%k1}
@@ -1999,7 +1999,7 @@ define <16 x float> @test_gather_16f32(<
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 ; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
-; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vpmovd2m %zmm2, %k1
 ; SKX-NEXT:    vextractf64x4 $1, %zmm3, %ymm2
 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX-NEXT:    vgatherqps (,%zmm1), %ymm2 {%k2}
@@ -2011,7 +2011,7 @@ define <16 x float> @test_gather_16f32(<
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
-; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 ; SKX_32-NEXT:    vgatherdps (,%zmm0), %zmm2 {%k1}
 ; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
 ; SKX_32-NEXT:    retl
@@ -2057,7 +2057,7 @@ define <16 x double> @test_gather_16f64(
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 ; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
-; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vpmovd2m %zmm2, %k1
 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX-NEXT:    vgatherqpd (,%zmm0), %zmm3 {%k1}
 ; SKX-NEXT:    vgatherqpd (,%zmm1), %zmm4 {%k2}
@@ -2076,7 +2076,7 @@ define <16 x double> @test_gather_16f64(
 ; SKX_32-NEXT:    subl $64, %esp
 ; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
-; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 ; SKX_32-NEXT:    vmovapd 8(%ebp), %zmm1
 ; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX_32-NEXT:    vgatherdpd (,%ymm0), %zmm2 {%k1}
@@ -2116,7 +2116,7 @@ define void @test_scatter_16i32(<16 x i3
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 ; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
-; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vpmovd2m %zmm2, %k1
 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX-NEXT:    vpscatterqd %ymm3, (,%zmm0) {%k1}
 ; SKX-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
@@ -2128,7 +2128,7 @@ define void @test_scatter_16i32(<16 x i3
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
-; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 ; SKX_32-NEXT:    vpscatterdd %zmm2, (,%zmm0) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
@@ -2173,7 +2173,7 @@ define void @test_scatter_16i64(<16 x i6
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 ; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
-; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vpmovd2m %zmm2, %k1
 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX-NEXT:    vpscatterqq %zmm3, (,%zmm0) {%k1}
 ; SKX-NEXT:    vpscatterqq %zmm4, (,%zmm1) {%k2}
@@ -2191,7 +2191,7 @@ define void @test_scatter_16i64(<16 x i6
 ; SKX_32-NEXT:    subl $64, %esp
 ; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
-; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 ; SKX_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
 ; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX_32-NEXT:    vpscatterdq %zmm2, (,%ymm0) {%k1}
@@ -2231,7 +2231,7 @@ define void @test_scatter_16f32(<16 x fl
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 ; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
-; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vpmovd2m %zmm2, %k1
 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX-NEXT:    vscatterqps %ymm3, (,%zmm0) {%k1}
 ; SKX-NEXT:    vextractf64x4 $1, %zmm3, %ymm0
@@ -2243,7 +2243,7 @@ define void @test_scatter_16f32(<16 x fl
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
-; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 ; SKX_32-NEXT:    vscatterdps %zmm2, (,%zmm0) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
@@ -2289,7 +2289,7 @@ define void @test_scatter_16f64(<16 x do
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 ; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
-; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vpmovd2m %zmm2, %k1
 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX-NEXT:    vscatterqpd %zmm3, (,%zmm0) {%k1}
 ; SKX-NEXT:    vscatterqpd %zmm4, (,%zmm1) {%k2}
@@ -2307,7 +2307,7 @@ define void @test_scatter_16f64(<16 x do
 ; SKX_32-NEXT:    subl $64, %esp
 ; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 ; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
-; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 ; SKX_32-NEXT:    vmovapd 8(%ebp), %zmm1
 ; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
 ; SKX_32-NEXT:    vscatterdpd %zmm2, (,%ymm0) {%k1}
@@ -2359,7 +2359,7 @@ define <4 x i64> @test_pr28312(<4 x i64*
 ; SKX-LABEL: test_pr28312:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX-NEXT:    vpgatherqq (,%ymm0), %ymm1 {%k1}
 ; SKX-NEXT:    vpaddq %ymm1, %ymm1, %ymm0
 ; SKX-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
@@ -2375,7 +2375,7 @@ define <4 x i64> @test_pr28312(<4 x i64*
 ; SKX_32-NEXT:    andl $-32, %esp
 ; SKX_32-NEXT:    subl $32, %esp
 ; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
-; SKX_32-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
 ; SKX_32-NEXT:    vpgatherdq (,%xmm0), %ymm1 {%k1}
 ; SKX_32-NEXT:    vpaddq %ymm1, %ymm1, %ymm0
 ; SKX_32-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
@@ -2546,7 +2546,7 @@ define <2 x float> @large_index(float* %
 ; SKX-LABEL: large_index:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
 ; SKX-NEXT:    vmovq %rcx, %xmm0
 ; SKX-NEXT:    vmovq %rsi, %xmm2
 ; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
@@ -2557,7 +2557,7 @@ define <2 x float> @large_index(float* %
 ; SKX_32-LABEL: large_index:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpsllq $63, %xmm0, %xmm0
-; SKX_32-NEXT:    vptestmq %xmm0, %xmm0, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm0, %k1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX_32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SKX_32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -2694,7 +2694,7 @@ define void @test_scatter_2i32_index(<2
 ; SKX-LABEL: test_scatter_2i32_index:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpsllq $63, %xmm2, %xmm2
-; SKX-NEXT:    vptestmq %xmm2, %xmm2, %k1
+; SKX-NEXT:    vpmovq2m %xmm2, %k1
 ; SKX-NEXT:    vpsllq $32, %xmm1, %xmm1
 ; SKX-NEXT:    vpsraq $32, %xmm1, %xmm1
 ; SKX-NEXT:    vscatterqpd %xmm0, (%rdi,%xmm1,8) {%k1}
@@ -2703,7 +2703,7 @@ define void @test_scatter_2i32_index(<2
 ; SKX_32-LABEL: test_scatter_2i32_index:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    vpsllq $63, %xmm2, %xmm2
-; SKX_32-NEXT:    vptestmq %xmm2, %xmm2, %k1
+; SKX_32-NEXT:    vpmovq2m %xmm2, %k1
 ; SKX_32-NEXT:    vpsllq $32, %xmm1, %xmm1
 ; SKX_32-NEXT:    vpsraq $32, %xmm1, %xmm1
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax

Modified: llvm/trunk/test/CodeGen/X86/pr33349.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr33349.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr33349.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr33349.ll Mon Feb 19 14:07:31 2018
@@ -45,7 +45,7 @@ target triple = "x86_64-unknown-linux-gn
 ; SKX-LABEL: test:
 ; SKX:       # %bb.0: # %bb
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
-; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; SKX-NEXT:    vpmovd2m %xmm0, %k0
 ; SKX-NEXT:    kshiftrb $2, %k0, %k1
 ; SKX-NEXT:    kshiftrw $1, %k1, %k2
 ; SKX-NEXT:    kmovd %k2, %eax

Modified: llvm/trunk/test/CodeGen/X86/required-vector-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/required-vector-width.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/required-vector-width.ll (original)
+++ llvm/trunk/test/CodeGen/X86/required-vector-width.ll Mon Feb 19 14:07:31 2018
@@ -614,7 +614,7 @@ define <16 x i16> @test_16f32toub_512(<1
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvttps2dq (%rdi), %zmm1
 ; CHECK-NEXT:    vpslld $31, %zmm1, %zmm1
-; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; CHECK-NEXT:    vpmovd2m %zmm1, %k1
 ; CHECK-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %a = load <16 x float>, <16 x float>* %ptr
@@ -645,7 +645,7 @@ define <16 x i16> @test_16f32tosb_512(<1
 ; CHECK-LABEL: test_16f32tosb_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvttps2dq (%rdi), %zmm1
-; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; CHECK-NEXT:    vpmovd2m %zmm1, %k1
 ; CHECK-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %a = load <16 x float>, <16 x float>* %ptr

Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Mon Feb 19 14:07:31 2018
@@ -6411,28 +6411,28 @@ define <128 x i1> @test_cmp_v128i8(<128
 ; AVX512DQ:       # %bb.0:
 ; AVX512DQ-NEXT:    vpcmpgtb %ymm4, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm4
-; AVX512DQ-NEXT:    vptestmd %zmm4, %zmm4, %k0
+; AVX512DQ-NEXT:    vpmovd2m %zmm4, %k0
 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
 ; AVX512DQ-NEXT:    vpcmpgtb %ymm5, %ymm1, %ymm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
-; AVX512DQ-NEXT:    vptestmd %zmm1, %zmm1, %k2
+; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k2
 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k3
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k3
 ; AVX512DQ-NEXT:    vpcmpgtb %ymm6, %ymm2, %ymm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
-; AVX512DQ-NEXT:    vptestmd %zmm1, %zmm1, %k4
+; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k4
 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k5
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k5
 ; AVX512DQ-NEXT:    vpcmpgtb %ymm7, %ymm3, %ymm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
-; AVX512DQ-NEXT:    vptestmd %zmm1, %zmm1, %k6
+; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k6
 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k7
+; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k7
 ; AVX512DQ-NEXT:    kmovw %k7, 14(%rdi)
 ; AVX512DQ-NEXT:    kmovw %k6, 12(%rdi)
 ; AVX512DQ-NEXT:    kmovw %k5, 10(%rdi)

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Mon Feb 19 14:07:31 2018
@@ -567,7 +567,7 @@ define <16 x float> @test_vshuff32x4_512
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm3
 ; AVX512F-NEXT:    vpslld $31, %zmm3, %zmm3
-; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k1
+; AVX512F-NEXT:    vpmovd2m %zmm3, %k1
 ; AVX512F-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
 ; AVX512F-NEXT:    vmovaps %zmm2, %zmm0
 ; AVX512F-NEXT:    retq
@@ -589,7 +589,7 @@ define <16 x i32> @test_vshufi32x4_512_m
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm3
 ; AVX512F-NEXT:    vpslld $31, %zmm3, %zmm3
-; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k1
+; AVX512F-NEXT:    vpmovd2m %zmm3, %k1
 ; AVX512F-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
 ; AVX512F-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; AVX512F-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=325534&r1=325533&r2=325534&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Mon Feb 19 14:07:31 2018
@@ -30,7 +30,7 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a
 ; VL_BW_DQ-LABEL: shuf2i1_1_0:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
-; VL_BW_DQ-NEXT:    vptestmq %xmm0, %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovq2m %xmm0, %k0
 ; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
 ; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; VL_BW_DQ-NEXT:    vpmovq2m %xmm0, %k0
@@ -71,7 +71,7 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a
 ; VL_BW_DQ-LABEL: shuf2i1_1_2:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
-; VL_BW_DQ-NEXT:    vptestmq %xmm0, %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovq2m %xmm0, %k0
 ; VL_BW_DQ-NEXT:    movq $-1, %rax
 ; VL_BW_DQ-NEXT:    vmovq %rax, %xmm0
 ; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm1
@@ -111,7 +111,7 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1>
 ; VL_BW_DQ-LABEL: shuf4i1_3_2_10:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    vpslld $31, %xmm0, %xmm0
-; VL_BW_DQ-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovd2m %xmm0, %k0
 ; VL_BW_DQ-NEXT:    vpmovm2d %k0, %xmm0
 ; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
 ; VL_BW_DQ-NEXT:    vpmovd2m %xmm0, %k0




More information about the llvm-commits mailing list