[llvm] c2e8a42 - [X86] Don't widen 128/256-bit strict compares with vXi1 result to 512-bits on KNL.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 13:27:10 PST 2020


Author: Craig Topper
Date: 2020-02-13T13:26:40-08:00
New Revision: c2e8a421ac52c5a17962a99db472be1d0bfdc296

URL: https://github.com/llvm/llvm-project/commit/c2e8a421ac52c5a17962a99db472be1d0bfdc296
DIFF: https://github.com/llvm/llvm-project/commit/c2e8a421ac52c5a17962a99db472be1d0bfdc296.diff

LOG: [X86] Don't widen 128/256-bit strict compares with vXi1 result to 512-bits on KNL.

If we widen the compare we might trigger a spurious exception from
the garbage data.

We have two choices here. Explicitly force the upper bits to zero.
Or use a legacy VEX vcmpps/pd instruction and convert the XMM/YMM
result to mask register.

I've chosen to go with the second option. I'm not sure which is
really best. In some cases we could get rid of the zeroing since
the producing instruction probably already zeroed it. But we lose
the ability to fold a load. So which is best is dependent on
surrounding code.

Differential Revision: https://reviews.llvm.org/D74522

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
    llvm/test/CodeGen/X86/vec-strict-cmp-256.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1b00d961d206..5992436e1635 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21645,8 +21645,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
     bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
     SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
 
+    // If we have a strict compare with a vXi1 result and the input is 128/256
+    // bits we can't use a masked compare unless we have VLX. If we use a wider
+    // compare like we do for non-strict, we might trigger spurious exceptions
+    // from the upper elements. Instead emit a AVX compare and convert to mask.
     unsigned Opc;
-    if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
+    if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 &&
+        (!IsStrict || Subtarget.hasVLX() ||
+         Op0.getSimpleValueType().is512BitVector())) {
       assert(VT.getVectorNumElements() <= 16);
       Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
     } else {
@@ -21742,10 +21748,19 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
             Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
     }
 
-    // If this is SSE/AVX CMPP, bitcast the result back to integer to match the
-    // result type of SETCC. The bitcast is expected to be optimized away
-    // during combining/isel.
-    Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
+    if (VT.getSizeInBits() > Op.getSimpleValueType().getSizeInBits()) {
+      // We emitted a compare with an XMM/YMM result. Finish converting to a
+      // mask register using a vptestm.
+      EVT CastVT = EVT(VT).changeVectorElementTypeToInteger();
+      Cmp = DAG.getBitcast(CastVT, Cmp);
+      Cmp = DAG.getSetCC(dl, Op.getSimpleValueType(), Cmp,
+                         DAG.getConstant(0, dl, CastVT), ISD::SETNE);
+    } else {
+      // If this is SSE/AVX CMPP, bitcast the result back to integer to match
+      // the result type of SETCC. The bitcast is expected to be optimized
+      // away during combining/isel.
+      Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
+    }
 
     if (IsStrict)
       return DAG.getMergeValues({Cmp, Chain}, dl);

diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 53a9294c9fef..1cc7f645ef5f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3232,8 +3232,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
                                                 X86VectorVTInfo Narrow,
                                                 X86VectorVTInfo Wide> {
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
-                                   (Narrow.VT Narrow.RC:$src2), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
+                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
           (COPY_TO_REGCLASS
            (!cast<Instruction>(InstStr#"Zrri")
             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3250,8 +3250,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
            timm:$cc), Narrow.KRC)>;
 
 // Broadcast load.
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
-                                   (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
+                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
           (COPY_TO_REGCLASS
            (!cast<Instruction>(InstStr#"Zrmbi")
             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3266,8 +3266,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
            addr:$src2, timm:$cc), Narrow.KRC)>;
 
 // Commuted with broadcast load.
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
-                                   (Narrow.VT Narrow.RC:$src1), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
+                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
           (COPY_TO_REGCLASS
            (!cast<Instruction>(InstStr#"Zrmbi")
             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),

diff  --git a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
index 7e11f2625603..72d9035e6738 100644
--- a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
@@ -73,11 +73,10 @@ define <4 x i32> @test_v4f32_oeq_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeqps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -87,11 +86,10 @@ define <4 x i32> @test_v4f32_oeq_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_oeq_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeqps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -228,11 +226,11 @@ define <4 x i32> @test_v4f32_ogt_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmplt_oqps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmplt_oqps %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -242,11 +240,10 @@ define <4 x i32> @test_v4f32_ogt_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ogt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplt_oqps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmplt_oqps %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -383,11 +380,11 @@ define <4 x i32> @test_v4f32_oge_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmple_oqps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmple_oqps %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -397,11 +394,10 @@ define <4 x i32> @test_v4f32_oge_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_oge_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmple_oqps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmple_oqps %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -537,11 +533,10 @@ define <4 x i32> @test_v4f32_olt_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmplt_oqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmplt_oqps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -551,11 +546,10 @@ define <4 x i32> @test_v4f32_olt_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_olt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplt_oqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmplt_oqps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -691,11 +685,10 @@ define <4 x i32> @test_v4f32_ole_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmple_oqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmple_oqps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -705,11 +698,10 @@ define <4 x i32> @test_v4f32_ole_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ole_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmple_oqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmple_oqps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -793,11 +785,10 @@ define <4 x i32> @test_v4f32_one_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpneq_oqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_oqps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -807,11 +798,10 @@ define <4 x i32> @test_v4f32_one_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_one_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_oqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_oqps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -888,11 +878,10 @@ define <4 x i32> @test_v4f32_ord_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpordps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpordps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -902,11 +891,10 @@ define <4 x i32> @test_v4f32_ord_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ord_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpordps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpordps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -990,11 +978,10 @@ define <4 x i32> @test_v4f32_ueq_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpeq_uqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_uqps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1004,11 +991,10 @@ define <4 x i32> @test_v4f32_ueq_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ueq_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_uqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_uqps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -1144,11 +1130,10 @@ define <4 x i32> @test_v4f32_ugt_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnle_uqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnle_uqps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1158,11 +1143,10 @@ define <4 x i32> @test_v4f32_ugt_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ugt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnle_uqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnle_uqps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -1298,11 +1282,10 @@ define <4 x i32> @test_v4f32_uge_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnlt_uqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnlt_uqps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1312,11 +1295,10 @@ define <4 x i32> @test_v4f32_uge_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_uge_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlt_uqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnlt_uqps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -1453,11 +1435,11 @@ define <4 x i32> @test_v4f32_ult_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnle_uqps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnle_uqps %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1467,11 +1449,10 @@ define <4 x i32> @test_v4f32_ult_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ult_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnle_uqps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnle_uqps %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -1608,11 +1589,11 @@ define <4 x i32> @test_v4f32_ule_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnlt_uqps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnlt_uqps %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1622,11 +1603,10 @@ define <4 x i32> @test_v4f32_ule_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ule_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlt_uqps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnlt_uqps %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -1703,11 +1683,10 @@ define <4 x i32> @test_v4f32_une_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpneqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneqps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1717,11 +1696,10 @@ define <4 x i32> @test_v4f32_une_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_une_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneqps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -1798,11 +1776,10 @@ define <4 x i32> @test_v4f32_uno_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpunordps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpunordps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1812,11 +1789,10 @@ define <4 x i32> @test_v4f32_uno_q(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_uno_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpunordps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpunordps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -1893,11 +1869,10 @@ define <2 x i64> @test_v2f64_oeq_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeqpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1907,11 +1882,10 @@ define <2 x i64> @test_v2f64_oeq_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_oeq_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeqpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2014,11 +1988,11 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmplt_oqpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmplt_oqpd %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2028,11 +2002,10 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ogt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplt_oqpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmplt_oqpd %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2135,11 +2108,11 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmple_oqpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmple_oqpd %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2149,11 +2122,10 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_oge_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmple_oqpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmple_oqpd %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2255,11 +2227,10 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmplt_oqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmplt_oqpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2269,11 +2240,10 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_olt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplt_oqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmplt_oqpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2375,11 +2345,10 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmple_oqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmple_oqpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2389,11 +2358,10 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ole_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmple_oqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmple_oqpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2477,11 +2445,10 @@ define <2 x i64> @test_v2f64_one_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpneq_oqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_oqpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2491,11 +2458,10 @@ define <2 x i64> @test_v2f64_one_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_one_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_oqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_oqpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2572,11 +2538,10 @@ define <2 x i64> @test_v2f64_ord_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpordpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpordpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2586,11 +2551,10 @@ define <2 x i64> @test_v2f64_ord_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ord_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpordpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpordpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2674,11 +2638,10 @@ define <2 x i64> @test_v2f64_ueq_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpeq_uqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_uqpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2688,11 +2651,10 @@ define <2 x i64> @test_v2f64_ueq_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ueq_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_uqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_uqpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2794,11 +2756,10 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnle_uqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnle_uqpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2808,11 +2769,10 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ugt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnle_uqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnle_uqpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -2914,11 +2874,10 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnlt_uqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnlt_uqpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2928,11 +2887,10 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_uge_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlt_uqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnlt_uqpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3035,11 +2993,11 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnle_uqpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnle_uqpd %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3049,11 +3007,10 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ult_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnle_uqpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnle_uqpd %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3156,11 +3113,11 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnlt_uqpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnlt_uqpd %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3170,11 +3127,10 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ule_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlt_uqpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnlt_uqpd %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3251,11 +3207,10 @@ define <2 x i64> @test_v2f64_une_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpneqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneqpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3265,11 +3220,10 @@ define <2 x i64> @test_v2f64_une_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_une_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneqpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3346,11 +3300,10 @@ define <2 x i64> @test_v2f64_uno_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpunordpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpunordpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3360,11 +3313,10 @@ define <2 x i64> @test_v2f64_uno_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_uno_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpunordpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpunordpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3446,11 +3398,10 @@ define <4 x i32> @test_v4f32_oeq_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpeq_osps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_osps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3460,11 +3411,10 @@ define <4 x i32> @test_v4f32_oeq_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_oeq_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_osps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_osps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3543,11 +3493,11 @@ define <4 x i32> @test_v4f32_ogt_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpltps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpltps %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3557,11 +3507,10 @@ define <4 x i32> @test_v4f32_ogt_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ogt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpltps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpltps %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3640,11 +3589,11 @@ define <4 x i32> @test_v4f32_oge_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpleps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpleps %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3654,11 +3603,10 @@ define <4 x i32> @test_v4f32_oge_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_oge_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpleps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpleps %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3735,11 +3683,10 @@ define <4 x i32> @test_v4f32_olt_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpltps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpltps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3749,11 +3696,10 @@ define <4 x i32> @test_v4f32_olt_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_olt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpltps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpltps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3830,11 +3776,10 @@ define <4 x i32> @test_v4f32_ole_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpleps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpleps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3844,11 +3789,10 @@ define <4 x i32> @test_v4f32_ole_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ole_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpleps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpleps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -3936,11 +3880,10 @@ define <4 x i32> @test_v4f32_one_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpneq_osps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_osps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3950,11 +3893,10 @@ define <4 x i32> @test_v4f32_one_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_one_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_osps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_osps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4036,11 +3978,10 @@ define <4 x i32> @test_v4f32_ord_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpord_sps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpord_sps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4050,11 +3991,10 @@ define <4 x i32> @test_v4f32_ord_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ord_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpord_sps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpord_sps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4142,11 +4082,10 @@ define <4 x i32> @test_v4f32_ueq_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpeq_usps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_usps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4156,11 +4095,10 @@ define <4 x i32> @test_v4f32_ueq_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ueq_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_usps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_usps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4237,11 +4175,10 @@ define <4 x i32> @test_v4f32_ugt_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnleps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnleps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4251,11 +4188,10 @@ define <4 x i32> @test_v4f32_ugt_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ugt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnleps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnleps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4332,11 +4268,10 @@ define <4 x i32> @test_v4f32_uge_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnltps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnltps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4346,11 +4281,10 @@ define <4 x i32> @test_v4f32_uge_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_uge_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnltps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnltps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4429,11 +4363,11 @@ define <4 x i32> @test_v4f32_ult_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnleps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnleps %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4443,11 +4377,10 @@ define <4 x i32> @test_v4f32_ult_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ult_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnleps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnleps %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4526,11 +4459,11 @@ define <4 x i32> @test_v4f32_ule_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnltps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnltps %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4540,11 +4473,10 @@ define <4 x i32> @test_v4f32_ule_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_ule_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnltps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnltps %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4626,11 +4558,10 @@ define <4 x i32> @test_v4f32_une_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpneq_usps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_usps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4640,11 +4571,10 @@ define <4 x i32> @test_v4f32_une_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_une_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_usps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_usps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4726,11 +4656,10 @@ define <4 x i32> @test_v4f32_uno_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpunord_sps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpunord_sps 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4740,11 +4669,10 @@ define <4 x i32> @test_v4f32_uno_s(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f32_uno_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpunord_sps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpunord_sps %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4826,11 +4754,10 @@ define <2 x i64> @test_v2f64_oeq_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpeq_ospd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_ospd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4840,11 +4767,10 @@ define <2 x i64> @test_v2f64_oeq_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_oeq_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_ospd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_ospd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -4923,11 +4849,11 @@ define <2 x i64> @test_v2f64_ogt_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpltpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpltpd %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -4937,11 +4863,10 @@ define <2 x i64> @test_v2f64_ogt_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ogt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpltpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpltpd %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5020,11 +4945,11 @@ define <2 x i64> @test_v2f64_oge_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmplepd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmplepd %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5034,11 +4959,10 @@ define <2 x i64> @test_v2f64_oge_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_oge_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplepd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmplepd %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5115,11 +5039,10 @@ define <2 x i64> @test_v2f64_olt_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpltpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpltpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5129,11 +5052,10 @@ define <2 x i64> @test_v2f64_olt_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_olt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpltpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpltpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5210,11 +5132,10 @@ define <2 x i64> @test_v2f64_ole_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmplepd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmplepd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5224,11 +5145,10 @@ define <2 x i64> @test_v2f64_ole_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ole_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplepd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmplepd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5316,11 +5236,10 @@ define <2 x i64> @test_v2f64_one_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpneq_ospd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_ospd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5330,11 +5249,10 @@ define <2 x i64> @test_v2f64_one_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_one_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_ospd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_ospd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5416,11 +5334,10 @@ define <2 x i64> @test_v2f64_ord_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpord_spd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpord_spd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5430,11 +5347,10 @@ define <2 x i64> @test_v2f64_ord_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ord_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpord_spd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpord_spd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5522,11 +5438,10 @@ define <2 x i64> @test_v2f64_ueq_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpeq_uspd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_uspd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5536,11 +5451,10 @@ define <2 x i64> @test_v2f64_ueq_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ueq_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_uspd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_uspd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5617,11 +5531,10 @@ define <2 x i64> @test_v2f64_ugt_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnlepd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnlepd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5631,11 +5544,10 @@ define <2 x i64> @test_v2f64_ugt_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ugt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlepd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnlepd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5712,11 +5624,10 @@ define <2 x i64> @test_v2f64_uge_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnltpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnltpd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5726,11 +5637,10 @@ define <2 x i64> @test_v2f64_uge_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_uge_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnltpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnltpd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5809,11 +5719,11 @@ define <2 x i64> @test_v2f64_ult_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnlepd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnlepd %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5823,11 +5733,10 @@ define <2 x i64> @test_v2f64_ult_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ult_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlepd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnlepd %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -5906,11 +5815,11 @@ define <2 x i64> @test_v2f64_ule_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpnltpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnltpd %xmm2, %xmm3, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -5920,11 +5829,10 @@ define <2 x i64> @test_v2f64_ule_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_ule_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnltpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnltpd %xmm2, %xmm3, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -6006,11 +5914,10 @@ define <2 x i64> @test_v2f64_une_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpneq_uspd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_uspd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -6020,11 +5927,10 @@ define <2 x i64> @test_v2f64_une_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_une_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_uspd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_uspd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper
@@ -6106,11 +6012,10 @@ define <2 x i64> @test_v2f64_uno_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-16, %esp
 ; AVX512F-32-NEXT:    subl $16, %esp
-; AVX512F-32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %xmm3
-; AVX512F-32-NEXT:    vcmpunord_spd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpunord_spd 8(%ebp), %xmm2, %xmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -6120,11 +6025,10 @@ define <2 x i64> @test_v2f64_uno_s(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v2f64_uno_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpunord_spd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpunord_spd %xmm3, %xmm2, %xmm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512F-64-NEXT:    vzeroupper

diff  --git a/llvm/test/CodeGen/X86/vec-strict-cmp-256.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-256.ll
index 225aebfeb65a..dd0dd95daa33 100644
--- a/llvm/test/CodeGen/X86/vec-strict-cmp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-cmp-256.ll
@@ -49,11 +49,10 @@ define <8 x i32> @test_v8f32_oeq_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeqps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -62,11 +61,10 @@ define <8 x i32> @test_v8f32_oeq_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_oeq_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeqps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -121,11 +119,11 @@ define <8 x i32> @test_v8f32_ogt_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmplt_oqps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmplt_oqps %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -134,11 +132,10 @@ define <8 x i32> @test_v8f32_ogt_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ogt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplt_oqps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmplt_oqps %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -193,11 +190,11 @@ define <8 x i32> @test_v8f32_oge_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmple_oqps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmple_oqps %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -206,11 +203,10 @@ define <8 x i32> @test_v8f32_oge_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_oge_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmple_oqps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmple_oqps %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -264,11 +260,10 @@ define <8 x i32> @test_v8f32_olt_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmplt_oqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmplt_oqps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -277,11 +272,10 @@ define <8 x i32> @test_v8f32_olt_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_olt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplt_oqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmplt_oqps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -335,11 +329,10 @@ define <8 x i32> @test_v8f32_ole_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmple_oqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmple_oqps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -348,11 +341,10 @@ define <8 x i32> @test_v8f32_ole_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ole_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmple_oqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmple_oqps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -406,11 +398,10 @@ define <8 x i32> @test_v8f32_one_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpneq_oqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_oqps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -419,11 +410,10 @@ define <8 x i32> @test_v8f32_one_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_one_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_oqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_oqps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -477,11 +467,10 @@ define <8 x i32> @test_v8f32_ord_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpordps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpordps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -490,11 +479,10 @@ define <8 x i32> @test_v8f32_ord_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ord_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpordps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpordps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -548,11 +536,10 @@ define <8 x i32> @test_v8f32_ueq_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpeq_uqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_uqps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -561,11 +548,10 @@ define <8 x i32> @test_v8f32_ueq_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ueq_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_uqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_uqps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -619,11 +605,10 @@ define <8 x i32> @test_v8f32_ugt_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnle_uqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnle_uqps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -632,11 +617,10 @@ define <8 x i32> @test_v8f32_ugt_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ugt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnle_uqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnle_uqps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -690,11 +674,10 @@ define <8 x i32> @test_v8f32_uge_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnlt_uqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnlt_uqps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -703,11 +686,10 @@ define <8 x i32> @test_v8f32_uge_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_uge_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlt_uqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnlt_uqps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -762,11 +744,11 @@ define <8 x i32> @test_v8f32_ult_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnle_uqps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnle_uqps %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -775,11 +757,10 @@ define <8 x i32> @test_v8f32_ult_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ult_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnle_uqps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnle_uqps %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -834,11 +815,11 @@ define <8 x i32> @test_v8f32_ule_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnlt_uqps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnlt_uqps %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -847,11 +828,10 @@ define <8 x i32> @test_v8f32_ule_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ule_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlt_uqps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnlt_uqps %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -905,11 +885,10 @@ define <8 x i32> @test_v8f32_une_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpneqps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneqps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -918,11 +897,10 @@ define <8 x i32> @test_v8f32_une_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_une_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneqps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneqps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -976,11 +954,10 @@ define <8 x i32> @test_v8f32_uno_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpunordps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpunordps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -989,11 +966,10 @@ define <8 x i32> @test_v8f32_uno_q(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_uno_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpunordps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpunordps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1047,11 +1023,10 @@ define <4 x i64> @test_v4f64_oeq_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeqpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1060,11 +1035,10 @@ define <4 x i64> @test_v4f64_oeq_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_oeq_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeqpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1119,11 +1093,11 @@ define <4 x i64> @test_v4f64_ogt_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmplt_oqpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmplt_oqpd %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1132,11 +1106,10 @@ define <4 x i64> @test_v4f64_ogt_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ogt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplt_oqpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmplt_oqpd %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1191,11 +1164,11 @@ define <4 x i64> @test_v4f64_oge_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmple_oqpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmple_oqpd %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1204,11 +1177,10 @@ define <4 x i64> @test_v4f64_oge_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_oge_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmple_oqpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmple_oqpd %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1262,11 +1234,10 @@ define <4 x i64> @test_v4f64_olt_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmplt_oqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmplt_oqpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1275,11 +1246,10 @@ define <4 x i64> @test_v4f64_olt_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_olt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplt_oqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmplt_oqpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1333,11 +1303,10 @@ define <4 x i64> @test_v4f64_ole_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmple_oqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmple_oqpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1346,11 +1315,10 @@ define <4 x i64> @test_v4f64_ole_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ole_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmple_oqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmple_oqpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1404,11 +1372,10 @@ define <4 x i64> @test_v4f64_one_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpneq_oqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_oqpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1417,11 +1384,10 @@ define <4 x i64> @test_v4f64_one_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_one_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_oqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_oqpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1475,11 +1441,10 @@ define <4 x i64> @test_v4f64_ord_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpordpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpordpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1488,11 +1453,10 @@ define <4 x i64> @test_v4f64_ord_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ord_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpordpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpordpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1546,11 +1510,10 @@ define <4 x i64> @test_v4f64_ueq_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpeq_uqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_uqpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1559,11 +1522,10 @@ define <4 x i64> @test_v4f64_ueq_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ueq_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_uqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_uqpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1617,11 +1579,10 @@ define <4 x i64> @test_v4f64_ugt_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnle_uqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnle_uqpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1630,11 +1591,10 @@ define <4 x i64> @test_v4f64_ugt_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ugt_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnle_uqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnle_uqpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1688,11 +1648,10 @@ define <4 x i64> @test_v4f64_uge_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnlt_uqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnlt_uqpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1701,11 +1660,10 @@ define <4 x i64> @test_v4f64_uge_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_uge_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlt_uqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnlt_uqpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1760,11 +1718,11 @@ define <4 x i64> @test_v4f64_ult_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnle_uqpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnle_uqpd %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1773,11 +1731,10 @@ define <4 x i64> @test_v4f64_ult_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ult_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnle_uqpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnle_uqpd %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1832,11 +1789,11 @@ define <4 x i64> @test_v4f64_ule_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnlt_uqpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnlt_uqpd %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1845,11 +1802,10 @@ define <4 x i64> @test_v4f64_ule_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ule_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlt_uqpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnlt_uqpd %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1903,11 +1859,10 @@ define <4 x i64> @test_v4f64_une_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpneqpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneqpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1916,11 +1871,10 @@ define <4 x i64> @test_v4f64_une_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_une_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneqpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneqpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -1974,11 +1928,10 @@ define <4 x i64> @test_v4f64_uno_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpunordpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpunordpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -1987,11 +1940,10 @@ define <4 x i64> @test_v4f64_uno_q(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_uno_q:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpunordpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpunordpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2045,11 +1997,10 @@ define <8 x i32> @test_v8f32_oeq_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpeq_osps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_osps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2058,11 +2009,10 @@ define <8 x i32> @test_v8f32_oeq_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_oeq_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_osps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_osps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2117,11 +2067,11 @@ define <8 x i32> @test_v8f32_ogt_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpltps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpltps %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2130,11 +2080,10 @@ define <8 x i32> @test_v8f32_ogt_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ogt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpltps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpltps %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2189,11 +2138,11 @@ define <8 x i32> @test_v8f32_oge_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpleps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpleps %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2202,11 +2151,10 @@ define <8 x i32> @test_v8f32_oge_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_oge_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpleps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpleps %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2260,11 +2208,10 @@ define <8 x i32> @test_v8f32_olt_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpltps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpltps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2273,11 +2220,10 @@ define <8 x i32> @test_v8f32_olt_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_olt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpltps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpltps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2331,11 +2277,10 @@ define <8 x i32> @test_v8f32_ole_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpleps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpleps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2344,11 +2289,10 @@ define <8 x i32> @test_v8f32_ole_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ole_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpleps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpleps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2402,11 +2346,10 @@ define <8 x i32> @test_v8f32_one_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpneq_osps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_osps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2415,11 +2358,10 @@ define <8 x i32> @test_v8f32_one_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_one_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_osps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_osps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2473,11 +2415,10 @@ define <8 x i32> @test_v8f32_ord_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpord_sps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpord_sps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2486,11 +2427,10 @@ define <8 x i32> @test_v8f32_ord_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ord_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpord_sps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpord_sps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2544,11 +2484,10 @@ define <8 x i32> @test_v8f32_ueq_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpeq_usps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_usps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2557,11 +2496,10 @@ define <8 x i32> @test_v8f32_ueq_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ueq_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_usps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_usps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2615,11 +2553,10 @@ define <8 x i32> @test_v8f32_ugt_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnleps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnleps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2628,11 +2565,10 @@ define <8 x i32> @test_v8f32_ugt_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ugt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnleps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnleps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2686,11 +2622,10 @@ define <8 x i32> @test_v8f32_uge_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnltps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnltps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2699,11 +2634,10 @@ define <8 x i32> @test_v8f32_uge_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_uge_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnltps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnltps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2758,11 +2692,11 @@ define <8 x i32> @test_v8f32_ult_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnleps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnleps %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2771,11 +2705,10 @@ define <8 x i32> @test_v8f32_ult_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ult_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnleps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnleps %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2830,11 +2763,11 @@ define <8 x i32> @test_v8f32_ule_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnltps %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnltps %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2843,11 +2776,10 @@ define <8 x i32> @test_v8f32_ule_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_ule_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnltps %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnltps %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2901,11 +2833,10 @@ define <8 x i32> @test_v8f32_une_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpneq_usps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_usps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2914,11 +2845,10 @@ define <8 x i32> @test_v8f32_une_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_une_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_usps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_usps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -2972,11 +2902,10 @@ define <8 x i32> @test_v8f32_uno_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovaps 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpunord_sps %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpunord_sps 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -2985,11 +2914,10 @@ define <8 x i32> @test_v8f32_uno_s(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1,
 ;
 ; AVX512F-64-LABEL: test_v8f32_uno_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpunord_sps %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpunord_sps %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3043,11 +2971,10 @@ define <4 x i64> @test_v4f64_oeq_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpeq_ospd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_ospd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3056,11 +2983,10 @@ define <4 x i64> @test_v4f64_oeq_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_oeq_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_ospd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_ospd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3115,11 +3041,11 @@ define <4 x i64> @test_v4f64_ogt_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpltpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpltpd %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3128,11 +3054,10 @@ define <4 x i64> @test_v4f64_ogt_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ogt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpltpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpltpd %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3187,11 +3112,11 @@ define <4 x i64> @test_v4f64_oge_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmplepd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmplepd %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3200,11 +3125,10 @@ define <4 x i64> @test_v4f64_oge_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_oge_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplepd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmplepd %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3258,11 +3182,10 @@ define <4 x i64> @test_v4f64_olt_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpltpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpltpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3271,11 +3194,10 @@ define <4 x i64> @test_v4f64_olt_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_olt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpltpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpltpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3329,11 +3251,10 @@ define <4 x i64> @test_v4f64_ole_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmplepd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmplepd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3342,11 +3263,10 @@ define <4 x i64> @test_v4f64_ole_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ole_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmplepd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmplepd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3400,11 +3320,10 @@ define <4 x i64> @test_v4f64_one_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpneq_ospd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_ospd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3413,11 +3332,10 @@ define <4 x i64> @test_v4f64_one_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_one_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_ospd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_ospd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3471,11 +3389,10 @@ define <4 x i64> @test_v4f64_ord_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpord_spd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpord_spd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3484,11 +3401,10 @@ define <4 x i64> @test_v4f64_ord_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ord_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpord_spd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpord_spd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3542,11 +3458,10 @@ define <4 x i64> @test_v4f64_ueq_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpeq_uspd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpeq_uspd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3555,11 +3470,10 @@ define <4 x i64> @test_v4f64_ueq_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ueq_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpeq_uspd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpeq_uspd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3613,11 +3527,10 @@ define <4 x i64> @test_v4f64_ugt_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnlepd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnlepd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3626,11 +3539,10 @@ define <4 x i64> @test_v4f64_ugt_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ugt_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlepd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnlepd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3684,11 +3596,10 @@ define <4 x i64> @test_v4f64_uge_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnltpd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpnltpd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3697,11 +3608,10 @@ define <4 x i64> @test_v4f64_uge_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_uge_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnltpd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpnltpd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3756,11 +3666,11 @@ define <4 x i64> @test_v4f64_ult_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnlepd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnlepd %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3769,11 +3679,10 @@ define <4 x i64> @test_v4f64_ult_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ult_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnlepd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnlepd %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3828,11 +3737,11 @@ define <4 x i64> @test_v4f64_ule_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpnltpd %zmm2, %zmm3, %k1
+; AVX512F-32-NEXT:    vcmpnltpd %ymm2, %ymm3, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3841,11 +3750,10 @@ define <4 x i64> @test_v4f64_ule_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_ule_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpnltpd %zmm2, %zmm3, %k1
+; AVX512F-64-NEXT:    vcmpnltpd %ymm2, %ymm3, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3899,11 +3807,10 @@ define <4 x i64> @test_v4f64_une_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpneq_uspd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpneq_uspd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3912,11 +3819,10 @@ define <4 x i64> @test_v4f64_une_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_une_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpneq_uspd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpneq_uspd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq
@@ -3970,11 +3876,10 @@ define <4 x i64> @test_v4f64_uno_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ; AVX512F-32-NEXT:    movl %esp, %ebp
 ; AVX512F-32-NEXT:    andl $-32, %esp
 ; AVX512F-32-NEXT:    subl $32, %esp
-; AVX512F-32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-32-NEXT:    vmovapd 8(%ebp), %ymm3
-; AVX512F-32-NEXT:    vcmpunord_spd %zmm3, %zmm2, %k1
+; AVX512F-32-NEXT:    vcmpunord_spd 8(%ebp), %ymm2, %ymm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-32-NEXT:    movl %ebp, %esp
@@ -3983,11 +3888,10 @@ define <4 x i64> @test_v4f64_uno_s(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1,
 ;
 ; AVX512F-64-LABEL: test_v4f64_uno_s:
 ; AVX512F-64:       # %bb.0:
-; AVX512F-64-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
-; AVX512F-64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512F-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-64-NEXT:    vcmpunord_spd %zmm3, %zmm2, %k1
+; AVX512F-64-NEXT:    vcmpunord_spd %ymm3, %ymm2, %ymm2
+; AVX512F-64-NEXT:    vptestmq %zmm2, %zmm2, %k1
 ; AVX512F-64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; AVX512F-64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512F-64-NEXT:    retq


        


More information about the llvm-commits mailing list