[llvm] r322101 - [X86] Add a DAG combine to combine (sext (setcc)) with VLX

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 9 10:14:22 PST 2018


Author: ctopper
Date: Tue Jan  9 10:14:22 2018
New Revision: 322101

URL: http://llvm.org/viewvc/llvm-project?rev=322101&view=rev
Log:
[X86] Add a DAG combine to combine (sext (setcc)) with VLX

Normally target independent DAG combine would do this combine based on getSetCCResultType, but with VLX getSetCCResultType returns a vXi1 type preventing the DAG combining from kicking in.

But doing this combine can allow us to remove the explicit sign extend that would otherwise be emitted.

This patch adds a target specific DAG combine to combine the sext+setcc when the result type is the same size as the input to the setcc. I've restricted this to FP compares and things that can be represented with PCMPEQ and PCMPGT since we don't have full integer compare support on the older ISAs.

Differential Revision: https://reviews.llvm.org/D41850

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
    llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll
    llvm/trunk/test/CodeGen/X86/commute-fcmp.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
    llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
    llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
    llvm/trunk/test/CodeGen/X86/vselect-packss.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan  9 10:14:22 2018
@@ -35996,6 +35996,45 @@ static SDValue combineToExtendVectorInRe
   return SDValue();
 }
 
+// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm
+// result type.
+static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
+                               const X86Subtarget &Subtarget) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  SDLoc dl(N);
+
+  // Only do this combine with AVX512 for vector extends.
+  if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Only combine legal element types.
+  EVT SVT = VT.getVectorElementType();
+  if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
+      SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
+    return SDValue();
+
+  // We can only do this if the vector size in 256 bits or less.
+  unsigned Size = VT.getSizeInBits();
+  if (Size > 256)
+    return SDValue();
+
+  // Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since
+  // that's the only integer compares with we have.
+  ISD::CondCode CC = cast<CondCodeSDNode>(N0->getOperand(2))->get();
+  if (ISD::isUnsignedIntSetCC(CC) || CC == ISD::SETLE || CC == ISD::SETGE ||
+      CC == ISD::SETNE)
+    return SDValue();
+
+  // Only do this combine if the extension will be fully consumed by the setcc.
+  EVT N00VT = N0.getOperand(0).getValueType();
+  EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+  if (Size != MatchingVecType.getSizeInBits())
+    return SDValue();
+
+  return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+}
+
 static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
                            TargetLowering::DAGCombinerInfo &DCI,
                            const X86Subtarget &Subtarget) {
@@ -36013,6 +36052,9 @@ static SDValue combineSext(SDNode *N, Se
   if (!DCI.isBeforeLegalizeOps())
     return SDValue();
 
+  if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
+    return V;
+
   if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
       isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
     // Invert and sign-extend a boolean is the same as zero-extend and subtract

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Tue Jan  9 10:14:22 2018
@@ -947,11 +947,9 @@ define <4 x double> @test_cmppd(<4 x dou
 ;
 ; SKX-LABEL: test_cmppd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vcmpeqpd (%rdi), %ymm0, %k1 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpmovm2q %k1, %ymm1 # sched: [1:0.25]
-; SKX-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; SKX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
+; SKX-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
+; SKX-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_cmppd:
@@ -1015,11 +1013,9 @@ define <8 x float> @test_cmpps(<8 x floa
 ;
 ; SKX-LABEL: test_cmpps:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vcmpeqps (%rdi), %ymm0, %k1 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpmovm2d %k1, %ymm1 # sched: [1:0.25]
-; SKX-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; SKX-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
+; SKX-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
+; SKX-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_cmpps:

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Tue Jan  9 10:14:22 2018
@@ -2159,10 +2159,8 @@ define <32 x i8> @test_pcmpeqb(<32 x i8>
 ;
 ; SKX-LABEL: test_pcmpeqb:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpeqb (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqb:
@@ -2205,10 +2203,8 @@ define <8 x i32> @test_pcmpeqd(<8 x i32>
 ;
 ; SKX-LABEL: test_pcmpeqd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqd:
@@ -2251,10 +2247,8 @@ define <4 x i64> @test_pcmpeqq(<4 x i64>
 ;
 ; SKX-LABEL: test_pcmpeqq:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpeqq (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqq:
@@ -2297,10 +2291,8 @@ define <16 x i16> @test_pcmpeqw(<16 x i1
 ;
 ; SKX-LABEL: test_pcmpeqw:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpeqw (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqw:
@@ -2343,10 +2335,8 @@ define <32 x i8> @test_pcmpgtb(<32 x i8>
 ;
 ; SKX-LABEL: test_pcmpgtb:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpgtb (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtb:
@@ -2389,10 +2379,8 @@ define <8 x i32> @test_pcmpgtd(<8 x i32>
 ;
 ; SKX-LABEL: test_pcmpgtd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpgtd (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtd:
@@ -2435,10 +2423,8 @@ define <4 x i64> @test_pcmpgtq(<4 x i64>
 ;
 ; SKX-LABEL: test_pcmpgtq:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpgtq (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT:    vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtq:
@@ -2481,10 +2467,8 @@ define <16 x i16> @test_pcmpgtw(<16 x i1
 ;
 ; SKX-LABEL: test_pcmpgtw:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpgtw (%rdi), %ymm0, %k0 # sched: [10:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtw:

Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Tue Jan  9 10:14:22 2018
@@ -848,22 +848,13 @@ define <8 x double> @test43(<8 x double>
 }
 
 define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
-; KNL-LABEL: test44:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
-; KNL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; KNL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: test44:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; SKX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
-; SKX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
-; SKX-NEXT:    vpmovm2d %k0, %xmm0
-; SKX-NEXT:    retq
+; CHECK-LABEL: test44:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; CHECK-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %mask = icmp eq <4 x i16> %x, %y
   %1 = sext <4 x i1> %mask to <4 x i32>
   ret <4 x i32> %1

Modified: llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll Tue Jan  9 10:14:22 2018
@@ -9,14 +9,13 @@ define <3 x i8 > @foo(<3 x i8>%x, <3 x i
 ; CHECK-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
 ; CHECK-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
 ; CHECK-NEXT:    vpslld $24, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrad $24, %xmm0, %xmm0
 ; CHECK-NEXT:    vmovd %ecx, %xmm1
 ; CHECK-NEXT:    vpinsrd $1, %r8d, %xmm1, %xmm1
+; CHECK-NEXT:    vpsrad $24, %xmm0, %xmm0
 ; CHECK-NEXT:    vpinsrd $2, %r9d, %xmm1, %xmm1
 ; CHECK-NEXT:    vpslld $24, %xmm1, %xmm1
 ; CHECK-NEXT:    vpsrad $24, %xmm1, %xmm1
-; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %k0
-; CHECK-NEXT:    vpmovm2d %k0, %xmm0
+; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
 ; CHECK-NEXT:    vpextrb $0, %xmm0, %eax
 ; CHECK-NEXT:    vpextrb $4, %xmm0, %edx
 ; CHECK-NEXT:    vpextrb $8, %xmm0, %ecx

Modified: llvm/trunk/test/CodeGen/X86/commute-fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commute-fcmp.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commute-fcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commute-fcmp.ll Tue Jan  9 10:14:22 2018
@@ -21,9 +21,7 @@ define <4 x i32> @commute_cmpps_eq(<4 x
 ;
 ; AVX512-LABEL: commute_cmpps_eq:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpeqps (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp oeq <4 x float> %1, %a1
@@ -44,9 +42,7 @@ define <4 x i32> @commute_cmpps_ne(<4 x
 ;
 ; AVX512-LABEL: commute_cmpps_ne:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpneqps (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpneqps (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp une <4 x float> %1, %a1
@@ -67,9 +63,7 @@ define <4 x i32> @commute_cmpps_ord(<4 x
 ;
 ; AVX512-LABEL: commute_cmpps_ord:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpordps (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpordps (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp ord <4 x float> %1, %a1
@@ -90,9 +84,7 @@ define <4 x i32> @commute_cmpps_uno(<4 x
 ;
 ; AVX512-LABEL: commute_cmpps_uno:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpunordps (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpunordps (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp uno <4 x float> %1, %a1
@@ -117,9 +109,7 @@ define <4 x i32> @commute_cmpps_ueq(<4 x
 ;
 ; AVX512-LABEL: commute_cmpps_ueq:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpeq_uqps (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpeq_uqps (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp ueq <4 x float> %1, %a1
@@ -144,9 +134,7 @@ define <4 x i32> @commute_cmpps_one(<4 x
 ;
 ; AVX512-LABEL: commute_cmpps_one:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpneq_oqps (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpneq_oqps (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp one <4 x float> %1, %a1
@@ -171,9 +159,7 @@ define <4 x i32> @commute_cmpps_lt(<4 x
 ; AVX512-LABEL: commute_cmpps_lt:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovaps (%rdi), %xmm1
-; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp olt <4 x float> %1, %a1
@@ -198,9 +184,7 @@ define <4 x i32> @commute_cmpps_le(<4 x
 ; AVX512-LABEL: commute_cmpps_le:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovaps (%rdi), %xmm1
-; AVX512-NEXT:    vcmpleps %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpleps %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp ole <4 x float> %1, %a1
@@ -222,9 +206,7 @@ define <8 x i32> @commute_cmpps_eq_ymm(<
 ;
 ; AVX512-LABEL: commute_cmpps_eq_ymm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpeqps (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp oeq <8 x float> %1, %a1
@@ -246,9 +228,7 @@ define <8 x i32> @commute_cmpps_ne_ymm(<
 ;
 ; AVX512-LABEL: commute_cmpps_ne_ymm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpneqps (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpneqps (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp une <8 x float> %1, %a1
@@ -270,9 +250,7 @@ define <8 x i32> @commute_cmpps_ord_ymm(
 ;
 ; AVX512-LABEL: commute_cmpps_ord_ymm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpordps (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpordps (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp ord <8 x float> %1, %a1
@@ -294,9 +272,7 @@ define <8 x i32> @commute_cmpps_uno_ymm(
 ;
 ; AVX512-LABEL: commute_cmpps_uno_ymm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpunordps (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpunordps (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp uno <8 x float> %1, %a1
@@ -326,9 +302,7 @@ define <8 x i32> @commute_cmpps_ueq_ymm(
 ;
 ; AVX512-LABEL: commute_cmpps_ueq_ymm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpeq_uqps (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpeq_uqps (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp ueq <8 x float> %1, %a1
@@ -358,9 +332,7 @@ define <8 x i32> @commute_cmpps_one_ymm(
 ;
 ; AVX512-LABEL: commute_cmpps_one_ymm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpneq_oqps (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpneq_oqps (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp one <8 x float> %1, %a1
@@ -388,9 +360,7 @@ define <8 x i32> @commute_cmpps_lt_ymm(<
 ; AVX512-LABEL: commute_cmpps_lt_ymm:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovaps (%rdi), %ymm1
-; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp olt <8 x float> %1, %a1
@@ -418,9 +388,7 @@ define <8 x i32> @commute_cmpps_le_ymm(<
 ; AVX512-LABEL: commute_cmpps_le_ymm:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovaps (%rdi), %ymm1
-; AVX512-NEXT:    vcmpleps %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpleps %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp ole <8 x float> %1, %a1
@@ -446,9 +414,7 @@ define <2 x i64> @commute_cmppd_eq(<2 x
 ;
 ; AVX512-LABEL: commute_cmppd_eq:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpeqpd (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp oeq <2 x double> %1, %a1
@@ -469,9 +435,7 @@ define <2 x i64> @commute_cmppd_ne(<2 x
 ;
 ; AVX512-LABEL: commute_cmppd_ne:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpneqpd (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpneqpd (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp une <2 x double> %1, %a1
@@ -492,9 +456,7 @@ define <2 x i64> @commute_cmppd_ord(<2 x
 ;
 ; AVX512-LABEL: commute_cmppd_ord:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpordpd (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpordpd (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp ord <2 x double> %1, %a1
@@ -519,9 +481,7 @@ define <2 x i64> @commute_cmppd_ueq(<2 x
 ;
 ; AVX512-LABEL: commute_cmppd_ueq:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp ueq <2 x double> %1, %a1
@@ -546,9 +506,7 @@ define <2 x i64> @commute_cmppd_one(<2 x
 ;
 ; AVX512-LABEL: commute_cmppd_one:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp one <2 x double> %1, %a1
@@ -569,9 +527,7 @@ define <2 x i64> @commute_cmppd_uno(<2 x
 ;
 ; AVX512-LABEL: commute_cmppd_uno:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpunordpd (%rdi), %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpunordpd (%rdi), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp uno <2 x double> %1, %a1
@@ -596,9 +552,7 @@ define <2 x i64> @commute_cmppd_lt(<2 x
 ; AVX512-LABEL: commute_cmppd_lt:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovapd (%rdi), %xmm1
-; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp olt <2 x double> %1, %a1
@@ -623,9 +577,7 @@ define <2 x i64> @commute_cmppd_le(<2 x
 ; AVX512-LABEL: commute_cmppd_le:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovapd (%rdi), %xmm1
-; AVX512-NEXT:    vcmplepd %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp ole <2 x double> %1, %a1
@@ -647,9 +599,7 @@ define <4 x i64> @commute_cmppd_eq_ymmm(
 ;
 ; AVX512-LABEL: commute_cmppd_eq_ymmm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpeqpd (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp oeq <4 x double> %1, %a1
@@ -671,9 +621,7 @@ define <4 x i64> @commute_cmppd_ne_ymmm(
 ;
 ; AVX512-LABEL: commute_cmppd_ne_ymmm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpneqpd (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpneqpd (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp une <4 x double> %1, %a1
@@ -695,9 +643,7 @@ define <4 x i64> @commute_cmppd_ord_ymmm
 ;
 ; AVX512-LABEL: commute_cmppd_ord_ymmm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpordpd (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpordpd (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp ord <4 x double> %1, %a1
@@ -719,9 +665,7 @@ define <4 x i64> @commute_cmppd_uno_ymmm
 ;
 ; AVX512-LABEL: commute_cmppd_uno_ymmm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpunordpd (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpunordpd (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp uno <4 x double> %1, %a1
@@ -751,9 +695,7 @@ define <4 x i64> @commute_cmppd_ueq_ymmm
 ;
 ; AVX512-LABEL: commute_cmppd_ueq_ymmm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp ueq <4 x double> %1, %a1
@@ -783,9 +725,7 @@ define <4 x i64> @commute_cmppd_one_ymmm
 ;
 ; AVX512-LABEL: commute_cmppd_one_ymmm:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp one <4 x double> %1, %a1
@@ -813,9 +753,7 @@ define <4 x i64> @commute_cmppd_lt_ymmm(
 ; AVX512-LABEL: commute_cmppd_lt_ymmm:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovapd (%rdi), %ymm1
-; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp olt <4 x double> %1, %a1
@@ -843,9 +781,7 @@ define <4 x i64> @commute_cmppd_le_ymmm(
 ; AVX512-LABEL: commute_cmppd_le_ymmm:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovapd (%rdi), %ymm1
-; AVX512-NEXT:    vcmplepd %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp ole <4 x double> %1, %a1

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Tue Jan  9 10:14:22 2018
@@ -860,10 +860,8 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
 ;
 ; SKX-LABEL: test_pcmpeqq:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpeqq (%rdi), %xmm0, %k0 # sched: [9:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpeqq:

Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Tue Jan  9 10:14:22 2018
@@ -771,10 +771,8 @@ define <2 x i64> @test_pcmpgtq(<2 x i64>
 ;
 ; SKX-LABEL: test_pcmpgtq:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT:    vpcmpgtq (%rdi), %xmm0, %k0 # sched: [9:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpgtq:

Modified: llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll Tue Jan  9 10:14:22 2018
@@ -23,10 +23,8 @@ define i64 @test_v2f64_sext(<2 x double>
 ;
 ; AVX512-LABEL: test_v2f64_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    retq
@@ -62,10 +60,8 @@ define i64 @test_v4f64_sext(<4 x double>
 ;
 ; AVX512-LABEL: test_v4f64_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
@@ -157,10 +153,8 @@ define i32 @test_v4f32_sext(<4 x float>
 ;
 ; AVX512-LABEL: test_v4f32_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
@@ -202,10 +196,8 @@ define i32 @test_v8f32_sext(<8 x float>
 ;
 ; AVX512-LABEL: test_v8f32_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
@@ -298,9 +290,7 @@ define i64 @test_v2i64_sext(<2 x i64> %a
 ;
 ; AVX512-LABEL: test_v2i64_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovq %xmm0, %rax
@@ -352,9 +342,7 @@ define i64 @test_v4i64_sext(<4 x i64> %a
 ;
 ; AVX512-LABEL: test_v4i64_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -463,9 +451,7 @@ define i32 @test_v4i32_sext(<4 x i32> %a
 ;
 ; AVX512-LABEL: test_v4i32_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -523,9 +509,7 @@ define i32 @test_v8i32_sext(<8 x i32> %a
 ;
 ; AVX512-LABEL: test_v8i32_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -640,8 +624,7 @@ define i16 @test_v8i16_sext(<8 x i16> %a
 ;
 ; AVX512-LABEL: test_v8i16_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
-; AVX512-NEXT:    vpmovm2w %k0, %xmm0
+; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -709,8 +692,7 @@ define i16 @test_v16i16_sext(<16 x i16>
 ;
 ; AVX512-LABEL: test_v16i16_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
-; AVX512-NEXT:    vpmovm2w %k0, %ymm0
+; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -838,8 +820,7 @@ define i8 @test_v16i8_sext(<16 x i8> %a0
 ;
 ; AVX512-LABEL: test_v16i8_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
-; AVX512-NEXT:    vpmovm2b %k0, %xmm0
+; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -913,8 +894,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
 ;
 ; AVX512-LABEL: test_v32i8_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0
-; AVX512-NEXT:    vpmovm2b %k0, %ymm0
+; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]

Modified: llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll Tue Jan  9 10:14:22 2018
@@ -23,10 +23,8 @@ define i64 @test_v2f64_sext(<2 x double>
 ;
 ; AVX512-LABEL: test_v2f64_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    retq
@@ -60,10 +58,8 @@ define i64 @test_v4f64_sext(<4 x double>
 ;
 ; AVX512-LABEL: test_v4f64_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
@@ -147,10 +143,8 @@ define i32 @test_v4f32_sext(<4 x float>
 ;
 ; AVX512-LABEL: test_v4f32_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
@@ -188,10 +182,8 @@ define i32 @test_v8f32_sext(<8 x float>
 ;
 ; AVX512-LABEL: test_v8f32_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
@@ -280,9 +272,7 @@ define i64 @test_v2i64_sext(<2 x i64> %a
 ;
 ; AVX512-LABEL: test_v2i64_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovq %xmm0, %rax
@@ -330,9 +320,7 @@ define i64 @test_v4i64_sext(<4 x i64> %a
 ;
 ; AVX512-LABEL: test_v4i64_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -431,9 +419,7 @@ define i32 @test_v4i32_sext(<4 x i32> %a
 ;
 ; AVX512-LABEL: test_v4i32_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -485,9 +471,7 @@ define i32 @test_v8i32_sext(<8 x i32> %a
 ;
 ; AVX512-LABEL: test_v8i32_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1
-; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -592,8 +576,7 @@ define i16 @test_v8i16_sext(<8 x i16> %a
 ;
 ; AVX512-LABEL: test_v8i16_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
-; AVX512-NEXT:    vpmovm2w %k0, %xmm0
+; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -658,8 +641,7 @@ define i16 @test_v16i16_sext(<16 x i16>
 ;
 ; AVX512-LABEL: test_v16i16_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
-; AVX512-NEXT:    vpmovm2w %k0, %ymm0
+; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -777,8 +759,7 @@ define i8 @test_v16i8_sext(<16 x i8> %a0
 ;
 ; AVX512-LABEL: test_v16i8_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
-; AVX512-NEXT:    vpmovm2b %k0, %xmm0
+; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -849,8 +830,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
 ;
 ; AVX512-LABEL: test_v32i8_sext:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0
-; AVX512-NEXT:    vpmovm2b %k0, %ymm0
+; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]

Modified: llvm/trunk/test/CodeGen/X86/vselect-packss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-packss.ll?rev=322101&r1=322100&r2=322101&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-packss.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-packss.ll Tue Jan  9 10:14:22 2018
@@ -377,33 +377,14 @@ define <16 x i8> @vselect_packss(<16 x i
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512NOBW-LABEL: vselect_packss:
-; AVX512NOBW:       # %bb.0:
-; AVX512NOBW-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX512NOBW-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512NOBW-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
-; AVX512NOBW-NEXT:    vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512NOBW-NEXT:    vzeroupper
-; AVX512NOBW-NEXT:    retq
-;
-; AVX512BWNOVL-LABEL: vselect_packss:
-; AVX512BWNOVL:       # %bb.0:
-; AVX512BWNOVL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX512BWNOVL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512BWNOVL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
-; AVX512BWNOVL-NEXT:    vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512BWNOVL-NEXT:    vzeroupper
-; AVX512BWNOVL-NEXT:    retq
-;
-; AVX512BWVL-LABEL: vselect_packss:
-; AVX512BWVL:       # %bb.0:
-; AVX512BWVL-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
-; AVX512BWVL-NEXT:    vpmovm2w %k0, %ymm0
-; AVX512BWVL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
-; AVX512BWVL-NEXT:    vzeroupper
-; AVX512BWVL-NEXT:    retq
+; AVX512-LABEL: vselect_packss:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
   %1 = icmp eq <16 x i16> %a0, %a1
   %2 = sext <16 x i1> %1 to <16 x i16>
   %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>




More information about the llvm-commits mailing list