[llvm] r323604 - [X86] Remove X86ISD::PCMPGTM/PCMPEQM and instead just use X86ISD::PCMPM and pattern match the immediate value during isel.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 27 12:19:03 PST 2018


Author: ctopper
Date: Sat Jan 27 12:19:02 2018
New Revision: 323604

URL: http://llvm.org/viewvc/llvm-project?rev=323604&view=rev
Log:
[X86] Remove X86ISD::PCMPGTM/PCMPEQM and instead just use X86ISD::PCMPM and pattern match the immediate value during isel.

Legalization is still biased to turn LT compares in to GT by swapping operands to avoid needing extra isel patterns to commute.

I'm hoping to remove TESTM/TESTNM next and this should simplify that by making EQ/NE more similar.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=323604&r1=323603&r2=323604&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Sat Jan 27 12:19:02 2018
@@ -451,8 +451,7 @@ namespace {
 // type.
 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
   unsigned Opcode = N->getOpcode();
-  if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM ||
-      Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
+  if (Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
       Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU ||
       Opcode == X86ISD::CMPM_RND) {
     // We can get 256-bit 8 element types here without VLX being enabled. When

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=323604&r1=323603&r2=323604&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 27 12:19:02 2018
@@ -5045,8 +5045,6 @@ static bool isMaskedZeroUpperBitsvXi1(un
     return false;
   case X86ISD::TESTM:
   case X86ISD::TESTNM:
-  case X86ISD::PCMPEQM:
-  case X86ISD::PCMPGTM:
   case X86ISD::CMPM:
   case X86ISD::CMPMU:
   case X86ISD::CMPM_RND:
@@ -14408,8 +14406,8 @@ static SDValue lower1BitVectorShuffle(co
   int NumElems = VT.getVectorNumElements();
   if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
       (Subtarget.hasDQI() && (NumElems < 32)))
-    return DAG.getNode(X86ISD::PCMPGTM, DL, VT, DAG.getConstant(0, DL, ExtVT),
-                       Shuffle);
+    return DAG.getNode(X86ISD::CMPM, DL, VT, DAG.getConstant(0, DL, ExtVT),
+                       Shuffle, DAG.getConstant(6, DL, MVT::i8));
 
   return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
 }
@@ -16565,8 +16563,8 @@ static SDValue LowerTruncateVecI1(SDValu
                          DAG.getConstant(ShiftInx, DL, ExtVT));
         In = DAG.getBitcast(InVT, In);
       }
-      return DAG.getNode(X86ISD::PCMPGTM, DL, VT, DAG.getConstant(0, DL, InVT),
-                         In);
+      return DAG.getNode(X86ISD::CMPM, DL, VT, DAG.getConstant(0, DL, InVT),
+                         In, DAG.getConstant(6, DL, MVT::i8));
     }
     // Use TESTD/Q, extended vector to packed dword/qword.
     assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
@@ -17750,43 +17748,39 @@ static SDValue LowerIntVSETCC_AVX512(SDV
          "Cannot set masked compare for this operation");
 
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
-  unsigned  Opc = 0;
-  bool Unsigned = false;
   bool Swap = false;
   unsigned SSECC;
   switch (SetCCOpcode) {
   default: llvm_unreachable("Unexpected SETCC condition");
   case ISD::SETNE:  SSECC = 4; break;
-  case ISD::SETEQ:  Opc = X86ISD::PCMPEQM; break;
-  case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
+  case ISD::SETEQ:  SSECC = 0; break;
+  case ISD::SETULT: SSECC = 1; break;
   case ISD::SETLT:  Swap = true; LLVM_FALLTHROUGH;
-  case ISD::SETGT:  Opc = X86ISD::PCMPGTM; break;
-  case ISD::SETULT: SSECC = 1; Unsigned = true; break;
-  case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
-  case ISD::SETGE:  Swap = true; SSECC = 2; break; // LE + swap
-  case ISD::SETULE: Unsigned = true; LLVM_FALLTHROUGH;
+  case ISD::SETUGT:
+  case ISD::SETGT:  SSECC = 6; break;
+  case ISD::SETUGE: SSECC = 5; break;
+  case ISD::SETGE:  Swap = true; LLVM_FALLTHROUGH;
+  case ISD::SETULE:
   case ISD::SETLE:  SSECC = 2; break;
   }
-
   if (Swap)
     std::swap(Op0, Op1);
 
   //  See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM.
-  if ((!Opc && SSECC == 4) || Opc == X86ISD::PCMPEQM) {
+  if (SSECC == 4 || SSECC == 0) {
     SDValue A = peekThroughBitcasts(Op0);
     if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) &&
         ISD::isBuildVectorAllZeros(Op1.getNode())) {
       MVT VT0 = Op0.getSimpleValueType();
       SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0));
       SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1));
-      return DAG.getNode(Opc == X86ISD::PCMPEQM ? X86ISD::TESTNM : X86ISD::TESTM,
+      return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM,
                          dl, VT, RHS, LHS);
     }
   }
 
-  if (Opc)
-    return DAG.getNode(Opc, dl, VT, Op0, Op1);
-  Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
+  unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) ? X86ISD::CMPMU
+                                                      : X86ISD::CMPM;
   return DAG.getNode(Opc, dl, VT, Op0, Op1,
                      DAG.getConstant(SSECC, dl, MVT::i8));
 }
@@ -22767,7 +22761,8 @@ static SDValue LowerScalarImmediateShift
           SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
           if (VT.is512BitVector()) {
             assert(VT == MVT::v64i8 && "Unexpected element type!");
-            SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R);
+            SDValue CMP = DAG.getNode(X86ISD::CMPM, dl, MVT::v64i1, Zeros, R,
+                                      DAG.getConstant(6, dl, MVT::i8));
             return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
           }
           return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
@@ -23214,8 +23209,9 @@ static SDValue LowerShift(SDValue Op, co
         V0 = DAG.getBitcast(VT, V0);
         V1 = DAG.getBitcast(VT, V1);
         Sel = DAG.getBitcast(VT, Sel);
-        Sel = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT,
-                          DAG.getConstant(0, dl, VT), Sel);
+        Sel = DAG.getNode(X86ISD::CMPM, dl, MaskVT,
+                          DAG.getConstant(0, dl, VT), Sel,
+                          DAG.getConstant(6, dl, MVT::i8));
         return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
       } else if (Subtarget.hasSSE41()) {
         // On SSE41 targets we make use of the fact that VSELECT lowers
@@ -25342,8 +25338,6 @@ const char *X86TargetLowering::getTarget
   case X86ISD::CMPP:               return "X86ISD::CMPP";
   case X86ISD::PCMPEQ:             return "X86ISD::PCMPEQ";
   case X86ISD::PCMPGT:             return "X86ISD::PCMPGT";
-  case X86ISD::PCMPEQM:            return "X86ISD::PCMPEQM";
-  case X86ISD::PCMPGTM:            return "X86ISD::PCMPGTM";
   case X86ISD::PHMINPOS:           return "X86ISD::PHMINPOS";
   case X86ISD::ADD:                return "X86ISD::ADD";
   case X86ISD::SUB:                return "X86ISD::SUB";

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=323604&r1=323603&r2=323604&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Jan 27 12:19:02 2018
@@ -329,8 +329,6 @@ namespace llvm {
 
       // Vector integer comparisons.
       PCMPEQ, PCMPGT,
-      // Vector integer comparisons, the result is in a mask vector.
-      PCMPEQM, PCMPGTM,
 
       // v8i16 Horizontal minimum and position.
       PHMINPOS,

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=323604&r1=323603&r2=323604&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Jan 27 12:19:02 2018
@@ -1994,7 +1994,7 @@ let Predicates = [HasAVX512] in {
                                    SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
 }
 
-multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
               OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
   let isCommutable = IsCommutable in
   def rr : AVX512BI<opc, MRMSrcReg,
@@ -2027,7 +2027,7 @@ multiclass avx512_icmp_packed<bits<8> op
               itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
-multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
               OpndItins itins,  X86VectorVTInfo _, bit IsCommutable> :
            avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
   def rmb : AVX512BI<opc, MRMSrcMem,
@@ -2051,7 +2051,7 @@ multiclass avx512_icmp_packed_rmb<bits<8
                Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
-multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo,
                                  Predicate prd, bit IsCommutable = 0> {
   let Predicates = [prd] in
@@ -2067,7 +2067,7 @@ multiclass avx512_icmp_packed_vl<bits<8>
 }
 
 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
-                                     SDNode OpNode, OpndItins itins,
+                                     PatFrag OpNode, OpndItins itins,
                                      AVX512VLVectorVTInfo VTInfo,
                                      Predicate prd, bit IsCommutable = 0> {
   let Predicates = [prd] in
@@ -2082,6 +2082,11 @@ multiclass avx512_icmp_packed_rmb_vl<bit
   }
 }
 
+def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
+                         (X86cmpm node:$src1, node:$src2, (i8 0))>;
+def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
+                         (X86cmpm node:$src1, node:$src2, (i8 6))>;
+
 // FIXME: Is there a better scheduler itinerary for VPCMP?
 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
                       SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
@@ -2950,10 +2955,10 @@ multiclass avx512_mask_shiftop_w<bits<8>
 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
 
-multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr,
+multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
                                               X86VectorVTInfo Narrow,
                                               X86VectorVTInfo Wide> {
-def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
+def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
                               (Narrow.VT Narrow.RC:$src2))),
           (COPY_TO_REGCLASS
            (!cast<Instruction>(InstStr##Zrr)
@@ -2962,7 +2967,7 @@ def : Pat<(Narrow.KVT (OpNode (Narrow.VT
            Narrow.KRC)>;
 
 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
-                           (OpNode (Narrow.VT Narrow.RC:$src1),
+                           (Frag (Narrow.VT Narrow.RC:$src1),
                                    (Narrow.VT Narrow.RC:$src2)))),
           (COPY_TO_REGCLASS
            (!cast<Instruction>(InstStr##Zrrk)

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=323604&r1=323603&r2=323604&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat Jan 27 12:19:02 2018
@@ -153,12 +153,6 @@ def X86cmpp    : SDNode<"X86ISD::CMPP",
 def X86pcmpeq  : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
 def X86pcmpgt  : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
 
-def X86IntCmpMask : SDTypeProfile<1, 2,
-    [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisSameAs<1, 2>, SDTCisInt<1>,
-     SDTCisSameNumEltsAs<0, 1>]>;
-def X86pcmpeqm  : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
-def X86pcmpgtm  : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
-
 def X86CmpMaskCC :
       SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
                        SDTCisVec<1>, SDTCisSameAs<2, 1>,

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=323604&r1=323603&r2=323604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Sat Jan 27 12:19:02 2018
@@ -502,8 +502,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ; KNL-NEXT:    ## kill: def %ymm2 killed %ymm2 def %zmm2
 ; KNL-NEXT:    ## kill: def %ymm1 killed %ymm1 def %zmm1
 ; KNL-NEXT:    ## kill: def %ymm0 killed %ymm0 def %zmm0
-; KNL-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
-; KNL-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1}
+; KNL-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
+; KNL-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
 ; KNL-NEXT:    vzeroupper
@@ -511,8 +511,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ;
 ; SKX-LABEL: test4:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1
-; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k0 {%k1}
+; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1
+; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
@@ -523,8 +523,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ; AVX512BW-NEXT:    ## kill: def %ymm2 killed %ymm2 def %zmm2
 ; AVX512BW-NEXT:    ## kill: def %ymm1 killed %ymm1 def %zmm1
 ; AVX512BW-NEXT:    ## kill: def %ymm0 killed %ymm0 def %zmm0
-; AVX512BW-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
-; AVX512BW-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1}
+; AVX512BW-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
+; AVX512BW-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
 ; AVX512BW-NEXT:    vzeroupper
@@ -536,8 +536,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ; AVX512DQ-NEXT:    ## kill: def %ymm2 killed %ymm2 def %zmm2
 ; AVX512DQ-NEXT:    ## kill: def %ymm1 killed %ymm1 def %zmm1
 ; AVX512DQ-NEXT:    ## kill: def %ymm0 killed %ymm0 def %zmm0
-; AVX512DQ-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
-; AVX512DQ-NEXT:    vpcmpleq %zmm1, %zmm0, %k0 {%k1}
+; AVX512DQ-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
+; AVX512DQ-NEXT:    vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512DQ-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
 ; AVX512DQ-NEXT:    vzeroupper
@@ -556,8 +556,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2
 ; KNL-NEXT:    ## kill: def %xmm2 killed %xmm2 def %zmm2
 ; KNL-NEXT:    ## kill: def %xmm1 killed %xmm1 def %zmm1
 ; KNL-NEXT:    ## kill: def %xmm0 killed %xmm0 def %zmm0
-; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
-; KNL-NEXT:    vpcmpleq %zmm3, %zmm2, %k1 {%k1}
+; KNL-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
+; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
 ; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
 ; KNL-NEXT:    vzeroupper
@@ -565,8 +565,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2
 ;
 ; SKX-LABEL: test5:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
-; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k0 {%k1}
+; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k1
+; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
 ; SKX-NEXT:    vpmovm2q %k0, %xmm0
 ; SKX-NEXT:    retq
 ;
@@ -576,8 +576,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2
 ; AVX512BW-NEXT:    ## kill: def %xmm2 killed %xmm2 def %zmm2
 ; AVX512BW-NEXT:    ## kill: def %xmm1 killed %xmm1 def %zmm1
 ; AVX512BW-NEXT:    ## kill: def %xmm0 killed %xmm0 def %zmm0
-; AVX512BW-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
-; AVX512BW-NEXT:    vpcmpleq %zmm3, %zmm2, %k1 {%k1}
+; AVX512BW-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
+; AVX512BW-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
 ; AVX512BW-NEXT:    vzeroupper
@@ -589,8 +589,8 @@ define <2 x i64> @test5(<2 x i64> %x, <2
 ; AVX512DQ-NEXT:    ## kill: def %xmm2 killed %xmm2 def %zmm2
 ; AVX512DQ-NEXT:    ## kill: def %xmm1 killed %xmm1 def %zmm1
 ; AVX512DQ-NEXT:    ## kill: def %xmm0 killed %xmm0 def %zmm0
-; AVX512DQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
-; AVX512DQ-NEXT:    vpcmpleq %zmm3, %zmm2, %k0 {%k1}
+; AVX512DQ-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
+; AVX512DQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
 ; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
 ; AVX512DQ-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
 ; AVX512DQ-NEXT:    vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=323604&r1=323603&r2=323604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Sat Jan 27 12:19:02 2018
@@ -7031,16 +7031,16 @@ entry:
 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
 ; GENERIC-LABEL: test4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpleq %ymm1, %ymm0, %k0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
+; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test4:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k0 {%k1} # sched: [3:1.00]
+; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
 ; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
@@ -7054,15 +7054,15 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
 ; GENERIC-LABEL: vcmp_test5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT:    vpcmpleq %xmm3, %xmm2, %k0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
+; GENERIC-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vcmp_test5:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k0 {%k1} # sched: [3:1.00]
+; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
 ; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x_gt_y = icmp slt <2 x i64> %x, %y

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll?rev=323604&r1=323603&r2=323604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll Sat Jan 27 12:19:02 2018
@@ -428,9 +428,9 @@ define <8 x i32> @test256_15(<8 x i32> %
 ; NoVLX-NEXT:    # kill: def %ymm2 killed %ymm2 def %zmm2
 ; NoVLX-NEXT:    # kill: def %ymm1 killed %ymm1 def %zmm1
 ; NoVLX-NEXT:    # kill: def %ymm0 killed %ymm0 def %zmm0
-; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm3
-; NoVLX-NEXT:    vpcmpgtd %zmm3, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpled %zmm1, %zmm2, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpled %zmm1, %zmm2, %k1
+; NoVLX-NEXT:    vpbroadcastd (%rdi), %ymm2
+; NoVLX-NEXT:    vpcmpgtd %zmm2, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def %ymm0 killed %ymm0 killed %zmm0
 ; NoVLX-NEXT:    retq
@@ -457,9 +457,9 @@ define <4 x i64> @test256_16(<4 x i64> %
 ; NoVLX-NEXT:    # kill: def %ymm2 killed %ymm2 def %zmm2
 ; NoVLX-NEXT:    # kill: def %ymm1 killed %ymm1 def %zmm1
 ; NoVLX-NEXT:    # kill: def %ymm0 killed %ymm0 def %zmm0
-; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm3
-; NoVLX-NEXT:    vpcmpgtq %zmm3, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm2, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm2, %k1
+; NoVLX-NEXT:    vpbroadcastq (%rdi), %ymm2
+; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def %ymm0 killed %ymm0 killed %zmm0
 ; NoVLX-NEXT:    retq
@@ -987,9 +987,9 @@ define <4 x i32> @test128_15(<4 x i32> %
 ; NoVLX-NEXT:    # kill: def %xmm2 killed %xmm2 def %zmm2
 ; NoVLX-NEXT:    # kill: def %xmm1 killed %xmm1 def %zmm1
 ; NoVLX-NEXT:    # kill: def %xmm0 killed %xmm0 def %zmm0
-; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm3
-; NoVLX-NEXT:    vpcmpgtd %zmm3, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpled %zmm1, %zmm2, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpled %zmm1, %zmm2, %k1
+; NoVLX-NEXT:    vpbroadcastd (%rdi), %xmm2
+; NoVLX-NEXT:    vpcmpgtd %zmm2, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
 ; NoVLX-NEXT:    retq
@@ -1016,9 +1016,9 @@ define <2 x i64> @test128_16(<2 x i64> %
 ; NoVLX-NEXT:    # kill: def %xmm2 killed %xmm2 def %zmm2
 ; NoVLX-NEXT:    # kill: def %xmm1 killed %xmm1 def %zmm1
 ; NoVLX-NEXT:    # kill: def %xmm0 killed %xmm0 def %zmm0
-; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm3
-; NoVLX-NEXT:    vpcmpgtq %zmm3, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm2, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm2, %k1
+; NoVLX-NEXT:    vpbroadcastq (%rdi), %xmm2
+; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
 ; NoVLX-NEXT:    retq




More information about the llvm-commits mailing list