[llvm] r269569 - Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512

Elena Demikhovsky via llvm-commits llvm-commits at lists.llvm.org
Sat May 14 08:06:10 PDT 2016


Author: delena
Date: Sat May 14 10:06:09 2016
New Revision: 269569

URL: http://llvm.org/viewvc/llvm-project?rev=269569&view=rev
Log:
Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512

Differential revision http://reviews.llvm.org/D19261


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 14 10:06:09 2016
@@ -17503,30 +17503,66 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
       ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
       SDValue LHS = Op.getOperand(1);
       SDValue RHS = Op.getOperand(2);
-      unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG);
-      assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
-      SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
-      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                  DAG.getConstant(X86CC, dl, MVT::i8), Cond);
+      SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+      SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS);
+      SDValue SetCC;
+      switch (CC) {
+      case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_E, dl, MVT::i8), Comi);
+        SDValue SetNP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                    DAG.getConstant(X86::COND_NP, dl, MVT::i8),
+                                    Comi);
+        SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
+        break;
+      }
+      case ISD::SETNE: { // (ZF = 1 or PF = 1)
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_NE, dl, MVT::i8), Comi);
+        SDValue SetP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                   DAG.getConstant(X86::COND_P, dl, MVT::i8),
+                                   Comi);
+        SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
+        break;
+      }
+      case ISD::SETGT: // (CF = 0 and ZF = 0)
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_A, dl, MVT::i8), Comi);
+        break;
+      case ISD::SETLT: { // The condition is opposite to GT. Swap the operands.
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_A, dl, MVT::i8), InvComi);
+        break;
+      }
+      case ISD::SETGE: // CF = 0
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_AE, dl, MVT::i8), Comi);
+        break;
+      case ISD::SETLE: // The condition is opposite to GE. Swap the operands.
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_AE, dl, MVT::i8), InvComi);
+        break;
+      default:
+        llvm_unreachable("Unexpected illegal condition!");
+      }
       return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
     }
     case COMI_RM: { // Comparison intrinsics with Sae
       SDValue LHS = Op.getOperand(1);
       SDValue RHS = Op.getOperand(2);
-      SDValue CC = Op.getOperand(3);
+      unsigned CondVal = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
       SDValue Sae = Op.getOperand(4);
-      auto ComiType = TranslateX86ConstCondToX86CC(CC);
-      // choose between ordered and unordered (comi/ucomi)
-      unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1;
-      SDValue Cond;
-      if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
-                                           X86::STATIC_ROUNDING::CUR_DIRECTION)
-        Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
+
+      SDValue FCmp;
+      if (cast<ConstantSDNode>(Sae)->getZExtValue() ==
+          X86::STATIC_ROUNDING::CUR_DIRECTION)
+        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+                                  DAG.getConstant(CondVal, dl, MVT::i8));
       else
-        Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
-      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-        DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
-      return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+                                  DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+      // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
+      return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
     }
     case VSHIFT:
       return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 14 10:06:09 2016
@@ -2331,96 +2331,6 @@ static void verifyIntrinsicTables() {
           std::end(IntrinsicsWithChain)) &&
          "Intrinsic data tables should have unique entries");
 }
-
-// X86 specific compare constants.
-// They must be kept in synch with avxintrin.h
-#define _X86_CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
-#define _X86_CMP_LT_OS    0x01 /* Less-than (ordered, signaling)  */
-#define _X86_CMP_LE_OS    0x02 /* Less-than-or-equal (ordered, signaling)  */
-#define _X86_CMP_UNORD_Q  0x03 /* Unordered (non-signaling)  */
-#define _X86_CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling)  */
-#define _X86_CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling)  */
-#define _X86_CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling)  */
-#define _X86_CMP_ORD_Q    0x07 /* Ordered (nonsignaling)   */
-#define _X86_CMP_EQ_UQ    0x08 /* Equal (unordered, non-signaling)  */
-#define _X86_CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unord, signaling)  */
-#define _X86_CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling)  */
-#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling)  */
-#define _X86_CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling)  */
-#define _X86_CMP_GE_OS    0x0d /* Greater-than-or-equal (ordered, signaling)  */
-#define _X86_CMP_GT_OS    0x0e /* Greater-than (ordered, signaling)  */
-#define _X86_CMP_TRUE_UQ  0x0f /* True (unordered, non-signaling)  */
-#define _X86_CMP_EQ_OS    0x10 /* Equal (ordered, signaling)  */
-#define _X86_CMP_LT_OQ    0x11 /* Less-than (ordered, non-signaling)  */
-#define _X86_CMP_LE_OQ    0x12 /* Less-than-or-equal (ordered, non-signaling)  */
-#define _X86_CMP_UNORD_S  0x13 /* Unordered (signaling)  */
-#define _X86_CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling)  */
-#define _X86_CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling)  */
-#define _X86_CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unord, non-signaling)  */
-#define _X86_CMP_ORD_S    0x17 /* Ordered (signaling)  */
-#define _X86_CMP_EQ_US    0x18 /* Equal (unordered, signaling)  */
-#define _X86_CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unord, non-sign)  */
-#define _X86_CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling)  */
-#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling)  */
-#define _X86_CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling)  */
-#define _X86_CMP_GE_OQ    0x1d /* Greater-than-or-equal (ordered, non-signaling)  */
-#define _X86_CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
-#define _X86_CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
-
-/*
-* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
-* Return tuple <isOrdered, X86 condcode>
-*/
-static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) {
-  ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
-  unsigned IntImm = CImm->getZExtValue();
-  // On a floating point condition, the flags are set as follows:
-  // ZF  PF  CF   op
-  //  0 | 0 | 0 | X > Y
-  //  0 | 0 | 1 | X < Y
-  //  1 | 0 | 0 | X == Y
-  //  1 | 1 | 1 | unordered
-  switch (IntImm) {
-  default: llvm_unreachable("Invalid floating point compare value for Comi!");
-  case _X86_CMP_EQ_OQ:      // 0x00 - Equal (ordered, nonsignaling)
-  case _X86_CMP_EQ_OS:      // 0x10 - Equal (ordered, signaling)
-    return std::make_tuple(true, X86::COND_E);
-  case _X86_CMP_EQ_UQ:      // 0x08 - Equal (unordered, non-signaling)
-  case _X86_CMP_EQ_US:      // 0x18 - Equal (unordered, signaling)
-    return std::make_tuple(false , X86::COND_E);
-  case _X86_CMP_LT_OS:      // 0x01 - Less-than (ordered, signaling)
-  case _X86_CMP_LT_OQ:      // 0x11 - Less-than (ordered, nonsignaling)
-    return std::make_tuple(true, X86::COND_B);
-  case _X86_CMP_NGE_US:     // 0x09 - Not-greater-than-or-equal (unordered, signaling)
-  case _X86_CMP_NGE_UQ:     // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling)
-    return std::make_tuple(false , X86::COND_B);
-  case _X86_CMP_LE_OS:      // 0x02 - Less-than-or-equal (ordered, signaling)
-  case _X86_CMP_LE_OQ:      // 0x12 - Less-than-or-equal (ordered, nonsignaling)
-    return std::make_tuple(true, X86::COND_BE);
-  case _X86_CMP_NGT_US:     // 0x0A - Not-greater-than (unordered, signaling)
-  case _X86_CMP_NGT_UQ:     // 0x1A - Not-greater-than (unordered, nonsignaling)
-    return std::make_tuple(false, X86::COND_BE);
-  case _X86_CMP_GT_OS:      // 0x0E - Greater-than (ordered, signaling)
-  case _X86_CMP_GT_OQ:      // 0x1E - Greater-than (ordered, nonsignaling)
-    return std::make_tuple(true, X86::COND_A);
-  case _X86_CMP_NLE_US:     // 0x06 - Not-less-than-or-equal (unordered,signaling)
-  case _X86_CMP_NLE_UQ:     // 0x16 - Not-less-than-or-equal (unordered, nonsignaling)
-    return std::make_tuple(false, X86::COND_A);
-  case _X86_CMP_GE_OS:      // 0x0D - Greater-than-or-equal (ordered, signaling)
-  case _X86_CMP_GE_OQ:      // 0x1D - Greater-than-or-equal (ordered, nonsignaling)
-    return std::make_tuple(true, X86::COND_AE);
-  case _X86_CMP_NLT_US:     // 0x05 - Not-less-than (unordered, signaling)
-  case _X86_CMP_NLT_UQ:     // 0x15 - Not-less-than (unordered, nonsignaling)
-    return std::make_tuple(false, X86::COND_AE);
-  case _X86_CMP_NEQ_OQ:     // 0x0C - Not-equal (ordered, non-signaling)
-  case _X86_CMP_NEQ_OS:     // 0x1C - Not-equal (ordered, signaling)
-    return std::make_tuple(true, X86::COND_NE);
-  case _X86_CMP_NEQ_UQ:     // 0x04 - Not-equal (unordered, nonsignaling)
-  case _X86_CMP_NEQ_US:     // 0x14 - Not-equal (unordered, signaling)
-    return std::make_tuple(false, X86::COND_NE);
-  }
-}
-
 } // End llvm namespace
 
 #endif

Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -104,8 +104,10 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
 ; CHECK-LABEL: test_x86_sse2_comieq_sd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -142,8 +144,8 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2
 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comile_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    vcomisd %xmm0, %xmm1
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -155,9 +157,9 @@ declare i32 @llvm.x86.sse2.comile.sd(<2
 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comilt_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcomisd %xmm0, %xmm1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -169,8 +171,10 @@ define i32 @test_x86_sse2_comineq_sd(<2
 ; CHECK-LABEL: test_x86_sse2_comineq_sd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -912,8 +916,10 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -950,8 +956,8 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2
 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    vucomisd %xmm0, %xmm1
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -963,9 +969,9 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2
 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vucomisd %xmm0, %xmm1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -977,8 +983,10 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1699,8 +1707,10 @@ define i32 @test_x86_sse_comieq_ss(<4 x
 ; CHECK-LABEL: test_x86_sse_comieq_ss:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1737,8 +1747,8 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comile_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomiss %xmm1, %xmm0
-; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    vcomiss %xmm0, %xmm1
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -1750,9 +1760,9 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comilt_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomiss %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcomiss %xmm0, %xmm1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1764,8 +1774,10 @@ define i32 @test_x86_sse_comineq_ss(<4 x
 ; CHECK-LABEL: test_x86_sse_comineq_ss:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -2003,8 +2015,10 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
 ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -2041,8 +2055,8 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomile_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    vucomiss %xmm0, %xmm1
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -2054,9 +2068,9 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vucomiss %xmm0, %xmm1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -2068,8 +2082,10 @@ define i32 @test_x86_sse_ucomineq_ss(<4
 ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 14 10:06:09 2016
@@ -6307,9 +6307,8 @@ define <8 x double>@test_int_x86_avx512_
 define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vcmpeqsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
   ret i32 %res
@@ -6318,9 +6317,8 @@ define i32 @test_x86_avx512_comi_sd_eq_s
 define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
   ret i32 %res
@@ -6329,9 +6327,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq_
 define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
   ret i32 %res
@@ -6340,9 +6337,8 @@ define i32 @test_x86_avx512_comi_sd_eq(<
 define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vcmpeq_uqsd %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
   ret i32 %res
@@ -6351,9 +6347,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq(
 define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpltsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
   ret i32 %res
@@ -6362,9 +6357,8 @@ define i32 @test_x86_avx512_comi_sd_lt_s
 define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpngesd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
   ret i32 %res
@@ -6373,9 +6367,8 @@ define i32 @test_x86_avx512_ucomi_sd_lt_
 define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpltsd %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
   ret i32 %res
@@ -6384,9 +6377,8 @@ define i32 @test_x86_avx512_comi_sd_lt(<
 define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpngesd %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
   ret i32 %res
@@ -6397,9 +6389,8 @@ declare i32 @llvm.x86.avx512.vcomi.sd(<2
 define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpngess %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
   ret i32 %res

Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -54,15 +54,19 @@ define i32 @test_x86_sse_comieq_ss(<4 x
 ; SSE-LABEL: test_x86_sse_comieq_ss:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    sete %cl
+; SSE-NEXT:    andb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_comieq_ss:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vcomiss %xmm1, %xmm0
-; KNL-NEXT:    sete %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setnp %al
+; KNL-NEXT:    sete %cl
+; KNL-NEXT:    andb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_x86_sse_comile_ss:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    setbe %al
+; SSE-NEXT:    comiss %xmm0, %xmm1
+; SSE-NEXT:    setae %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_comile_ss:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vcomiss %xmm1, %xmm0
-; KNL-NEXT:    setbe %al
+; KNL-NEXT:    vcomiss %xmm0, %xmm1
+; KNL-NEXT:    setae %al
 ; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_x86_sse_comilt_ss:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    sbbl %eax, %eax
-; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    comiss %xmm0, %xmm1
+; SSE-NEXT:    seta %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_comilt_ss:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vcomiss %xmm1, %xmm0
-; KNL-NEXT:    sbbl %eax, %eax
-; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vcomiss %xmm0, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -154,15 +158,19 @@ define i32 @test_x86_sse_comineq_ss(<4 x
 ; SSE-LABEL: test_x86_sse_comineq_ss:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setp %al
+; SSE-NEXT:    setne %cl
+; SSE-NEXT:    orb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_comineq_ss:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vcomiss %xmm1, %xmm0
-; KNL-NEXT:    setne %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setp %al
+; KNL-NEXT:    setne %cl
+; KNL-NEXT:    orb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -504,15 +512,19 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
 ; SSE-LABEL: test_x86_sse_ucomieq_ss:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    sete %cl
+; SSE-NEXT:    andb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_ucomieq_ss:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vucomiss %xmm1, %xmm0
-; KNL-NEXT:    sete %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setnp %al
+; KNL-NEXT:    sete %cl
+; KNL-NEXT:    andb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -563,15 +575,15 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_x86_sse_ucomile_ss:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    setbe %al
+; SSE-NEXT:    ucomiss %xmm0, %xmm1
+; SSE-NEXT:    setae %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_ucomile_ss:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vucomiss %xmm1, %xmm0
-; KNL-NEXT:    setbe %al
+; KNL-NEXT:    vucomiss %xmm0, %xmm1
+; KNL-NEXT:    setae %al
 ; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -583,16 +595,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_x86_sse_ucomilt_ss:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    sbbl %eax, %eax
-; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    ucomiss %xmm0, %xmm1
+; SSE-NEXT:    seta %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_ucomilt_ss:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vucomiss %xmm1, %xmm0
-; KNL-NEXT:    sbbl %eax, %eax
-; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vucomiss %xmm0, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -604,15 +616,19 @@ define i32 @test_x86_sse_ucomineq_ss(<4
 ; SSE-LABEL: test_x86_sse_ucomineq_ss:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setp %al
+; SSE-NEXT:    setne %cl
+; SSE-NEXT:    orb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_ucomineq_ss:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vucomiss %xmm1, %xmm0
-; KNL-NEXT:    setne %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setp %al
+; KNL-NEXT:    setne %cl
+; KNL-NEXT:    orb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res

Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -54,15 +54,19 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
 ; SSE-LABEL: test_x86_sse2_comieq_sd:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    sete %cl
+; SSE-NEXT:    andb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_comieq_sd:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vcomisd %xmm1, %xmm0
-; KNL-NEXT:    sete %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setnp %al
+; KNL-NEXT:    sete %cl
+; KNL-NEXT:    andb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2
 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_x86_sse2_comile_sd:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    setbe %al
+; SSE-NEXT:    comisd %xmm0, %xmm1
+; SSE-NEXT:    setae %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_comile_sd:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vcomisd %xmm1, %xmm0
-; KNL-NEXT:    setbe %al
+; KNL-NEXT:    vcomisd %xmm0, %xmm1
+; KNL-NEXT:    setae %al
 ; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse2.comile.sd(<2
 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_x86_sse2_comilt_sd:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    sbbl %eax, %eax
-; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    comisd %xmm0, %xmm1
+; SSE-NEXT:    seta %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_comilt_sd:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vcomisd %xmm1, %xmm0
-; KNL-NEXT:    sbbl %eax, %eax
-; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vcomisd %xmm0, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -154,15 +158,19 @@ define i32 @test_x86_sse2_comineq_sd(<2
 ; SSE-LABEL: test_x86_sse2_comineq_sd:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setp %al
+; SSE-NEXT:    setne %cl
+; SSE-NEXT:    orb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_comineq_sd:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vcomisd %xmm1, %xmm0
-; KNL-NEXT:    setne %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setp %al
+; KNL-NEXT:    setne %cl
+; KNL-NEXT:    orb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1237,15 +1245,19 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
 ; SSE-LABEL: test_x86_sse2_ucomieq_sd:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    sete %cl
+; SSE-NEXT:    andb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_ucomieq_sd:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vucomisd %xmm1, %xmm0
-; KNL-NEXT:    sete %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setnp %al
+; KNL-NEXT:    sete %cl
+; KNL-NEXT:    andb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1296,15 +1308,15 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2
 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_x86_sse2_ucomile_sd:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    setbe %al
+; SSE-NEXT:    ucomisd %xmm0, %xmm1
+; SSE-NEXT:    setae %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_ucomile_sd:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vucomisd %xmm1, %xmm0
-; KNL-NEXT:    setbe %al
+; KNL-NEXT:    vucomisd %xmm0, %xmm1
+; KNL-NEXT:    setae %al
 ; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -1316,16 +1328,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2
 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_x86_sse2_ucomilt_sd:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    sbbl %eax, %eax
-; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    ucomisd %xmm0, %xmm1
+; SSE-NEXT:    seta %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_ucomilt_sd:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vucomisd %xmm1, %xmm0
-; KNL-NEXT:    sbbl %eax, %eax
-; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vucomisd %xmm0, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1337,15 +1349,19 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
 ; SSE-LABEL: test_x86_sse2_ucomineq_sd:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setp %al
+; SSE-NEXT:    setne %cl
+; SSE-NEXT:    orb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_ucomineq_sd:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vucomisd %xmm1, %xmm0
-; KNL-NEXT:    setne %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setp %al
+; KNL-NEXT:    setne %cl
+; KNL-NEXT:    orb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res




More information about the llvm-commits mailing list