[llvm] r269569 - Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512

Demikhovsky, Elena via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 20 05:22:15 PDT 2016


Hi Andrea,

I should understand what you mean by “switch”. An undocumented switch on the clang level? Does clang already have such flags for backward compatibly?

Or you mean an #ifdef around “comi” intrinsics in the header file? And user will define a VAR before including intrin.h

Or #ifdef inside codegen and you’ll compile clang with this specific flag?

-           Elena

From: Andrea Di Biagio [mailto:andrea.dibiagio at gmail.com]
Sent: Wednesday, July 20, 2016 15:05
To: Demikhovsky, Elena <elena.demikhovsky at intel.com>
Cc: llvm-commits <llvm-commits at lists.llvm.org>
Subject: Re: [llvm] r269569 - Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512

Hi Elena,
I agree that this patch is the correct thing to do.
However, we at Sony are a bit concerned by this patch because it breaks some very important customer codebases which heavily rely on the old lowering behavior.

What if we add a switch to enable the old lowering (as suggested in PR28510)? I think it would be very useful in the short term. Users that are stuck with old codebases would be able to pass that switch; other users will have a bit of time to upgrade their codebases.
What do you think?
Cheers,
Andrea

On Sat, May 14, 2016 at 4:06 PM, Elena Demikhovsky via llvm-commits <llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>> wrote:
Author: delena
Date: Sat May 14 10:06:09 2016
New Revision: 269569

URL: http://llvm.org/viewvc/llvm-project?rev=269569&view=rev
Log:
Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512

Differential revision http://reviews.llvm.org/D19261


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 14 10:06:09 2016
@@ -17503,30 +17503,66 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
       ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
       SDValue LHS = Op.getOperand(1);
       SDValue RHS = Op.getOperand(2);
-      unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG);
-      assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
-      SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
-      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                  DAG.getConstant(X86CC, dl, MVT::i8), Cond);
+      SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+      SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS);
+      SDValue SetCC;
+      switch (CC) {
+      case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_E, dl, MVT::i8), Comi);
+        SDValue SetNP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                    DAG.getConstant(X86::COND_NP, dl, MVT::i8),
+                                    Comi);
+        SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
+        break;
+      }
+      case ISD::SETNE: { // (ZF = 1 or PF = 1)
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_NE, dl, MVT::i8), Comi);
+        SDValue SetP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                   DAG.getConstant(X86::COND_P, dl, MVT::i8),
+                                   Comi);
+        SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
+        break;
+      }
+      case ISD::SETGT: // (CF = 0 and ZF = 0)
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_A, dl, MVT::i8), Comi);
+        break;
+      case ISD::SETLT: { // The condition is opposite to GT. Swap the operands.
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_A, dl, MVT::i8), InvComi);
+        break;
+      }
+      case ISD::SETGE: // CF = 0
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_AE, dl, MVT::i8), Comi);
+        break;
+      case ISD::SETLE: // The condition is opposite to GE. Swap the operands.
+        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                            DAG.getConstant(X86::COND_AE, dl, MVT::i8), InvComi);
+        break;
+      default:
+        llvm_unreachable("Unexpected illegal condition!");
+      }
       return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
     }
     case COMI_RM: { // Comparison intrinsics with Sae
       SDValue LHS = Op.getOperand(1);
       SDValue RHS = Op.getOperand(2);
-      SDValue CC = Op.getOperand(3);
+      unsigned CondVal = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
       SDValue Sae = Op.getOperand(4);
-      auto ComiType = TranslateX86ConstCondToX86CC(CC);
-      // choose between ordered and unordered (comi/ucomi)
-      unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1;
-      SDValue Cond;
-      if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
-                                           X86::STATIC_ROUNDING::CUR_DIRECTION)
-        Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
+
+      SDValue FCmp;
+      if (cast<ConstantSDNode>(Sae)->getZExtValue() ==
+          X86::STATIC_ROUNDING::CUR_DIRECTION)
+        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+                                  DAG.getConstant(CondVal, dl, MVT::i8));
       else
-        Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
-      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-        DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
-      return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+                                  DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+      // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
+      return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
     }
     case VSHIFT:
       return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 14 10:06:09 2016
@@ -2331,96 +2331,6 @@ static void verifyIntrinsicTables() {
           std::end(IntrinsicsWithChain)) &&
          "Intrinsic data tables should have unique entries");
 }
-
-// X86 specific compare constants.
-// They must be kept in synch with avxintrin.h
-#define _X86_CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
-#define _X86_CMP_LT_OS    0x01 /* Less-than (ordered, signaling)  */
-#define _X86_CMP_LE_OS    0x02 /* Less-than-or-equal (ordered, signaling)  */
-#define _X86_CMP_UNORD_Q  0x03 /* Unordered (non-signaling)  */
-#define _X86_CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling)  */
-#define _X86_CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling)  */
-#define _X86_CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling)  */
-#define _X86_CMP_ORD_Q    0x07 /* Ordered (nonsignaling)   */
-#define _X86_CMP_EQ_UQ    0x08 /* Equal (unordered, non-signaling)  */
-#define _X86_CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unord, signaling)  */
-#define _X86_CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling)  */
-#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling)  */
-#define _X86_CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling)  */
-#define _X86_CMP_GE_OS    0x0d /* Greater-than-or-equal (ordered, signaling)  */
-#define _X86_CMP_GT_OS    0x0e /* Greater-than (ordered, signaling)  */
-#define _X86_CMP_TRUE_UQ  0x0f /* True (unordered, non-signaling)  */
-#define _X86_CMP_EQ_OS    0x10 /* Equal (ordered, signaling)  */
-#define _X86_CMP_LT_OQ    0x11 /* Less-than (ordered, non-signaling)  */
-#define _X86_CMP_LE_OQ    0x12 /* Less-than-or-equal (ordered, non-signaling)  */
-#define _X86_CMP_UNORD_S  0x13 /* Unordered (signaling)  */
-#define _X86_CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling)  */
-#define _X86_CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling)  */
-#define _X86_CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unord, non-signaling)  */
-#define _X86_CMP_ORD_S    0x17 /* Ordered (signaling)  */
-#define _X86_CMP_EQ_US    0x18 /* Equal (unordered, signaling)  */
-#define _X86_CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unord, non-sign)  */
-#define _X86_CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling)  */
-#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling)  */
-#define _X86_CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling)  */
-#define _X86_CMP_GE_OQ    0x1d /* Greater-than-or-equal (ordered, non-signaling)  */
-#define _X86_CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
-#define _X86_CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
-
-/*
-* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
-* Return tuple <isOrdered, X86 condcode>
-*/
-static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) {
-  ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
-  unsigned IntImm = CImm->getZExtValue();
-  // On a floating point condition, the flags are set as follows:
-  // ZF  PF  CF   op
-  //  0 | 0 | 0 | X > Y
-  //  0 | 0 | 1 | X < Y
-  //  1 | 0 | 0 | X == Y
-  //  1 | 1 | 1 | unordered
-  switch (IntImm) {
-  default: llvm_unreachable("Invalid floating point compare value for Comi!");
-  case _X86_CMP_EQ_OQ:      // 0x00 - Equal (ordered, nonsignaling)
-  case _X86_CMP_EQ_OS:      // 0x10 - Equal (ordered, signaling)
-    return std::make_tuple(true, X86::COND_E);
-  case _X86_CMP_EQ_UQ:      // 0x08 - Equal (unordered, non-signaling)
-  case _X86_CMP_EQ_US:      // 0x18 - Equal (unordered, signaling)
-    return std::make_tuple(false , X86::COND_E);
-  case _X86_CMP_LT_OS:      // 0x01 - Less-than (ordered, signaling)
-  case _X86_CMP_LT_OQ:      // 0x11 - Less-than (ordered, nonsignaling)
-    return std::make_tuple(true, X86::COND_B);
-  case _X86_CMP_NGE_US:     // 0x09 - Not-greater-than-or-equal (unordered, signaling)
-  case _X86_CMP_NGE_UQ:     // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling)
-    return std::make_tuple(false , X86::COND_B);
-  case _X86_CMP_LE_OS:      // 0x02 - Less-than-or-equal (ordered, signaling)
-  case _X86_CMP_LE_OQ:      // 0x12 - Less-than-or-equal (ordered, nonsignaling)
-    return std::make_tuple(true, X86::COND_BE);
-  case _X86_CMP_NGT_US:     // 0x0A - Not-greater-than (unordered, signaling)
-  case _X86_CMP_NGT_UQ:     // 0x1A - Not-greater-than (unordered, nonsignaling)
-    return std::make_tuple(false, X86::COND_BE);
-  case _X86_CMP_GT_OS:      // 0x0E - Greater-than (ordered, signaling)
-  case _X86_CMP_GT_OQ:      // 0x1E - Greater-than (ordered, nonsignaling)
-    return std::make_tuple(true, X86::COND_A);
-  case _X86_CMP_NLE_US:     // 0x06 - Not-less-than-or-equal (unordered,signaling)
-  case _X86_CMP_NLE_UQ:     // 0x16 - Not-less-than-or-equal (unordered, nonsignaling)
-    return std::make_tuple(false, X86::COND_A);
-  case _X86_CMP_GE_OS:      // 0x0D - Greater-than-or-equal (ordered, signaling)
-  case _X86_CMP_GE_OQ:      // 0x1D - Greater-than-or-equal (ordered, nonsignaling)
-    return std::make_tuple(true, X86::COND_AE);
-  case _X86_CMP_NLT_US:     // 0x05 - Not-less-than (unordered, signaling)
-  case _X86_CMP_NLT_UQ:     // 0x15 - Not-less-than (unordered, nonsignaling)
-    return std::make_tuple(false, X86::COND_AE);
-  case _X86_CMP_NEQ_OQ:     // 0x0C - Not-equal (ordered, non-signaling)
-  case _X86_CMP_NEQ_OS:     // 0x1C - Not-equal (ordered, signaling)
-    return std::make_tuple(true, X86::COND_NE);
-  case _X86_CMP_NEQ_UQ:     // 0x04 - Not-equal (unordered, nonsignaling)
-  case _X86_CMP_NEQ_US:     // 0x14 - Not-equal (unordered, signaling)
-    return std::make_tuple(false, X86::COND_NE);
-  }
-}
-
 } // End llvm namespace

 #endif

Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -104,8 +104,10 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
 ; CHECK-LABEL: test_x86_sse2_comieq_sd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comieq.sd<http://llvm.x86.sse2.comieq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -142,8 +144,8 @@ declare i32 @llvm.x86.sse2.comigt.sd<http://llvm.x86.sse2.comigt.sd>(<2
 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comile_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    vcomisd %xmm0, %xmm1
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comile.sd<http://llvm.x86.sse2.comile.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -155,9 +157,9 @@ declare i32 @llvm.x86.sse2.comile.sd<http://llvm.x86.sse2.comile.sd>(<2
 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comilt_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcomisd %xmm0, %xmm1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comilt.sd<http://llvm.x86.sse2.comilt.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -169,8 +171,10 @@ define i32 @test_x86_sse2_comineq_sd(<2
 ; CHECK-LABEL: test_x86_sse2_comineq_sd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comineq.sd<http://llvm.x86.sse2.comineq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -912,8 +916,10 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomieq.sd<http://llvm.x86.sse2.ucomieq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -950,8 +956,8 @@ declare i32 @llvm.x86.sse2.ucomigt.sd<http://llvm.x86.sse2.ucomigt.sd>(<2
 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    vucomisd %xmm0, %xmm1
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomile.sd<http://llvm.x86.sse2.ucomile.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -963,9 +969,9 @@ declare i32 @llvm.x86.sse2.ucomile.sd<http://llvm.x86.sse2.ucomile.sd>(<2
 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vucomisd %xmm0, %xmm1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomilt.sd<http://llvm.x86.sse2.ucomilt.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -977,8 +983,10 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomineq.sd<http://llvm.x86.sse2.ucomineq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1699,8 +1707,10 @@ define i32 @test_x86_sse_comieq_ss(<4 x
 ; CHECK-LABEL: test_x86_sse_comieq_ss:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1737,8 +1747,8 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comile_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomiss %xmm1, %xmm0
-; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    vcomiss %xmm0, %xmm1
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -1750,9 +1760,9 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comilt_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomiss %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcomiss %xmm0, %xmm1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1764,8 +1774,10 @@ define i32 @test_x86_sse_comineq_ss(<4 x
 ; CHECK-LABEL: test_x86_sse_comineq_ss:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -2003,8 +2015,10 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
 ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -2041,8 +2055,8 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomile_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    vucomiss %xmm0, %xmm1
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -2054,9 +2068,9 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vucomiss %xmm0, %xmm1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -2068,8 +2082,10 @@ define i32 @test_x86_sse_ucomineq_ss(<4
 ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 14 10:06:09 2016
@@ -6307,9 +6307,8 @@ define <8 x double>@test_int_x86_avx512_
 define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vcmpeqsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
   ret i32 %res
@@ -6318,9 +6317,8 @@ define i32 @test_x86_avx512_comi_sd_eq_s
 define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
   ret i32 %res
@@ -6329,9 +6327,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq_
 define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
   ret i32 %res
@@ -6340,9 +6337,8 @@ define i32 @test_x86_avx512_comi_sd_eq(<
 define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vcmpeq_uqsd %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
   ret i32 %res
@@ -6351,9 +6347,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq(
 define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpltsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
   ret i32 %res
@@ -6362,9 +6357,8 @@ define i32 @test_x86_avx512_comi_sd_lt_s
 define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpngesd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
   ret i32 %res
@@ -6373,9 +6367,8 @@ define i32 @test_x86_avx512_ucomi_sd_lt_
 define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcomisd %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpltsd %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
   ret i32 %res
@@ -6384,9 +6377,8 @@ define i32 @test_x86_avx512_comi_sd_lt(<
 define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomisd %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpngesd %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
   ret i32 %res
@@ -6397,9 +6389,8 @@ declare i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2
 define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vucomiss %xmm1, %xmm0
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vcmpngess %xmm1, %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
   ret i32 %res

Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -54,15 +54,19 @@ define i32 @test_x86_sse_comieq_ss(<4 x
 ; SSE-LABEL: test_x86_sse_comieq_ss:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    sete %cl
+; SSE-NEXT:    andb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_comieq_ss:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vcomiss %xmm1, %xmm0
-; KNL-NEXT:    sete %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setnp %al
+; KNL-NEXT:    sete %cl
+; KNL-NEXT:    andb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_x86_sse_comile_ss:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    setbe %al
+; SSE-NEXT:    comiss %xmm0, %xmm1
+; SSE-NEXT:    setae %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_comile_ss:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vcomiss %xmm1, %xmm0
-; KNL-NEXT:    setbe %al
+; KNL-NEXT:    vcomiss %xmm0, %xmm1
+; KNL-NEXT:    setae %al
 ; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_x86_sse_comilt_ss:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    sbbl %eax, %eax
-; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    comiss %xmm0, %xmm1
+; SSE-NEXT:    seta %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_comilt_ss:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vcomiss %xmm1, %xmm0
-; KNL-NEXT:    sbbl %eax, %eax
-; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vcomiss %xmm0, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -154,15 +158,19 @@ define i32 @test_x86_sse_comineq_ss(<4 x
 ; SSE-LABEL: test_x86_sse_comineq_ss:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setp %al
+; SSE-NEXT:    setne %cl
+; SSE-NEXT:    orb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_comineq_ss:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vcomiss %xmm1, %xmm0
-; KNL-NEXT:    setne %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setp %al
+; KNL-NEXT:    setne %cl
+; KNL-NEXT:    orb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -504,15 +512,19 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
 ; SSE-LABEL: test_x86_sse_ucomieq_ss:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    sete %cl
+; SSE-NEXT:    andb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_ucomieq_ss:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vucomiss %xmm1, %xmm0
-; KNL-NEXT:    sete %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setnp %al
+; KNL-NEXT:    sete %cl
+; KNL-NEXT:    andb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -563,15 +575,15 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_x86_sse_ucomile_ss:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    setbe %al
+; SSE-NEXT:    ucomiss %xmm0, %xmm1
+; SSE-NEXT:    setae %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_ucomile_ss:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vucomiss %xmm1, %xmm0
-; KNL-NEXT:    setbe %al
+; KNL-NEXT:    vucomiss %xmm0, %xmm1
+; KNL-NEXT:    setae %al
 ; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -583,16 +595,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_x86_sse_ucomilt_ss:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    sbbl %eax, %eax
-; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    ucomiss %xmm0, %xmm1
+; SSE-NEXT:    seta %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_ucomilt_ss:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vucomiss %xmm1, %xmm0
-; KNL-NEXT:    sbbl %eax, %eax
-; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vucomiss %xmm0, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -604,15 +616,19 @@ define i32 @test_x86_sse_ucomineq_ss(<4
 ; SSE-LABEL: test_x86_sse_ucomineq_ss:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setp %al
+; SSE-NEXT:    setne %cl
+; SSE-NEXT:    orb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse_ucomineq_ss:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vucomiss %xmm1, %xmm0
-; KNL-NEXT:    setne %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setp %al
+; KNL-NEXT:    setne %cl
+; KNL-NEXT:    orb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res

Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -54,15 +54,19 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
 ; SSE-LABEL: test_x86_sse2_comieq_sd:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    sete %cl
+; SSE-NEXT:    andb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_comieq_sd:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vcomisd %xmm1, %xmm0
-; KNL-NEXT:    sete %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setnp %al
+; KNL-NEXT:    sete %cl
+; KNL-NEXT:    andb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comieq.sd<http://llvm.x86.sse2.comieq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse2.comigt.sd<http://llvm.x86.sse2.comigt.sd>(<2
 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_x86_sse2_comile_sd:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    setbe %al
+; SSE-NEXT:    comisd %xmm0, %xmm1
+; SSE-NEXT:    setae %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_comile_sd:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vcomisd %xmm1, %xmm0
-; KNL-NEXT:    setbe %al
+; KNL-NEXT:    vcomisd %xmm0, %xmm1
+; KNL-NEXT:    setae %al
 ; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comile.sd<http://llvm.x86.sse2.comile.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse2.comile.sd<http://llvm.x86.sse2.comile.sd>(<2
 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_x86_sse2_comilt_sd:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    sbbl %eax, %eax
-; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    comisd %xmm0, %xmm1
+; SSE-NEXT:    seta %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_comilt_sd:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vcomisd %xmm1, %xmm0
-; KNL-NEXT:    sbbl %eax, %eax
-; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vcomisd %xmm0, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comilt.sd<http://llvm.x86.sse2.comilt.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -154,15 +158,19 @@ define i32 @test_x86_sse2_comineq_sd(<2
 ; SSE-LABEL: test_x86_sse2_comineq_sd:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setp %al
+; SSE-NEXT:    setne %cl
+; SSE-NEXT:    orb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_comineq_sd:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vcomisd %xmm1, %xmm0
-; KNL-NEXT:    setne %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setp %al
+; KNL-NEXT:    setne %cl
+; KNL-NEXT:    orb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.comineq.sd<http://llvm.x86.sse2.comineq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1237,15 +1245,19 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
 ; SSE-LABEL: test_x86_sse2_ucomieq_sd:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    sete %cl
+; SSE-NEXT:    andb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_ucomieq_sd:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vucomisd %xmm1, %xmm0
-; KNL-NEXT:    sete %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setnp %al
+; KNL-NEXT:    sete %cl
+; KNL-NEXT:    andb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomieq.sd<http://llvm.x86.sse2.ucomieq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1296,15 +1308,15 @@ declare i32 @llvm.x86.sse2.ucomigt.sd<http://llvm.x86.sse2.ucomigt.sd>(<2
 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_x86_sse2_ucomile_sd:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    setbe %al
+; SSE-NEXT:    ucomisd %xmm0, %xmm1
+; SSE-NEXT:    setae %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_ucomile_sd:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vucomisd %xmm1, %xmm0
-; KNL-NEXT:    setbe %al
+; KNL-NEXT:    vucomisd %xmm0, %xmm1
+; KNL-NEXT:    setae %al
 ; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomile.sd<http://llvm.x86.sse2.ucomile.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -1316,16 +1328,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd<http://llvm.x86.sse2.ucomile.sd>(<2
 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_x86_sse2_ucomilt_sd:
 ; SSE:       ## BB#0:
-; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    sbbl %eax, %eax
-; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    ucomisd %xmm0, %xmm1
+; SSE-NEXT:    seta %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_ucomilt_sd:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vucomisd %xmm1, %xmm0
-; KNL-NEXT:    sbbl %eax, %eax
-; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vucomisd %xmm0, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomilt.sd<http://llvm.x86.sse2.ucomilt.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
@@ -1337,15 +1349,19 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
 ; SSE-LABEL: test_x86_sse2_ucomineq_sd:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    setne %al
-; SSE-NEXT:    movzbl %al, %eax
+; SSE-NEXT:    setp %al
+; SSE-NEXT:    setne %cl
+; SSE-NEXT:    orb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
 ; SSE-NEXT:    retl
 ;
 ; KNL-LABEL: test_x86_sse2_ucomineq_sd:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vucomisd %xmm1, %xmm0
-; KNL-NEXT:    setne %al
-; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    setp %al
+; KNL-NEXT:    setne %cl
+; KNL-NEXT:    orb %al, %cl
+; KNL-NEXT:    movzbl %cl, %eax
 ; KNL-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.ucomineq.sd<http://llvm.x86.sse2.ucomineq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res


_______________________________________________
llvm-commits mailing list
llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits

---------------------------------------------------------------------
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160720/ff57b52a/attachment-0001.html>


More information about the llvm-commits mailing list