[llvm] r269569 - Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 20 05:05:29 PDT 2016


Hi Elena,

I agree that this patch is the correct thing to do.
However, we at Sony are a bit concerned by this patch because it breaks
some very important customer codebases which heavily rely on the old
lowering behavior.

What if we add a switch to enable the old lowering (as suggested in
PR28510)? I think it would be very useful in the short term. Users that are
stuck with old codebases would be able to pass that switch; other users
will have a bit of time to upgrade their codebases.
What do you think?

Cheers,
Andrea

On Sat, May 14, 2016 at 4:06 PM, Elena Demikhovsky via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: delena
> Date: Sat May 14 10:06:09 2016
> New Revision: 269569
>
> URL: http://llvm.org/viewvc/llvm-project?rev=269569&view=rev
> Log:
> Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512
>
> Differential revision http://reviews.llvm.org/D19261
>
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>     llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
>     llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
>     llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
>     llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 14 10:06:09 2016
> @@ -17503,30 +17503,66 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>        ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
>        SDValue LHS = Op.getOperand(1);
>        SDValue RHS = Op.getOperand(2);
> -      unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG);
> -      assert(X86CC != X86::COND_INVALID && "Unexpected illegal
> condition!");
> -      SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
> -      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> -                                  DAG.getConstant(X86CC, dl, MVT::i8),
> Cond);
> +      SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
> +      SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS,
> LHS);
> +      SDValue SetCC;
> +      switch (CC) {
> +      case ISD::SETEQ: { // (ZF = 0 and PF = 0)
> +        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> +                            DAG.getConstant(X86::COND_E, dl, MVT::i8),
> Comi);
> +        SDValue SetNP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> +                                    DAG.getConstant(X86::COND_NP, dl,
> MVT::i8),
> +                                    Comi);
> +        SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
> +        break;
> +      }
> +      case ISD::SETNE: { // (ZF = 1 or PF = 1)
> +        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> +                            DAG.getConstant(X86::COND_NE, dl, MVT::i8),
> Comi);
> +        SDValue SetP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> +                                   DAG.getConstant(X86::COND_P, dl,
> MVT::i8),
> +                                   Comi);
> +        SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
> +        break;
> +      }
> +      case ISD::SETGT: // (CF = 0 and ZF = 0)
> +        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> +                            DAG.getConstant(X86::COND_A, dl, MVT::i8),
> Comi);
> +        break;
> +      case ISD::SETLT: { // The condition is opposite to GT. Swap the
> operands.
> +        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> +                            DAG.getConstant(X86::COND_A, dl, MVT::i8),
> InvComi);
> +        break;
> +      }
> +      case ISD::SETGE: // CF = 0
> +        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> +                            DAG.getConstant(X86::COND_AE, dl, MVT::i8),
> Comi);
> +        break;
> +      case ISD::SETLE: // The condition is opposite to GE. Swap the
> operands.
> +        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> +                            DAG.getConstant(X86::COND_AE, dl, MVT::i8),
> InvComi);
> +        break;
> +      default:
> +        llvm_unreachable("Unexpected illegal condition!");
> +      }
>        return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
>      }
>      case COMI_RM: { // Comparison intrinsics with Sae
>        SDValue LHS = Op.getOperand(1);
>        SDValue RHS = Op.getOperand(2);
> -      SDValue CC = Op.getOperand(3);
> +      unsigned CondVal =
> cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
>        SDValue Sae = Op.getOperand(4);
> -      auto ComiType = TranslateX86ConstCondToX86CC(CC);
> -      // choose between ordered and unordered (comi/ucomi)
> -      unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 :
> IntrData->Opc1;
> -      SDValue Cond;
> -      if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
> -
>  X86::STATIC_ROUNDING::CUR_DIRECTION)
> -        Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
> +
> +      SDValue FCmp;
> +      if (cast<ConstantSDNode>(Sae)->getZExtValue() ==
> +          X86::STATIC_ROUNDING::CUR_DIRECTION)
> +        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
> +                                  DAG.getConstant(CondVal, dl, MVT::i8));
>        else
> -        Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
> -      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> -        DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
> -      return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
> +        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
> +                                  DAG.getConstant(CondVal, dl, MVT::i8),
> Sae);
> +      // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
> +      return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
>      }
>      case VSHIFT:
>        return getTargetVShiftNode(IntrData->Opc0, dl,
> Op.getSimpleValueType(),
>
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 14 10:06:09 2016
> @@ -2331,96 +2331,6 @@ static void verifyIntrinsicTables() {
>            std::end(IntrinsicsWithChain)) &&
>           "Intrinsic data tables should have unique entries");
>  }
> -
> -// X86 specific compare constants.
> -// They must be kept in synch with avxintrin.h
> -#define _X86_CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
> -#define _X86_CMP_LT_OS    0x01 /* Less-than (ordered, signaling)  */
> -#define _X86_CMP_LE_OS    0x02 /* Less-than-or-equal (ordered,
> signaling)  */
> -#define _X86_CMP_UNORD_Q  0x03 /* Unordered (non-signaling)  */
> -#define _X86_CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling)  */
> -#define _X86_CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling)  */
> -#define _X86_CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered,
> signaling)  */
> -#define _X86_CMP_ORD_Q    0x07 /* Ordered (nonsignaling)   */
> -#define _X86_CMP_EQ_UQ    0x08 /* Equal (unordered, non-signaling)  */
> -#define _X86_CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unord,
> signaling)  */
> -#define _X86_CMP_NGT_US   0x0a /* Not-greater-than (unordered,
> signaling)  */
> -#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling)  */
> -#define _X86_CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling)  */
> -#define _X86_CMP_GE_OS    0x0d /* Greater-than-or-equal (ordered,
> signaling)  */
> -#define _X86_CMP_GT_OS    0x0e /* Greater-than (ordered, signaling)  */
> -#define _X86_CMP_TRUE_UQ  0x0f /* True (unordered, non-signaling)  */
> -#define _X86_CMP_EQ_OS    0x10 /* Equal (ordered, signaling)  */
> -#define _X86_CMP_LT_OQ    0x11 /* Less-than (ordered, non-signaling)  */
> -#define _X86_CMP_LE_OQ    0x12 /* Less-than-or-equal (ordered,
> non-signaling)  */
> -#define _X86_CMP_UNORD_S  0x13 /* Unordered (signaling)  */
> -#define _X86_CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling)  */
> -#define _X86_CMP_NLT_UQ   0x15 /* Not-less-than (unordered,
> non-signaling)  */
> -#define _X86_CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unord,
> non-signaling)  */
> -#define _X86_CMP_ORD_S    0x17 /* Ordered (signaling)  */
> -#define _X86_CMP_EQ_US    0x18 /* Equal (unordered, signaling)  */
> -#define _X86_CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unord,
> non-sign)  */
> -#define _X86_CMP_NGT_UQ   0x1a /* Not-greater-than (unordered,
> non-signaling)  */
> -#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling)  */
> -#define _X86_CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling)  */
> -#define _X86_CMP_GE_OQ    0x1d /* Greater-than-or-equal (ordered,
> non-signaling)  */
> -#define _X86_CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)
> */
> -#define _X86_CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
> -
> -/*
> -* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
> -* Return tuple <isOrdered, X86 condcode>
> -*/
> -static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue
> &imm) {
> -  ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
> -  unsigned IntImm = CImm->getZExtValue();
> -  // On a floating point condition, the flags are set as follows:
> -  // ZF  PF  CF   op
> -  //  0 | 0 | 0 | X > Y
> -  //  0 | 0 | 1 | X < Y
> -  //  1 | 0 | 0 | X == Y
> -  //  1 | 1 | 1 | unordered
> -  switch (IntImm) {
> -  default: llvm_unreachable("Invalid floating point compare value for
> Comi!");
> -  case _X86_CMP_EQ_OQ:      // 0x00 - Equal (ordered, nonsignaling)
> -  case _X86_CMP_EQ_OS:      // 0x10 - Equal (ordered, signaling)
> -    return std::make_tuple(true, X86::COND_E);
> -  case _X86_CMP_EQ_UQ:      // 0x08 - Equal (unordered, non-signaling)
> -  case _X86_CMP_EQ_US:      // 0x18 - Equal (unordered, signaling)
> -    return std::make_tuple(false , X86::COND_E);
> -  case _X86_CMP_LT_OS:      // 0x01 - Less-than (ordered, signaling)
> -  case _X86_CMP_LT_OQ:      // 0x11 - Less-than (ordered, nonsignaling)
> -    return std::make_tuple(true, X86::COND_B);
> -  case _X86_CMP_NGE_US:     // 0x09 - Not-greater-than-or-equal
> (unordered, signaling)
> -  case _X86_CMP_NGE_UQ:     // 0x19 - Not-greater-than-or-equal
> (unordered, nonsignaling)
> -    return std::make_tuple(false , X86::COND_B);
> -  case _X86_CMP_LE_OS:      // 0x02 - Less-than-or-equal (ordered,
> signaling)
> -  case _X86_CMP_LE_OQ:      // 0x12 - Less-than-or-equal (ordered,
> nonsignaling)
> -    return std::make_tuple(true, X86::COND_BE);
> -  case _X86_CMP_NGT_US:     // 0x0A - Not-greater-than (unordered,
> signaling)
> -  case _X86_CMP_NGT_UQ:     // 0x1A - Not-greater-than (unordered,
> nonsignaling)
> -    return std::make_tuple(false, X86::COND_BE);
> -  case _X86_CMP_GT_OS:      // 0x0E - Greater-than (ordered, signaling)
> -  case _X86_CMP_GT_OQ:      // 0x1E - Greater-than (ordered, nonsignaling)
> -    return std::make_tuple(true, X86::COND_A);
> -  case _X86_CMP_NLE_US:     // 0x06 - Not-less-than-or-equal
> (unordered,signaling)
> -  case _X86_CMP_NLE_UQ:     // 0x16 - Not-less-than-or-equal (unordered,
> nonsignaling)
> -    return std::make_tuple(false, X86::COND_A);
> -  case _X86_CMP_GE_OS:      // 0x0D - Greater-than-or-equal (ordered,
> signaling)
> -  case _X86_CMP_GE_OQ:      // 0x1D - Greater-than-or-equal (ordered,
> nonsignaling)
> -    return std::make_tuple(true, X86::COND_AE);
> -  case _X86_CMP_NLT_US:     // 0x05 - Not-less-than (unordered, signaling)
> -  case _X86_CMP_NLT_UQ:     // 0x15 - Not-less-than (unordered,
> nonsignaling)
> -    return std::make_tuple(false, X86::COND_AE);
> -  case _X86_CMP_NEQ_OQ:     // 0x0C - Not-equal (ordered, non-signaling)
> -  case _X86_CMP_NEQ_OS:     // 0x1C - Not-equal (ordered, signaling)
> -    return std::make_tuple(true, X86::COND_NE);
> -  case _X86_CMP_NEQ_UQ:     // 0x04 - Not-equal (unordered, nonsignaling)
> -  case _X86_CMP_NEQ_US:     // 0x14 - Not-equal (unordered, signaling)
> -    return std::make_tuple(false, X86::COND_NE);
> -  }
> -}
> -
>  } // End llvm namespace
>
>  #endif
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Sat May 14 10:06:09
> 2016
> @@ -104,8 +104,10 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
>  ; CHECK-LABEL: test_x86_sse2_comieq_sd:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
> -; CHECK-NEXT:    sete %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    setnp %al
> +; CHECK-NEXT:    sete %cl
> +; CHECK-NEXT:    andb %al, %cl
> +; CHECK-NEXT:    movzbl %cl, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -142,8 +144,8 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2
>  define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
>  ; CHECK-LABEL: test_x86_sse2_comile_sd:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vcomisd %xmm1, %xmm0
> -; CHECK-NEXT:    setbe %al
> +; CHECK-NEXT:    vcomisd %xmm0, %xmm1
> +; CHECK-NEXT:    setae %al
>  ; CHECK-NEXT:    movzbl %al, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> @@ -155,9 +157,9 @@ declare i32 @llvm.x86.sse2.comile.sd(<2
>  define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
>  ; CHECK-LABEL: test_x86_sse2_comilt_sd:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vcomisd %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vcomisd %xmm0, %xmm1
> +; CHECK-NEXT:    seta %al
> +; CHECK-NEXT:    movzbl %al, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -169,8 +171,10 @@ define i32 @test_x86_sse2_comineq_sd(<2
>  ; CHECK-LABEL: test_x86_sse2_comineq_sd:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
> -; CHECK-NEXT:    setne %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    setp %al
> +; CHECK-NEXT:    setne %cl
> +; CHECK-NEXT:    orb %al, %cl
> +; CHECK-NEXT:    movzbl %cl, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -912,8 +916,10 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
>  ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
> -; CHECK-NEXT:    sete %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    setnp %al
> +; CHECK-NEXT:    sete %cl
> +; CHECK-NEXT:    andb %al, %cl
> +; CHECK-NEXT:    movzbl %cl, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -950,8 +956,8 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2
>  define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
>  ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomisd %xmm1, %xmm0
> -; CHECK-NEXT:    setbe %al
> +; CHECK-NEXT:    vucomisd %xmm0, %xmm1
> +; CHECK-NEXT:    setae %al
>  ; CHECK-NEXT:    movzbl %al, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> @@ -963,9 +969,9 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2
>  define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
>  ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomisd %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vucomisd %xmm0, %xmm1
> +; CHECK-NEXT:    seta %al
> +; CHECK-NEXT:    movzbl %al, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -977,8 +983,10 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
>  ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
> -; CHECK-NEXT:    setne %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    setp %al
> +; CHECK-NEXT:    setne %cl
> +; CHECK-NEXT:    orb %al, %cl
> +; CHECK-NEXT:    movzbl %cl, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -1699,8 +1707,10 @@ define i32 @test_x86_sse_comieq_ss(<4 x
>  ; CHECK-LABEL: test_x86_sse_comieq_ss:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
> -; CHECK-NEXT:    sete %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    setnp %al
> +; CHECK-NEXT:    sete %cl
> +; CHECK-NEXT:    andb %al, %cl
> +; CHECK-NEXT:    movzbl %cl, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -1737,8 +1747,8 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
>  define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
>  ; CHECK-LABEL: test_x86_sse_comile_ss:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vcomiss %xmm1, %xmm0
> -; CHECK-NEXT:    setbe %al
> +; CHECK-NEXT:    vcomiss %xmm0, %xmm1
> +; CHECK-NEXT:    setae %al
>  ; CHECK-NEXT:    movzbl %al, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> @@ -1750,9 +1760,9 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
>  define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
>  ; CHECK-LABEL: test_x86_sse_comilt_ss:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vcomiss %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vcomiss %xmm0, %xmm1
> +; CHECK-NEXT:    seta %al
> +; CHECK-NEXT:    movzbl %al, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -1764,8 +1774,10 @@ define i32 @test_x86_sse_comineq_ss(<4 x
>  ; CHECK-LABEL: test_x86_sse_comineq_ss:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
> -; CHECK-NEXT:    setne %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    setp %al
> +; CHECK-NEXT:    setne %cl
> +; CHECK-NEXT:    orb %al, %cl
> +; CHECK-NEXT:    movzbl %cl, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -2003,8 +2015,10 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
>  ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
> -; CHECK-NEXT:    sete %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    setnp %al
> +; CHECK-NEXT:    sete %cl
> +; CHECK-NEXT:    andb %al, %cl
> +; CHECK-NEXT:    movzbl %cl, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -2041,8 +2055,8 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
>  define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
>  ; CHECK-LABEL: test_x86_sse_ucomile_ss:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomiss %xmm1, %xmm0
> -; CHECK-NEXT:    setbe %al
> +; CHECK-NEXT:    vucomiss %xmm0, %xmm1
> +; CHECK-NEXT:    setae %al
>  ; CHECK-NEXT:    movzbl %al, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> @@ -2054,9 +2068,9 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
>  define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
>  ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomiss %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vucomiss %xmm0, %xmm1
> +; CHECK-NEXT:    seta %al
> +; CHECK-NEXT:    movzbl %al, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -2068,8 +2082,10 @@ define i32 @test_x86_sse_ucomineq_ss(<4
>  ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
> -; CHECK-NEXT:    setne %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    setp %al
> +; CHECK-NEXT:    setne %cl
> +; CHECK-NEXT:    orb %al, %cl
> +; CHECK-NEXT:    movzbl %cl, %eax
>  ; CHECK-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 14 10:06:09
> 2016
> @@ -6307,9 +6307,8 @@ define <8 x double>@test_int_x86_avx512_
>  define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double>
> %a1) {
>  ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vcomisd {sae}, %xmm1, %xmm0
> -; CHECK-NEXT:    sete %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    vcmpeqsd {sae}, %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 0, i32 8)
>    ret i32 %res
> @@ -6318,9 +6317,8 @@ define i32 @test_x86_avx512_comi_sd_eq_s
>  define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x
> double> %a1) {
>  ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomisd {sae}, %xmm1, %xmm0
> -; CHECK-NEXT:    sete %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 8, i32 8)
>    ret i32 %res
> @@ -6329,9 +6327,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq_
>  define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double>
> %a1) {
>  ; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vcomisd %xmm1, %xmm0
> -; CHECK-NEXT:    sete %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 0, i32 4)
>    ret i32 %res
> @@ -6340,9 +6337,8 @@ define i32 @test_x86_avx512_comi_sd_eq(<
>  define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double>
> %a1) {
>  ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomisd %xmm1, %xmm0
> -; CHECK-NEXT:    sete %al
> -; CHECK-NEXT:    movzbl %al, %eax
> +; CHECK-NEXT:    vcmpeq_uqsd %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 8, i32 4)
>    ret i32 %res
> @@ -6351,9 +6347,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq(
>  define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double>
> %a1) {
>  ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vcomisd {sae}, %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vcmpltsd {sae}, %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 1, i32 8)
>    ret i32 %res
> @@ -6362,9 +6357,8 @@ define i32 @test_x86_avx512_comi_sd_lt_s
>  define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x
> double> %a1) {
>  ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomisd {sae}, %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vcmpngesd {sae}, %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 9, i32 8)
>    ret i32 %res
> @@ -6373,9 +6367,8 @@ define i32 @test_x86_avx512_ucomi_sd_lt_
>  define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double>
> %a1) {
>  ; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vcomisd %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vcmpltsd %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 1, i32 4)
>    ret i32 %res
> @@ -6384,9 +6377,8 @@ define i32 @test_x86_avx512_comi_sd_lt(<
>  define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double>
> %a1) {
>  ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomisd %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vcmpngesd %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 9, i32 4)
>    ret i32 %res
> @@ -6397,9 +6389,8 @@ declare i32 @llvm.x86.avx512.vcomi.sd(<2
>  define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1)
> {
>  ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
>  ; CHECK:       ## BB#0:
> -; CHECK-NEXT:    vucomiss %xmm1, %xmm0
> -; CHECK-NEXT:    sbbl %eax, %eax
> -; CHECK-NEXT:    andl $1, %eax
> +; CHECK-NEXT:    vcmpngess %xmm1, %xmm0, %k0
> +; CHECK-NEXT:    kmovw %k0, %eax
>  ; CHECK-NEXT:    retq
>    %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float>
> %a1, i32 9, i32 4)
>    ret i32 %res
>
> Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Sat May 14 10:06:09
> 2016
> @@ -54,15 +54,19 @@ define i32 @test_x86_sse_comieq_ss(<4 x
>  ; SSE-LABEL: test_x86_sse_comieq_ss:
>  ; SSE:       ## BB#0:
>  ; SSE-NEXT:    comiss %xmm1, %xmm0
> -; SSE-NEXT:    sete %al
> -; SSE-NEXT:    movzbl %al, %eax
> +; SSE-NEXT:    setnp %al
> +; SSE-NEXT:    sete %cl
> +; SSE-NEXT:    andb %al, %cl
> +; SSE-NEXT:    movzbl %cl, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse_comieq_ss:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vcomiss %xmm1, %xmm0
> -; KNL-NEXT:    sete %al
> -; KNL-NEXT:    movzbl %al, %eax
> +; KNL-NEXT:    setnp %al
> +; KNL-NEXT:    sete %cl
> +; KNL-NEXT:    andb %al, %cl
> +; KNL-NEXT:    movzbl %cl, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
>  define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
>  ; SSE-LABEL: test_x86_sse_comile_ss:
>  ; SSE:       ## BB#0:
> -; SSE-NEXT:    comiss %xmm1, %xmm0
> -; SSE-NEXT:    setbe %al
> +; SSE-NEXT:    comiss %xmm0, %xmm1
> +; SSE-NEXT:    setae %al
>  ; SSE-NEXT:    movzbl %al, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse_comile_ss:
>  ; KNL:       ## BB#0:
> -; KNL-NEXT:    vcomiss %xmm1, %xmm0
> -; KNL-NEXT:    setbe %al
> +; KNL-NEXT:    vcomiss %xmm0, %xmm1
> +; KNL-NEXT:    setae %al
>  ; KNL-NEXT:    movzbl %al, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> @@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
>  define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
>  ; SSE-LABEL: test_x86_sse_comilt_ss:
>  ; SSE:       ## BB#0:
> -; SSE-NEXT:    comiss %xmm1, %xmm0
> -; SSE-NEXT:    sbbl %eax, %eax
> -; SSE-NEXT:    andl $1, %eax
> +; SSE-NEXT:    comiss %xmm0, %xmm1
> +; SSE-NEXT:    seta %al
> +; SSE-NEXT:    movzbl %al, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse_comilt_ss:
>  ; KNL:       ## BB#0:
> -; KNL-NEXT:    vcomiss %xmm1, %xmm0
> -; KNL-NEXT:    sbbl %eax, %eax
> -; KNL-NEXT:    andl $1, %eax
> +; KNL-NEXT:    vcomiss %xmm0, %xmm1
> +; KNL-NEXT:    seta %al
> +; KNL-NEXT:    movzbl %al, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -154,15 +158,19 @@ define i32 @test_x86_sse_comineq_ss(<4 x
>  ; SSE-LABEL: test_x86_sse_comineq_ss:
>  ; SSE:       ## BB#0:
>  ; SSE-NEXT:    comiss %xmm1, %xmm0
> -; SSE-NEXT:    setne %al
> -; SSE-NEXT:    movzbl %al, %eax
> +; SSE-NEXT:    setp %al
> +; SSE-NEXT:    setne %cl
> +; SSE-NEXT:    orb %al, %cl
> +; SSE-NEXT:    movzbl %cl, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse_comineq_ss:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vcomiss %xmm1, %xmm0
> -; KNL-NEXT:    setne %al
> -; KNL-NEXT:    movzbl %al, %eax
> +; KNL-NEXT:    setp %al
> +; KNL-NEXT:    setne %cl
> +; KNL-NEXT:    orb %al, %cl
> +; KNL-NEXT:    movzbl %cl, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -504,15 +512,19 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
>  ; SSE-LABEL: test_x86_sse_ucomieq_ss:
>  ; SSE:       ## BB#0:
>  ; SSE-NEXT:    ucomiss %xmm1, %xmm0
> -; SSE-NEXT:    sete %al
> -; SSE-NEXT:    movzbl %al, %eax
> +; SSE-NEXT:    setnp %al
> +; SSE-NEXT:    sete %cl
> +; SSE-NEXT:    andb %al, %cl
> +; SSE-NEXT:    movzbl %cl, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse_ucomieq_ss:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vucomiss %xmm1, %xmm0
> -; KNL-NEXT:    sete %al
> -; KNL-NEXT:    movzbl %al, %eax
> +; KNL-NEXT:    setnp %al
> +; KNL-NEXT:    sete %cl
> +; KNL-NEXT:    andb %al, %cl
> +; KNL-NEXT:    movzbl %cl, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -563,15 +575,15 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
>  define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
>  ; SSE-LABEL: test_x86_sse_ucomile_ss:
>  ; SSE:       ## BB#0:
> -; SSE-NEXT:    ucomiss %xmm1, %xmm0
> -; SSE-NEXT:    setbe %al
> +; SSE-NEXT:    ucomiss %xmm0, %xmm1
> +; SSE-NEXT:    setae %al
>  ; SSE-NEXT:    movzbl %al, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse_ucomile_ss:
>  ; KNL:       ## BB#0:
> -; KNL-NEXT:    vucomiss %xmm1, %xmm0
> -; KNL-NEXT:    setbe %al
> +; KNL-NEXT:    vucomiss %xmm0, %xmm1
> +; KNL-NEXT:    setae %al
>  ; KNL-NEXT:    movzbl %al, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> @@ -583,16 +595,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
>  define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
>  ; SSE-LABEL: test_x86_sse_ucomilt_ss:
>  ; SSE:       ## BB#0:
> -; SSE-NEXT:    ucomiss %xmm1, %xmm0
> -; SSE-NEXT:    sbbl %eax, %eax
> -; SSE-NEXT:    andl $1, %eax
> +; SSE-NEXT:    ucomiss %xmm0, %xmm1
> +; SSE-NEXT:    seta %al
> +; SSE-NEXT:    movzbl %al, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse_ucomilt_ss:
>  ; KNL:       ## BB#0:
> -; KNL-NEXT:    vucomiss %xmm1, %xmm0
> -; KNL-NEXT:    sbbl %eax, %eax
> -; KNL-NEXT:    andl $1, %eax
> +; KNL-NEXT:    vucomiss %xmm0, %xmm1
> +; KNL-NEXT:    seta %al
> +; KNL-NEXT:    movzbl %al, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -604,15 +616,19 @@ define i32 @test_x86_sse_ucomineq_ss(<4
>  ; SSE-LABEL: test_x86_sse_ucomineq_ss:
>  ; SSE:       ## BB#0:
>  ; SSE-NEXT:    ucomiss %xmm1, %xmm0
> -; SSE-NEXT:    setne %al
> -; SSE-NEXT:    movzbl %al, %eax
> +; SSE-NEXT:    setp %al
> +; SSE-NEXT:    setne %cl
> +; SSE-NEXT:    orb %al, %cl
> +; SSE-NEXT:    movzbl %cl, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse_ucomineq_ss:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vucomiss %xmm1, %xmm0
> -; KNL-NEXT:    setne %al
> -; KNL-NEXT:    movzbl %al, %eax
> +; KNL-NEXT:    setp %al
> +; KNL-NEXT:    setne %cl
> +; KNL-NEXT:    orb %al, %cl
> +; KNL-NEXT:    movzbl %cl, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
>    ret i32 %res
>
> Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sat May 14 10:06:09
> 2016
> @@ -54,15 +54,19 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
>  ; SSE-LABEL: test_x86_sse2_comieq_sd:
>  ; SSE:       ## BB#0:
>  ; SSE-NEXT:    comisd %xmm1, %xmm0
> -; SSE-NEXT:    sete %al
> -; SSE-NEXT:    movzbl %al, %eax
> +; SSE-NEXT:    setnp %al
> +; SSE-NEXT:    sete %cl
> +; SSE-NEXT:    andb %al, %cl
> +; SSE-NEXT:    movzbl %cl, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse2_comieq_sd:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vcomisd %xmm1, %xmm0
> -; KNL-NEXT:    sete %al
> -; KNL-NEXT:    movzbl %al, %eax
> +; KNL-NEXT:    setnp %al
> +; KNL-NEXT:    sete %cl
> +; KNL-NEXT:    andb %al, %cl
> +; KNL-NEXT:    movzbl %cl, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2
>  define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
>  ; SSE-LABEL: test_x86_sse2_comile_sd:
>  ; SSE:       ## BB#0:
> -; SSE-NEXT:    comisd %xmm1, %xmm0
> -; SSE-NEXT:    setbe %al
> +; SSE-NEXT:    comisd %xmm0, %xmm1
> +; SSE-NEXT:    setae %al
>  ; SSE-NEXT:    movzbl %al, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse2_comile_sd:
>  ; KNL:       ## BB#0:
> -; KNL-NEXT:    vcomisd %xmm1, %xmm0
> -; KNL-NEXT:    setbe %al
> +; KNL-NEXT:    vcomisd %xmm0, %xmm1
> +; KNL-NEXT:    setae %al
>  ; KNL-NEXT:    movzbl %al, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> @@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse2.comile.sd(<2
>  define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
>  ; SSE-LABEL: test_x86_sse2_comilt_sd:
>  ; SSE:       ## BB#0:
> -; SSE-NEXT:    comisd %xmm1, %xmm0
> -; SSE-NEXT:    sbbl %eax, %eax
> -; SSE-NEXT:    andl $1, %eax
> +; SSE-NEXT:    comisd %xmm0, %xmm1
> +; SSE-NEXT:    seta %al
> +; SSE-NEXT:    movzbl %al, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse2_comilt_sd:
>  ; KNL:       ## BB#0:
> -; KNL-NEXT:    vcomisd %xmm1, %xmm0
> -; KNL-NEXT:    sbbl %eax, %eax
> -; KNL-NEXT:    andl $1, %eax
> +; KNL-NEXT:    vcomisd %xmm0, %xmm1
> +; KNL-NEXT:    seta %al
> +; KNL-NEXT:    movzbl %al, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -154,15 +158,19 @@ define i32 @test_x86_sse2_comineq_sd(<2
>  ; SSE-LABEL: test_x86_sse2_comineq_sd:
>  ; SSE:       ## BB#0:
>  ; SSE-NEXT:    comisd %xmm1, %xmm0
> -; SSE-NEXT:    setne %al
> -; SSE-NEXT:    movzbl %al, %eax
> +; SSE-NEXT:    setp %al
> +; SSE-NEXT:    setne %cl
> +; SSE-NEXT:    orb %al, %cl
> +; SSE-NEXT:    movzbl %cl, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse2_comineq_sd:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vcomisd %xmm1, %xmm0
> -; KNL-NEXT:    setne %al
> -; KNL-NEXT:    movzbl %al, %eax
> +; KNL-NEXT:    setp %al
> +; KNL-NEXT:    setne %cl
> +; KNL-NEXT:    orb %al, %cl
> +; KNL-NEXT:    movzbl %cl, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -1237,15 +1245,19 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
>  ; SSE-LABEL: test_x86_sse2_ucomieq_sd:
>  ; SSE:       ## BB#0:
>  ; SSE-NEXT:    ucomisd %xmm1, %xmm0
> -; SSE-NEXT:    sete %al
> -; SSE-NEXT:    movzbl %al, %eax
> +; SSE-NEXT:    setnp %al
> +; SSE-NEXT:    sete %cl
> +; SSE-NEXT:    andb %al, %cl
> +; SSE-NEXT:    movzbl %cl, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse2_ucomieq_sd:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vucomisd %xmm1, %xmm0
> -; KNL-NEXT:    sete %al
> -; KNL-NEXT:    movzbl %al, %eax
> +; KNL-NEXT:    setnp %al
> +; KNL-NEXT:    sete %cl
> +; KNL-NEXT:    andb %al, %cl
> +; KNL-NEXT:    movzbl %cl, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -1296,15 +1308,15 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2
>  define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
>  ; SSE-LABEL: test_x86_sse2_ucomile_sd:
>  ; SSE:       ## BB#0:
> -; SSE-NEXT:    ucomisd %xmm1, %xmm0
> -; SSE-NEXT:    setbe %al
> +; SSE-NEXT:    ucomisd %xmm0, %xmm1
> +; SSE-NEXT:    setae %al
>  ; SSE-NEXT:    movzbl %al, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse2_ucomile_sd:
>  ; KNL:       ## BB#0:
> -; KNL-NEXT:    vucomisd %xmm1, %xmm0
> -; KNL-NEXT:    setbe %al
> +; KNL-NEXT:    vucomisd %xmm0, %xmm1
> +; KNL-NEXT:    setae %al
>  ; KNL-NEXT:    movzbl %al, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> @@ -1316,16 +1328,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2
>  define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
>  ; SSE-LABEL: test_x86_sse2_ucomilt_sd:
>  ; SSE:       ## BB#0:
> -; SSE-NEXT:    ucomisd %xmm1, %xmm0
> -; SSE-NEXT:    sbbl %eax, %eax
> -; SSE-NEXT:    andl $1, %eax
> +; SSE-NEXT:    ucomisd %xmm0, %xmm1
> +; SSE-NEXT:    seta %al
> +; SSE-NEXT:    movzbl %al, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse2_ucomilt_sd:
>  ; KNL:       ## BB#0:
> -; KNL-NEXT:    vucomisd %xmm1, %xmm0
> -; KNL-NEXT:    sbbl %eax, %eax
> -; KNL-NEXT:    andl $1, %eax
> +; KNL-NEXT:    vucomisd %xmm0, %xmm1
> +; KNL-NEXT:    seta %al
> +; KNL-NEXT:    movzbl %al, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
> @@ -1337,15 +1349,19 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
>  ; SSE-LABEL: test_x86_sse2_ucomineq_sd:
>  ; SSE:       ## BB#0:
>  ; SSE-NEXT:    ucomisd %xmm1, %xmm0
> -; SSE-NEXT:    setne %al
> -; SSE-NEXT:    movzbl %al, %eax
> +; SSE-NEXT:    setp %al
> +; SSE-NEXT:    setne %cl
> +; SSE-NEXT:    orb %al, %cl
> +; SSE-NEXT:    movzbl %cl, %eax
>  ; SSE-NEXT:    retl
>  ;
>  ; KNL-LABEL: test_x86_sse2_ucomineq_sd:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vucomisd %xmm1, %xmm0
> -; KNL-NEXT:    setne %al
> -; KNL-NEXT:    movzbl %al, %eax
> +; KNL-NEXT:    setp %al
> +; KNL-NEXT:    setne %cl
> +; KNL-NEXT:    orb %al, %cl
> +; KNL-NEXT:    movzbl %cl, %eax
>  ; KNL-NEXT:    retl
>    %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
>    ret i32 %res
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160720/cb29f294/attachment-0001.html>


More information about the llvm-commits mailing list