[llvm] r269569 - Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 20 05:05:29 PDT 2016
Hi Elena,
I agree that this patch is the correct thing to do.
However, we at Sony are a bit concerned by this patch because it breaks
some very important customer codebases which heavily rely on the old
lowering behavior.
What if we add a switch to enable the old lowering (as suggested in
PR28510)? I think it would be very useful in the short term. Users that are
stuck with old codebases would be able to pass that switch; other users
will have a bit of time to upgrade their codebases.
What do you think?
Cheers,
Andrea
On Sat, May 14, 2016 at 4:06 PM, Elena Demikhovsky via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: delena
> Date: Sat May 14 10:06:09 2016
> New Revision: 269569
>
> URL: http://llvm.org/viewvc/llvm-project?rev=269569&view=rev
> Log:
> Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512
>
> Differential revision http://reviews.llvm.org/D19261
>
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
> llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
> llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 14 10:06:09 2016
> @@ -17503,30 +17503,66 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
> ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
> SDValue LHS = Op.getOperand(1);
> SDValue RHS = Op.getOperand(2);
> - unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG);
> - assert(X86CC != X86::COND_INVALID && "Unexpected illegal
> condition!");
> - SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
> - SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> - DAG.getConstant(X86CC, dl, MVT::i8),
> Cond);
> + SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
> + SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS,
> LHS);
> + SDValue SetCC;
> + switch (CC) {
> + case ISD::SETEQ: { // (ZF = 0 and PF = 0)
> + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> + DAG.getConstant(X86::COND_E, dl, MVT::i8),
> Comi);
> + SDValue SetNP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> + DAG.getConstant(X86::COND_NP, dl,
> MVT::i8),
> + Comi);
> + SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
> + break;
> + }
> + case ISD::SETNE: { // (ZF = 1 or PF = 1)
> + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> + DAG.getConstant(X86::COND_NE, dl, MVT::i8),
> Comi);
> + SDValue SetP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> + DAG.getConstant(X86::COND_P, dl,
> MVT::i8),
> + Comi);
> + SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
> + break;
> + }
> + case ISD::SETGT: // (CF = 0 and ZF = 0)
> + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> + DAG.getConstant(X86::COND_A, dl, MVT::i8),
> Comi);
> + break;
> + case ISD::SETLT: { // The condition is opposite to GT. Swap the
> operands.
> + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> + DAG.getConstant(X86::COND_A, dl, MVT::i8),
> InvComi);
> + break;
> + }
> + case ISD::SETGE: // CF = 0
> + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> + DAG.getConstant(X86::COND_AE, dl, MVT::i8),
> Comi);
> + break;
> + case ISD::SETLE: // The condition is opposite to GE. Swap the
> operands.
> + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> + DAG.getConstant(X86::COND_AE, dl, MVT::i8),
> InvComi);
> + break;
> + default:
> + llvm_unreachable("Unexpected illegal condition!");
> + }
> return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
> }
> case COMI_RM: { // Comparison intrinsics with Sae
> SDValue LHS = Op.getOperand(1);
> SDValue RHS = Op.getOperand(2);
> - SDValue CC = Op.getOperand(3);
> + unsigned CondVal =
> cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
> SDValue Sae = Op.getOperand(4);
> - auto ComiType = TranslateX86ConstCondToX86CC(CC);
> - // choose between ordered and unordered (comi/ucomi)
> - unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 :
> IntrData->Opc1;
> - SDValue Cond;
> - if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
> -
> X86::STATIC_ROUNDING::CUR_DIRECTION)
> - Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
> +
> + SDValue FCmp;
> + if (cast<ConstantSDNode>(Sae)->getZExtValue() ==
> + X86::STATIC_ROUNDING::CUR_DIRECTION)
> + FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
> + DAG.getConstant(CondVal, dl, MVT::i8));
> else
> - Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
> - SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
> - DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
> - return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
> + FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
> + DAG.getConstant(CondVal, dl, MVT::i8),
> Sae);
> + // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
> + return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
> }
> case VSHIFT:
> return getTargetVShiftNode(IntrData->Opc0, dl,
> Op.getSimpleValueType(),
>
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 14 10:06:09 2016
> @@ -2331,96 +2331,6 @@ static void verifyIntrinsicTables() {
> std::end(IntrinsicsWithChain)) &&
> "Intrinsic data tables should have unique entries");
> }
> -
> -// X86 specific compare constants.
> -// They must be kept in synch with avxintrin.h
> -#define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
> -#define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
> -#define _X86_CMP_LE_OS 0x02 /* Less-than-or-equal (ordered,
> signaling) */
> -#define _X86_CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
> -#define _X86_CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
> -#define _X86_CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
> -#define _X86_CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered,
> signaling) */
> -#define _X86_CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
> -#define _X86_CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
> -#define _X86_CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord,
> signaling) */
> -#define _X86_CMP_NGT_US 0x0a /* Not-greater-than (unordered,
> signaling) */
> -#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
> -#define _X86_CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
> -#define _X86_CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered,
> signaling) */
> -#define _X86_CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
> -#define _X86_CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
> -#define _X86_CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
> -#define _X86_CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
> -#define _X86_CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered,
> non-signaling) */
> -#define _X86_CMP_UNORD_S 0x13 /* Unordered (signaling) */
> -#define _X86_CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
> -#define _X86_CMP_NLT_UQ 0x15 /* Not-less-than (unordered,
> non-signaling) */
> -#define _X86_CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord,
> non-signaling) */
> -#define _X86_CMP_ORD_S 0x17 /* Ordered (signaling) */
> -#define _X86_CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
> -#define _X86_CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord,
> non-sign) */
> -#define _X86_CMP_NGT_UQ 0x1a /* Not-greater-than (unordered,
> non-signaling) */
> -#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
> -#define _X86_CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
> -#define _X86_CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered,
> non-signaling) */
> -#define _X86_CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling)
> */
> -#define _X86_CMP_TRUE_US 0x1f /* True (unordered, signaling) */
> -
> -/*
> -* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
> -* Return tuple <isOrdered, X86 condcode>
> -*/
> -static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue
> &imm) {
> - ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
> - unsigned IntImm = CImm->getZExtValue();
> - // On a floating point condition, the flags are set as follows:
> - // ZF PF CF op
> - // 0 | 0 | 0 | X > Y
> - // 0 | 0 | 1 | X < Y
> - // 1 | 0 | 0 | X == Y
> - // 1 | 1 | 1 | unordered
> - switch (IntImm) {
> - default: llvm_unreachable("Invalid floating point compare value for
> Comi!");
> - case _X86_CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling)
> - case _X86_CMP_EQ_OS: // 0x10 - Equal (ordered, signaling)
> - return std::make_tuple(true, X86::COND_E);
> - case _X86_CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling)
> - case _X86_CMP_EQ_US: // 0x18 - Equal (unordered, signaling)
> - return std::make_tuple(false , X86::COND_E);
> - case _X86_CMP_LT_OS: // 0x01 - Less-than (ordered, signaling)
> - case _X86_CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling)
> - return std::make_tuple(true, X86::COND_B);
> - case _X86_CMP_NGE_US: // 0x09 - Not-greater-than-or-equal
> (unordered, signaling)
> - case _X86_CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal
> (unordered, nonsignaling)
> - return std::make_tuple(false , X86::COND_B);
> - case _X86_CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered,
> signaling)
> - case _X86_CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered,
> nonsignaling)
> - return std::make_tuple(true, X86::COND_BE);
> - case _X86_CMP_NGT_US: // 0x0A - Not-greater-than (unordered,
> signaling)
> - case _X86_CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered,
> nonsignaling)
> - return std::make_tuple(false, X86::COND_BE);
> - case _X86_CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling)
> - case _X86_CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling)
> - return std::make_tuple(true, X86::COND_A);
> - case _X86_CMP_NLE_US: // 0x06 - Not-less-than-or-equal
> (unordered,signaling)
> - case _X86_CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered,
> nonsignaling)
> - return std::make_tuple(false, X86::COND_A);
> - case _X86_CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered,
> signaling)
> - case _X86_CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered,
> nonsignaling)
> - return std::make_tuple(true, X86::COND_AE);
> - case _X86_CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling)
> - case _X86_CMP_NLT_UQ: // 0x15 - Not-less-than (unordered,
> nonsignaling)
> - return std::make_tuple(false, X86::COND_AE);
> - case _X86_CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling)
> - case _X86_CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling)
> - return std::make_tuple(true, X86::COND_NE);
> - case _X86_CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling)
> - case _X86_CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling)
> - return std::make_tuple(false, X86::COND_NE);
> - }
> -}
> -
> } // End llvm namespace
>
> #endif
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Sat May 14 10:06:09
> 2016
> @@ -104,8 +104,10 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
> ; CHECK-LABEL: test_x86_sse2_comieq_sd:
> ; CHECK: ## BB#0:
> ; CHECK-NEXT: vcomisd %xmm1, %xmm0
> -; CHECK-NEXT: sete %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: setnp %al
> +; CHECK-NEXT: sete %cl
> +; CHECK-NEXT: andb %al, %cl
> +; CHECK-NEXT: movzbl %cl, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -142,8 +144,8 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2
> define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
> ; CHECK-LABEL: test_x86_sse2_comile_sd:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vcomisd %xmm1, %xmm0
> -; CHECK-NEXT: setbe %al
> +; CHECK-NEXT: vcomisd %xmm0, %xmm1
> +; CHECK-NEXT: setae %al
> ; CHECK-NEXT: movzbl %al, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> @@ -155,9 +157,9 @@ declare i32 @llvm.x86.sse2.comile.sd(<2
> define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
> ; CHECK-LABEL: test_x86_sse2_comilt_sd:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vcomisd %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vcomisd %xmm0, %xmm1
> +; CHECK-NEXT: seta %al
> +; CHECK-NEXT: movzbl %al, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -169,8 +171,10 @@ define i32 @test_x86_sse2_comineq_sd(<2
> ; CHECK-LABEL: test_x86_sse2_comineq_sd:
> ; CHECK: ## BB#0:
> ; CHECK-NEXT: vcomisd %xmm1, %xmm0
> -; CHECK-NEXT: setne %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: setp %al
> +; CHECK-NEXT: setne %cl
> +; CHECK-NEXT: orb %al, %cl
> +; CHECK-NEXT: movzbl %cl, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -912,8 +916,10 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
> ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
> ; CHECK: ## BB#0:
> ; CHECK-NEXT: vucomisd %xmm1, %xmm0
> -; CHECK-NEXT: sete %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: setnp %al
> +; CHECK-NEXT: sete %cl
> +; CHECK-NEXT: andb %al, %cl
> +; CHECK-NEXT: movzbl %cl, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -950,8 +956,8 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2
> define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
> ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomisd %xmm1, %xmm0
> -; CHECK-NEXT: setbe %al
> +; CHECK-NEXT: vucomisd %xmm0, %xmm1
> +; CHECK-NEXT: setae %al
> ; CHECK-NEXT: movzbl %al, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> @@ -963,9 +969,9 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2
> define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
> ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomisd %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vucomisd %xmm0, %xmm1
> +; CHECK-NEXT: seta %al
> +; CHECK-NEXT: movzbl %al, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -977,8 +983,10 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
> ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
> ; CHECK: ## BB#0:
> ; CHECK-NEXT: vucomisd %xmm1, %xmm0
> -; CHECK-NEXT: setne %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: setp %al
> +; CHECK-NEXT: setne %cl
> +; CHECK-NEXT: orb %al, %cl
> +; CHECK-NEXT: movzbl %cl, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -1699,8 +1707,10 @@ define i32 @test_x86_sse_comieq_ss(<4 x
> ; CHECK-LABEL: test_x86_sse_comieq_ss:
> ; CHECK: ## BB#0:
> ; CHECK-NEXT: vcomiss %xmm1, %xmm0
> -; CHECK-NEXT: sete %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: setnp %al
> +; CHECK-NEXT: sete %cl
> +; CHECK-NEXT: andb %al, %cl
> +; CHECK-NEXT: movzbl %cl, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -1737,8 +1747,8 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
> define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
> ; CHECK-LABEL: test_x86_sse_comile_ss:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vcomiss %xmm1, %xmm0
> -; CHECK-NEXT: setbe %al
> +; CHECK-NEXT: vcomiss %xmm0, %xmm1
> +; CHECK-NEXT: setae %al
> ; CHECK-NEXT: movzbl %al, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> @@ -1750,9 +1760,9 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
> define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
> ; CHECK-LABEL: test_x86_sse_comilt_ss:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vcomiss %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vcomiss %xmm0, %xmm1
> +; CHECK-NEXT: seta %al
> +; CHECK-NEXT: movzbl %al, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -1764,8 +1774,10 @@ define i32 @test_x86_sse_comineq_ss(<4 x
> ; CHECK-LABEL: test_x86_sse_comineq_ss:
> ; CHECK: ## BB#0:
> ; CHECK-NEXT: vcomiss %xmm1, %xmm0
> -; CHECK-NEXT: setne %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: setp %al
> +; CHECK-NEXT: setne %cl
> +; CHECK-NEXT: orb %al, %cl
> +; CHECK-NEXT: movzbl %cl, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -2003,8 +2015,10 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
> ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
> ; CHECK: ## BB#0:
> ; CHECK-NEXT: vucomiss %xmm1, %xmm0
> -; CHECK-NEXT: sete %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: setnp %al
> +; CHECK-NEXT: sete %cl
> +; CHECK-NEXT: andb %al, %cl
> +; CHECK-NEXT: movzbl %cl, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -2041,8 +2055,8 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
> define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
> ; CHECK-LABEL: test_x86_sse_ucomile_ss:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomiss %xmm1, %xmm0
> -; CHECK-NEXT: setbe %al
> +; CHECK-NEXT: vucomiss %xmm0, %xmm1
> +; CHECK-NEXT: setae %al
> ; CHECK-NEXT: movzbl %al, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> @@ -2054,9 +2068,9 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
> define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
> ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomiss %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vucomiss %xmm0, %xmm1
> +; CHECK-NEXT: seta %al
> +; CHECK-NEXT: movzbl %al, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -2068,8 +2082,10 @@ define i32 @test_x86_sse_ucomineq_ss(<4
> ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
> ; CHECK: ## BB#0:
> ; CHECK-NEXT: vucomiss %xmm1, %xmm0
> -; CHECK-NEXT: setne %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: setp %al
> +; CHECK-NEXT: setne %cl
> +; CHECK-NEXT: orb %al, %cl
> +; CHECK-NEXT: movzbl %cl, %eax
> ; CHECK-NEXT: retl
> %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 14 10:06:09
> 2016
> @@ -6307,9 +6307,8 @@ define <8 x double>@test_int_x86_avx512_
> define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double>
> %a1) {
> ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
> -; CHECK-NEXT: sete %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: vcmpeqsd {sae}, %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 0, i32 8)
> ret i32 %res
> @@ -6318,9 +6317,8 @@ define i32 @test_x86_avx512_comi_sd_eq_s
> define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x
> double> %a1) {
> ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
> -; CHECK-NEXT: sete %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 8, i32 8)
> ret i32 %res
> @@ -6329,9 +6327,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq_
> define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double>
> %a1) {
> ; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vcomisd %xmm1, %xmm0
> -; CHECK-NEXT: sete %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 0, i32 4)
> ret i32 %res
> @@ -6340,9 +6337,8 @@ define i32 @test_x86_avx512_comi_sd_eq(<
> define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double>
> %a1) {
> ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomisd %xmm1, %xmm0
> -; CHECK-NEXT: sete %al
> -; CHECK-NEXT: movzbl %al, %eax
> +; CHECK-NEXT: vcmpeq_uqsd %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 8, i32 4)
> ret i32 %res
> @@ -6351,9 +6347,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq(
> define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double>
> %a1) {
> ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vcmpltsd {sae}, %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 1, i32 8)
> ret i32 %res
> @@ -6362,9 +6357,8 @@ define i32 @test_x86_avx512_comi_sd_lt_s
> define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x
> double> %a1) {
> ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vcmpngesd {sae}, %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 9, i32 8)
> ret i32 %res
> @@ -6373,9 +6367,8 @@ define i32 @test_x86_avx512_ucomi_sd_lt_
> define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double>
> %a1) {
> ; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vcomisd %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vcmpltsd %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 1, i32 4)
> ret i32 %res
> @@ -6384,9 +6377,8 @@ define i32 @test_x86_avx512_comi_sd_lt(<
> define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double>
> %a1) {
> ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomisd %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vcmpngesd %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x
> double> %a1, i32 9, i32 4)
> ret i32 %res
> @@ -6397,9 +6389,8 @@ declare i32 @llvm.x86.avx512.vcomi.sd(<2
> define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1)
> {
> ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
> ; CHECK: ## BB#0:
> -; CHECK-NEXT: vucomiss %xmm1, %xmm0
> -; CHECK-NEXT: sbbl %eax, %eax
> -; CHECK-NEXT: andl $1, %eax
> +; CHECK-NEXT: vcmpngess %xmm1, %xmm0, %k0
> +; CHECK-NEXT: kmovw %k0, %eax
> ; CHECK-NEXT: retq
> %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float>
> %a1, i32 9, i32 4)
> ret i32 %res
>
> Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Sat May 14 10:06:09
> 2016
> @@ -54,15 +54,19 @@ define i32 @test_x86_sse_comieq_ss(<4 x
> ; SSE-LABEL: test_x86_sse_comieq_ss:
> ; SSE: ## BB#0:
> ; SSE-NEXT: comiss %xmm1, %xmm0
> -; SSE-NEXT: sete %al
> -; SSE-NEXT: movzbl %al, %eax
> +; SSE-NEXT: setnp %al
> +; SSE-NEXT: sete %cl
> +; SSE-NEXT: andb %al, %cl
> +; SSE-NEXT: movzbl %cl, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse_comieq_ss:
> ; KNL: ## BB#0:
> ; KNL-NEXT: vcomiss %xmm1, %xmm0
> -; KNL-NEXT: sete %al
> -; KNL-NEXT: movzbl %al, %eax
> +; KNL-NEXT: setnp %al
> +; KNL-NEXT: sete %cl
> +; KNL-NEXT: andb %al, %cl
> +; KNL-NEXT: movzbl %cl, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
> define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
> ; SSE-LABEL: test_x86_sse_comile_ss:
> ; SSE: ## BB#0:
> -; SSE-NEXT: comiss %xmm1, %xmm0
> -; SSE-NEXT: setbe %al
> +; SSE-NEXT: comiss %xmm0, %xmm1
> +; SSE-NEXT: setae %al
> ; SSE-NEXT: movzbl %al, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse_comile_ss:
> ; KNL: ## BB#0:
> -; KNL-NEXT: vcomiss %xmm1, %xmm0
> -; KNL-NEXT: setbe %al
> +; KNL-NEXT: vcomiss %xmm0, %xmm1
> +; KNL-NEXT: setae %al
> ; KNL-NEXT: movzbl %al, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> @@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
> define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
> ; SSE-LABEL: test_x86_sse_comilt_ss:
> ; SSE: ## BB#0:
> -; SSE-NEXT: comiss %xmm1, %xmm0
> -; SSE-NEXT: sbbl %eax, %eax
> -; SSE-NEXT: andl $1, %eax
> +; SSE-NEXT: comiss %xmm0, %xmm1
> +; SSE-NEXT: seta %al
> +; SSE-NEXT: movzbl %al, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse_comilt_ss:
> ; KNL: ## BB#0:
> -; KNL-NEXT: vcomiss %xmm1, %xmm0
> -; KNL-NEXT: sbbl %eax, %eax
> -; KNL-NEXT: andl $1, %eax
> +; KNL-NEXT: vcomiss %xmm0, %xmm1
> +; KNL-NEXT: seta %al
> +; KNL-NEXT: movzbl %al, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -154,15 +158,19 @@ define i32 @test_x86_sse_comineq_ss(<4 x
> ; SSE-LABEL: test_x86_sse_comineq_ss:
> ; SSE: ## BB#0:
> ; SSE-NEXT: comiss %xmm1, %xmm0
> -; SSE-NEXT: setne %al
> -; SSE-NEXT: movzbl %al, %eax
> +; SSE-NEXT: setp %al
> +; SSE-NEXT: setne %cl
> +; SSE-NEXT: orb %al, %cl
> +; SSE-NEXT: movzbl %cl, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse_comineq_ss:
> ; KNL: ## BB#0:
> ; KNL-NEXT: vcomiss %xmm1, %xmm0
> -; KNL-NEXT: setne %al
> -; KNL-NEXT: movzbl %al, %eax
> +; KNL-NEXT: setp %al
> +; KNL-NEXT: setne %cl
> +; KNL-NEXT: orb %al, %cl
> +; KNL-NEXT: movzbl %cl, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -504,15 +512,19 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
> ; SSE-LABEL: test_x86_sse_ucomieq_ss:
> ; SSE: ## BB#0:
> ; SSE-NEXT: ucomiss %xmm1, %xmm0
> -; SSE-NEXT: sete %al
> -; SSE-NEXT: movzbl %al, %eax
> +; SSE-NEXT: setnp %al
> +; SSE-NEXT: sete %cl
> +; SSE-NEXT: andb %al, %cl
> +; SSE-NEXT: movzbl %cl, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse_ucomieq_ss:
> ; KNL: ## BB#0:
> ; KNL-NEXT: vucomiss %xmm1, %xmm0
> -; KNL-NEXT: sete %al
> -; KNL-NEXT: movzbl %al, %eax
> +; KNL-NEXT: setnp %al
> +; KNL-NEXT: sete %cl
> +; KNL-NEXT: andb %al, %cl
> +; KNL-NEXT: movzbl %cl, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -563,15 +575,15 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
> define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
> ; SSE-LABEL: test_x86_sse_ucomile_ss:
> ; SSE: ## BB#0:
> -; SSE-NEXT: ucomiss %xmm1, %xmm0
> -; SSE-NEXT: setbe %al
> +; SSE-NEXT: ucomiss %xmm0, %xmm1
> +; SSE-NEXT: setae %al
> ; SSE-NEXT: movzbl %al, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse_ucomile_ss:
> ; KNL: ## BB#0:
> -; KNL-NEXT: vucomiss %xmm1, %xmm0
> -; KNL-NEXT: setbe %al
> +; KNL-NEXT: vucomiss %xmm0, %xmm1
> +; KNL-NEXT: setae %al
> ; KNL-NEXT: movzbl %al, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> @@ -583,16 +595,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
> define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
> ; SSE-LABEL: test_x86_sse_ucomilt_ss:
> ; SSE: ## BB#0:
> -; SSE-NEXT: ucomiss %xmm1, %xmm0
> -; SSE-NEXT: sbbl %eax, %eax
> -; SSE-NEXT: andl $1, %eax
> +; SSE-NEXT: ucomiss %xmm0, %xmm1
> +; SSE-NEXT: seta %al
> +; SSE-NEXT: movzbl %al, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse_ucomilt_ss:
> ; KNL: ## BB#0:
> -; KNL-NEXT: vucomiss %xmm1, %xmm0
> -; KNL-NEXT: sbbl %eax, %eax
> -; KNL-NEXT: andl $1, %eax
> +; KNL-NEXT: vucomiss %xmm0, %xmm1
> +; KNL-NEXT: seta %al
> +; KNL-NEXT: movzbl %al, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -604,15 +616,19 @@ define i32 @test_x86_sse_ucomineq_ss(<4
> ; SSE-LABEL: test_x86_sse_ucomineq_ss:
> ; SSE: ## BB#0:
> ; SSE-NEXT: ucomiss %xmm1, %xmm0
> -; SSE-NEXT: setne %al
> -; SSE-NEXT: movzbl %al, %eax
> +; SSE-NEXT: setp %al
> +; SSE-NEXT: setne %cl
> +; SSE-NEXT: orb %al, %cl
> +; SSE-NEXT: movzbl %cl, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse_ucomineq_ss:
> ; KNL: ## BB#0:
> ; KNL-NEXT: vucomiss %xmm1, %xmm0
> -; KNL-NEXT: setne %al
> -; KNL-NEXT: movzbl %al, %eax
> +; KNL-NEXT: setp %al
> +; KNL-NEXT: setne %cl
> +; KNL-NEXT: orb %al, %cl
> +; KNL-NEXT: movzbl %cl, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float>
> %a1) ; <i32> [#uses=1]
> ret i32 %res
>
> Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sat May 14 10:06:09
> 2016
> @@ -54,15 +54,19 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
> ; SSE-LABEL: test_x86_sse2_comieq_sd:
> ; SSE: ## BB#0:
> ; SSE-NEXT: comisd %xmm1, %xmm0
> -; SSE-NEXT: sete %al
> -; SSE-NEXT: movzbl %al, %eax
> +; SSE-NEXT: setnp %al
> +; SSE-NEXT: sete %cl
> +; SSE-NEXT: andb %al, %cl
> +; SSE-NEXT: movzbl %cl, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse2_comieq_sd:
> ; KNL: ## BB#0:
> ; KNL-NEXT: vcomisd %xmm1, %xmm0
> -; KNL-NEXT: sete %al
> -; KNL-NEXT: movzbl %al, %eax
> +; KNL-NEXT: setnp %al
> +; KNL-NEXT: sete %cl
> +; KNL-NEXT: andb %al, %cl
> +; KNL-NEXT: movzbl %cl, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2
> define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
> ; SSE-LABEL: test_x86_sse2_comile_sd:
> ; SSE: ## BB#0:
> -; SSE-NEXT: comisd %xmm1, %xmm0
> -; SSE-NEXT: setbe %al
> +; SSE-NEXT: comisd %xmm0, %xmm1
> +; SSE-NEXT: setae %al
> ; SSE-NEXT: movzbl %al, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse2_comile_sd:
> ; KNL: ## BB#0:
> -; KNL-NEXT: vcomisd %xmm1, %xmm0
> -; KNL-NEXT: setbe %al
> +; KNL-NEXT: vcomisd %xmm0, %xmm1
> +; KNL-NEXT: setae %al
> ; KNL-NEXT: movzbl %al, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> @@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse2.comile.sd(<2
> define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
> ; SSE-LABEL: test_x86_sse2_comilt_sd:
> ; SSE: ## BB#0:
> -; SSE-NEXT: comisd %xmm1, %xmm0
> -; SSE-NEXT: sbbl %eax, %eax
> -; SSE-NEXT: andl $1, %eax
> +; SSE-NEXT: comisd %xmm0, %xmm1
> +; SSE-NEXT: seta %al
> +; SSE-NEXT: movzbl %al, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse2_comilt_sd:
> ; KNL: ## BB#0:
> -; KNL-NEXT: vcomisd %xmm1, %xmm0
> -; KNL-NEXT: sbbl %eax, %eax
> -; KNL-NEXT: andl $1, %eax
> +; KNL-NEXT: vcomisd %xmm0, %xmm1
> +; KNL-NEXT: seta %al
> +; KNL-NEXT: movzbl %al, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -154,15 +158,19 @@ define i32 @test_x86_sse2_comineq_sd(<2
> ; SSE-LABEL: test_x86_sse2_comineq_sd:
> ; SSE: ## BB#0:
> ; SSE-NEXT: comisd %xmm1, %xmm0
> -; SSE-NEXT: setne %al
> -; SSE-NEXT: movzbl %al, %eax
> +; SSE-NEXT: setp %al
> +; SSE-NEXT: setne %cl
> +; SSE-NEXT: orb %al, %cl
> +; SSE-NEXT: movzbl %cl, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse2_comineq_sd:
> ; KNL: ## BB#0:
> ; KNL-NEXT: vcomisd %xmm1, %xmm0
> -; KNL-NEXT: setne %al
> -; KNL-NEXT: movzbl %al, %eax
> +; KNL-NEXT: setp %al
> +; KNL-NEXT: setne %cl
> +; KNL-NEXT: orb %al, %cl
> +; KNL-NEXT: movzbl %cl, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -1237,15 +1245,19 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
> ; SSE-LABEL: test_x86_sse2_ucomieq_sd:
> ; SSE: ## BB#0:
> ; SSE-NEXT: ucomisd %xmm1, %xmm0
> -; SSE-NEXT: sete %al
> -; SSE-NEXT: movzbl %al, %eax
> +; SSE-NEXT: setnp %al
> +; SSE-NEXT: sete %cl
> +; SSE-NEXT: andb %al, %cl
> +; SSE-NEXT: movzbl %cl, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse2_ucomieq_sd:
> ; KNL: ## BB#0:
> ; KNL-NEXT: vucomisd %xmm1, %xmm0
> -; KNL-NEXT: sete %al
> -; KNL-NEXT: movzbl %al, %eax
> +; KNL-NEXT: setnp %al
> +; KNL-NEXT: sete %cl
> +; KNL-NEXT: andb %al, %cl
> +; KNL-NEXT: movzbl %cl, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -1296,15 +1308,15 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2
> define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
> ; SSE-LABEL: test_x86_sse2_ucomile_sd:
> ; SSE: ## BB#0:
> -; SSE-NEXT: ucomisd %xmm1, %xmm0
> -; SSE-NEXT: setbe %al
> +; SSE-NEXT: ucomisd %xmm0, %xmm1
> +; SSE-NEXT: setae %al
> ; SSE-NEXT: movzbl %al, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse2_ucomile_sd:
> ; KNL: ## BB#0:
> -; KNL-NEXT: vucomisd %xmm1, %xmm0
> -; KNL-NEXT: setbe %al
> +; KNL-NEXT: vucomisd %xmm0, %xmm1
> +; KNL-NEXT: setae %al
> ; KNL-NEXT: movzbl %al, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> @@ -1316,16 +1328,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2
> define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
> ; SSE-LABEL: test_x86_sse2_ucomilt_sd:
> ; SSE: ## BB#0:
> -; SSE-NEXT: ucomisd %xmm1, %xmm0
> -; SSE-NEXT: sbbl %eax, %eax
> -; SSE-NEXT: andl $1, %eax
> +; SSE-NEXT: ucomisd %xmm0, %xmm1
> +; SSE-NEXT: seta %al
> +; SSE-NEXT: movzbl %al, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse2_ucomilt_sd:
> ; KNL: ## BB#0:
> -; KNL-NEXT: vucomisd %xmm1, %xmm0
> -; KNL-NEXT: sbbl %eax, %eax
> -; KNL-NEXT: andl $1, %eax
> +; KNL-NEXT: vucomisd %xmm0, %xmm1
> +; KNL-NEXT: seta %al
> +; KNL-NEXT: movzbl %al, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
> @@ -1337,15 +1349,19 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
> ; SSE-LABEL: test_x86_sse2_ucomineq_sd:
> ; SSE: ## BB#0:
> ; SSE-NEXT: ucomisd %xmm1, %xmm0
> -; SSE-NEXT: setne %al
> -; SSE-NEXT: movzbl %al, %eax
> +; SSE-NEXT: setp %al
> +; SSE-NEXT: setne %cl
> +; SSE-NEXT: orb %al, %cl
> +; SSE-NEXT: movzbl %cl, %eax
> ; SSE-NEXT: retl
> ;
> ; KNL-LABEL: test_x86_sse2_ucomineq_sd:
> ; KNL: ## BB#0:
> ; KNL-NEXT: vucomisd %xmm1, %xmm0
> -; KNL-NEXT: setne %al
> -; KNL-NEXT: movzbl %al, %eax
> +; KNL-NEXT: setp %al
> +; KNL-NEXT: setne %cl
> +; KNL-NEXT: orb %al, %cl
> +; KNL-NEXT: movzbl %cl, %eax
> ; KNL-NEXT: retl
> %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x
> double> %a1) ; <i32> [#uses=1]
> ret i32 %res
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160720/cb29f294/attachment-0001.html>
More information about the llvm-commits
mailing list