[llvm] r269569 - Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Sat May 14 08:06:10 PDT 2016
Author: delena
Date: Sat May 14 10:06:09 2016
New Revision: 269569
URL: http://llvm.org/viewvc/llvm-project?rev=269569&view=rev
Log:
Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512
Differential revision http://reviews.llvm.org/D19261
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 14 10:06:09 2016
@@ -17503,30 +17503,66 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG);
- assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
- SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, dl, MVT::i8), Cond);
+ SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+ SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS);
+ SDValue SetCC;
+ switch (CC) {
+ case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_E, dl, MVT::i8), Comi);
+ SDValue SetNP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_NP, dl, MVT::i8),
+ Comi);
+ SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
+ break;
+ }
+ case ISD::SETNE: { // (ZF = 1 or PF = 1)
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_NE, dl, MVT::i8), Comi);
+ SDValue SetP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_P, dl, MVT::i8),
+ Comi);
+ SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
+ break;
+ }
+ case ISD::SETGT: // (CF = 0 and ZF = 0)
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_A, dl, MVT::i8), Comi);
+ break;
+ case ISD::SETLT: { // The condition is opposite to GT. Swap the operands.
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_A, dl, MVT::i8), InvComi);
+ break;
+ }
+ case ISD::SETGE: // CF = 0
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_AE, dl, MVT::i8), Comi);
+ break;
+ case ISD::SETLE: // The condition is opposite to GE. Swap the operands.
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_AE, dl, MVT::i8), InvComi);
+ break;
+ default:
+ llvm_unreachable("Unexpected illegal condition!");
+ }
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case COMI_RM: { // Comparison intrinsics with Sae
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDValue CC = Op.getOperand(3);
+ unsigned CondVal = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
SDValue Sae = Op.getOperand(4);
- auto ComiType = TranslateX86ConstCondToX86CC(CC);
- // choose between ordered and unordered (comi/ucomi)
- unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1;
- SDValue Cond;
- if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
- X86::STATIC_ROUNDING::CUR_DIRECTION)
- Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
+
+ SDValue FCmp;
+ if (cast<ConstantSDNode>(Sae)->getZExtValue() ==
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
+ FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+ DAG.getConstant(CondVal, dl, MVT::i8));
else
- Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+ DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+ // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
+ return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
}
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 14 10:06:09 2016
@@ -2331,96 +2331,6 @@ static void verifyIntrinsicTables() {
std::end(IntrinsicsWithChain)) &&
"Intrinsic data tables should have unique entries");
}
-
-// X86 specific compare constants.
-// They must be kept in synch with avxintrin.h
-#define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
-#define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
-#define _X86_CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
-#define _X86_CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
-#define _X86_CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
-#define _X86_CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
-#define _X86_CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
-#define _X86_CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
-#define _X86_CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
-#define _X86_CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
-#define _X86_CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
-#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
-#define _X86_CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
-#define _X86_CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
-#define _X86_CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
-#define _X86_CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
-#define _X86_CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
-#define _X86_CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
-#define _X86_CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
-#define _X86_CMP_UNORD_S 0x13 /* Unordered (signaling) */
-#define _X86_CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
-#define _X86_CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
-#define _X86_CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
-#define _X86_CMP_ORD_S 0x17 /* Ordered (signaling) */
-#define _X86_CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
-#define _X86_CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
-#define _X86_CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
-#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
-#define _X86_CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
-#define _X86_CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
-#define _X86_CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
-#define _X86_CMP_TRUE_US 0x1f /* True (unordered, signaling) */
-
-/*
-* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
-* Return tuple <isOrdered, X86 condcode>
-*/
-static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) {
- ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
- unsigned IntImm = CImm->getZExtValue();
- // On a floating point condition, the flags are set as follows:
- // ZF PF CF op
- // 0 | 0 | 0 | X > Y
- // 0 | 0 | 1 | X < Y
- // 1 | 0 | 0 | X == Y
- // 1 | 1 | 1 | unordered
- switch (IntImm) {
- default: llvm_unreachable("Invalid floating point compare value for Comi!");
- case _X86_CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling)
- case _X86_CMP_EQ_OS: // 0x10 - Equal (ordered, signaling)
- return std::make_tuple(true, X86::COND_E);
- case _X86_CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling)
- case _X86_CMP_EQ_US: // 0x18 - Equal (unordered, signaling)
- return std::make_tuple(false , X86::COND_E);
- case _X86_CMP_LT_OS: // 0x01 - Less-than (ordered, signaling)
- case _X86_CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_B);
- case _X86_CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling)
- case _X86_CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling)
- return std::make_tuple(false , X86::COND_B);
- case _X86_CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling)
- case _X86_CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_BE);
- case _X86_CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling)
- case _X86_CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_BE);
- case _X86_CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling)
- case _X86_CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_A);
- case _X86_CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling)
- case _X86_CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_A);
- case _X86_CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling)
- case _X86_CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_AE);
- case _X86_CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling)
- case _X86_CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_AE);
- case _X86_CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling)
- case _X86_CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling)
- return std::make_tuple(true, X86::COND_NE);
- case _X86_CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling)
- case _X86_CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling)
- return std::make_tuple(false, X86::COND_NE);
- }
-}
-
} // End llvm namespace
#endif
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -104,8 +104,10 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
; CHECK-LABEL: test_x86_sse2_comieq_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -142,8 +144,8 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2
define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comile_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: setbe %al
+; CHECK-NEXT: vcomisd %xmm0, %xmm1
+; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -155,9 +157,9 @@ declare i32 @llvm.x86.sse2.comile.sd(<2
define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comilt_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcomisd %xmm0, %xmm1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -169,8 +171,10 @@ define i32 @test_x86_sse2_comineq_sd(<2
; CHECK-LABEL: test_x86_sse2_comineq_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setp %al
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -912,8 +916,10 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -950,8 +956,8 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2
define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomile_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: setbe %al
+; CHECK-NEXT: vucomisd %xmm0, %xmm1
+; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -963,9 +969,9 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2
define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vucomisd %xmm0, %xmm1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -977,8 +983,10 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setp %al
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1699,8 +1707,10 @@ define i32 @test_x86_sse_comieq_ss(<4 x
; CHECK-LABEL: test_x86_sse_comieq_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1737,8 +1747,8 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comile_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomiss %xmm1, %xmm0
-; CHECK-NEXT: setbe %al
+; CHECK-NEXT: vcomiss %xmm0, %xmm1
+; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -1750,9 +1760,9 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comilt_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomiss %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcomiss %xmm0, %xmm1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1764,8 +1774,10 @@ define i32 @test_x86_sse_comineq_ss(<4 x
; CHECK-LABEL: test_x86_sse_comineq_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setp %al
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -2003,8 +2015,10 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
; CHECK-LABEL: test_x86_sse_ucomieq_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -2041,8 +2055,8 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomile_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: setbe %al
+; CHECK-NEXT: vucomiss %xmm0, %xmm1
+; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -2054,9 +2068,9 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomilt_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vucomiss %xmm0, %xmm1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -2068,8 +2082,10 @@ define i32 @test_x86_sse_ucomineq_ss(<4
; CHECK-LABEL: test_x86_sse_ucomineq_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setp %al
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 14 10:06:09 2016
@@ -6307,9 +6307,8 @@ define <8 x double>@test_int_x86_avx512_
define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vcmpeqsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
ret i32 %res
@@ -6318,9 +6317,8 @@ define i32 @test_x86_avx512_comi_sd_eq_s
define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
ret i32 %res
@@ -6329,9 +6327,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq_
define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
ret i32 %res
@@ -6340,9 +6337,8 @@ define i32 @test_x86_avx512_comi_sd_eq(<
define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vcmpeq_uqsd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
ret i32 %res
@@ -6351,9 +6347,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq(
define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpltsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
ret i32 %res
@@ -6362,9 +6357,8 @@ define i32 @test_x86_avx512_comi_sd_lt_s
define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpngesd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
ret i32 %res
@@ -6373,9 +6367,8 @@ define i32 @test_x86_avx512_ucomi_sd_lt_
define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpltsd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
ret i32 %res
@@ -6384,9 +6377,8 @@ define i32 @test_x86_avx512_comi_sd_lt(<
define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpngesd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
ret i32 %res
@@ -6397,9 +6389,8 @@ declare i32 @llvm.x86.avx512.vcomi.sd(<2
define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpngess %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -54,15 +54,19 @@ define i32 @test_x86_sse_comieq_ss(<4 x
; SSE-LABEL: test_x86_sse_comieq_ss:
; SSE: ## BB#0:
; SSE-NEXT: comiss %xmm1, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setnp %al
+; SSE-NEXT: sete %cl
+; SSE-NEXT: andb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_comieq_ss:
; KNL: ## BB#0:
; KNL-NEXT: vcomiss %xmm1, %xmm0
-; KNL-NEXT: sete %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setnp %al
+; KNL-NEXT: sete %cl
+; KNL-NEXT: andb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_comile_ss:
; SSE: ## BB#0:
-; SSE-NEXT: comiss %xmm1, %xmm0
-; SSE-NEXT: setbe %al
+; SSE-NEXT: comiss %xmm0, %xmm1
+; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_comile_ss:
; KNL: ## BB#0:
-; KNL-NEXT: vcomiss %xmm1, %xmm0
-; KNL-NEXT: setbe %al
+; KNL-NEXT: vcomiss %xmm0, %xmm1
+; KNL-NEXT: setae %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_comilt_ss:
; SSE: ## BB#0:
-; SSE-NEXT: comiss %xmm1, %xmm0
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: comiss %xmm0, %xmm1
+; SSE-NEXT: seta %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_comilt_ss:
; KNL: ## BB#0:
-; KNL-NEXT: vcomiss %xmm1, %xmm0
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vcomiss %xmm0, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -154,15 +158,19 @@ define i32 @test_x86_sse_comineq_ss(<4 x
; SSE-LABEL: test_x86_sse_comineq_ss:
; SSE: ## BB#0:
; SSE-NEXT: comiss %xmm1, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setp %al
+; SSE-NEXT: setne %cl
+; SSE-NEXT: orb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_comineq_ss:
; KNL: ## BB#0:
; KNL-NEXT: vcomiss %xmm1, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setp %al
+; KNL-NEXT: setne %cl
+; KNL-NEXT: orb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -504,15 +512,19 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
; SSE-LABEL: test_x86_sse_ucomieq_ss:
; SSE: ## BB#0:
; SSE-NEXT: ucomiss %xmm1, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setnp %al
+; SSE-NEXT: sete %cl
+; SSE-NEXT: andb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_ucomieq_ss:
; KNL: ## BB#0:
; KNL-NEXT: vucomiss %xmm1, %xmm0
-; KNL-NEXT: sete %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setnp %al
+; KNL-NEXT: sete %cl
+; KNL-NEXT: andb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -563,15 +575,15 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_ucomile_ss:
; SSE: ## BB#0:
-; SSE-NEXT: ucomiss %xmm1, %xmm0
-; SSE-NEXT: setbe %al
+; SSE-NEXT: ucomiss %xmm0, %xmm1
+; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_ucomile_ss:
; KNL: ## BB#0:
-; KNL-NEXT: vucomiss %xmm1, %xmm0
-; KNL-NEXT: setbe %al
+; KNL-NEXT: vucomiss %xmm0, %xmm1
+; KNL-NEXT: setae %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -583,16 +595,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_ucomilt_ss:
; SSE: ## BB#0:
-; SSE-NEXT: ucomiss %xmm1, %xmm0
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: ucomiss %xmm0, %xmm1
+; SSE-NEXT: seta %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_ucomilt_ss:
; KNL: ## BB#0:
-; KNL-NEXT: vucomiss %xmm1, %xmm0
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vucomiss %xmm0, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -604,15 +616,19 @@ define i32 @test_x86_sse_ucomineq_ss(<4
; SSE-LABEL: test_x86_sse_ucomineq_ss:
; SSE: ## BB#0:
; SSE-NEXT: ucomiss %xmm1, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setp %al
+; SSE-NEXT: setne %cl
+; SSE-NEXT: orb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_ucomineq_ss:
; KNL: ## BB#0:
; KNL-NEXT: vucomiss %xmm1, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setp %al
+; KNL-NEXT: setne %cl
+; KNL-NEXT: orb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -54,15 +54,19 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
; SSE-LABEL: test_x86_sse2_comieq_sd:
; SSE: ## BB#0:
; SSE-NEXT: comisd %xmm1, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setnp %al
+; SSE-NEXT: sete %cl
+; SSE-NEXT: andb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_comieq_sd:
; KNL: ## BB#0:
; KNL-NEXT: vcomisd %xmm1, %xmm0
-; KNL-NEXT: sete %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setnp %al
+; KNL-NEXT: sete %cl
+; KNL-NEXT: andb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2
define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comile_sd:
; SSE: ## BB#0:
-; SSE-NEXT: comisd %xmm1, %xmm0
-; SSE-NEXT: setbe %al
+; SSE-NEXT: comisd %xmm0, %xmm1
+; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_comile_sd:
; KNL: ## BB#0:
-; KNL-NEXT: vcomisd %xmm1, %xmm0
-; KNL-NEXT: setbe %al
+; KNL-NEXT: vcomisd %xmm0, %xmm1
+; KNL-NEXT: setae %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse2.comile.sd(<2
define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comilt_sd:
; SSE: ## BB#0:
-; SSE-NEXT: comisd %xmm1, %xmm0
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: comisd %xmm0, %xmm1
+; SSE-NEXT: seta %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_comilt_sd:
; KNL: ## BB#0:
-; KNL-NEXT: vcomisd %xmm1, %xmm0
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vcomisd %xmm0, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -154,15 +158,19 @@ define i32 @test_x86_sse2_comineq_sd(<2
; SSE-LABEL: test_x86_sse2_comineq_sd:
; SSE: ## BB#0:
; SSE-NEXT: comisd %xmm1, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setp %al
+; SSE-NEXT: setne %cl
+; SSE-NEXT: orb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_comineq_sd:
; KNL: ## BB#0:
; KNL-NEXT: vcomisd %xmm1, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setp %al
+; KNL-NEXT: setne %cl
+; KNL-NEXT: orb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1237,15 +1245,19 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
; SSE-LABEL: test_x86_sse2_ucomieq_sd:
; SSE: ## BB#0:
; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setnp %al
+; SSE-NEXT: sete %cl
+; SSE-NEXT: andb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_ucomieq_sd:
; KNL: ## BB#0:
; KNL-NEXT: vucomisd %xmm1, %xmm0
-; KNL-NEXT: sete %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setnp %al
+; KNL-NEXT: sete %cl
+; KNL-NEXT: andb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1296,15 +1308,15 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2
define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomile_sd:
; SSE: ## BB#0:
-; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: setbe %al
+; SSE-NEXT: ucomisd %xmm0, %xmm1
+; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_ucomile_sd:
; KNL: ## BB#0:
-; KNL-NEXT: vucomisd %xmm1, %xmm0
-; KNL-NEXT: setbe %al
+; KNL-NEXT: vucomisd %xmm0, %xmm1
+; KNL-NEXT: setae %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -1316,16 +1328,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2
define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomilt_sd:
; SSE: ## BB#0:
-; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: ucomisd %xmm0, %xmm1
+; SSE-NEXT: seta %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_ucomilt_sd:
; KNL: ## BB#0:
-; KNL-NEXT: vucomisd %xmm1, %xmm0
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vucomisd %xmm0, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1337,15 +1349,19 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
; SSE-LABEL: test_x86_sse2_ucomineq_sd:
; SSE: ## BB#0:
; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setp %al
+; SSE-NEXT: setne %cl
+; SSE-NEXT: orb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_ucomineq_sd:
; KNL: ## BB#0:
; KNL-NEXT: vucomisd %xmm1, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setp %al
+; KNL-NEXT: setne %cl
+; KNL-NEXT: orb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
More information about the llvm-commits
mailing list