[llvm] r269569 - Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512
Demikhovsky, Elena via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 20 05:22:15 PDT 2016
Hi Andrea,
I should understand what you mean by “switch”. An undocumented switch on the clang level? Does clang already have such flags for backward compatibly?
Or you mean an #ifdef around “comi” intrinsics in the header file? And user will define a VAR before including intrin.h
Or #ifdef inside codegen and you’ll compile clang with this specific flag?
- Elena
From: Andrea Di Biagio [mailto:andrea.dibiagio at gmail.com]
Sent: Wednesday, July 20, 2016 15:05
To: Demikhovsky, Elena <elena.demikhovsky at intel.com>
Cc: llvm-commits <llvm-commits at lists.llvm.org>
Subject: Re: [llvm] r269569 - Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512
Hi Elena,
I agree that this patch is the correct thing to do.
However, we at Sony are a bit concerned by this patch because it breaks some very important customer codebases which heavily rely on the old lowering behavior.
What if we add a switch to enable the old lowering (as suggested in PR28510)? I think it would be very useful in the short term. Users that are stuck with old codebases would be able to pass that switch; other users will have a bit of time to upgrade their codebases.
What do you think?
Cheers,
Andrea
On Sat, May 14, 2016 at 4:06 PM, Elena Demikhovsky via llvm-commits <llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>> wrote:
Author: delena
Date: Sat May 14 10:06:09 2016
New Revision: 269569
URL: http://llvm.org/viewvc/llvm-project?rev=269569&view=rev
Log:
Fixed lowering of _comi_ intrinsics from all sets - SSE/SSE2/AVX/AVX-512
Differential revision http://reviews.llvm.org/D19261
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 14 10:06:09 2016
@@ -17503,30 +17503,66 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG);
- assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
- SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, dl, MVT::i8), Cond);
+ SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+ SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS);
+ SDValue SetCC;
+ switch (CC) {
+ case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_E, dl, MVT::i8), Comi);
+ SDValue SetNP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_NP, dl, MVT::i8),
+ Comi);
+ SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
+ break;
+ }
+ case ISD::SETNE: { // (ZF = 1 or PF = 1)
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_NE, dl, MVT::i8), Comi);
+ SDValue SetP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_P, dl, MVT::i8),
+ Comi);
+ SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
+ break;
+ }
+ case ISD::SETGT: // (CF = 0 and ZF = 0)
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_A, dl, MVT::i8), Comi);
+ break;
+ case ISD::SETLT: { // The condition is opposite to GT. Swap the operands.
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_A, dl, MVT::i8), InvComi);
+ break;
+ }
+ case ISD::SETGE: // CF = 0
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_AE, dl, MVT::i8), Comi);
+ break;
+ case ISD::SETLE: // The condition is opposite to GE. Swap the operands.
+ SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_AE, dl, MVT::i8), InvComi);
+ break;
+ default:
+ llvm_unreachable("Unexpected illegal condition!");
+ }
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case COMI_RM: { // Comparison intrinsics with Sae
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDValue CC = Op.getOperand(3);
+ unsigned CondVal = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
SDValue Sae = Op.getOperand(4);
- auto ComiType = TranslateX86ConstCondToX86CC(CC);
- // choose between ordered and unordered (comi/ucomi)
- unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1;
- SDValue Cond;
- if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
- X86::STATIC_ROUNDING::CUR_DIRECTION)
- Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
+
+ SDValue FCmp;
+ if (cast<ConstantSDNode>(Sae)->getZExtValue() ==
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
+ FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+ DAG.getConstant(CondVal, dl, MVT::i8));
else
- Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+ DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+ // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
+ return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
}
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat May 14 10:06:09 2016
@@ -2331,96 +2331,6 @@ static void verifyIntrinsicTables() {
std::end(IntrinsicsWithChain)) &&
"Intrinsic data tables should have unique entries");
}
-
-// X86 specific compare constants.
-// They must be kept in synch with avxintrin.h
-#define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
-#define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
-#define _X86_CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
-#define _X86_CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
-#define _X86_CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
-#define _X86_CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
-#define _X86_CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
-#define _X86_CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
-#define _X86_CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
-#define _X86_CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
-#define _X86_CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
-#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
-#define _X86_CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
-#define _X86_CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
-#define _X86_CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
-#define _X86_CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
-#define _X86_CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
-#define _X86_CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
-#define _X86_CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
-#define _X86_CMP_UNORD_S 0x13 /* Unordered (signaling) */
-#define _X86_CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
-#define _X86_CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
-#define _X86_CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
-#define _X86_CMP_ORD_S 0x17 /* Ordered (signaling) */
-#define _X86_CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
-#define _X86_CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
-#define _X86_CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
-#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
-#define _X86_CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
-#define _X86_CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
-#define _X86_CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
-#define _X86_CMP_TRUE_US 0x1f /* True (unordered, signaling) */
-
-/*
-* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
-* Return tuple <isOrdered, X86 condcode>
-*/
-static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) {
- ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
- unsigned IntImm = CImm->getZExtValue();
- // On a floating point condition, the flags are set as follows:
- // ZF PF CF op
- // 0 | 0 | 0 | X > Y
- // 0 | 0 | 1 | X < Y
- // 1 | 0 | 0 | X == Y
- // 1 | 1 | 1 | unordered
- switch (IntImm) {
- default: llvm_unreachable("Invalid floating point compare value for Comi!");
- case _X86_CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling)
- case _X86_CMP_EQ_OS: // 0x10 - Equal (ordered, signaling)
- return std::make_tuple(true, X86::COND_E);
- case _X86_CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling)
- case _X86_CMP_EQ_US: // 0x18 - Equal (unordered, signaling)
- return std::make_tuple(false , X86::COND_E);
- case _X86_CMP_LT_OS: // 0x01 - Less-than (ordered, signaling)
- case _X86_CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_B);
- case _X86_CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling)
- case _X86_CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling)
- return std::make_tuple(false , X86::COND_B);
- case _X86_CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling)
- case _X86_CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_BE);
- case _X86_CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling)
- case _X86_CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_BE);
- case _X86_CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling)
- case _X86_CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_A);
- case _X86_CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling)
- case _X86_CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_A);
- case _X86_CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling)
- case _X86_CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling)
- return std::make_tuple(true, X86::COND_AE);
- case _X86_CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling)
- case _X86_CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling)
- return std::make_tuple(false, X86::COND_AE);
- case _X86_CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling)
- case _X86_CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling)
- return std::make_tuple(true, X86::COND_NE);
- case _X86_CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling)
- case _X86_CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling)
- return std::make_tuple(false, X86::COND_NE);
- }
-}
-
} // End llvm namespace
#endif
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -104,8 +104,10 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
; CHECK-LABEL: test_x86_sse2_comieq_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comieq.sd<http://llvm.x86.sse2.comieq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -142,8 +144,8 @@ declare i32 @llvm.x86.sse2.comigt.sd<http://llvm.x86.sse2.comigt.sd>(<2
define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comile_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: setbe %al
+; CHECK-NEXT: vcomisd %xmm0, %xmm1
+; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comile.sd<http://llvm.x86.sse2.comile.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -155,9 +157,9 @@ declare i32 @llvm.x86.sse2.comile.sd<http://llvm.x86.sse2.comile.sd>(<2
define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comilt_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcomisd %xmm0, %xmm1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comilt.sd<http://llvm.x86.sse2.comilt.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -169,8 +171,10 @@ define i32 @test_x86_sse2_comineq_sd(<2
; CHECK-LABEL: test_x86_sse2_comineq_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setp %al
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comineq.sd<http://llvm.x86.sse2.comineq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -912,8 +916,10 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomieq.sd<http://llvm.x86.sse2.ucomieq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -950,8 +956,8 @@ declare i32 @llvm.x86.sse2.ucomigt.sd<http://llvm.x86.sse2.ucomigt.sd>(<2
define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomile_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: setbe %al
+; CHECK-NEXT: vucomisd %xmm0, %xmm1
+; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomile.sd<http://llvm.x86.sse2.ucomile.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -963,9 +969,9 @@ declare i32 @llvm.x86.sse2.ucomile.sd<http://llvm.x86.sse2.ucomile.sd>(<2
define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vucomisd %xmm0, %xmm1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomilt.sd<http://llvm.x86.sse2.ucomilt.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -977,8 +983,10 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setp %al
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomineq.sd<http://llvm.x86.sse2.ucomineq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1699,8 +1707,10 @@ define i32 @test_x86_sse_comieq_ss(<4 x
; CHECK-LABEL: test_x86_sse_comieq_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1737,8 +1747,8 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comile_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomiss %xmm1, %xmm0
-; CHECK-NEXT: setbe %al
+; CHECK-NEXT: vcomiss %xmm0, %xmm1
+; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -1750,9 +1760,9 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comilt_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomiss %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcomiss %xmm0, %xmm1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1764,8 +1774,10 @@ define i32 @test_x86_sse_comineq_ss(<4 x
; CHECK-LABEL: test_x86_sse_comineq_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setp %al
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -2003,8 +2015,10 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
; CHECK-LABEL: test_x86_sse_ucomieq_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setnp %al
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: andb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -2041,8 +2055,8 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomile_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: setbe %al
+; CHECK-NEXT: vucomiss %xmm0, %xmm1
+; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -2054,9 +2068,9 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomilt_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vucomiss %xmm0, %xmm1
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -2068,8 +2082,10 @@ define i32 @test_x86_sse_ucomineq_ss(<4
; CHECK-LABEL: test_x86_sse_ucomineq_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: setp %al
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sat May 14 10:06:09 2016
@@ -6307,9 +6307,8 @@ define <8 x double>@test_int_x86_avx512_
define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vcmpeqsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
ret i32 %res
@@ -6318,9 +6317,8 @@ define i32 @test_x86_avx512_comi_sd_eq_s
define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
ret i32 %res
@@ -6329,9 +6327,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq_
define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
ret i32 %res
@@ -6340,9 +6337,8 @@ define i32 @test_x86_avx512_comi_sd_eq(<
define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vcmpeq_uqsd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
ret i32 %res
@@ -6351,9 +6347,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq(
define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpltsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
ret i32 %res
@@ -6362,9 +6357,8 @@ define i32 @test_x86_avx512_comi_sd_lt_s
define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpngesd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
ret i32 %res
@@ -6373,9 +6367,8 @@ define i32 @test_x86_avx512_ucomi_sd_lt_
define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpltsd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
ret i32 %res
@@ -6384,9 +6377,8 @@ define i32 @test_x86_avx512_comi_sd_lt(<
define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpngesd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
ret i32 %res
@@ -6397,9 +6389,8 @@ declare i32 @llvm.x86.avx512.vcomi.sd<http://llvm.x86.avx512.vcomi.sd>(<2
define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
; CHECK: ## BB#0:
-; CHECK-NEXT: vucomiss %xmm1, %xmm0
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vcmpngess %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -54,15 +54,19 @@ define i32 @test_x86_sse_comieq_ss(<4 x
; SSE-LABEL: test_x86_sse_comieq_ss:
; SSE: ## BB#0:
; SSE-NEXT: comiss %xmm1, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setnp %al
+; SSE-NEXT: sete %cl
+; SSE-NEXT: andb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_comieq_ss:
; KNL: ## BB#0:
; KNL-NEXT: vcomiss %xmm1, %xmm0
-; KNL-NEXT: sete %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setnp %al
+; KNL-NEXT: sete %cl
+; KNL-NEXT: andb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x
define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_comile_ss:
; SSE: ## BB#0:
-; SSE-NEXT: comiss %xmm1, %xmm0
-; SSE-NEXT: setbe %al
+; SSE-NEXT: comiss %xmm0, %xmm1
+; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_comile_ss:
; KNL: ## BB#0:
-; KNL-NEXT: vcomiss %xmm1, %xmm0
-; KNL-NEXT: setbe %al
+; KNL-NEXT: vcomiss %xmm0, %xmm1
+; KNL-NEXT: setae %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x
define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_comilt_ss:
; SSE: ## BB#0:
-; SSE-NEXT: comiss %xmm1, %xmm0
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: comiss %xmm0, %xmm1
+; SSE-NEXT: seta %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_comilt_ss:
; KNL: ## BB#0:
-; KNL-NEXT: vcomiss %xmm1, %xmm0
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vcomiss %xmm0, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -154,15 +158,19 @@ define i32 @test_x86_sse_comineq_ss(<4 x
; SSE-LABEL: test_x86_sse_comineq_ss:
; SSE: ## BB#0:
; SSE-NEXT: comiss %xmm1, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setp %al
+; SSE-NEXT: setne %cl
+; SSE-NEXT: orb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_comineq_ss:
; KNL: ## BB#0:
; KNL-NEXT: vcomiss %xmm1, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setp %al
+; KNL-NEXT: setne %cl
+; KNL-NEXT: orb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -504,15 +512,19 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
; SSE-LABEL: test_x86_sse_ucomieq_ss:
; SSE: ## BB#0:
; SSE-NEXT: ucomiss %xmm1, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setnp %al
+; SSE-NEXT: sete %cl
+; SSE-NEXT: andb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_ucomieq_ss:
; KNL: ## BB#0:
; KNL-NEXT: vucomiss %xmm1, %xmm0
-; KNL-NEXT: sete %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setnp %al
+; KNL-NEXT: sete %cl
+; KNL-NEXT: andb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -563,15 +575,15 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4
define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_ucomile_ss:
; SSE: ## BB#0:
-; SSE-NEXT: ucomiss %xmm1, %xmm0
-; SSE-NEXT: setbe %al
+; SSE-NEXT: ucomiss %xmm0, %xmm1
+; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_ucomile_ss:
; KNL: ## BB#0:
-; KNL-NEXT: vucomiss %xmm1, %xmm0
-; KNL-NEXT: setbe %al
+; KNL-NEXT: vucomiss %xmm0, %xmm1
+; KNL-NEXT: setae %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -583,16 +595,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4
define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse_ucomilt_ss:
; SSE: ## BB#0:
-; SSE-NEXT: ucomiss %xmm1, %xmm0
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: ucomiss %xmm0, %xmm1
+; SSE-NEXT: seta %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_ucomilt_ss:
; KNL: ## BB#0:
-; KNL-NEXT: vucomiss %xmm1, %xmm0
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vucomiss %xmm0, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -604,15 +616,19 @@ define i32 @test_x86_sse_ucomineq_ss(<4
; SSE-LABEL: test_x86_sse_ucomineq_ss:
; SSE: ## BB#0:
; SSE-NEXT: ucomiss %xmm1, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setp %al
+; SSE-NEXT: setne %cl
+; SSE-NEXT: orb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse_ucomineq_ss:
; KNL: ## BB#0:
; KNL-NEXT: vucomiss %xmm1, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setp %al
+; KNL-NEXT: setne %cl
+; KNL-NEXT: orb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=269569&r1=269568&r2=269569&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sat May 14 10:06:09 2016
@@ -54,15 +54,19 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
; SSE-LABEL: test_x86_sse2_comieq_sd:
; SSE: ## BB#0:
; SSE-NEXT: comisd %xmm1, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setnp %al
+; SSE-NEXT: sete %cl
+; SSE-NEXT: andb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_comieq_sd:
; KNL: ## BB#0:
; KNL-NEXT: vcomisd %xmm1, %xmm0
-; KNL-NEXT: sete %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setnp %al
+; KNL-NEXT: sete %cl
+; KNL-NEXT: andb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.comieq.sd<http://llvm.x86.sse2.comieq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -113,15 +117,15 @@ declare i32 @llvm.x86.sse2.comigt.sd<http://llvm.x86.sse2.comigt.sd>(<2
define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comile_sd:
; SSE: ## BB#0:
-; SSE-NEXT: comisd %xmm1, %xmm0
-; SSE-NEXT: setbe %al
+; SSE-NEXT: comisd %xmm0, %xmm1
+; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_comile_sd:
; KNL: ## BB#0:
-; KNL-NEXT: vcomisd %xmm1, %xmm0
-; KNL-NEXT: setbe %al
+; KNL-NEXT: vcomisd %xmm0, %xmm1
+; KNL-NEXT: setae %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.comile.sd<http://llvm.x86.sse2.comile.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -133,16 +137,16 @@ declare i32 @llvm.x86.sse2.comile.sd<http://llvm.x86.sse2.comile.sd>(<2
define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comilt_sd:
; SSE: ## BB#0:
-; SSE-NEXT: comisd %xmm1, %xmm0
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: comisd %xmm0, %xmm1
+; SSE-NEXT: seta %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_comilt_sd:
; KNL: ## BB#0:
-; KNL-NEXT: vcomisd %xmm1, %xmm0
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vcomisd %xmm0, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.comilt.sd<http://llvm.x86.sse2.comilt.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -154,15 +158,19 @@ define i32 @test_x86_sse2_comineq_sd(<2
; SSE-LABEL: test_x86_sse2_comineq_sd:
; SSE: ## BB#0:
; SSE-NEXT: comisd %xmm1, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setp %al
+; SSE-NEXT: setne %cl
+; SSE-NEXT: orb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_comineq_sd:
; KNL: ## BB#0:
; KNL-NEXT: vcomisd %xmm1, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setp %al
+; KNL-NEXT: setne %cl
+; KNL-NEXT: orb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.comineq.sd<http://llvm.x86.sse2.comineq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1237,15 +1245,19 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
; SSE-LABEL: test_x86_sse2_ucomieq_sd:
; SSE: ## BB#0:
; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setnp %al
+; SSE-NEXT: sete %cl
+; SSE-NEXT: andb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_ucomieq_sd:
; KNL: ## BB#0:
; KNL-NEXT: vucomisd %xmm1, %xmm0
-; KNL-NEXT: sete %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setnp %al
+; KNL-NEXT: sete %cl
+; KNL-NEXT: andb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomieq.sd<http://llvm.x86.sse2.ucomieq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1296,15 +1308,15 @@ declare i32 @llvm.x86.sse2.ucomigt.sd<http://llvm.x86.sse2.ucomigt.sd>(<2
define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomile_sd:
; SSE: ## BB#0:
-; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: setbe %al
+; SSE-NEXT: ucomisd %xmm0, %xmm1
+; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_ucomile_sd:
; KNL: ## BB#0:
-; KNL-NEXT: vucomisd %xmm1, %xmm0
-; KNL-NEXT: setbe %al
+; KNL-NEXT: vucomisd %xmm0, %xmm1
+; KNL-NEXT: setae %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomile.sd<http://llvm.x86.sse2.ucomile.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -1316,16 +1328,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd<http://llvm.x86.sse2.ucomile.sd>(<2
define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomilt_sd:
; SSE: ## BB#0:
-; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: ucomisd %xmm0, %xmm1
+; SSE-NEXT: seta %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_ucomilt_sd:
; KNL: ## BB#0:
-; KNL-NEXT: vucomisd %xmm1, %xmm0
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vucomisd %xmm0, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomilt.sd<http://llvm.x86.sse2.ucomilt.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -1337,15 +1349,19 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
; SSE-LABEL: test_x86_sse2_ucomineq_sd:
; SSE: ## BB#0:
; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: setp %al
+; SSE-NEXT: setne %cl
+; SSE-NEXT: orb %al, %cl
+; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_ucomineq_sd:
; KNL: ## BB#0:
; KNL-NEXT: vucomisd %xmm1, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: setp %al
+; KNL-NEXT: setne %cl
+; KNL-NEXT: orb %al, %cl
+; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomineq.sd<http://llvm.x86.sse2.ucomineq.sd>(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
_______________________________________________
llvm-commits mailing list
llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
---------------------------------------------------------------------
Intel Israel (74) Limited
This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160720/ff57b52a/attachment-0001.html>
More information about the llvm-commits
mailing list