[llvm] r211544 - [FastISel][X86] Add support for floating-point select.
Juergen Ributzka
juergen at apple.com
Mon Jun 23 14:55:40 PDT 2014
Author: ributzka
Date: Mon Jun 23 16:55:40 2014
New Revision: 211544
URL: http://llvm.org/viewvc/llvm-project?rev=211544&view=rev
Log:
[FastISel][X86] Add support for floating-point select.
This extends the select lowering to support floating-point selects. The
lowering depends on SSE instructions and that the conditon comes from a
floating-point compare. Under this conditions it is possible to emit an
optimized instruction sequence that doesn't require any branches to
simulate the select.
Added:
llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll
Modified:
llvm/trunk/lib/Target/X86/X86FastISel.cpp
Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=211544&r1=211543&r2=211544&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Jun 23 16:55:40 2014
@@ -113,6 +113,8 @@ private:
bool X86FastEmitCMoveSelect(const Instruction *I);
+ bool X86FastEmitSSESelect(const Instruction *I);
+
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
@@ -235,6 +237,41 @@ getX86ConditonCode(CmpInst::Predicate Pr
return std::make_pair(CC, NeedSwap);
}
+static std::pair<unsigned, bool>
+getX86SSECondtionCode(CmpInst::Predicate Predicate) {
+ unsigned CC;
+ bool NeedSwap = false;
+
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
+ switch (Predicate) {
+ default: llvm_unreachable("Unexpected predicate");
+ case CmpInst::FCMP_OEQ: CC = 0; break;
+ case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLT: CC = 1; break;
+ case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLE: CC = 2; break;
+ case CmpInst::FCMP_UNO: CC = 3; break;
+ case CmpInst::FCMP_UNE: CC = 4; break;
+ case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGE: CC = 5; break;
+ case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGT: CC = 6; break;
+ case CmpInst::FCMP_ORD: CC = 7; break;
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_ONE: CC = 8; break;
+ }
+
+ return std::make_pair(CC, NeedSwap);
+}
+
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
@@ -1728,6 +1765,93 @@ bool X86FastISel::X86FastEmitCMoveSelect
return true;
}
+/// \brief Emit SSE instructions to lower the select.
+///
+/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
+/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
+/// SSE instructions are available.
+bool X86FastISel::X86FastEmitSSESelect(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
+ if (!CI)
+ return false;
+
+ if (I->getType() != CI->getOperand(0)->getType() ||
+ !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
+ (Subtarget->hasSSE2() && RetVT == MVT::f64) ))
+ return false;
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+ // We don't have to materialize a zero constant for this case and can just use
+ // %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+ if (CmpRHSC && CmpRHSC->isNullValue())
+ CmpRHS = CmpLHS;
+ }
+
+ unsigned CC;
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86SSECondtionCode(Predicate);
+ if (CC > 7)
+ return false;
+
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ static unsigned OpcTable[2][2][4] = {
+ { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
+ { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
+ { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ };
+
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned *Opc = nullptr;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned CmpLHSReg = getRegForValue(CmpLHS);
+ bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
+
+ unsigned CmpRHSReg = getRegForValue(CmpRHS);
+ bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
+
+ if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
+ return false;
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
bool X86FastISel::X86SelectSelect(const Instruction *I) {
MVT RetVT;
if (!isTypeLegal(I->getType(), RetVT))
@@ -1762,6 +1886,10 @@ bool X86FastISel::X86SelectSelect(const
if (X86FastEmitCMoveSelect(I))
return true;
+ // Try to use a sequence of SSE instructions to simulate a conditonal move.
+ if (X86FastEmitSSESelect(I))
+ return true;
+
return false;
}
Added: llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll?rev=211544&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-select-sse.ll Mon Jun 23 16:55:40 2014
@@ -0,0 +1,391 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
+
+; Test all cmp predicates that can be used with SSE.
+
+define float @select_fcmp_oeq_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_oeq_f32
+; CHECK: cmpeqss %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: orps %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_oeq_f32
+; AVX: vcmpeqss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp oeq float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_oeq_f64
+; CHECK: cmpeqsd %xmm1, %xmm0
+; CHECK-NEXT: andpd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm0
+; CHECK-NEXT: orpd %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_oeq_f64
+; AVX: vcmpeqsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp oeq double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ogt_f32
+; CHECK: cmpltss %xmm0, %xmm1
+; CHECK-NEXT: andps %xmm1, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm1
+; CHECK-NEXT: orps %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ogt_f32
+; AVX: vcmpltss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp ogt float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ogt_f64
+; CHECK: cmpltsd %xmm0, %xmm1
+; CHECK-NEXT: andpd %xmm1, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm1
+; CHECK-NEXT: orpd %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ogt_f64
+; AVX: vcmpltsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp ogt double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_oge_f32
+; CHECK: cmpless %xmm0, %xmm1
+; CHECK-NEXT: andps %xmm1, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm1
+; CHECK-NEXT: orps %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_oge_f32
+; AVX: vcmpless %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp oge float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_oge_f64
+; CHECK: cmplesd %xmm0, %xmm1
+; CHECK-NEXT: andpd %xmm1, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm1
+; CHECK-NEXT: orpd %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_oge_f64
+; AVX: vcmplesd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp oge double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_olt_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_olt_f32
+; CHECK: cmpltss %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: orps %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_olt_f32
+; AVX: vcmpltss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp olt float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_olt_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_olt_f64
+; CHECK: cmpltsd %xmm1, %xmm0
+; CHECK-NEXT: andpd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm0
+; CHECK-NEXT: orpd %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_olt_f64
+; AVX: vcmpltsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp olt double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_ole_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ole_f32
+; CHECK: cmpless %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: orps %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ole_f32
+; AVX: vcmpless %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp ole float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_ole_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ole_f64
+; CHECK: cmplesd %xmm1, %xmm0
+; CHECK-NEXT: andpd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm0
+; CHECK-NEXT: orpd %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ole_f64
+; AVX: vcmplesd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp ole double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_ord_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ord_f32
+; CHECK: cmpordss %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: orps %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ord_f32
+; AVX: vcmpordss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp ord float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_ord_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ord_f64
+; CHECK: cmpordsd %xmm1, %xmm0
+; CHECK-NEXT: andpd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm0
+; CHECK-NEXT: orpd %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ord_f64
+; AVX: vcmpordsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp ord double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_uno_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_uno_f32
+; CHECK: cmpunordss %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: orps %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_uno_f32
+; AVX: vcmpunordss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp uno float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_uno_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_uno_f64
+; CHECK: cmpunordsd %xmm1, %xmm0
+; CHECK-NEXT: andpd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm0
+; CHECK-NEXT: orpd %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_uno_f64
+; AVX: vcmpunordsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp uno double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_ugt_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ugt_f32
+; CHECK: cmpnless %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: orps %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ugt_f32
+; AVX: vcmpnless %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp ugt float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_ugt_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ugt_f64
+; CHECK: cmpnlesd %xmm1, %xmm0
+; CHECK-NEXT: andpd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm0
+; CHECK-NEXT: orpd %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_ugt_f64
+; AVX: vcmpnlesd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp ugt double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_uge_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_uge_f32
+; CHECK: cmpnltss %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: orps %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_uge_f32
+; AVX: vcmpnltss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp uge float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_uge_f64
+; CHECK: cmpnltsd %xmm1, %xmm0
+; CHECK-NEXT: andpd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm0
+; CHECK-NEXT: orpd %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_uge_f64
+; AVX: vcmpnltsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp uge double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ult_f32
+; CHECK: cmpnless %xmm0, %xmm1
+; CHECK-NEXT: andps %xmm1, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm1
+; CHECK-NEXT: orps %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ult_f32
+; AVX: vcmpnless %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp ult float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ult_f64
+; CHECK: cmpnlesd %xmm0, %xmm1
+; CHECK-NEXT: andpd %xmm1, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm1
+; CHECK-NEXT: orpd %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ult_f64
+; AVX: vcmpnlesd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp ult double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_ule_f32
+; CHECK: cmpnltss %xmm0, %xmm1
+; CHECK-NEXT: andps %xmm1, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm1
+; CHECK-NEXT: orps %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ule_f32
+; AVX: vcmpnltss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp ule float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_ule_f64
+; CHECK: cmpnltsd %xmm0, %xmm1
+; CHECK-NEXT: andpd %xmm1, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm1
+; CHECK-NEXT: orpd %xmm2, %xmm1
+; AVX-LABEL: select_fcmp_ule_f64
+; AVX: vcmpnltsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp ule double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
+define float @select_fcmp_une_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_une_f32
+; CHECK: cmpneqss %xmm1, %xmm0
+; CHECK-NEXT: andps %xmm0, %xmm2
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: orps %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_une_f32
+; AVX: vcmpneqss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+ %1 = fcmp une float %a, %b
+ %2 = select i1 %1, float %c, float %d
+ ret float %2
+}
+
+define double @select_fcmp_une_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_une_f64
+; CHECK: cmpneqsd %xmm1, %xmm0
+; CHECK-NEXT: andpd %xmm0, %xmm2
+; CHECK-NEXT: andnpd %xmm3, %xmm0
+; CHECK-NEXT: orpd %xmm2, %xmm0
+; AVX-LABEL: select_fcmp_une_f64
+; AVX: vcmpneqsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+ %1 = fcmp une double %a, %b
+ %2 = select i1 %1, double %c, double %d
+ ret double %2
+}
+
More information about the llvm-commits
mailing list