[llvm-branch-commits] [llvm-branch] r97839 - in /llvm/branches/Apple/Hermes: ./ include/llvm/CodeGen/SelectionDAG.h include/llvm/CodeGen/SelectionDAGNodes.h include/llvm/Constants.h lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h lib/Target/ARM/ARMInstrNEON.td lib/Target/X86/X86ISelLowering.cpp test/CodeGen/ARM/neon_minmax.ll test/CodeGen/X86/sse-minmax.ll
Bob Wilson
bob.wilson at apple.com
Fri Mar 5 14:06:17 PST 2010
Author: bwilson
Date: Fri Mar 5 16:06:17 2010
New Revision: 97839
URL: http://llvm.org/viewvc/llvm-project?rev=97839&view=rev
Log:
--- Merging r96521 into '.':
U lib/Target/ARM/ARMInstrNEON.td
--- Merging r96525 into '.':
G lib/Target/ARM/ARMInstrNEON.td
--- Merging r96572 into '.':
A test/CodeGen/ARM/neon_minmax.ll
G lib/Target/ARM/ARMInstrNEON.td
U lib/Target/ARM/ARMISelLowering.h
U lib/Target/ARM/ARMISelLowering.cpp
--- Merging r96775 into '.':
U test/CodeGen/X86/sse-minmax.ll
U lib/Target/X86/X86ISelLowering.cpp
--- Merging r97025 into '.':
G test/CodeGen/X86/sse-minmax.ll
U include/llvm/Constants.h
U include/llvm/CodeGen/SelectionDAGNodes.h
U include/llvm/CodeGen/SelectionDAG.h
U lib/CodeGen/SelectionDAG/SelectionDAG.cpp
G lib/Target/X86/X86ISelLowering.cpp
--- Merging r97065 into '.':
U test/CodeGen/ARM/neon_minmax.ll
G lib/Target/ARM/ARMISelLowering.cpp
Added:
llvm/branches/Apple/Hermes/test/CodeGen/ARM/neon_minmax.ll
- copied, changed from r96572, llvm/trunk/test/CodeGen/ARM/neon_minmax.ll
Modified:
llvm/branches/Apple/Hermes/ (props changed)
llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAG.h
llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/branches/Apple/Hermes/include/llvm/Constants.h
llvm/branches/Apple/Hermes/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.cpp
llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.h
llvm/branches/Apple/Hermes/lib/Target/ARM/ARMInstrNEON.td
llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp
llvm/branches/Apple/Hermes/test/CodeGen/X86/sse-minmax.ll
Propchange: llvm/branches/Apple/Hermes/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar 5 16:06:17 2010
@@ -1 +1 @@
-/llvm/trunk:96621,96825,96827,96990,97071,97538,97707,97757,97782,97797
+/llvm/trunk:96521,96525,96572,96621,96775,96825,96827,96990,97025,97065,97071,97538,97707,97757,97782,97797
Modified: llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAG.h?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAG.h Fri Mar 5 16:06:17 2010
@@ -897,6 +897,15 @@
/// isKnownNeverNan - Test whether the given SDValue is known to never be NaN.
bool isKnownNeverNaN(SDValue Op) const;
+ /// isKnownNeverZero - Test whether the given SDValue is known to never be
+ /// positive or negative Zero.
+ bool isKnownNeverZero(SDValue Op) const;
+
+ /// isEqualTo - Test whether two SDValues are known to compare equal. This
+ /// is true if they are the same value, or if one is negative zero and the
+ /// other positive zero.
+ bool isEqualTo(SDValue A, SDValue B) const;
+
/// isVerifiedDebugInfoDesc - Returns true if the specified SDValue has
/// been verified as a debug information descriptor.
bool isVerifiedDebugInfoDesc(SDValue Op) const;
Modified: llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAGNodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAGNodes.h?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAGNodes.h (original)
+++ llvm/branches/Apple/Hermes/include/llvm/CodeGen/SelectionDAGNodes.h Fri Mar 5 16:06:17 2010
@@ -1810,6 +1810,12 @@
const APFloat& getValueAPF() const { return Value->getValueAPF(); }
const ConstantFP *getConstantFPValue() const { return Value; }
+ /// isZero - Return true if the value is positive or negative zero.
+ bool isZero() const { return Value->isZero(); }
+
+ /// isNaN - Return true if the value is a NaN.
+ bool isNaN() const { return Value->isNaN(); }
+
/// isExactlyValue - We don't rely on operator== working on double values, as
/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
/// As such, this method can be used to do an exact bit-for-bit comparison of
Modified: llvm/branches/Apple/Hermes/include/llvm/Constants.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/include/llvm/Constants.h?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/include/llvm/Constants.h (original)
+++ llvm/branches/Apple/Hermes/include/llvm/Constants.h Fri Mar 5 16:06:17 2010
@@ -275,6 +275,12 @@
return Val.isZero() && Val.isNegative();
}
+ /// isZero - Return true if the value is positive or negative zero.
+ bool isZero() const { return Val.isZero(); }
+
+ /// isNaN - Return true if the value is a NaN.
+ bool isNaN() const { return Val.isNaN(); }
+
/// isExactlyValue - We don't rely on operator== working on double values, as
/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
/// As such, this method can be used to do an exact bit-for-bit comparison of
Modified: llvm/branches/Apple/Hermes/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/branches/Apple/Hermes/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Fri Mar 5 16:06:17 2010
@@ -2232,6 +2232,29 @@
return false;
}
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+
+ // TODO: Recognize more cases here.
+
+ return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For for negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
if (!GA) return false;
Modified: llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.cpp?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.cpp Fri Mar 5 16:06:17 2010
@@ -294,6 +294,7 @@
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
}
computeRegisterProperties();
@@ -544,6 +545,8 @@
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
}
}
@@ -3831,23 +3834,106 @@
return SDValue();
}
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered;
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+ // will return -0, so vmin can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+ // will return +0, so vmax can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD: return PerformADDCombine(N, DCI);
- case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
- case ISD::INTRINSIC_WO_CHAIN:
- return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL:
- return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
}
return SDValue();
}
Modified: llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.h?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/branches/Apple/Hermes/lib/Target/ARM/ARMISelLowering.h Fri Mar 5 16:06:17 2010
@@ -131,7 +131,11 @@
VREV16, // reverse elements within 16-bit halfwords
VZIP, // zip (interleave)
VUZP, // unzip (deinterleave)
- VTRN // transpose
+ VTRN, // transpose
+
+ // Floating-point max and min:
+ FMAX,
+ FMIN
};
}
Modified: llvm/branches/Apple/Hermes/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/lib/Target/ARM/ARMInstrNEON.td?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/branches/Apple/Hermes/lib/Target/ARM/ARMInstrNEON.td Fri Mar 5 16:06:17 2010
@@ -89,6 +89,11 @@
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
//===----------------------------------------------------------------------===//
// NEON operand definitions
//===----------------------------------------------------------------------===//
@@ -566,34 +571,26 @@
// Instruction Classes
//===----------------------------------------------------------------------===//
-// Basic 2-register operations, both double- and quad-register.
+// Basic 2-register operations: single-, double- and quad-register.
+class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
+ IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
(ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
(ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
-// Basic 2-register operations, scalar single-precision.
-class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
- string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
- IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
-
-class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
- : NEONFPPat<(ResTy (OpNode SPR:$a)),
- (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
@@ -610,21 +607,6 @@
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
-// Basic 2-register intrinsics, scalar single-precision
-class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin,
- OpcodeStr, Dt, "$dst, $src", "", []>;
-
-class N2VDIntsPat<SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$a)),
- (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -655,7 +637,16 @@
(ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2",
"$src1 = $dst1, $src2 = $dst2", []>;
-// Basic 3-register operations, both double- and quad-register.
+// Basic 3-register operations: single-, double- and quad-register.
+class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
+ let isCommutable = Commutable;
+}
+
class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
@@ -740,23 +731,6 @@
let isCommutable = 0;
}
-// Basic 3-register operations, scalar single-precision
-class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
- : N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
- OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
- let isCommutable = Commutable;
-}
-class N3VDsPat<SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
- (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -824,7 +798,15 @@
let isCommutable = 0;
}
-// Multiply-Add/Sub operations, both double- and quad-register.
+// Multiply-Add/Sub operations: single-, double- and quad-register.
+class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
+
class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode OpNode>
@@ -892,25 +874,6 @@
(ResTy (NEONvduplane (OpTy DPR_8:$src3),
imm:$lane)))))))]>;
-// Multiply-Add/Sub operations, scalar single-precision
-class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode OpNode>
- : N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
-
-class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
- (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$acc, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -2391,7 +2354,7 @@
def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
// VNEG : Vector Negate (floating-point)
-def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
(outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
"vneg", "f32", "$dst, $src", "",
[(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
@@ -2977,71 +2940,110 @@
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
+class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
+ : NEONFPPat<(ResTy (OpNode SPR:$a)),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$b, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$acc, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$b, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
// These need separate instructions because they must use DPR_VFP2 register
// class which have SPR sub-registers.
// Vector Add Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VDs<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
-def : N3VDsPat<fadd, VADDfd_sfp>;
+def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
+def : N3VSPat<fadd, VADDfd_sfp>;
// Vector Sub Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VDs<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
-def : N3VDsPat<fsub, VSUBfd_sfp>;
+def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
+def : N3VSPat<fsub, VSUBfd_sfp>;
// Vector Multiply Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VDs<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
-def : N3VDsPat<fmul, VMULfd_sfp>;
+def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
+def : N3VSPat<fmul, VMULfd_sfp>;
// Vector Multiply-Accumulate/Subtract used for single-precision FP
// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
//let neverHasSideEffects = 1 in
-//def VMLAfd_sfp : N3VDMulOps<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
+//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
// v2f32, fmul, fadd>;
-//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
//let neverHasSideEffects = 1 in
-//def VMLSfd_sfp : N3VDMulOps<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
+//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
// v2f32, fmul, fsub>;
-//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
+//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
// Vector Absolute used for single-precision FP
let neverHasSideEffects = 1 in
-def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, IIC_VUNAD,
- "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>;
-def : N2VDIntsPat<fabs, VABSfd_sfp>;
+def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+ "vabs", "f32", "$dst, $src", "", []>;
+def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
// Vector Negate used for single-precision FP
let neverHasSideEffects = 1 in
-def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
- "vneg", "f32", "$dst, $src", "", []>;
-def : N2VDIntsPat<fneg, VNEGf32d_sfp>;
+def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+ "vneg", "f32", "$dst, $src", "", []>;
+def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
+
+// Vector Maximum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmax", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmax, VMAXfd_sfp>;
+
+// Vector Minimum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmin", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmin, VMINfd_sfp>;
// Vector Convert between single-precision FP and integer
let neverHasSideEffects = 1 in
-def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
- v2i32, v2f32, fp_to_sint>;
-def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
+def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v2i32, v2f32, fp_to_sint>;
+def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
let neverHasSideEffects = 1 in
-def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
- v2i32, v2f32, fp_to_uint>;
-def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
+def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v2i32, v2f32, fp_to_uint>;
+def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
let neverHasSideEffects = 1 in
-def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
- v2f32, v2i32, sint_to_fp>;
-def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
+def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v2f32, v2i32, sint_to_fp>;
+def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
let neverHasSideEffects = 1 in
-def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
- v2f32, v2i32, uint_to_fp>;
-def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
+def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v2f32, v2i32, uint_to_fp>;
+def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
Modified: llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp Fri Mar 5 16:06:17 2010
@@ -8793,10 +8793,9 @@
SDValue RHS = N->getOperand(2);
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
- // instructions have the peculiarity that if either operand is a NaN,
- // they chose what we call the RHS operand (and as such are not symmetric).
- // It happens that this matches the semantics of the common C idiom
- // x<y?x:y and related forms, so we can recognize these cases.
+ // instructions match the semantics of the common C idiom x<y?x:y but not
+ // x<=y?x:y, because of how they handle negative zero (which can be
+ // ignored in unsafe-math mode).
if (Subtarget->hasSSE2() &&
(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
Cond.getOpcode() == ISD::SETCC) {
@@ -8804,36 +8803,34 @@
unsigned Opcode = 0;
// Check for x CC y ? x : y.
- if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a min would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+ break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETULE:
- // This can be a min, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
@@ -8842,32 +8839,29 @@
break;
case ISD::SETOGE:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+ break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGT:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a max would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGE:
- // This can be a max, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
@@ -8876,36 +8870,33 @@
break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
- } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETOGE:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGT:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a min would handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGE:
- // This can be a min, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
@@ -8914,32 +8905,28 @@
break;
case ISD::SETULT:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a max would handle NaNs incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETOLE:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETULE:
- // This can be a max, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
Copied: llvm/branches/Apple/Hermes/test/CodeGen/ARM/neon_minmax.ll (from r96572, llvm/trunk/test/CodeGen/ARM/neon_minmax.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/test/CodeGen/ARM/neon_minmax.ll?p2=llvm/branches/Apple/Hermes/test/CodeGen/ARM/neon_minmax.ll&p1=llvm/trunk/test/CodeGen/ARM/neon_minmax.ll&r1=96572&r2=97839&rev=97839&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/neon_minmax.ll (original)
+++ llvm/branches/Apple/Hermes/test/CodeGen/ARM/neon_minmax.ll Fri Mar 5 16:06:17 2010
@@ -8,6 +8,14 @@
ret float %min1
}
+define float @fmin_ole_zero(float %x) nounwind {
+;CHECK: fmin_ole_zero:
+;CHECK-NOT: vmin.f32
+ %cond = fcmp ole float 0.0, %x
+ %min1 = select i1 %cond, float 0.0, float %x
+ ret float %min1
+}
+
define float @fmin_ult(float %x) nounwind {
;CHECK: fmin_ult:
;CHECK: vmin.f32
@@ -32,6 +40,14 @@
ret float %max1
}
+define float @fmax_uge_zero(float %x) nounwind {
+;CHECK: fmax_uge_zero:
+;CHECK-NOT: vmax.f32
+ %cond = fcmp uge float %x, 0.0
+ %max1 = select i1 %cond, float %x, float 0.0
+ ret float %max1
+}
+
define float @fmax_olt_reverse(float %x) nounwind {
;CHECK: fmax_olt_reverse:
;CHECK: vmax.f32
Modified: llvm/branches/Apple/Hermes/test/CodeGen/X86/sse-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/test/CodeGen/X86/sse-minmax.ll?rev=97839&r1=97838&r2=97839&view=diff
==============================================================================
--- llvm/branches/Apple/Hermes/test/CodeGen/X86/sse-minmax.ll (original)
+++ llvm/branches/Apple/Hermes/test/CodeGen/X86/sse-minmax.ll Fri Mar 5 16:06:17 2010
@@ -1,17 +1,26 @@
; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math | FileCheck -check-prefix=uNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
; Some of these patterns can be matched as SSE min or max. Some of
; then can be matched provided that the operands are swapped.
; Some of them can't be matched at all and require a comparison
; and a conditional branch.
-; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
+; The naming convention is {,x_,y_}{o,u}{gt,lt,ge,le}{,_inverse}
; x_ : use 0.0 instead of %y
+; y_ : use -0.0 instead of %y
; _inverse : swap the arms of the select.
; CHECK: ogt:
; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: ogt:
+; uNSAFE-NEXT: maxsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ogt:
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ogt(double %x, double %y) nounwind {
%c = fcmp ogt double %x, %y
%d = select i1 %c, double %x, double %y
@@ -21,6 +30,12 @@
; CHECK: olt:
; CHECK-NEXT: minsd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: olt:
+; uNSAFE-NEXT: minsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: olt:
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @olt(double %x, double %y) nounwind {
%c = fcmp olt double %x, %y
%d = select i1 %c, double %x, double %y
@@ -31,6 +46,14 @@
; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: ogt_inverse:
+; uNSAFE-NEXT: minsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ogt_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ogt_inverse(double %x, double %y) nounwind {
%c = fcmp ogt double %x, %y
%d = select i1 %c, double %y, double %x
@@ -41,6 +64,14 @@
; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: olt_inverse:
+; uNSAFE-NEXT: maxsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: olt_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @olt_inverse(double %x, double %y) nounwind {
%c = fcmp olt double %x, %y
%d = select i1 %c, double %y, double %x
@@ -49,6 +80,12 @@
; CHECK: oge:
; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; uNSAFE: oge:
+; uNSAFE-NEXT: maxsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: oge:
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @oge(double %x, double %y) nounwind {
%c = fcmp oge double %x, %y
%d = select i1 %c, double %x, double %y
@@ -57,6 +94,10 @@
; CHECK: ole:
; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; uNSAFE: ole:
+; uNSAFE-NEXT: minsd %xmm1, %xmm0
+; FINITE: ole:
+; FINITE-NEXT: minsd %xmm1, %xmm0
define double @ole(double %x, double %y) nounwind {
%c = fcmp ole double %x, %y
%d = select i1 %c, double %x, double %y
@@ -65,6 +106,14 @@
; CHECK: oge_inverse:
; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; uNSAFE: oge_inverse:
+; uNSAFE-NEXT: minsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: oge_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @oge_inverse(double %x, double %y) nounwind {
%c = fcmp oge double %x, %y
%d = select i1 %c, double %y, double %x
@@ -73,6 +122,14 @@
; CHECK: ole_inverse:
; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; uNSAFE: ole_inverse:
+; uNSAFE-NEXT: maxsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ole_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ole_inverse(double %x, double %y) nounwind {
%c = fcmp ole double %x, %y
%d = select i1 %c, double %y, double %x
@@ -83,6 +140,14 @@
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: x_ogt:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: maxsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ogt:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ogt(double %x) nounwind {
%c = fcmp ogt double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
@@ -93,6 +158,14 @@
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: minsd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: x_olt:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: minsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_olt:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_olt(double %x) nounwind {
%c = fcmp olt double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
@@ -104,6 +177,16 @@
; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: x_ogt_inverse:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: minsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ogt_inverse:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ogt_inverse(double %x) nounwind {
%c = fcmp ogt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -115,6 +198,16 @@
; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: x_olt_inverse:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: maxsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_olt_inverse:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_olt_inverse(double %x) nounwind {
%c = fcmp olt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -122,9 +215,15 @@
}
; CHECK: x_oge:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: maxsd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK: ucomisd %xmm1, %xmm0
+; uNSAFE: x_oge:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: maxsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_oge:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_oge(double %x) nounwind {
%c = fcmp oge double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
@@ -132,9 +231,15 @@
}
; CHECK: x_ole:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: minsd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK: ucomisd %xmm0, %xmm1
+; uNSAFE: x_ole:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: minsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ole:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ole(double %x) nounwind {
%c = fcmp ole double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
@@ -142,10 +247,17 @@
}
; CHECK: x_oge_inverse:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: minsd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK: ucomisd %xmm1, %xmm0
+; uNSAFE: x_oge_inverse:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: minsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_oge_inverse:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_oge_inverse(double %x) nounwind {
%c = fcmp oge double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -153,10 +265,17 @@
}
; CHECK: x_ole_inverse:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: maxsd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK: ucomisd %xmm0, %xmm1
+; uNSAFE: x_ole_inverse:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: maxsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ole_inverse:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ole_inverse(double %x) nounwind {
%c = fcmp ole double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -164,7 +283,13 @@
}
; CHECK: ugt:
-; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK: ucomisd %xmm0, %xmm1
+; uNSAFE: ugt:
+; uNSAFE-NEXT: maxsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ugt:
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ugt(double %x, double %y) nounwind {
%c = fcmp ugt double %x, %y
%d = select i1 %c, double %x, double %y
@@ -172,7 +297,13 @@
}
; CHECK: ult:
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK: ucomisd %xmm1, %xmm0
+; uNSAFE: ult:
+; uNSAFE-NEXT: minsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ult:
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ult(double %x, double %y) nounwind {
%c = fcmp ult double %x, %y
%d = select i1 %c, double %x, double %y
@@ -180,7 +311,15 @@
}
; CHECK: ugt_inverse:
-; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK: ucomisd %xmm0, %xmm1
+; uNSAFE: ugt_inverse:
+; uNSAFE-NEXT: minsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ugt_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ugt_inverse(double %x, double %y) nounwind {
%c = fcmp ugt double %x, %y
%d = select i1 %c, double %y, double %x
@@ -188,7 +327,15 @@
}
; CHECK: ult_inverse:
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK: ucomisd %xmm1, %xmm0
+; uNSAFE: ult_inverse:
+; uNSAFE-NEXT: maxsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ult_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ult_inverse(double %x, double %y) nounwind {
%c = fcmp ult double %x, %y
%d = select i1 %c, double %y, double %x
@@ -196,9 +343,15 @@
}
; CHECK: uge:
-; CHECK-NEXT: maxsd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: uge:
+; uNSAFE-NEXT: maxsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: uge:
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @uge(double %x, double %y) nounwind {
%c = fcmp uge double %x, %y
%d = select i1 %c, double %x, double %y
@@ -209,6 +362,12 @@
; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: ule:
+; uNSAFE-NEXT: minsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ule:
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ule(double %x, double %y) nounwind {
%c = fcmp ule double %x, %y
%d = select i1 %c, double %x, double %y
@@ -218,6 +377,14 @@
; CHECK: uge_inverse:
; CHECK-NEXT: minsd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: uge_inverse:
+; uNSAFE-NEXT: minsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: uge_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @uge_inverse(double %x, double %y) nounwind {
%c = fcmp uge double %x, %y
%d = select i1 %c, double %y, double %x
@@ -227,6 +394,14 @@
; CHECK: ule_inverse:
; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: ule_inverse:
+; uNSAFE-NEXT: maxsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: ule_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @ule_inverse(double %x, double %y) nounwind {
%c = fcmp ule double %x, %y
%d = select i1 %c, double %y, double %x
@@ -234,10 +409,15 @@
}
; CHECK: x_ugt:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: maxsd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK: ucomisd %xmm0, %xmm1
+; uNSAFE: x_ugt:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: maxsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ugt:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ugt(double %x) nounwind {
%c = fcmp ugt double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
@@ -245,10 +425,15 @@
}
; CHECK: x_ult:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: minsd %xmm0, %xmm1
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK: ucomisd %xmm1, %xmm0
+; uNSAFE: x_ult:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: minsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ult:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ult(double %x) nounwind {
%c = fcmp ult double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
@@ -256,9 +441,17 @@
}
; CHECK: x_ugt_inverse:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: minsd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK: ucomisd %xmm0, %xmm1
+; uNSAFE: x_ugt_inverse:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: minsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ugt_inverse:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ugt_inverse(double %x) nounwind {
%c = fcmp ugt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -266,9 +459,17 @@
}
; CHECK: x_ult_inverse:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: maxsd %xmm1, %xmm0
-; CHECK-NEXT: ret
+; CHECK: ucomisd %xmm1, %xmm0
+; uNSAFE: x_ult_inverse:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: maxsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ult_inverse:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ult_inverse(double %x) nounwind {
%c = fcmp ult double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -280,6 +481,14 @@
; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: x_uge:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: maxsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_uge:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_uge(double %x) nounwind {
%c = fcmp uge double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
@@ -291,6 +500,14 @@
; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: x_ule:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: minsd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ule:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ule(double %x) nounwind {
%c = fcmp ule double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
@@ -301,6 +518,16 @@
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: minsd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: x_uge_inverse:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: minsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_uge_inverse:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_uge_inverse(double %x) nounwind {
%c = fcmp uge double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
@@ -311,16 +538,301 @@
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: ret
+; uNSAFE: x_ule_inverse:
+; uNSAFE-NEXT: pxor %xmm1, %xmm1
+; uNSAFE-NEXT: maxsd %xmm0, %xmm1
+; uNSAFE-NEXT: movapd %xmm1, %xmm0
+; uNSAFE-NEXT: ret
+; FINITE: x_ule_inverse:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
define double @x_ule_inverse(double %x) nounwind {
%c = fcmp ule double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
+; CHECK: y_ogt:
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE: y_ogt:
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ogt:
+; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ogt(double %x) nounwind {
+ %c = fcmp ogt double %x, -0.000000e+00
+ %d = select i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+; CHECK: y_olt:
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE: y_olt:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_olt:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_olt(double %x) nounwind {
+ %c = fcmp olt double %x, -0.000000e+00
+ %d = select i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+; CHECK: y_ogt_inverse:
+; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE: y_ogt_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ogt_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ogt_inverse(double %x) nounwind {
+ %c = fcmp ogt double %x, -0.000000e+00
+ %d = select i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: y_olt_inverse:
+; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE: y_olt_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_olt_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_olt_inverse(double %x) nounwind {
+ %c = fcmp olt double %x, -0.000000e+00
+ %d = select i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: y_oge:
+; CHECK: ucomisd %xmm1, %xmm0
+; UNSAFE: y_oge:
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_oge:
+; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_oge(double %x) nounwind {
+ %c = fcmp oge double %x, -0.000000e+00
+ %d = select i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+; CHECK: y_ole:
+; CHECK: ucomisd %xmm0, %xmm1
+; UNSAFE: y_ole:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ole:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ole(double %x) nounwind {
+ %c = fcmp ole double %x, -0.000000e+00
+ %d = select i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+; CHECK: y_oge_inverse:
+; CHECK: ucomisd %xmm1, %xmm0
+; UNSAFE: y_oge_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_oge_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_oge_inverse(double %x) nounwind {
+ %c = fcmp oge double %x, -0.000000e+00
+ %d = select i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: y_ole_inverse:
+; CHECK: ucomisd %xmm0, %xmm1
+; UNSAFE: y_ole_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ole_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ole_inverse(double %x) nounwind {
+ %c = fcmp ole double %x, -0.000000e+00
+ %d = select i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: y_ugt:
+; CHECK: ucomisd %xmm0, %xmm1
+; UNSAFE: y_ugt:
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ugt:
+; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ugt(double %x) nounwind {
+ %c = fcmp ugt double %x, -0.000000e+00
+ %d = select i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+; CHECK: y_ult:
+; CHECK: ucomisd %xmm1, %xmm0
+; UNSAFE: y_ult:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ult:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ult(double %x) nounwind {
+ %c = fcmp ult double %x, -0.000000e+00
+ %d = select i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+; CHECK: y_ugt_inverse:
+; CHECK: ucomisd %xmm0, %xmm1
+; UNSAFE: y_ugt_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ugt_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ugt_inverse(double %x) nounwind {
+ %c = fcmp ugt double %x, -0.000000e+00
+ %d = select i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: y_ult_inverse:
+; CHECK: ucomisd %xmm1, %xmm0
+; UNSAFE: y_ult_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ult_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ult_inverse(double %x) nounwind {
+ %c = fcmp ult double %x, -0.000000e+00
+ %d = select i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: y_uge:
+; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE: y_uge:
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_uge:
+; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_uge(double %x) nounwind {
+ %c = fcmp uge double %x, -0.000000e+00
+ %d = select i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+; CHECK: y_ule:
+; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE: y_ule:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ule:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ule(double %x) nounwind {
+ %c = fcmp ule double %x, -0.000000e+00
+ %d = select i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+; CHECK: y_uge_inverse:
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE: y_uge_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_uge_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_uge_inverse(double %x) nounwind {
+ %c = fcmp uge double %x, -0.000000e+00
+ %d = select i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: y_ule_inverse:
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE: y_ule_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movapd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE: y_ule_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movapd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ule_inverse(double %x) nounwind {
+ %c = fcmp ule double %x, -0.000000e+00
+ %d = select i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
; Test a few more misc. cases.
; CHECK: clampTo3k_a:
; CHECK: minsd
+; uNSAFE: clampTo3k_a:
+; uNSAFE: minsd
+; FINITE: clampTo3k_a:
+; FINITE: minsd
define double @clampTo3k_a(double %x) nounwind readnone {
entry:
%0 = fcmp ogt double %x, 3.000000e+03 ; <i1> [#uses=1]
@@ -330,6 +842,10 @@
; CHECK: clampTo3k_b:
; CHECK: minsd
+; uNSAFE: clampTo3k_b:
+; uNSAFE: minsd
+; FINITE: clampTo3k_b:
+; FINITE: minsd
define double @clampTo3k_b(double %x) nounwind readnone {
entry:
%0 = fcmp uge double %x, 3.000000e+03 ; <i1> [#uses=1]
@@ -339,6 +855,10 @@
; CHECK: clampTo3k_c:
; CHECK: maxsd
+; uNSAFE: clampTo3k_c:
+; uNSAFE: maxsd
+; FINITE: clampTo3k_c:
+; FINITE: maxsd
define double @clampTo3k_c(double %x) nounwind readnone {
entry:
%0 = fcmp olt double %x, 3.000000e+03 ; <i1> [#uses=1]
@@ -348,6 +868,10 @@
; CHECK: clampTo3k_d:
; CHECK: maxsd
+; uNSAFE: clampTo3k_d:
+; uNSAFE: maxsd
+; FINITE: clampTo3k_d:
+; FINITE: maxsd
define double @clampTo3k_d(double %x) nounwind readnone {
entry:
%0 = fcmp ule double %x, 3.000000e+03 ; <i1> [#uses=1]
@@ -357,6 +881,10 @@
; CHECK: clampTo3k_e:
; CHECK: maxsd
+; uNSAFE: clampTo3k_e:
+; uNSAFE: maxsd
+; FINITE: clampTo3k_e:
+; FINITE: maxsd
define double @clampTo3k_e(double %x) nounwind readnone {
entry:
%0 = fcmp olt double %x, 3.000000e+03 ; <i1> [#uses=1]
@@ -366,6 +894,10 @@
; CHECK: clampTo3k_f:
; CHECK: maxsd
+; uNSAFE: clampTo3k_f:
+; uNSAFE: maxsd
+; FINITE: clampTo3k_f:
+; FINITE: maxsd
define double @clampTo3k_f(double %x) nounwind readnone {
entry:
%0 = fcmp ule double %x, 3.000000e+03 ; <i1> [#uses=1]
@@ -375,6 +907,10 @@
; CHECK: clampTo3k_g:
; CHECK: minsd
+; uNSAFE: clampTo3k_g:
+; uNSAFE: minsd
+; FINITE: clampTo3k_g:
+; FINITE: minsd
define double @clampTo3k_g(double %x) nounwind readnone {
entry:
%0 = fcmp ogt double %x, 3.000000e+03 ; <i1> [#uses=1]
@@ -384,6 +920,10 @@
; CHECK: clampTo3k_h:
; CHECK: minsd
+; uNSAFE: clampTo3k_h:
+; uNSAFE: minsd
+; FINITE: clampTo3k_h:
+; FINITE: minsd
define double @clampTo3k_h(double %x) nounwind readnone {
entry:
%0 = fcmp uge double %x, 3.000000e+03 ; <i1> [#uses=1]
More information about the llvm-branch-commits
mailing list