[llvm-commits] [llvm] r42246 - in /llvm/trunk: include/llvm/Target/TargetLowering.h lib/Target/Alpha/AlphaISelLowering.cpp lib/Target/IA64/IA64ISelLowering.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFPStack.td lib/Target/X86/X86InstrInfo.td lib/Target/X86/X86InstrSSE.td
Dale Johannesen
dalej at apple.com
Sun Sep 23 07:52:20 PDT 2007
Author: johannes
Date: Sun Sep 23 09:52:20 2007
New Revision: 42246
URL: http://llvm.org/viewvc/llvm-project?rev=42246&view=rev
Log:
Fix PR 1681. When X86 target uses +sse -sse2,
keep f32 in SSE registers and f64 in x87. This
is effectively a new codegen mode.
Change addLegalFPImmediate to permit float and
double variants to do different things.
Adjust callers.
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp
llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrFPStack.td
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=42246&r1=42245&r2=42246&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Sun Sep 23 09:52:20 2007
@@ -783,17 +783,7 @@
/// addLegalFPImmediate - Indicate that this target can instruction select
/// the specified FP immediate natively.
void addLegalFPImmediate(const APFloat& Imm) {
- // Incoming constants are expected to be double. We also add
- // the float version. It is expected that all constants are exactly
- // representable as floats.
- assert(&Imm.getSemantics() == &APFloat::IEEEdouble);
- APFloat Immf = APFloat(Imm);
- // Rounding mode is not supposed to matter here...
- if (Immf.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven) !=
- APFloat::opOK)
- assert(0);
LegalFPImmediates.push_back(Imm);
- LegalFPImmediates.push_back(Immf);
}
/// setTargetDAGCombine - Targets should invoke this method for each target
Modified: llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp?rev=42246&r1=42245&r2=42246&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp Sun Sep 23 09:52:20 2007
@@ -140,7 +140,9 @@
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
addLegalFPImmediate(APFloat(+0.0)); //F31
+ addLegalFPImmediate(APFloat(+0.0f)); //F31
addLegalFPImmediate(APFloat(-0.0)); //-F31
+ addLegalFPImmediate(APFloat(-0.0f)); //-F31
setJumpBufSize(272);
setJumpBufAlignment(16);
Modified: llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp?rev=42246&r1=42245&r2=42246&view=diff
==============================================================================
--- llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp Sun Sep 23 09:52:20 2007
@@ -120,7 +120,9 @@
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
addLegalFPImmediate(APFloat(+0.0));
+ addLegalFPImmediate(APFloat(+0.0f));
addLegalFPImmediate(APFloat(+1.0));
+ addLegalFPImmediate(APFloat(+1.0f));
}
const char *IA64TargetLowering::getTargetNodeName(unsigned Opcode) const {
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=42246&r1=42245&r2=42246&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 23 09:52:20 2007
@@ -40,7 +40,8 @@
X86TargetLowering::X86TargetLowering(TargetMachine &TM)
: TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
- X86ScalarSSE = Subtarget->hasSSE2();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo();
@@ -87,7 +88,7 @@
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
} else {
- if (X86ScalarSSE)
+ if (X86ScalarSSEf64)
// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
else
@@ -99,7 +100,7 @@
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
// SSE has no i16 to fp conversion, only i32
- if (X86ScalarSSE) {
+ if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
@@ -118,7 +119,7 @@
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
- if (X86ScalarSSE) {
+ if (X86ScalarSSEf32) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
// f32 and f64 cases are Legal, f80 case is not
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
@@ -137,7 +138,7 @@
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
} else {
- if (X86ScalarSSE && !Subtarget->hasSSE3())
+ if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
@@ -148,7 +149,7 @@
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSE) {
+ if (!X86ScalarSSEf64) {
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
}
@@ -271,7 +272,8 @@
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- if (X86ScalarSSE) {
+ if (X86ScalarSSEf64) {
+ // f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
addRegisterClass(MVT::f64, X86::FR64RegisterClass);
@@ -300,7 +302,8 @@
// cases we handle.
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
- addLegalFPImmediate(APFloat(+0.0)); // xorps / xorpd
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
// Conversions to long double (in X87) go through memory.
setConvertAction(MVT::f32, MVT::f80, Expand);
@@ -309,7 +312,55 @@
// Conversions from long double (in X87) go through memory.
setConvertAction(MVT::f80, MVT::f32, Expand);
setConvertAction(MVT::f80, MVT::f64, Expand);
+ } else if (X86ScalarSSEf32) {
+ // Use SSE for f32, x87 for f64.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+
+ // Use ANDPS to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+
+ // Use ANDPS and ORPS to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // Expand FP immediates into loads from the stack, except for the special
+ // cases we handle.
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+ // SSE->x87 conversions go through memory.
+ setConvertAction(MVT::f32, MVT::f64, Expand);
+ setConvertAction(MVT::f32, MVT::f80, Expand);
+
+ // x87->SSE truncations need to go through memory.
+ setConvertAction(MVT::f80, MVT::f32, Expand);
+ setConvertAction(MVT::f64, MVT::f32, Expand);
+ // And x87->x87 truncations also.
+ setConvertAction(MVT::f80, MVT::f64, Expand);
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
} else {
+ // f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
@@ -335,6 +386,10 @@
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
}
// Long double always uses X87.
@@ -583,7 +638,8 @@
// If this is an FP return with ScalarSSE, we need to move the value from
// an XMM register onto the fp-stack.
- if (X86ScalarSSE) {
+ if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) ||
+ (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) {
SDOperand MemLoc;
// If this is a load into a scalarsse value, don't store the loaded value
@@ -659,7 +715,8 @@
// If we are using ScalarSSE, store ST(0) to the stack and reload it into
// an XMM register.
- if (X86ScalarSSE) {
+ if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) ||
+ (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) {
// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
@@ -3334,7 +3391,9 @@
StackSlot, NULL, 0);
// These are really Legal; caller falls through into that case.
- if (SrcVT==MVT::i32 && Op.getValueType() != MVT::f80 && X86ScalarSSE)
+ if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32)
+ return Result;
+ if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64)
return Result;
if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 &&
Subtarget->is64Bit())
@@ -3342,7 +3401,8 @@
// Build the FILD
SDVTList Tys;
- bool useSSE = X86ScalarSSE && Op.getValueType() != MVT::f80;
+ bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) ||
+ (X86ScalarSSEf64 && Op.getValueType() == MVT::f64);
if (useSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
@@ -3390,8 +3450,11 @@
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
// These are really Legal.
- if (Op.getValueType() == MVT::i32 && X86ScalarSSE &&
- Op.getOperand(0).getValueType() != MVT::f80)
+ if (Op.getValueType() == MVT::i32 &&
+ X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32)
+ return Result;
+ if (Op.getValueType() == MVT::i32 &&
+ X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)
return Result;
if (Subtarget->is64Bit() &&
Op.getValueType() == MVT::i64 &&
@@ -3408,7 +3471,8 @@
SDOperand Chain = DAG.getEntryNode();
SDOperand Value = Op.getOperand(0);
- if (X86ScalarSSE && Op.getOperand(0).getValueType() != MVT::f80) {
+ if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) ||
+ (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
@@ -3620,8 +3684,9 @@
// pressure reason)?
SDOperand Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- bool IllegalFPCMov = !X86ScalarSSE &&
- MVT::isFloatingPoint(Op.getValueType()) &&
+ bool IllegalFPCMov =
+ ! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) ||
+ (X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) &&
!hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
!IllegalFPCMov) {
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=42246&r1=42245&r2=42246&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sun Sep 23 09:52:20 2007
@@ -373,8 +373,12 @@
/// X86StackPtr - X86 physical register used as stack ptr.
unsigned X86StackPtr;
- /// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
- bool X86ScalarSSE;
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf32;
+ bool X86ScalarSSEf64;
SDNode *LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode*TheCall,
unsigned CallingConv, SelectionDAG &DAG);
Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=42246&r1=42245&r2=42246&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Sun Sep 23 09:52:20 2007
@@ -152,30 +152,33 @@
[(X86fpset RFP80:$src)]>;// ST(0) = FPR
}
-// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
-// Note that f80-only instructions are used even in SSE mode and use FpI_
-// not this predicate.
-class FpI<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
- FpI_<outs, ins, fp, pattern>, Requires<[FPStack]>;
+// FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
// Register copies. Just copies, the shortening ones do not truncate.
-def MOV_Fp3232 : FpI<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>;
-def MOV_Fp3264 : FpI<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>;
-def MOV_Fp6432 : FpI<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>;
-def MOV_Fp6464 : FpI<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>;
-def MOV_Fp8032 : FpI<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>;
-def MOV_Fp3280 : FpI<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>;
-def MOV_Fp8064 : FpI<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>;
-def MOV_Fp6480 : FpI<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>;
+def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>;
+def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>;
+def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>;
+def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>;
+def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>;
+def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>;
+def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>;
+def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>;
def MOV_Fp8080 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>;
// Factoring for arithmetic.
multiclass FPBinary_rr<SDNode OpNode> {
// Register op register -> register
// These are separated out because they have no reversed form.
-def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
[(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
-def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
[(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
[(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
@@ -185,13 +188,13 @@
// These instructions cannot address 80-bit memory.
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
// ST(0) = ST(0) + [mem]
-def _Fp32m : FpI<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
[(set RFP32:$dst,
(OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
-def _Fp64m : FpI<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
[(set RFP64:$dst,
(OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
-def _Fp64m32: FpI<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
[(set RFP64:$dst,
(OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
@@ -205,16 +208,16 @@
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
!strconcat("f", !strconcat(asmstring, "{l}\t$src"))>;
// ST(0) = ST(0) + [memint]
-def _FpI16m32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW,
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src1,
(X86fild addr:$src2, i16)))]>;
-def _FpI32m32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW,
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src1,
(X86fild addr:$src2, i32)))]>;
-def _FpI16m64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW,
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src1,
(X86fild addr:$src2, i16)))]>;
-def _FpI32m64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW,
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src1,
(X86fild addr:$src2, i32)))]>;
def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), OneArgFPRW,
@@ -271,9 +274,9 @@
// Unary operations.
multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
-def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src))]>;
-def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
[(set RFP80:$dst, (OpNode RFP80:$src))]>;
@@ -286,9 +289,9 @@
defm SIN : FPUnary<fsin, 0xFE, "fsin">;
defm COS : FPUnary<fcos, 0xFF, "fcos">;
-def TST_Fp32 : FpI<(outs), (ins RFP32:$src), OneArgFP,
+def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP,
[]>;
-def TST_Fp64 : FpI<(outs), (ins RFP64:$src), OneArgFP,
+def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP,
[]>;
def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP,
[]>;
@@ -296,10 +299,10 @@
// Floating point cmovs.
multiclass FPCMov<PatLeaf cc> {
- def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), CondMovFP,
+ def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), CondMovFP,
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
cc))]>;
- def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), CondMovFP,
+ def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), CondMovFP,
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
cc))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), CondMovFP,
@@ -337,30 +340,30 @@
// Floating point loads & stores.
let isLoad = 1 in {
-def LD_Fp32m : FpI<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (loadf32 addr:$src))]>;
-def LD_Fp64m : FpI<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (loadf64 addr:$src))]>;
def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
[(set RFP80:$dst, (loadf80 addr:$src))]>;
}
-def LD_Fp32m64 : FpI<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
-def ILD_Fp16m32: FpI<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i16))]>;
-def ILD_Fp32m32: FpI<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i32))]>;
-def ILD_Fp64m32: FpI<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i64))]>;
-def ILD_Fp16m64: FpI<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i16))]>;
-def ILD_Fp32m64: FpI<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i32))]>;
-def ILD_Fp64m64: FpI<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i64))]>;
def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild addr:$src, i16))]>;
@@ -369,11 +372,11 @@
def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild addr:$src, i64))]>;
-def ST_Fp32m : FpI<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
[(store RFP32:$src, addr:$op)]>;
-def ST_Fp64m32 : FpI<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
[(truncstoref32 RFP64:$src, addr:$op)]>;
-def ST_Fp64m : FpI<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
[(store RFP64:$src, addr:$op)]>;
def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
[(truncstoref32 RFP80:$src, addr:$op)]>;
@@ -381,19 +384,19 @@
[(truncstoref64 RFP80:$src, addr:$op)]>;
// FST does not support 80-bit memory target; FSTP must be used.
-def ST_FpP32m : FpI<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
-def ST_FpP64m32 : FpI<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
-def ST_FpP64m : FpI<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
-def ST_FpP80m32 : FpI<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
-def ST_FpP80m64 : FpI<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
[(store RFP80:$src, addr:$op)]>;
-def IST_Fp16m32 : FpI<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
-def IST_Fp32m32 : FpI<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
-def IST_Fp64m32 : FpI<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
-def IST_Fp16m64 : FpI<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
-def IST_Fp32m64 : FpI<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
-def IST_Fp64m64 : FpI<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
@@ -456,13 +459,13 @@
// Floating point constant loads.
let isReMaterializable = 1 in {
-def LD_Fp032 : FpI<(outs RFP32:$dst), (ins), ZeroArgFP,
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
[(set RFP32:$dst, fpimm0)]>;
-def LD_Fp132 : FpI<(outs RFP32:$dst), (ins), ZeroArgFP,
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
[(set RFP32:$dst, fpimm1)]>;
-def LD_Fp064 : FpI<(outs RFP64:$dst), (ins), ZeroArgFP,
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
[(set RFP64:$dst, fpimm0)]>;
-def LD_Fp164 : FpI<(outs RFP64:$dst), (ins), ZeroArgFP,
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
[(set RFP64:$dst, fpimm1)]>;
def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm0)]>;
@@ -475,13 +478,13 @@
// Floating point compares.
-def UCOM_Fpr32 : FpI<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[]>; // FPSW = cmp ST(0) with ST(i)
-def UCOM_FpIr32: FpI<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(X86cmp RFP32:$lhs, RFP32:$rhs)]>; // CC = ST(0) cmp ST(i)
-def UCOM_Fpr64 : FpI<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[]>; // FPSW = cmp ST(0) with ST(i)
-def UCOM_FpIr64: FpI<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(X86cmp RFP64:$lhs, RFP64:$rhs)]>; // CC = ST(0) cmp ST(i)
def UCOM_Fpr80 : FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[]>; // FPSW = cmp ST(0) with ST(i)
@@ -535,16 +538,16 @@
def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, RFP80:$src)>;
// Floating point constant -0.0 and -1.0
-def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStack]>;
-def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStack]>;
-def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStack]>;
-def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStack]>;
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
// Used to conv. i64 to f64 since there isn't a SSE version.
def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
-def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStack]>;
-def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStack]>;
-def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStack]>;
+def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStackf64]>;
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=42246&r1=42245&r2=42246&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Sun Sep 23 09:52:20 2007
@@ -160,7 +160,8 @@
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
-def FPStack : Predicate<"!Subtarget->hasSSE2()">;
+def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
+def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">;
def HasLow4G : Predicate<"Subtarget->hasLow4GUserSpaceAddress()">;
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=42246&r1=42245&r2=42246&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Sep 23 09:52:20 2007
@@ -50,7 +50,7 @@
Requires<[HasSSE1]>;
def IMPLICIT_DEF_FR32 : I<0, Pseudo, (outs FR32:$dst), (ins),
"#IMPLICIT_DEF $dst",
- [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
+ [(set FR32:$dst, (undef))]>, Requires<[HasSSE1]>;
def IMPLICIT_DEF_FR64 : I<0, Pseudo, (outs FR64:$dst), (ins),
"#IMPLICIT_DEF $dst",
[(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
More information about the llvm-commits
mailing list