[llvm-commits] [llvm] r37847 - in /llvm/trunk/lib: CodeGen/SelectionDAG/ Target/X86/
Dale Johannesen
dalej at apple.com
Mon Jul 2 17:53:03 PDT 2007
Author: johannes
Date: Mon Jul 2 19:53:03 2007
New Revision: 37847
URL: http://llvm.org/viewvc/llvm-project?rev=37847&view=rev
Log:
Fix for PR 1505 (and 1489). Rewrite X87 register
model to include f32 variants. Some factoring
improvments forthcoming.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrFPStack.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
llvm/trunk/lib/Target/X86/X86RegisterInfo.td
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Mon Jul 2 19:53:03 2007
@@ -3016,11 +3016,33 @@
}
break;
+ case ISD::FP_ROUND:
+ if (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)) ==
+ TargetLowering::Expand) {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ MVT::ValueType VT = Op.getValueType(); // 32
+ const Type *Ty = MVT::getTypeForValueType(VT);
+ uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI =
+ MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0),
+ StackSlot, NULL, 0, VT);
+ Result = DAG.getLoad(VT, Result, StackSlot, NULL, 0, VT);
+ break;
+ }
+ // FALL THROUGH
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
switch (getTypeAction(Node->getOperand(0).getValueType())) {
case Expand: assert(0 && "Shouldn't need to expand other operators here!");
case Legal:
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Mon Jul 2 19:53:03 2007
@@ -3918,15 +3918,7 @@
Op = DAG.getNode(ExtOp, getTypeToTransformTo(VT), Op);
} else {
assert(MVT::isFloatingPoint(VT) && "Not int or FP?");
- // A true promotion would change the size of the argument.
- // Instead, pretend this is an int. If FP objects are not
- // passed the same as ints, the original type should be Legal
- // and we should not get here.
- Op = DAG.getNode(ISD::BIT_CONVERT,
- VT==MVT::f32 ? MVT::i32 :
- (VT==MVT::f64 ? MVT::i64 :
- MVT::Other),
- Op);
+ Op = DAG.getNode(ISD::FP_EXTEND, getTypeToTransformTo(VT), Op);
}
Ops.push_back(Op);
Ops.push_back(DAG.getConstant(Flags, MVT::i32));
Modified: llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp Mon Jul 2 19:53:03 2007
@@ -324,61 +324,101 @@
// concrete X86 instruction which uses the register stack.
//
static const TableEntry OpcodeTable[] = {
- { X86::FpABS , X86::FABS },
+ { X86::FpABS32 , X86::FABS },
+ { X86::FpABS64 , X86::FABS },
{ X86::FpADD32m , X86::FADD32m },
{ X86::FpADD64m , X86::FADD64m },
- { X86::FpCHS , X86::FCHS },
- { X86::FpCMOVB , X86::FCMOVB },
- { X86::FpCMOVBE , X86::FCMOVBE },
- { X86::FpCMOVE , X86::FCMOVE },
- { X86::FpCMOVNB , X86::FCMOVNB },
- { X86::FpCMOVNBE , X86::FCMOVNBE },
- { X86::FpCMOVNE , X86::FCMOVNE },
- { X86::FpCMOVNP , X86::FCMOVNP },
- { X86::FpCMOVP , X86::FCMOVP },
- { X86::FpCOS , X86::FCOS },
+ { X86::FpCHS32 , X86::FCHS },
+ { X86::FpCHS64 , X86::FCHS },
+ { X86::FpCMOVB32 , X86::FCMOVB },
+ { X86::FpCMOVB64 , X86::FCMOVB },
+ { X86::FpCMOVBE32 , X86::FCMOVBE },
+ { X86::FpCMOVBE64 , X86::FCMOVBE },
+ { X86::FpCMOVE32 , X86::FCMOVE },
+ { X86::FpCMOVE64 , X86::FCMOVE },
+ { X86::FpCMOVNB32 , X86::FCMOVNB },
+ { X86::FpCMOVNB64 , X86::FCMOVNB },
+ { X86::FpCMOVNBE32 , X86::FCMOVNBE },
+ { X86::FpCMOVNBE64 , X86::FCMOVNBE },
+ { X86::FpCMOVNE32 , X86::FCMOVNE },
+ { X86::FpCMOVNE64 , X86::FCMOVNE },
+ { X86::FpCMOVNP32 , X86::FCMOVNP },
+ { X86::FpCMOVNP64 , X86::FCMOVNP },
+ { X86::FpCMOVP32 , X86::FCMOVP },
+ { X86::FpCMOVP64 , X86::FCMOVP },
+ { X86::FpCOS32 , X86::FCOS },
+ { X86::FpCOS64 , X86::FCOS },
{ X86::FpDIV32m , X86::FDIV32m },
{ X86::FpDIV64m , X86::FDIV64m },
{ X86::FpDIVR32m , X86::FDIVR32m },
{ X86::FpDIVR64m , X86::FDIVR64m },
- { X86::FpIADD16m , X86::FIADD16m },
- { X86::FpIADD32m , X86::FIADD32m },
- { X86::FpIDIV16m , X86::FIDIV16m },
- { X86::FpIDIV32m , X86::FIDIV32m },
- { X86::FpIDIVR16m, X86::FIDIVR16m},
- { X86::FpIDIVR32m, X86::FIDIVR32m},
- { X86::FpILD16m , X86::FILD16m },
- { X86::FpILD32m , X86::FILD32m },
- { X86::FpILD64m , X86::FILD64m },
- { X86::FpIMUL16m , X86::FIMUL16m },
- { X86::FpIMUL32m , X86::FIMUL32m },
- { X86::FpIST16m , X86::FIST16m },
- { X86::FpIST32m , X86::FIST32m },
- { X86::FpIST64m , X86::FISTP64m },
- { X86::FpISTT16m , X86::FISTTP16m},
- { X86::FpISTT32m , X86::FISTTP32m},
- { X86::FpISTT64m , X86::FISTTP64m},
- { X86::FpISUB16m , X86::FISUB16m },
- { X86::FpISUB32m , X86::FISUB32m },
- { X86::FpISUBR16m, X86::FISUBR16m},
- { X86::FpISUBR32m, X86::FISUBR32m},
- { X86::FpLD0 , X86::FLD0 },
- { X86::FpLD1 , X86::FLD1 },
+ { X86::FpIADD16m32 , X86::FIADD16m },
+ { X86::FpIADD16m64 , X86::FIADD16m },
+ { X86::FpIADD32m32 , X86::FIADD32m },
+ { X86::FpIADD32m64 , X86::FIADD32m },
+ { X86::FpIDIV16m32 , X86::FIDIV16m },
+ { X86::FpIDIV16m64 , X86::FIDIV16m },
+ { X86::FpIDIV32m32 , X86::FIDIV32m },
+ { X86::FpIDIV32m64 , X86::FIDIV32m },
+ { X86::FpIDIVR16m32, X86::FIDIVR16m},
+ { X86::FpIDIVR16m64, X86::FIDIVR16m},
+ { X86::FpIDIVR32m32, X86::FIDIVR32m},
+ { X86::FpIDIVR32m64, X86::FIDIVR32m},
+ { X86::FpILD16m32 , X86::FILD16m },
+ { X86::FpILD16m64 , X86::FILD16m },
+ { X86::FpILD32m32 , X86::FILD32m },
+ { X86::FpILD32m64 , X86::FILD32m },
+ { X86::FpILD64m32 , X86::FILD64m },
+ { X86::FpILD64m64 , X86::FILD64m },
+ { X86::FpIMUL16m32 , X86::FIMUL16m },
+ { X86::FpIMUL16m64 , X86::FIMUL16m },
+ { X86::FpIMUL32m32 , X86::FIMUL32m },
+ { X86::FpIMUL32m64 , X86::FIMUL32m },
+ { X86::FpIST16m32 , X86::FIST16m },
+ { X86::FpIST16m64 , X86::FIST16m },
+ { X86::FpIST32m32 , X86::FIST32m },
+ { X86::FpIST32m64 , X86::FIST32m },
+ { X86::FpIST64m32 , X86::FISTP64m },
+ { X86::FpIST64m64 , X86::FISTP64m },
+ { X86::FpISTT16m32 , X86::FISTTP16m},
+ { X86::FpISTT16m64 , X86::FISTTP16m},
+ { X86::FpISTT32m32 , X86::FISTTP32m},
+ { X86::FpISTT32m64 , X86::FISTTP32m},
+ { X86::FpISTT64m32 , X86::FISTTP64m},
+ { X86::FpISTT64m64 , X86::FISTTP64m},
+ { X86::FpISUB16m32 , X86::FISUB16m },
+ { X86::FpISUB16m64 , X86::FISUB16m },
+ { X86::FpISUB32m32 , X86::FISUB32m },
+ { X86::FpISUB32m64 , X86::FISUB32m },
+ { X86::FpISUBR16m32, X86::FISUBR16m},
+ { X86::FpISUBR16m64, X86::FISUBR16m},
+ { X86::FpISUBR32m32, X86::FISUBR32m},
+ { X86::FpISUBR32m64, X86::FISUBR32m},
+ { X86::FpLD032 , X86::FLD0 },
+ { X86::FpLD064 , X86::FLD0 },
+ { X86::FpLD132 , X86::FLD1 },
+ { X86::FpLD164 , X86::FLD1 },
{ X86::FpLD32m , X86::FLD32m },
{ X86::FpLD64m , X86::FLD64m },
{ X86::FpMUL32m , X86::FMUL32m },
{ X86::FpMUL64m , X86::FMUL64m },
- { X86::FpSIN , X86::FSIN },
- { X86::FpSQRT , X86::FSQRT },
+ { X86::FpSIN32 , X86::FSIN },
+ { X86::FpSIN64 , X86::FSIN },
+ { X86::FpSQRT32 , X86::FSQRT },
+ { X86::FpSQRT64 , X86::FSQRT },
{ X86::FpST32m , X86::FST32m },
{ X86::FpST64m , X86::FST64m },
+ { X86::FpST64m32 , X86::FST32m },
{ X86::FpSUB32m , X86::FSUB32m },
{ X86::FpSUB64m , X86::FSUB64m },
{ X86::FpSUBR32m , X86::FSUBR32m },
{ X86::FpSUBR64m , X86::FSUBR64m },
- { X86::FpTST , X86::FTST },
- { X86::FpUCOMIr , X86::FUCOMIr },
- { X86::FpUCOMr , X86::FUCOMr },
+ { X86::FpTST32 , X86::FTST },
+ { X86::FpTST64 , X86::FTST },
+ { X86::FpUCOMIr32 , X86::FUCOMIr },
+ { X86::FpUCOMIr64 , X86::FUCOMIr },
+ { X86::FpUCOMr32 , X86::FUCOMr },
+ { X86::FpUCOMr64 , X86::FUCOMr },
};
static unsigned getConcreteOpcode(unsigned Opcode) {
@@ -510,10 +550,14 @@
// Ditto FISTTP16m, FISTTP32m, FISTTP64m.
//
if (!KillsSrc &&
- (MI->getOpcode() == X86::FpIST64m ||
- MI->getOpcode() == X86::FpISTT16m ||
- MI->getOpcode() == X86::FpISTT32m ||
- MI->getOpcode() == X86::FpISTT64m)) {
+ (MI->getOpcode() == X86::FpIST64m32 ||
+ MI->getOpcode() == X86::FpISTT16m32 ||
+ MI->getOpcode() == X86::FpISTT32m32 ||
+ MI->getOpcode() == X86::FpISTT64m32 ||
+ MI->getOpcode() == X86::FpIST64m64 ||
+ MI->getOpcode() == X86::FpISTT16m64 ||
+ MI->getOpcode() == X86::FpISTT32m64 ||
+ MI->getOpcode() == X86::FpISTT64m64)) {
duplicateToTop(Reg, 7 /*temp register*/, I);
} else {
moveToTop(Reg, I); // Move to the top of the stack...
@@ -578,34 +622,50 @@
// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
static const TableEntry ForwardST0Table[] = {
- { X86::FpADD , X86::FADDST0r },
- { X86::FpDIV , X86::FDIVST0r },
- { X86::FpMUL , X86::FMULST0r },
- { X86::FpSUB , X86::FSUBST0r },
+ { X86::FpADD32 , X86::FADDST0r },
+ { X86::FpADD64 , X86::FADDST0r },
+ { X86::FpDIV32 , X86::FDIVST0r },
+ { X86::FpDIV64 , X86::FDIVST0r },
+ { X86::FpMUL32 , X86::FMULST0r },
+ { X86::FpMUL64 , X86::FMULST0r },
+ { X86::FpSUB32 , X86::FSUBST0r },
+ { X86::FpSUB64 , X86::FSUBST0r },
};
// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
static const TableEntry ReverseST0Table[] = {
- { X86::FpADD , X86::FADDST0r }, // commutative
- { X86::FpDIV , X86::FDIVRST0r },
- { X86::FpMUL , X86::FMULST0r }, // commutative
- { X86::FpSUB , X86::FSUBRST0r },
+ { X86::FpADD32 , X86::FADDST0r }, // commutative
+ { X86::FpADD64 , X86::FADDST0r }, // commutative
+ { X86::FpDIV32 , X86::FDIVRST0r },
+ { X86::FpDIV64 , X86::FDIVRST0r },
+ { X86::FpMUL32 , X86::FMULST0r }, // commutative
+ { X86::FpMUL64 , X86::FMULST0r }, // commutative
+ { X86::FpSUB32 , X86::FSUBRST0r },
+ { X86::FpSUB64 , X86::FSUBRST0r },
};
// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
static const TableEntry ForwardSTiTable[] = {
- { X86::FpADD , X86::FADDrST0 }, // commutative
- { X86::FpDIV , X86::FDIVRrST0 },
- { X86::FpMUL , X86::FMULrST0 }, // commutative
- { X86::FpSUB , X86::FSUBRrST0 },
+ { X86::FpADD32 , X86::FADDrST0 }, // commutative
+ { X86::FpADD64 , X86::FADDrST0 }, // commutative
+ { X86::FpDIV32 , X86::FDIVRrST0 },
+ { X86::FpDIV64 , X86::FDIVRrST0 },
+ { X86::FpMUL32 , X86::FMULrST0 }, // commutative
+ { X86::FpMUL64 , X86::FMULrST0 }, // commutative
+ { X86::FpSUB32 , X86::FSUBRrST0 },
+ { X86::FpSUB64 , X86::FSUBRrST0 },
};
// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
static const TableEntry ReverseSTiTable[] = {
- { X86::FpADD , X86::FADDrST0 },
- { X86::FpDIV , X86::FDIVrST0 },
- { X86::FpMUL , X86::FMULrST0 },
- { X86::FpSUB , X86::FSUBrST0 },
+ { X86::FpADD32 , X86::FADDrST0 },
+ { X86::FpADD64 , X86::FADDrST0 },
+ { X86::FpDIV32 , X86::FDIVrST0 },
+ { X86::FpDIV64 , X86::FDIVrST0 },
+ { X86::FpMUL32 , X86::FMULrST0 },
+ { X86::FpMUL64 , X86::FMULrST0 },
+ { X86::FpSUB32 , X86::FSUBrST0 },
+ { X86::FpSUB64 , X86::FSUBrST0 },
};
@@ -777,15 +837,20 @@
MachineInstr *MI = I;
switch (MI->getOpcode()) {
default: assert(0 && "Unknown SpecialFP instruction!");
- case X86::FpGETRESULT: // Appears immediately after a call returning FP type!
+ case X86::FpGETRESULT32: // Appears immediately after a call returning FP type!
+ case X86::FpGETRESULT64: // Appears immediately after a call returning FP type!
assert(StackTop == 0 && "Stack should be empty after a call!");
pushReg(getFPReg(MI->getOperand(0)));
break;
- case X86::FpSETRESULT:
+ case X86::FpSETRESULT32:
+ case X86::FpSETRESULT64:
assert(StackTop == 1 && "Stack should have one element on it to return!");
--StackTop; // "Forget" we have something on the top of stack!
break;
- case X86::FpMOV: {
+ case X86::FpMOV3232:
+ case X86::FpMOV3264:
+ case X86::FpMOV6432:
+ case X86::FpMOV6464: {
unsigned SrcReg = getFPReg(MI->getOperand(1));
unsigned DestReg = getFPReg(MI->getOperand(0));
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Jul 2 19:53:03 2007
@@ -492,11 +492,13 @@
for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
!ContainsFPCode && I != E; ++I) {
if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
+ const TargetRegisterClass *clas;
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
- RegMap->getRegClass(I->getOperand(0).getReg()) ==
- X86::RFPRegisterClass) {
+ ((clas = RegMap->getRegClass(I->getOperand(0).getReg())) ==
+ X86::RFP32RegisterClass ||
+ clas == X86::RFP64RegisterClass)) {
ContainsFPCode = true;
break;
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jul 2 19:53:03 2007
@@ -289,11 +289,14 @@
addLegalFPImmediate(+0.0); // xorps / xorpd
} else {
// Set up the FP register classes.
- addRegisterClass(MVT::f64, X86::RFPRegisterClass);
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+ addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
@@ -301,6 +304,7 @@
}
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
addLegalFPImmediate(+0.0); // FLD0
addLegalFPImmediate(+1.0); // FLD1
addLegalFPImmediate(-0.0); // FLD0/FCHS
@@ -553,7 +557,7 @@
MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
}
- SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
+ SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
Chain = Value.getValue(1);
@@ -604,7 +608,7 @@
// before the fp stackifier runs.
// Copy ST0 into an RFP register with FP_GET_RESULT.
- SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
SDOperand GROps[] = { Chain, InFlag };
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
Chain = RetVal.getValue(1);
@@ -626,11 +630,6 @@
RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
Chain = RetVal.getValue(1);
}
-
- if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE)
- // FIXME: we would really like to remember that this FP_ROUND
- // operation is okay to eliminate if we allow excess FP precision.
- RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
ResultVals.push_back(RetVal);
}
@@ -3252,7 +3251,7 @@
if (X86ScalarSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
- Tys = DAG.getVTList(MVT::f64, MVT::Other);
+ Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
SmallVector<SDOperand, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(StackSlot);
@@ -3307,7 +3306,7 @@
if (X86ScalarSSE) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
- SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
+ SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDOperand Ops[] = {
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
};
@@ -4437,9 +4436,12 @@
return BB;
}
- case X86::FP_TO_INT16_IN_MEM:
- case X86::FP_TO_INT32_IN_MEM:
- case X86::FP_TO_INT64_IN_MEM: {
+ case X86::FP32_TO_INT16_IN_MEM:
+ case X86::FP32_TO_INT32_IN_MEM:
+ case X86::FP32_TO_INT64_IN_MEM:
+ case X86::FP64_TO_INT16_IN_MEM:
+ case X86::FP64_TO_INT32_IN_MEM:
+ case X86::FP64_TO_INT64_IN_MEM: {
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
@@ -4466,9 +4468,12 @@
unsigned Opc;
switch (MI->getOpcode()) {
default: assert(0 && "illegal opcode!");
- case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
- case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
- case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
+ case X86::FP32_TO_INT16_IN_MEM: Opc = X86::FpIST16m32; break;
+ case X86::FP32_TO_INT32_IN_MEM: Opc = X86::FpIST32m32; break;
+ case X86::FP32_TO_INT64_IN_MEM: Opc = X86::FpIST64m32; break;
+ case X86::FP64_TO_INT16_IN_MEM: Opc = X86::FpIST16m64; break;
+ case X86::FP64_TO_INT32_IN_MEM: Opc = X86::FpIST32m64; break;
+ case X86::FP64_TO_INT64_IN_MEM: Opc = X86::FpIST64m64; break;
}
X86AddressMode AM;
Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Mon Jul 2 19:53:03 2007
@@ -17,13 +17,13 @@
// FPStack specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>;
+def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
+def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
-def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisPtrTy<1>,
+def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86FpToIMem: SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
@@ -50,19 +50,19 @@
// FPStack pattern fragments
//===----------------------------------------------------------------------===//
-def fp64imm0 : PatLeaf<(f64 fpimm), [{
+def fpimm0 : PatLeaf<(fpimm), [{
return N->isExactlyValue(+0.0);
}]>;
-def fp64immneg0 : PatLeaf<(f64 fpimm), [{
+def fpimmneg0 : PatLeaf<(fpimm), [{
return N->isExactlyValue(-0.0);
}]>;
-def fp64imm1 : PatLeaf<(f64 fpimm), [{
+def fpimm1 : PatLeaf<(fpimm), [{
return N->isExactlyValue(+1.0);
}]>;
-def fp64immneg1 : PatLeaf<(f64 fpimm), [{
+def fpimmneg1 : PatLeaf<(fpimm), [{
return N->isExactlyValue(-1.0);
}]>;
@@ -70,18 +70,30 @@
// Some 'special' instructions
let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
- def FP_TO_INT16_IN_MEM : I<0, Pseudo,
- (ops i16mem:$dst, RFP:$src),
- "#FP_TO_INT16_IN_MEM PSEUDO!",
- [(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
- def FP_TO_INT32_IN_MEM : I<0, Pseudo,
- (ops i32mem:$dst, RFP:$src),
- "#FP_TO_INT32_IN_MEM PSEUDO!",
- [(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
- def FP_TO_INT64_IN_MEM : I<0, Pseudo,
- (ops i64mem:$dst, RFP:$src),
- "#FP_TO_INT64_IN_MEM PSEUDO!",
- [(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
+ def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
+ (ops i16mem:$dst, RFP32:$src),
+ "#FP32_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
+ (ops i32mem:$dst, RFP32:$src),
+ "#FP32_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
+ (ops i64mem:$dst, RFP32:$src),
+ "#FP32_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+ def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
+ (ops i16mem:$dst, RFP64:$src),
+ "#FP64_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
+ (ops i32mem:$dst, RFP64:$src),
+ "#FP64_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
+ (ops i64mem:$dst, RFP64:$src),
+ "#FP64_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
}
let isTerminator = 1 in
@@ -111,30 +123,47 @@
}
// Random Pseudo Instructions.
-def FpGETRESULT : FpI_<(ops RFP:$dst), SpecialFP,
- [(set RFP:$dst, X86fpget)]>; // FPR = ST(0)
+def FpGETRESULT32 : FpI_<(ops RFP32:$dst), SpecialFP,
+ [(set RFP32:$dst, X86fpget)]>; // FPR = ST(0)
-let noResults = 1 in
- def FpSETRESULT : FpI_<(ops RFP:$src), SpecialFP,
- [(X86fpset RFP:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR
+def FpGETRESULT64 : FpI_<(ops RFP64:$dst), SpecialFP,
+ [(set RFP64:$dst, X86fpget)]>; // FPR = ST(0)
+let noResults = 1 in {
+ def FpSETRESULT32 : FpI_<(ops RFP32:$src), SpecialFP,
+ [(X86fpset RFP32:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR
+
+ def FpSETRESULT64 : FpI_<(ops RFP64:$src), SpecialFP,
+ [(X86fpset RFP64:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR
+}
// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
class FpI<dag ops, FPFormat fp, list<dag> pattern> :
FpI_<ops, fp, pattern>, Requires<[FPStack]>;
-
-def FpMOV : FpI<(ops RFP:$dst, RFP:$src), SpecialFP, []>; // f1 = fmov f2
+// Register copies. Just copies, the 64->32 version does not truncate.
+def FpMOV3232 : FpI<(ops RFP32:$dst, RFP32:$src), SpecialFP, []>; // f1 = fmov f2
+def FpMOV3264 : FpI<(ops RFP64:$dst, RFP32:$src), SpecialFP, []>; // f1 = fmov f2
+def FpMOV6432 : FpI<(ops RFP32:$dst, RFP64:$src), SpecialFP, []>; // f1 = fmov f2
+def FpMOV6464 : FpI<(ops RFP64:$dst, RFP64:$src), SpecialFP, []>; // f1 = fmov f2
// Arithmetic
// Add, Sub, Mul, Div.
-def FpADD : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
- [(set RFP:$dst, (fadd RFP:$src1, RFP:$src2))]>;
-def FpSUB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
- [(set RFP:$dst, (fsub RFP:$src1, RFP:$src2))]>;
-def FpMUL : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
- [(set RFP:$dst, (fmul RFP:$src1, RFP:$src2))]>;
-def FpDIV : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
- [(set RFP:$dst, (fdiv RFP:$src1, RFP:$src2))]>;
+def FpADD32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (fadd RFP32:$src1, RFP32:$src2))]>;
+def FpSUB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (fsub RFP32:$src1, RFP32:$src2))]>;
+def FpMUL32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (fmul RFP32:$src1, RFP32:$src2))]>;
+def FpDIV32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (fdiv RFP32:$src1, RFP32:$src2))]>;
+def FpADD64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (fadd RFP64:$src1, RFP64:$src2))]>;
+def FpSUB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (fsub RFP64:$src1, RFP64:$src2))]>;
+def FpMUL64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (fmul RFP64:$src1, RFP64:$src2))]>;
+def FpDIV64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (fdiv RFP64:$src1, RFP64:$src2))]>;
class FPST0rInst<bits<8> o, string asm>
: FPI<o, AddRegFrm, (ops RST:$op), asm>, D8;
@@ -144,47 +173,41 @@
: FPI<o, AddRegFrm, (ops RST:$op), asm>, DE;
// Binary Ops with a memory source.
-def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fadd RFP:$src1,
- (extloadf64f32 addr:$src2)))]>;
+def FpADD32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fadd RFP32:$src1, (loadf32 addr:$src2)))]>;
// ST(0) = ST(0) + [mem32]
-def FpADD64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fadd RFP:$src1, (loadf64 addr:$src2)))]>;
+def FpADD64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fadd RFP64:$src1, (loadf64 addr:$src2)))]>;
// ST(0) = ST(0) + [mem64]
-def FpMUL32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fmul RFP:$src1,
- (extloadf64f32 addr:$src2)))]>;
+def FpMUL32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fmul RFP32:$src1, (loadf32 addr:$src2)))]>;
// ST(0) = ST(0) * [mem32]
-def FpMUL64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fmul RFP:$src1, (loadf64 addr:$src2)))]>;
+def FpMUL64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fmul RFP64:$src1, (loadf64 addr:$src2)))]>;
// ST(0) = ST(0) * [mem64]
-def FpSUB32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fsub RFP:$src1,
- (extloadf64f32 addr:$src2)))]>;
+def FpSUB32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fsub RFP32:$src1, (loadf32 addr:$src2)))]>;
// ST(0) = ST(0) - [mem32]
-def FpSUB64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fsub RFP:$src1, (loadf64 addr:$src2)))]>;
+def FpSUB64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fsub RFP64:$src1, (loadf64 addr:$src2)))]>;
// ST(0) = ST(0) - [mem64]
-def FpSUBR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fsub (extloadf64f32 addr:$src2),
- RFP:$src1))]>;
+def FpSUBR32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fsub (loadf32 addr:$src2), RFP32:$src1))]>;
// ST(0) = [mem32] - ST(0)
-def FpSUBR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fsub (loadf64 addr:$src2), RFP:$src1))]>;
+def FpSUBR64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fsub (loadf64 addr:$src2), RFP64:$src1))]>;
// ST(0) = [mem64] - ST(0)
-def FpDIV32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fdiv RFP:$src1,
- (extloadf64f32 addr:$src2)))]>;
+def FpDIV32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fdiv RFP32:$src1, (loadf32 addr:$src2)))]>;
// ST(0) = ST(0) / [mem32]
-def FpDIV64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fdiv RFP:$src1, (loadf64 addr:$src2)))]>;
+def FpDIV64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fdiv RFP64:$src1, (loadf64 addr:$src2)))]>;
// ST(0) = ST(0) / [mem64]
-def FpDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fdiv (extloadf64f32 addr:$src2),
- RFP:$src1))]>;
+def FpDIVR32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fdiv (loadf32 addr:$src2), RFP32:$src1))]>;
// ST(0) = [mem32] / ST(0)
-def FpDIVR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fdiv (loadf64 addr:$src2), RFP:$src1))]>;
+def FpDIVR64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fdiv (loadf64 addr:$src2), RFP64:$src1))]>;
// ST(0) = [mem64] / ST(0)
@@ -201,53 +224,102 @@
def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">;
def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">;
-def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fadd RFP:$src1,
+def FpIADD16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fadd RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+ // ST(0) = ST(0) + [mem16int]
+def FpIADD32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fadd RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+ // ST(0) = ST(0) + [mem32int]
+def FpIMUL16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fmul RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+ // ST(0) = ST(0) * [mem16int]
+def FpIMUL32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fmul RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+ // ST(0) = ST(0) * [mem32int]
+def FpISUB16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fsub RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+ // ST(0) = ST(0) - [mem16int]
+def FpISUB32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fsub RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+ // ST(0) = ST(0) - [mem32int]
+def FpISUBR16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fsub (X86fild addr:$src2, i16),
+ RFP32:$src1))]>;
+ // ST(0) = [mem16int] - ST(0)
+def FpISUBR32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fsub (X86fild addr:$src2, i32),
+ RFP32:$src1))]>;
+ // ST(0) = [mem32int] - ST(0)
+def FpIDIV16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fdiv RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+ // ST(0) = ST(0) / [mem16int]
+def FpIDIV32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fdiv RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+ // ST(0) = ST(0) / [mem32int]
+def FpIDIVR16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fdiv (X86fild addr:$src2, i16),
+ RFP32:$src1))]>;
+ // ST(0) = [mem16int] / ST(0)
+def FpIDIVR32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (fdiv (X86fild addr:$src2, i32),
+ RFP32:$src1))]>;
+ // ST(0) = [mem32int] / ST(0)
+
+def FpIADD16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fadd RFP64:$src1,
(X86fild addr:$src2, i16)))]>;
// ST(0) = ST(0) + [mem16int]
-def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fadd RFP:$src1,
+def FpIADD32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fadd RFP64:$src1,
(X86fild addr:$src2, i32)))]>;
// ST(0) = ST(0) + [mem32int]
-def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fmul RFP:$src1,
+def FpIMUL16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fmul RFP64:$src1,
(X86fild addr:$src2, i16)))]>;
// ST(0) = ST(0) * [mem16int]
-def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fmul RFP:$src1,
+def FpIMUL32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fmul RFP64:$src1,
(X86fild addr:$src2, i32)))]>;
// ST(0) = ST(0) * [mem32int]
-def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fsub RFP:$src1,
+def FpISUB16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fsub RFP64:$src1,
(X86fild addr:$src2, i16)))]>;
// ST(0) = ST(0) - [mem16int]
-def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fsub RFP:$src1,
+def FpISUB32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fsub RFP64:$src1,
(X86fild addr:$src2, i32)))]>;
// ST(0) = ST(0) - [mem32int]
-def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fsub (X86fild addr:$src2, i16),
- RFP:$src1))]>;
+def FpISUBR16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fsub (X86fild addr:$src2, i16),
+ RFP64:$src1))]>;
// ST(0) = [mem16int] - ST(0)
-def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fsub (X86fild addr:$src2, i32),
- RFP:$src1))]>;
+def FpISUBR32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fsub (X86fild addr:$src2, i32),
+ RFP64:$src1))]>;
// ST(0) = [mem32int] - ST(0)
-def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fdiv RFP:$src1,
+def FpIDIV16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fdiv RFP64:$src1,
(X86fild addr:$src2, i16)))]>;
// ST(0) = ST(0) / [mem16int]
-def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fdiv RFP:$src1,
+def FpIDIV32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fdiv RFP64:$src1,
(X86fild addr:$src2, i32)))]>;
// ST(0) = ST(0) / [mem32int]
-def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16),
- RFP:$src1))]>;
+def FpIDIVR16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fdiv (X86fild addr:$src2, i16),
+ RFP64:$src1))]>;
// ST(0) = [mem16int] / ST(0)
-def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
- [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32),
- RFP:$src1))]>;
+def FpIDIVR32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (fdiv (X86fild addr:$src2, i32),
+ RFP64:$src1))]>;
// ST(0) = [mem32int] / ST(0)
def FIADD16m : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">;
@@ -285,19 +357,31 @@
def FDIVRrST0 : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">;
def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">;
-
// Unary operations.
-def FpCHS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
- [(set RFP:$dst, (fneg RFP:$src))]>;
-def FpABS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
- [(set RFP:$dst, (fabs RFP:$src))]>;
-def FpSQRT : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
- [(set RFP:$dst, (fsqrt RFP:$src))]>;
-def FpSIN : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
- [(set RFP:$dst, (fsin RFP:$src))]>;
-def FpCOS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
- [(set RFP:$dst, (fcos RFP:$src))]>;
-def FpTST : FpI<(ops RFP:$src), OneArgFP,
+def FpCHS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (fneg RFP32:$src))]>;
+def FpABS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (fabs RFP32:$src))]>;
+def FpSQRT32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (fsqrt RFP32:$src))]>;
+def FpSIN32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (fsin RFP32:$src))]>;
+def FpCOS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (fcos RFP32:$src))]>;
+def FpTST32 : FpI<(ops RFP32:$src), OneArgFP,
+ []>;
+
+def FpCHS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (fneg RFP64:$src))]>;
+def FpABS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (fabs RFP64:$src))]>;
+def FpSQRT64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (fsqrt RFP64:$src))]>;
+def FpSIN64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (fsin RFP64:$src))]>;
+def FpCOS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (fcos RFP64:$src))]>;
+def FpTST64 : FpI<(ops RFP64:$src), OneArgFP,
[]>;
def FCHS : FPI<0xE0, RawFrm, (ops), "fchs">, D9;
@@ -310,29 +394,54 @@
// Floating point cmovs.
let isTwoAddress = 1 in {
- def FpCMOVB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
- [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+ def FpCMOVB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
X86_COND_B))]>;
- def FpCMOVBE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
- [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+ def FpCMOVBE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
X86_COND_BE))]>;
- def FpCMOVE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
- [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+ def FpCMOVE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
X86_COND_E))]>;
- def FpCMOVP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
- [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+ def FpCMOVP32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
X86_COND_P))]>;
- def FpCMOVNB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
- [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+ def FpCMOVNB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
X86_COND_AE))]>;
- def FpCMOVNBE: FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
- [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+ def FpCMOVNBE32: FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
X86_COND_A))]>;
- def FpCMOVNE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
- [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+ def FpCMOVNE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
X86_COND_NE))]>;
- def FpCMOVNP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
- [(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
+ def FpCMOVNP32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+ X86_COND_NP))]>;
+
+ def FpCMOVB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ X86_COND_B))]>;
+ def FpCMOVBE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ X86_COND_BE))]>;
+ def FpCMOVE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ X86_COND_E))]>;
+ def FpCMOVP64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ X86_COND_P))]>;
+ def FpCMOVNB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ X86_COND_AE))]>;
+ def FpCMOVNBE64: FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ X86_COND_A))]>;
+ def FpCMOVNE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ X86_COND_NE))]>;
+ def FpCMOVNP64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
X86_COND_NP))]>;
}
@@ -354,27 +463,39 @@
"fcmovnu {$op, %st(0)|%ST(0), $op}">, DB;
// Floating point loads & stores.
-def FpLD32m : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP,
- [(set RFP:$dst, (extloadf64f32 addr:$src))]>;
-def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP,
- [(set RFP:$dst, (loadf64 addr:$src))]>;
-def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP,
- [(set RFP:$dst, (X86fild addr:$src, i16))]>;
-def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP,
- [(set RFP:$dst, (X86fild addr:$src, i32))]>;
-def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP,
- [(set RFP:$dst, (X86fild addr:$src, i64))]>;
-
-def FpST32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP,
- [(truncstoref32 RFP:$src, addr:$op)]>;
-def FpST64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP,
- [(store RFP:$src, addr:$op)]>;
-
-def FpSTP32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, []>;
-def FpSTP64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, []>;
-def FpIST16m : FpI<(ops i16mem:$op, RFP:$src), OneArgFP, []>;
-def FpIST32m : FpI<(ops i32mem:$op, RFP:$src), OneArgFP, []>;
-def FpIST64m : FpI<(ops i64mem:$op, RFP:$src), OneArgFP, []>;
+def FpLD32m : FpI<(ops RFP32:$dst, f32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (loadf32 addr:$src))]>;
+def FpLD64m : FpI<(ops RFP64:$dst, f64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def FpILD16m32 : FpI<(ops RFP32:$dst, i16mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def FpILD32m32 : FpI<(ops RFP32:$dst, i32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def FpILD64m32 : FpI<(ops RFP32:$dst, i64mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def FpILD16m64 : FpI<(ops RFP64:$dst, i16mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def FpILD32m64 : FpI<(ops RFP64:$dst, i32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def FpILD64m64 : FpI<(ops RFP64:$dst, i64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+
+def FpST32m : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP,
+ [(store RFP32:$src, addr:$op)]>;
+def FpST64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP,
+ [(truncstoref32 RFP64:$src, addr:$op)]>;
+def FpST64m : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP,
+ [(store RFP64:$src, addr:$op)]>;
+
+def FpSTP32m : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP, []>;
+def FpSTP64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP, []>;
+def FpSTP64m : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP, []>;
+def FpIST16m32 : FpI<(ops i16mem:$op, RFP32:$src), OneArgFP, []>;
+def FpIST32m32 : FpI<(ops i32mem:$op, RFP32:$src), OneArgFP, []>;
+def FpIST64m32 : FpI<(ops i64mem:$op, RFP32:$src), OneArgFP, []>;
+def FpIST16m64 : FpI<(ops i16mem:$op, RFP64:$src), OneArgFP, []>;
+def FpIST32m64 : FpI<(ops i32mem:$op, RFP64:$src), OneArgFP, []>;
+def FpIST64m64 : FpI<(ops i64mem:$op, RFP64:$src), OneArgFP, []>;
def FLD32m : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">;
def FLD64m : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">;
@@ -392,14 +513,23 @@
def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">;
// FISTTP requires SSE3 even though it's a FPStack op.
-def FpISTT16m : FpI_<(ops i16mem:$op, RFP:$src), OneArgFP,
- [(X86fp_to_i16mem RFP:$src, addr:$op)]>,
+def FpISTT16m32 : FpI_<(ops i16mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def FpISTT32m32 : FpI_<(ops i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
Requires<[HasSSE3]>;
-def FpISTT32m : FpI_<(ops i32mem:$op, RFP:$src), OneArgFP,
- [(X86fp_to_i32mem RFP:$src, addr:$op)]>,
+def FpISTT64m32 : FpI_<(ops i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
Requires<[HasSSE3]>;
-def FpISTT64m : FpI_<(ops i64mem:$op, RFP:$src), OneArgFP,
- [(X86fp_to_i64mem RFP:$src, addr:$op)]>,
+def FpISTT16m64 : FpI_<(ops i16mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def FpISTT32m64 : FpI_<(ops i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def FpISTT64m64 : FpI_<(ops i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
Requires<[HasSSE3]>;
def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">;
@@ -414,10 +544,14 @@
// Floating point constant loads.
let isReMaterializable = 1 in {
-def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP,
- [(set RFP:$dst, fp64imm0)]>;
-def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP,
- [(set RFP:$dst, fp64imm1)]>;
+def FpLD032 : FpI<(ops RFP32:$dst), ZeroArgFP,
+ [(set RFP32:$dst, fpimm0)]>;
+def FpLD132 : FpI<(ops RFP32:$dst), ZeroArgFP,
+ [(set RFP32:$dst, fpimm1)]>;
+def FpLD064 : FpI<(ops RFP64:$dst), ZeroArgFP,
+ [(set RFP64:$dst, fpimm0)]>;
+def FpLD164 : FpI<(ops RFP64:$dst), ZeroArgFP,
+ [(set RFP64:$dst, fpimm1)]>;
}
def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9;
@@ -425,10 +559,14 @@
// Floating point compares.
-def FpUCOMr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
+def FpUCOMr32 : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def FpUCOMIr32 : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(X86cmp RFP32:$lhs, RFP32:$rhs)]>; // CC = cmp ST(0) with ST(i)
+def FpUCOMr64 : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP,
[]>; // FPSW = cmp ST(0) with ST(i)
-def FpUCOMIr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
- [(X86cmp RFP:$lhs, RFP:$rhs)]>; // CC = cmp ST(0) with ST(i)
+def FpUCOMIr64 : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(X86cmp RFP64:$lhs, RFP64:$rhs)]>; // CC = cmp ST(0) with ST(i)
def FUCOMr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
(ops RST:$reg),
@@ -447,7 +585,6 @@
(ops RST:$reg),
"fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
-
// Floating point flag ops.
def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
(ops), "fnstsw", []>, DF, Imp<[],[AX]>;
@@ -466,12 +603,18 @@
def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
// Required for CALL which return f32 / f64 values.
-def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
-def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (FpST32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (FpST64m32 addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (FpST64m addr:$op, RFP64:$src)>;
// Floating point constant -0.0 and -1.0
-def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
-def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
+def : Pat<(f32 fpimmneg0), (FpCHS32 (FpLD032))>, Requires<[FPStack]>;
+def : Pat<(f32 fpimmneg1), (FpCHS32 (FpLD132))>, Requires<[FPStack]>;
+def : Pat<(f64 fpimmneg0), (FpCHS64 (FpLD064))>, Requires<[FPStack]>;
+def : Pat<(f64 fpimmneg1), (FpCHS64 (FpLD164))>, Requires<[FPStack]>;
// Used to conv. i64 to f64 since there isn't a SSE version.
-def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
+def : Pat<(X86fildflag addr:$src, i64), (FpILD64m64 addr:$src)>;
+
+def : Pat<(extloadf32 addr:$src), (FpMOV3264 (FpLD32m addr:$src))>, Requires<[FPStack]>;
+def : Pat<(fextend RFP32:$src), (FpMOV3264 RFP32:$src)>, Requires<[FPStack]>;
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Jul 2 19:53:03 2007
@@ -33,7 +33,8 @@
if (oc == X86::MOV8rr || oc == X86::MOV16rr ||
oc == X86::MOV32rr || oc == X86::MOV64rr ||
oc == X86::MOV16to16_ || oc == X86::MOV32to32_ ||
- oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
+ oc == X86::FpMOV3232 || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
+ oc == X86::FpMOV3264 || oc == X86::FpMOV6432 || oc == X86::FpMOV6464 ||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
oc == X86::MOVAPSrr || oc == X86::MOVAPDrr ||
oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr ||
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Jul 2 19:53:03 2007
@@ -964,7 +964,7 @@
// Alias instructions that map fld0 to pxor for sse.
def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
- "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
+ "pxor $dst, $dst", [(set FR64:$dst, fpimm0)]>,
Requires<[HasSSE2]>, TB, OpSize;
// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Mon Jul 2 19:53:03 2007
@@ -82,8 +82,10 @@
Opc = X86::MOV32_mr;
} else if (RC == &X86::GR16_RegClass) {
Opc = X86::MOV16_mr;
- } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
+ } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
Opc = X86::FpST64m;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::FpST32m;
} else if (RC == &X86::FR32RegClass) {
Opc = X86::MOVSSmr;
} else if (RC == &X86::FR64RegClass) {
@@ -117,8 +119,10 @@
Opc = X86::MOV32_rm;
} else if (RC == &X86::GR16_RegClass) {
Opc = X86::MOV16_rm;
- } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
+ } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
Opc = X86::FpLD64m;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::FpLD32m;
} else if (RC == &X86::FR32RegClass) {
Opc = X86::MOVSSrm;
} else if (RC == &X86::FR64RegClass) {
@@ -151,8 +155,10 @@
Opc = X86::MOV32_rr;
} else if (RC == &X86::GR16_RegClass) {
Opc = X86::MOV16_rr;
- } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
- Opc = X86::FpMOV;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::FpMOV3232;
+ } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
+ Opc = X86::FpMOV6464;
} else if (RC == &X86::FR32RegClass) {
Opc = X86::FsMOVAPSrr;
} else if (RC == &X86::FR64RegClass) {
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=37847&r1=37846&r2=37847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Mon Jul 2 19:53:03 2007
@@ -425,7 +425,8 @@
// faster on common hardware. In reality, this should be controlled by a
// command line option or something.
-def RFP : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP32 : RegisterClass<"X86", [f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP64 : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
// Floating point stack registers (these are not allocatable by the
// register allocator - the floating point stackifier is responsible
More information about the llvm-commits
mailing list