[llvm-commits] CVS: llvm/lib/Target/X86/X86ATTAsmPrinter.h X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86ISelLowering.h X86InstrInfo.td X86IntelAsmPrinter.h
Evan Cheng
evan.cheng at apple.com
Tue Jan 31 14:28:42 PST 2006
Changes in directory llvm/lib/Target/X86:
X86ATTAsmPrinter.h updated: 1.4 -> 1.5
X86ISelDAGToDAG.cpp updated: 1.37 -> 1.38
X86ISelLowering.cpp updated: 1.71 -> 1.72
X86ISelLowering.h updated: 1.23 -> 1.24
X86InstrInfo.td updated: 1.225 -> 1.226
X86IntelAsmPrinter.h updated: 1.5 -> 1.6
---
Log message:
- Allow XMM load (for scalar use) to be folded into ANDP* and XORP*.
- Use XORP* to implement fneg.
---
Diffs of the changes: (+127 -26)
X86ATTAsmPrinter.h | 2 -
X86ISelDAGToDAG.cpp | 3 +
X86ISelLowering.cpp | 54 +++++++++++++++++++++++++++++----
X86ISelLowering.h | 8 ++++
X86InstrInfo.td | 82 +++++++++++++++++++++++++++++++++++++++++----------
X86IntelAsmPrinter.h | 4 +-
6 files changed, 127 insertions(+), 26 deletions(-)
Index: llvm/lib/Target/X86/X86ATTAsmPrinter.h
diff -u llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.4 llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.5
--- llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.4 Fri Dec 16 19:03:57 2005
+++ llvm/lib/Target/X86/X86ATTAsmPrinter.h Tue Jan 31 16:28:30 2006
@@ -59,7 +59,7 @@
void printf64mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo);
}
- void printf80mem(const MachineInstr *MI, unsigned OpNo) {
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo);
}
Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.37 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.38
--- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.37 Fri Jan 27 02:10:46 2006
+++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Tue Jan 31 16:28:30 2006
@@ -253,7 +253,8 @@
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N)) {
AM.BaseType = X86ISelAddressMode::ConstantPoolBase;
- AM.Base.Reg = CurDAG->getTargetConstantPool(CP->get(), MVT::i32);
+ AM.Base.Reg = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
+ CP->getAlignment());
return false;
}
}
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.71 llvm/lib/Target/X86/X86ISelLowering.cpp:1.72
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.71 Tue Jan 31 13:43:35 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Tue Jan 31 16:28:30 2006
@@ -17,6 +17,7 @@
#include "X86ISelLowering.h"
#include "X86TargetMachine.h"
#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -208,16 +209,20 @@
setOperationAction(ISD::EXTLOAD, MVT::f32, Expand);
setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand);
+ // Use ANDPD to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f64, Custom);
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f64, Custom);
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
// We don't support sin/cos/sqrt/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FABS , MVT::f64, Custom);
- setOperationAction(ISD::FNEG , MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
- setOperationAction(ISD::FABS , MVT::f32, Custom);
- setOperationAction(ISD::FNEG , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
@@ -1567,11 +1572,44 @@
}
case ISD::FABS: {
MVT::ValueType VT = Op.getValueType();
- SDOperand Mask = (VT == MVT::f64)
- ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), MVT::f64)
- : DAG.getConstantFP(BitsToFloat (~(1U << 31)), MVT::f32);
+ const Type *OpNTy = MVT::getTypeForValueType(VT);
+ std::vector<Constant*> CV;
+ if (VT == MVT::f64) {
+ CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
+ CV.push_back(ConstantFP::get(OpNTy, 0.0));
+ } else {
+ CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
+ CV.push_back(ConstantFP::get(OpNTy, 0.0));
+ CV.push_back(ConstantFP::get(OpNTy, 0.0));
+ CV.push_back(ConstantFP::get(OpNTy, 0.0));
+ }
+ Constant *CS = ConstantStruct::get(CV);
+ SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+ SDOperand Mask
+ = DAG.getNode(X86ISD::LOAD_PACK,
+ VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
}
+ case ISD::FNEG: {
+ MVT::ValueType VT = Op.getValueType();
+ const Type *OpNTy = MVT::getTypeForValueType(VT);
+ std::vector<Constant*> CV;
+ if (VT == MVT::f64) {
+ CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
+ CV.push_back(ConstantFP::get(OpNTy, 0.0));
+ } else {
+ CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
+ CV.push_back(ConstantFP::get(OpNTy, 0.0));
+ CV.push_back(ConstantFP::get(OpNTy, 0.0));
+ CV.push_back(ConstantFP::get(OpNTy, 0.0));
+ }
+ Constant *CS = ConstantStruct::get(CV);
+ SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+ SDOperand Mask
+ = DAG.getNode(X86ISD::LOAD_PACK,
+ VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
+ return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
+ }
case ISD::SETCC: {
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
SDOperand Cond;
@@ -1923,6 +1961,7 @@
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";
+ case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
@@ -1942,6 +1981,7 @@
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
case X86ISD::REP_STOS: return "X86ISD::RET_STOS";
case X86ISD::REP_MOVS: return "X86ISD::RET_MOVS";
+ case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK";
}
}
Index: llvm/lib/Target/X86/X86ISelLowering.h
diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.23 llvm/lib/Target/X86/X86ISelLowering.h:1.24
--- llvm/lib/Target/X86/X86ISelLowering.h:1.23 Tue Jan 31 13:43:35 2006
+++ llvm/lib/Target/X86/X86ISelLowering.h Tue Jan 31 16:28:30 2006
@@ -45,6 +45,10 @@
/// to X86::ANDPS or X86::ANDPD.
FAND,
+ /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// to X86::XORPS or X86::XORPD.
+ FXOR,
+
/// FILD - This instruction implements SINT_TO_FP with the integer source
/// in memory and FP reg result. This corresponds to the X86::FILD*m
/// instructions. It has three inputs (token chain, address, and source
@@ -137,6 +141,10 @@
/// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
REP_MOVS,
+
+ /// LOAD_PACK Load a 128-bit packed float / double value. It has the same
+ /// operands as a normal load.
+ LOAD_PACK,
};
// X86 specific condition code. These correspond to X86_*_COND in
Index: llvm/lib/Target/X86/X86InstrInfo.td
diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.225 llvm/lib/Target/X86/X86InstrInfo.td:1.226
--- llvm/lib/Target/X86/X86InstrInfo.td:1.225 Tue Jan 31 13:43:35 2006
+++ llvm/lib/Target/X86/X86InstrInfo.td Tue Jan 31 16:28:30 2006
@@ -70,6 +70,8 @@
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest,
[SDNPOutFlag]>;
@@ -122,6 +124,9 @@
def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc,
[SDNPHasChain, SDNPOutFlag]>;
+def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
+ [SDNPHasChain]>;
+
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
@@ -140,7 +145,7 @@
def i64mem : X86MemOperand<"printi64mem">;
def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
-def f80mem : X86MemOperand<"printf80mem">;
+def f128mem : X86MemOperand<"printf128mem">;
def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
@@ -357,6 +362,9 @@
def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extload node:$ptr, i1))>;
def extloadf64f32 : PatFrag<(ops node:$ptr), (f64 (extload node:$ptr, f32))>;
+def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
+def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
+
//===----------------------------------------------------------------------===//
// Instruction templates...
@@ -2566,43 +2574,51 @@
"orpd {$src2, $dst|$dst, $src2}", []>,
Requires<[HasSSE2]>, TB, OpSize;
def XORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
- "xorps {$src2, $dst|$dst, $src2}", []>,
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
Requires<[HasSSE1]>, TB;
def XORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
- "xorpd {$src2, $dst|$dst, $src2}", []>,
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
Requires<[HasSSE2]>, TB, OpSize;
}
-def ANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+def ANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
"andps {$src2, $dst|$dst, $src2}",
- []>,
+ [(set FR32:$dst, (X86fand FR32:$src1,
+ (X86loadpf32 addr:$src2)))]>,
Requires<[HasSSE1]>, TB;
-def ANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+def ANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
"andpd {$src2, $dst|$dst, $src2}",
- []>,
+ [(set FR64:$dst, (X86fand FR64:$src1,
+ (X86loadpf64 addr:$src2)))]>,
Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+def ORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
"orps {$src2, $dst|$dst, $src2}", []>,
Requires<[HasSSE1]>, TB;
-def ORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+def ORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
"orpd {$src2, $dst|$dst, $src2}", []>,
Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
- "xorps {$src2, $dst|$dst, $src2}", []>,
+def XORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1,
+ (X86loadpf32 addr:$src2)))]>,
Requires<[HasSSE1]>, TB;
-def XORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
- "xorpd {$src2, $dst|$dst, $src2}", []>,
+def XORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1,
+ (X86loadpf64 addr:$src2)))]>,
Requires<[HasSSE2]>, TB, OpSize;
def ANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
"andnps {$src2, $dst|$dst, $src2}", []>,
Requires<[HasSSE1]>, TB;
-def ANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+def ANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
"andnps {$src2, $dst|$dst, $src2}", []>,
Requires<[HasSSE1]>, TB;
def ANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
"andnpd {$src2, $dst|$dst, $src2}", []>,
Requires<[HasSSE2]>, TB, OpSize;
-def ANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+def ANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
"andnpd {$src2, $dst|$dst, $src2}", []>,
Requires<[HasSSE2]>, TB, OpSize;
@@ -2982,6 +2998,42 @@
//===----------------------------------------------------------------------===//
+// XMM Packed Floating point support (requires SSE / SSE2)
+//===----------------------------------------------------------------------===//
+
+def MOVAPSrr : I<0x28, MRMSrcMem, (ops V4F4:$dst, V4F4:$src),
+ "movaps {$src, $dst|$dst, $src}", []>,
+ Requires<[HasSSE1]>, XS;
+def MOVAPDrr : I<0x28, MRMSrcMem, (ops V2F8:$dst, V2F8:$src),
+ "movapd {$src, $dst|$dst, $src}", []>,
+ Requires<[HasSSE2]>, XD;
+
+def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F4:$dst, f128mem:$src),
+ "movaps {$src, $dst|$dst, $src}", []>,
+ Requires<[HasSSE1]>, XS;
+def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F4:$src),
+ "movaps {$src, $dst|$dst, $src}",[]>,
+ Requires<[HasSSE1]>, XD;
+def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F8:$dst, f128mem:$src),
+ "movapd {$src, $dst|$dst, $src}", []>,
+ Requires<[HasSSE1]>, XD;
+def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src),
+ "movapd {$src, $dst|$dst, $src}",[]>,
+ Requires<[HasSSE2]>, XD;
+
+// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd.
+// Upper bits are disregarded.
+def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
+ "movaps {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (X86loadpf32 addr:$src))]>,
+ Requires<[HasSSE1]>, XS;
+def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
+ "movapd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (X86loadpf64 addr:$src))]>,
+ Requires<[HasSSE1]>, XD;
+
+
+//===----------------------------------------------------------------------===//
// Miscellaneous Instructions
//===----------------------------------------------------------------------===//
Index: llvm/lib/Target/X86/X86IntelAsmPrinter.h
diff -u llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.5 llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.6
--- llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.5 Fri Dec 16 19:03:57 2005
+++ llvm/lib/Target/X86/X86IntelAsmPrinter.h Tue Jan 31 16:28:30 2006
@@ -76,8 +76,8 @@
O << "QWORD PTR ";
printMemReference(MI, OpNo);
}
- void printf80mem(const MachineInstr *MI, unsigned OpNo) {
- O << "XWORD PTR ";
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
printMemReference(MI, OpNo);
}
More information about the llvm-commits
mailing list