[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86ISelLowering.h X86InstrSSE.td
Evan Cheng
evan.cheng at apple.com
Fri Jul 7 01:34:06 PDT 2006
Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.232 -> 1.233
X86ISelLowering.h updated: 1.67 -> 1.68
X86InstrSSE.td updated: 1.128 -> 1.129
---
Log message:
X86 target specific DAG combine: turn build_vector (load x), (load x+4),
(load x+8), (load x+12), <0, 1, 2, 3> to a single 128-bit load (aligned and
unaligned).
e.g.
__m128 test(float a, float b, float c, float d) {
return _mm_set_ps(d, c, b, a);
}
_test:
movups 4(%esp), %xmm0
ret
---
Diffs of the changes: (+164 -0)
X86ISelLowering.cpp | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++
X86ISelLowering.h | 6 ++
X86InstrSSE.td | 6 ++
3 files changed, 164 insertions(+)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.232 llvm/lib/Target/X86/X86ISelLowering.cpp:1.233
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.232 Wed Jul 5 17:17:51 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Fri Jul 7 03:33:52 2006
@@ -349,6 +349,9 @@
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+
computeRegisterProperties();
// FIXME: These should be based on subtarget info. Plus, the values should
@@ -3751,6 +3754,7 @@
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK";
+ case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::S2VEC: return "X86ISD::S2VEC";
@@ -3972,6 +3976,154 @@
}
}
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
+ MVT::ValueType VT = N->getValueType(0);
+ SDOperand PermMask = N->getOperand(2);
+ unsigned NumElems = PermMask.getNumOperands();
+ SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ i %= NumElems;
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return (i == 0)
+ ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
+ } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ SDOperand Idx = PermMask.getOperand(i);
+ if (Idx.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
+ return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
+ }
+ return SDOperand();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + an offset.
+static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
+ if (N->getOpcode() == X86ISD::Wrapper) {
+ if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
+ GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+ return true;
+ }
+ } else if (N->getOpcode() == ISD::ADD) {
+ SDOperand N1 = N->getOperand(0);
+ SDOperand N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.Val, GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+ if (V) {
+ Offset += V->getSignExtended();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+ if (V) {
+ Offset += V->getSignExtended();
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isConsecutiveLoad - Returns true if N is loading from an address of Base
+/// + Dist * Size.
+static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
+ MachineFrameInfo *MFI) {
+ if (N->getOperand(0).Val != Base->getOperand(0).Val)
+ return false;
+
+ SDOperand Loc = N->getOperand(1);
+ SDOperand BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != Size) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
+ } else {
+ GlobalValue *GV1 = NULL;
+ GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
+ bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Size);
+ }
+
+ return false;
+}
+
+bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI) {
+ GlobalValue *GV;
+ int64_t Offset;
+ if (isGAPlusOffset(Base, GV, Offset))
+ return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
+ else {
+ assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
+ int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
+ return MFI->getObjectAlignment(BFI) >= 16;
+ }
+ return false;
+}
+
+
+/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
+/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
+/// if the load addresses are consecutive, non-overlapping, and in the right
+/// order.
+static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MVT::ValueType VT = N->getValueType(0);
+ MVT::ValueType EVT = MVT::getVectorBaseType(VT);
+ SDOperand PermMask = N->getOperand(2);
+ int NumElems = (int)PermMask.getNumOperands();
+ SDNode *Base = NULL;
+ for (int i = 0; i < NumElems; ++i) {
+ SDOperand Idx = PermMask.getOperand(i);
+ if (Idx.getOpcode() == ISD::UNDEF) {
+ if (!Base) return SDOperand();
+ } else {
+ SDOperand Arg =
+ getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
+ if (!Arg.Val || Arg.getOpcode() != ISD::LOAD)
+ return SDOperand();
+ if (!Base)
+ Base = Arg.Val;
+ else if (!isConsecutiveLoad(Arg.Val, Base,
+ i, MVT::getSizeInBits(EVT)/8,MFI))
+ return SDOperand();
+ }
+ }
+
+ bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI);
+ if (isAlign16)
+ return DAG.getLoad(VT, Base->getOperand(0), Base->getOperand(1),
+ Base->getOperand(2));
+ else
+ // Just use movups, it's shorter.
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(X86ISD::LOAD_UA, MVT::v4f32,
+ Base->getOperand(0), Base->getOperand(1),
+ Base->getOperand(2)));
+}
+
+SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ TargetMachine &TM = getTargetMachine();
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::VECTOR_SHUFFLE:
+ return PerformShuffleCombine(N, DAG);
+ }
+
+ return SDOperand();
+}
+
//===----------------------------------------------------------------------===//
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
Index: llvm/lib/Target/X86/X86ISelLowering.h
diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.67 llvm/lib/Target/X86/X86ISelLowering.h:1.68
--- llvm/lib/Target/X86/X86ISelLowering.h:1.67 Sat Jun 24 03:36:10 2006
+++ llvm/lib/Target/X86/X86ISelLowering.h Fri Jul 7 03:33:52 2006
@@ -138,6 +138,10 @@
/// operands as a normal load.
LOAD_PACK,
+ /// LOAD_UA Load an unaligned 128-bit value. It has the same operands as
+ /// a normal load.
+ LOAD_UA,
+
/// GlobalBaseReg - On Darwin, this node represents the result of the popl
/// at function entry, used for PIC code.
GlobalBaseReg,
@@ -286,6 +290,8 @@
LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth,
SelectionDAG &DAG);
+ virtual SDOperand PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
MachineBasicBlock *MBB);
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.128 llvm/lib/Target/X86/X86InstrSSE.td:1.129
--- llvm/lib/Target/X86/X86InstrSSE.td:1.128 Thu Jun 29 13:04:54 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Fri Jul 7 03:33:52 2006
@@ -19,6 +19,8 @@
def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
[SDNPHasChain]>;
+def X86loadu : SDNode<"X86ISD::LOAD_UA", SDTLoad,
+ [SDNPHasChain]>;
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
@@ -2563,3 +2565,7 @@
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
(load addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+
+// Unaligned load
+def : Pat<(v4f32 (X86loadu addr:$src)), (MOVUPSrm addr:$src)>,
+ Requires<[HasSSE1]>;
More information about the llvm-commits
mailing list