[llvm-branch-commits] [llvm-branch] r90985 - in /llvm/branches/Apple/Zoidberg: include/llvm/CodeGen/SelectionDAG.h include/llvm/Target/TargetLowering.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/TargetLowering.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/splat-scalar-load.ll
Evan Cheng
evan.cheng at apple.com
Wed Dec 9 13:12:35 PST 2009
Author: evancheng
Date: Wed Dec 9 15:12:35 2009
New Revision: 90985
URL: http://llvm.org/viewvc/llvm-project?rev=90985&view=rev
Log:
Merge 90917 90918 90919 90922 90925 90984.
Added:
llvm/branches/Apple/Zoidberg/test/CodeGen/X86/splat-scalar-load.ll
Modified:
llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAG.h
llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h
llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.h
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td
Modified: llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAG.h?rev=90985&r1=90984&r2=90985&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAG.h Wed Dec 9 15:12:35 2009
@@ -890,6 +890,16 @@
/// vector op and fill the end of the resulting vector with UNDEFS.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
+ /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+ /// location that is 'Dist' units away from the location that the 'Base' load
+ /// is loading from.
+ bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+ unsigned Bytes, int Dist) const;
+
+ /// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+ /// it cannot be inferred.
+ unsigned InferPtrAlignment(SDValue Ptr) const;
+
private:
bool RemoveNodeFromCSEMaps(SDNode *N);
void AddModifiedNodeToCSEMaps(SDNode *N, DAGUpdateListener *UpdateListener);
Modified: llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h?rev=90985&r1=90984&r2=90985&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h (original)
+++ llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h Wed Dec 9 15:12:35 2009
@@ -857,12 +857,6 @@
virtual bool
isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
- /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
- /// location that is 'Dist' units away from the location that the 'Base' load
- /// is loading from.
- bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes,
- int Dist, const MachineFrameInfo *MFI) const;
-
/// PerformDAGCombine - This method will be invoked for all target nodes and
/// for any target-independent nodes that the target has registered with
/// invoke it for.
Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=90985&r1=90984&r2=90985&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Dec 9 15:12:35 2009
@@ -3688,7 +3688,6 @@
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
- const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
if (ISD::isNON_EXTLoad(LD2) &&
LD2->hasOneUse() &&
@@ -3696,7 +3695,7 @@
// If one is volatile it might be ok, but play conservative and bail out.
!LD1->isVolatile() &&
!LD2->isVolatile() &&
- TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
+ DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
unsigned Align = LD1->getAlignment();
unsigned NewAlign = TLI.getTargetData()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=90985&r1=90984&r2=90985&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Dec 9 15:12:35 2009
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -5867,6 +5868,99 @@
&Scalars[0], Scalars.size());
}
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+ unsigned Bytes, int Dist) const {
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+ if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+ if (V && (V->getSExtValue() == Dist*Bytes))
+ return true;
+ }
+
+ GlobalValue *GV1 = NULL;
+ GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ GlobalValue *GV;
+ int64_t GVOffset = 0;
+ if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
+ return MinAlign(GV->getAlignment(), GVOffset);
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ // FIXME: Handle FI+CST.
+ const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ if (MFI.isFixedObjectIndex(FrameIdx)) {
+ int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
+
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = MinAlign(ObjectOffset, StackAlign);
+
+ // Finally, the frame object itself may have a known alignment. Factor
+ // the alignment + offset into a new alignment. For example, if we know
+ // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
+ // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
+ // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
+ return std::max(Align, FIInfoAlign);
+ }
+ return FIInfoAlign;
+ }
+
+ return 0;
+}
+
void SelectionDAG::dump() const {
errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=90985&r1=90984&r2=90985&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp Wed Dec 9 15:12:35 2009
@@ -2184,48 +2184,6 @@
}
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
-/// location that is 'Dist' units away from the location that the 'Base' load
-/// is loading from.
-bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
- unsigned Bytes, int Dist,
- const MachineFrameInfo *MFI) const {
- if (LD->getChain() != Base->getChain())
- return false;
- EVT VT = LD->getValueType(0);
- if (VT.getSizeInBits() / 8 != Bytes)
- return false;
-
- SDValue Loc = LD->getOperand(1);
- SDValue BaseLoc = Base->getOperand(1);
- if (Loc.getOpcode() == ISD::FrameIndex) {
- if (BaseLoc.getOpcode() != ISD::FrameIndex)
- return false;
- int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
- int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
- int FS = MFI->getObjectSize(FI);
- int BFS = MFI->getObjectSize(BFI);
- if (FS != BFS || FS != (int)Bytes) return false;
- return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
- }
- if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
- if (V && (V->getSExtValue() == Dist*Bytes))
- return true;
- }
-
- GlobalValue *GV1 = NULL;
- GlobalValue *GV2 = NULL;
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
- bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1);
- bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
- if (isGA1 && isGA2 && GV1 == GV2)
- return Offset1 == (Offset2 + Dist*Bytes);
- return false;
-}
-
-
SDValue TargetLowering::
PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
// Default implementation: no optimization.
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp?rev=90985&r1=90984&r2=90985&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp Wed Dec 9 15:12:35 2009
@@ -3344,6 +3344,82 @@
}
SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) {
+
+ // Check if the scalar load can be widened into a vector load. And if
+ // the address is "base + cst" see if the cst can be "absorbed" into
+ // the shuffle mask.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+ SDValue Ptr = LD->getBasePtr();
+ if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ return SDValue();
+ EVT PVT = LD->getValueType(0);
+ if (PVT != MVT::i32 && PVT != MVT::f32)
+ return SDValue();
+
+ int FI = -1;
+ int64_t Offset = 0;
+ if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FI = FINode->getIndex();
+ Offset = 0;
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ Offset = Ptr.getConstantOperandVal(1);
+ Ptr = Ptr.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ SDValue Chain = LD->getChain();
+ // Make sure the stack object alignment is at least 16.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change the alignment. Reference stack + offset explicitly
+ // if stack pointer is at least 16-byte aligned.
+ unsigned StackAlign = Subtarget->getStackAlignment();
+ if (StackAlign < 16)
+ return SDValue();
+ Offset = MFI->getObjectOffset(FI) + Offset;
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ DAG.getConstant(Offset & ~15, getPointerTy()));
+ Offset %= 16;
+ } else {
+ MFI->setObjectAlignment(FI, 16);
+ }
+ }
+
+ // (Offset % 16) must be multiple of 4. Then address is then
+ // Ptr + (Offset & ~15).
+ if (Offset < 0)
+ return SDValue();
+ if ((Offset % 16) & 3)
+ return SDValue();
+ int64_t StartOffset = Offset & ~15;
+ if (StartOffset)
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+ Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+ int EltNo = (Offset - StartOffset) >> 2;
+ int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+ EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+ // Canonicalize it to a v4i32 shuffle.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+ DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+ }
+
+ return SDValue();
+}
+
+SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3486,8 +3562,19 @@
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
- if (Values.size() == 1)
+ if (Values.size() == 1) {
+ if (EVTBits == 32) {
+ // Instead of a shuffle like this:
+ // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+ // Check if it's possible to issue this instead.
+ // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+ if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+ return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+ }
return SDValue();
+ }
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
@@ -4278,7 +4365,7 @@
unsigned ShAmt = 0;
SDValue ShVal;
bool isShift = getSubtarget()->hasSSE2() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -8327,16 +8414,6 @@
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
- const TargetLowering &TLI) {
- GlobalValue *GV;
- int64_t Offset = 0;
- if (TLI.isGAPlusOffset(Base, GV, Offset))
- return (GV->getAlignment() >= N && (Offset % N) == 0);
- // DAG combine handles the stack object case.
- return false;
-}
-
static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
EVT EltVT, LoadSDNode *&LDBase,
unsigned &LastLoadedElt,
@@ -8366,7 +8443,7 @@
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI))
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
return false;
LastLoadedElt = i;
}
@@ -8399,7 +8476,7 @@
return SDValue();
if (LastLoadedElt == NumElems - 1) {
- if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
+ if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
LD->isVolatile());
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.h?rev=90985&r1=90984&r2=90985&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.h Wed Dec 9 15:12:35 2009
@@ -626,7 +626,9 @@
std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool isSigned);
-
+
+ SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG);
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td?rev=90985&r1=90984&r2=90985&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td Wed Dec 9 15:12:35 2009
@@ -2083,7 +2083,7 @@
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (pshufd:$src2
- (bc_v4i32(memopv2i64 addr:$src1)),
+ (bc_v4i32 (memopv2i64 addr:$src1)),
(undef))))]>;
}
Added: llvm/branches/Apple/Zoidberg/test/CodeGen/X86/splat-scalar-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/test/CodeGen/X86/splat-scalar-load.ll?rev=90985&view=auto
==============================================================================
--- llvm/branches/Apple/Zoidberg/test/CodeGen/X86/splat-scalar-load.ll (added)
+++ llvm/branches/Apple/Zoidberg/test/CodeGen/X86/splat-scalar-load.ll Wed Dec 9 15:12:35 2009
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; rdar://7434544
+
+define <2 x i64> @t1() nounwind ssp {
+entry:
+; CHECK: t1:
+; CHECK: pshufd $0, (%esp), %xmm0
+ %array = alloca [8 x float], align 16
+ %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0
+ %tmp2 = load float* %arrayidx
+ %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+ %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+ %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+ %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+ %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: pshufd $85, (%esp), %xmm0
+ %array = alloca [8 x float], align 4
+ %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
+ %tmp2 = load float* %arrayidx
+ %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+ %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+ %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+ %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+ %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+ ret <2 x i64> %0
+}
+
+define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: pshufd $-86, (%esp), %xmm0
+ %0 = insertelement <4 x float> undef, float %tmp3, i32 0
+ %1 = insertelement <4 x float> %0, float %tmp3, i32 1
+ %2 = insertelement <4 x float> %1, float %tmp3, i32 2
+ %3 = insertelement <4 x float> %2, float %tmp3, i32 3
+ ret <4 x float> %3
+}
More information about the llvm-branch-commits
mailing list