[llvm-commits] [llvm] r94338 - in /llvm/trunk: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/CodeGen/SelectionDAG/LegalizeTypes.h lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrMMX.td test/CodeGen/X86/widen_cast-2.ll test/CodeGen/X86/widen_load-1.ll test/CodeGen/X86/widen_load-2.ll
Mon P Wang
wangmp at apple.com
Sat Jan 23 16:05:03 PST 2010
Author: wangmp
Date: Sat Jan 23 18:05:03 2010
New Revision: 94338
URL: http://llvm.org/viewvc/llvm-project?rev=94338&view=rev
Log:
Improved widening loads by adding support for wider loads if
the alignment allows. Fixed a bug where we didn't use a
vector load/store for PR5626.
Added:
llvm/trunk/test/CodeGen/X86/widen_load-2.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrMMX.td
llvm/trunk/test/CodeGen/X86/widen_cast-2.ll
llvm/trunk/test/CodeGen/X86/widen_load-1.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=94338&r1=94337&r2=94338&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Sat Jan 23 18:05:03 2010
@@ -1533,10 +1533,10 @@
Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
// If EltVT smaller than OpVT, only store the bits necessary.
- if (EltVT.bitsLT(OpVT))
+ if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx, SV, Offset, EltVT));
- else
+ } else
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx, SV, Offset));
}
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=94338&r1=94337&r2=94338&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Sat Jan 23 18:05:03 2010
@@ -633,43 +633,33 @@
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
- /// Helper genWidenVectorLoads - Helper function to generate a set of
+ /// Helper GenWidenVectorLoads - Helper function to generate a set of
/// loads to load a vector with a resulting wider type. It takes
- /// ExtType: Extension type
- /// LdChain: list of chains for the load we have generated.
- /// Chain: incoming chain for the ld vector.
- /// BasePtr: base pointer to load from.
- /// SV: memory disambiguation source value.
- /// SVOffset: memory disambiugation offset.
- /// Alignment: alignment of the memory.
- /// isVolatile: volatile load.
- /// LdWidth: width of memory that we want to load.
- /// ResType: the wider result result type for the resulting vector.
- /// dl: DebugLoc to be applied to new nodes
- SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain,
- SDValue BasePtr, const Value *SV,
- int SVOffset, unsigned Alignment,
- bool isVolatile, unsigned LdWidth,
- EVT ResType, DebugLoc dl);
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD);
+
+ /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+ /// loads to load a ector with a resulting wider type. It takes
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ /// ExtType: extension element type
+ SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode *LD, ISD::LoadExtType ExtType);
/// Helper genWidenVectorStores - Helper function to generate a set of
/// stores to store a widen vector into non widen memory
- /// It takes
/// StChain: list of chains for the stores we have generated
- /// Chain: incoming chain for the ld vector
- /// BasePtr: base pointer to load from
- /// SV: memory disambiguation source value
- /// SVOffset: memory disambiugation offset
- /// Alignment: alignment of the memory
- /// isVolatile: volatile lod
- /// ValOp: value to store
- /// StWidth: width of memory that we want to store
- /// dl: DebugLoc to be applied to new nodes
- void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain,
- SDValue BasePtr, const Value *SV,
- int SVOffset, unsigned Alignment,
- bool isVolatile, SDValue ValOp,
- unsigned StWidth, DebugLoc dl);
+ /// ST: store of a widen value
+ void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+ /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+ /// stores to store a truncate widen vector into non widen memory
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST);
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=94338&r1=94337&r2=94338&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Sat Jan 23 18:05:03 2010
@@ -1655,68 +1655,24 @@
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
- EVT LdVT = LD->getMemoryVT();
- DebugLoc dl = N->getDebugLoc();
- assert(LdVT.isVector() && WidenVT.isVector());
-
- // Load information
- SDValue Chain = LD->getChain();
- SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
- unsigned Align = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- const Value *SV = LD->getSrcValue();
ISD::LoadExtType ExtType = LD->getExtensionType();
SDValue Result;
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
- if (ExtType != ISD::NON_EXTLOAD) {
- // For extension loads, we can not play the tricks of chopping legal
- // vector types and bit cast it to the right type. Instead, we unroll
- // the load and build a vector.
- EVT EltVT = WidenVT.getVectorElementType();
- EVT LdEltVT = LdVT.getVectorElementType();
- unsigned NumElts = LdVT.getVectorNumElements();
-
- // Load each element and widen
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- SmallVector<SDValue, 16> Ops(WidenNumElts);
- unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
- LdEltVT, isVolatile, Align);
- LdChain.push_back(Ops[0].getValue(1));
- unsigned i = 0, Offset = Increment;
- for (i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr, DAG.getIntPtrConstant(Offset));
- Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
- SVOffset + Offset, LdEltVT, isVolatile, Align);
- LdChain.push_back(Ops[i].getValue(1));
- }
-
- // Fill the rest with undefs
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; i != WidenNumElts; ++i)
- Ops[i] = UndefVal;
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
- Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
- } else {
- assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
- unsigned int LdWidth = LdVT.getSizeInBits();
- Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset,
- Align, isVolatile, LdWidth, WidenVT, dl);
- }
-
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0],
- LdChain.size());
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+ &LdChain[0], LdChain.size());
// Modified the chain - switch anything that used the old chain to use
// the new one.
@@ -1954,57 +1910,17 @@
// We have to widen the value but we want only to store the original
// vector type.
StoreSDNode *ST = cast<StoreSDNode>(N);
- SDValue Chain = ST->getChain();
- SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
- unsigned Align = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- SDValue ValOp = GetWidenedVector(ST->getValue());
- DebugLoc dl = N->getDebugLoc();
-
- EVT StVT = ST->getMemoryVT();
- EVT ValVT = ValOp.getValueType();
- // It must be true that we the widen vector type is bigger than where
- // we need to store.
- assert(StVT.isVector() && ValOp.getValueType().isVector());
- assert(StVT.bitsLT(ValOp.getValueType()));
SmallVector<SDValue, 16> StChain;
- if (ST->isTruncatingStore()) {
- // For truncating stores, we can not play the tricks of chopping legal
- // vector types and bit cast it to the right type. Instead, we unroll
- // the store.
- EVT StEltVT = StVT.getVectorElementType();
- EVT ValEltVT = ValVT.getVectorElementType();
- unsigned Increment = ValEltVT.getSizeInBits() / 8;
- unsigned NumElts = StVT.getVectorNumElements();
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset, StEltVT,
- isVolatile, Align));
- unsigned Offset = Increment;
- for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr, DAG.getIntPtrConstant(Offset));
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
- SVOffset + Offset, StEltVT,
- isVolatile, MinAlign(Align, Offset)));
- }
- }
- else {
- assert(StVT.getVectorElementType() == ValVT.getVectorElementType());
- // Store value
- GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset,
- Align, isVolatile, ValOp, StVT.getSizeInBits(), dl);
- }
+ if (ST->isTruncatingStore())
+ GenWidenVectorTruncStores(StChain, ST);
+ else
+ GenWidenVectorStores(StChain, ST);
+
if (StChain.size() == 1)
return StChain[0];
else
- return DAG.getNode(ISD::TokenFactor, dl,
+ return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
MVT::Other,&StChain[0],StChain.size());
}
@@ -2012,179 +1928,383 @@
// Vector Widening Utilities
//===----------------------------------------------------------------------===//
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (Width == WidenEltWidth)
+ return RetVT;
+
+ // See if there is larger legal integer than the element type to load/store
+ unsigned VT;
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ RetVT = MemVT;
+ break;
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+ EVT MemVT = (MVT::SimpleValueType) VT;
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ return RetVT;
+}
-// Utility function to find a vector type and its associated element
-// type from a preferred width and whose vector type must be the same size
-// as the VecVT.
-// TLI: Target lowering used to determine legal types.
-// Width: Preferred width to store.
-// VecVT: Vector value type whose size we must match.
-// Returns NewVecVT and NewEltVT - the vector type and its associated
-// element type.
-static void FindAssocWidenVecType(SelectionDAG& DAG,
- const TargetLowering &TLI, unsigned Width,
- EVT VecVT,
- EVT& NewEltVT, EVT& NewVecVT) {
- unsigned EltWidth = Width + 1;
- if (TLI.isTypeLegal(VecVT)) {
- // We start with the preferred with, making it a power of 2 and find a
- // legal vector type of that width. If not, we reduce it by another of 2.
- // For incoming type is legal, this process will end as a vector of the
- // smallest loadable type should always be legal.
- do {
- assert(EltWidth > 0);
- EltWidth = 1 << Log2_32(EltWidth - 1);
- NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
- unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
- NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
- } while (!TLI.isTypeLegal(NewVecVT) ||
- VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
- } else {
- // The incoming vector type is illegal and is the result of widening
- // a vector to a power of 2. In this case, we will use the preferred
- // with as long as it is a multiple of the incoming vector length.
- // The legalization process will eventually make this into a legal type
- // and remove the illegal bit converts (which would turn to stack converts
- // if they are allow to exist).
- do {
- assert(EltWidth > 0);
- EltWidth = 1 << Log2_32(EltWidth - 1);
- NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
- unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
- NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
- } while (!TLI.isTypeLegal(NewEltVT) ||
- VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVector<SDValue, 16>& LdOps,
+ unsigned Start, unsigned End) {
+ DebugLoc dl = LdOps[Start].getDebugLoc();
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getIntPtrConstant(Idx++));
}
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
}
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
- SDValue Chain,
- SDValue BasePtr,
- const Value *SV,
- int SVOffset,
- unsigned Alignment,
- bool isVolatile,
- unsigned LdWidth,
- EVT ResType,
- DebugLoc dl) {
+ LoadSDNode * LD) {
// The strategy assumes that we can efficiently load powers of two widths.
- // The routines chops the vector into the largest power of 2 load and
- // can be inserted into a legal vector and then cast the result into the
- // vector type we want. This avoids unnecessary stack converts.
+ // The routines chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ const Value *SV = LD->getSrcValue();
- // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and
- // the load is nonvolatile, we an use a wider load for the value.
+ int LdWidth = LdVT.getSizeInBits();
+ int WidthDiff = WidenWidth - LdWidth; // Difference
+ unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
// Find the vector type that can load from.
- EVT NewEltVT, NewVecVT;
- unsigned NewEltVTWidth;
- FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
- NewEltVTWidth = NewEltVT.getSizeInBits();
-
- SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset,
- isVolatile, Alignment);
- SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ int NewVTWidth = NewVT.getSizeInBits();
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+ isVolatile, Align);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
- if (LdWidth == NewEltVTWidth) {
- return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
+ if (LdWidth <= NewVTWidth) {
+ if (NewVT.isVector()) {
+ if (NewVT != WidenVT) {
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+ NumConcat);
+ } else
+ return LdOp;
+ } else {
+ unsigned NumElts = WidenWidth / LdWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+ }
}
- unsigned Idx = 1;
- LdWidth -= NewEltVTWidth;
+ // Load vector by using multiple loads from largest vector to scalar
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
unsigned Offset = 0;
while (LdWidth > 0) {
- unsigned Increment = NewEltVTWidth / 8;
+ unsigned Increment = NewVTWidth / 8;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(Increment));
- if (LdWidth < NewEltVTWidth) {
- // Our current type we are using is too large, use a smaller size by
- // using a smaller power of 2
- unsigned oNewEltVTWidth = NewEltVTWidth;
- FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
- NewEltVTWidth = NewEltVT.getSizeInBits();
- // Readjust position and vector position based on new load type
- Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ if (LdWidth < NewVTWidth) {
+ // Our current type we are using is too large, find a better size
+ NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVTWidth = NewVT.getSizeInBits();
}
- SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV,
- SVOffset+Offset, isVolatile,
- MinAlign(Alignment, Offset));
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
+ SVOffset+Offset, isVolatile,
+ MinAlign(Align, Increment));
LdChain.push_back(LdOp.getValue(1));
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp,
- DAG.getIntPtrConstant(Idx++));
+ LdOps.push_back(LdOp);
- LdWidth -= NewEltVTWidth;
+ LdWidth -= NewVTWidth;
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
-}
+ // Build the vector from the loads operations
+ unsigned End = LdOps.size();
+ if (LdOps[0].getValueType().isVector()) {
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ &ConcatOps[Idx], End - Idx);
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
-void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
- SDValue Chain,
- SDValue BasePtr,
- const Value *SV,
- int SVOffset,
- unsigned Alignment,
- bool isVolatile,
- SDValue ValOp,
- unsigned StWidth,
- DebugLoc dl) {
- // Breaks the stores into a series of power of 2 width stores. For any
- // width, we convert the vector to the vector of element size that we
- // want to store. This avoids requiring a stack convert.
-
- // Find a width of the element type we can store with
- EVT WidenVT = ValOp.getValueType();
- EVT NewEltVT, NewVecVT;
+ if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+ } else
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ &ConcatOps[Idx], End - Idx);
+ } else // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+ LoadSDNode * LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extended it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ const Value *SV = LD->getSrcValue();
- FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
- unsigned NewEltVTWidth = NewEltVT.getSizeInBits();
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
- SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
- DAG.getIntPtrConstant(0));
- SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset,
- isVolatile, Alignment);
- StChain.push_back(StOp);
-
- // Check if we are done
- if (StWidth == NewEltVTWidth) {
- return;
+ // Load each element and widen
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
+ LdEltVT, isVolatile, Align);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
+ SVOffset + Offset, LdEltVT, isVolatile, Align);
+ LdChain.push_back(Ops[i].getValue(1));
}
- unsigned Idx = 1;
- StWidth -= NewEltVTWidth;
- unsigned Offset = 0;
+ // Fill the rest with undefs
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
- while (StWidth > 0) {
- unsigned Increment = NewEltVTWidth / 8;
- Offset += Increment;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getIntPtrConstant(Increment));
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
- if (StWidth < NewEltVTWidth) {
- // Our current type we are using is too large, use a smaller size by
- // using a smaller power of 2
- unsigned oNewEltVTWidth = NewEltVTWidth;
- FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
- NewEltVTWidth = NewEltVT.getSizeInBits();
- // Readjust position and vector position based on new load type
- Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
- }
- EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store powers of two widths.
+ // The routines chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ const Value *SV = ST->getSrcValue();
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ unsigned ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+
+ int Idx = 0; // current index to store
+ unsigned Offset = 0; // offset from base to store
+ while (StWidth != 0) {
+ // Find the largest vector type we can store with
+ EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+ unsigned NewVTWidth = NewVT.getSizeInBits();
+ unsigned Increment = NewVTWidth / 8;
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorNumElements();
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getIntPtrConstant(Idx));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset + Offset, isVolatile,
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ Idx += NumVTElts;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } else {
+ // Cast the vector to the scalar type we can store
+ unsigned NumElts = ValWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type
+ Idx = Idx * ValEltWidth / NewVTWidth;
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getIntPtrConstant(Idx++));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
SVOffset + Offset, isVolatile,
- MinAlign(Alignment, Offset)));
- StWidth -= NewEltVTWidth;
+ MinAlign(Align, Offset)));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ // Restore index back to be relative to the original widen element type
+ Idx = Idx * NewVTWidth / ValEltWidth;
+ }
+ }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+ StoreSDNode *ST) {
+ // For extension loads, it may not be more efficient to truncate the vector
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ const Value *SV = ST->getSrcValue();
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = ST->getDebugLoc();
+
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ValOp.getValueType();
+
+ // It must be true that we the widen vector type is bigger than where
+ // we need to store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ // For truncating stores, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the store.
+ EVT StEltVT = StVT.getVectorElementType();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset, StEltVT,
+ isVolatile, Align));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
+ SVOffset + Offset, StEltVT,
+ isVolatile, MinAlign(Align, Offset)));
}
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=94338&r1=94337&r2=94338&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 23 18:05:03 2010
@@ -747,6 +747,12 @@
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
+
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
EVT VT = (MVT::SimpleValueType)i;
@@ -3686,6 +3692,33 @@
return SDValue();
}
+SDValue
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ // We support concatenate two MMX registers and place them in a MMX
+ // register. This is better than doing a stack convert.
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ResVT = Op.getValueType();
+ assert(Op.getNumOperands() == 2);
+ assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
+ ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
+ int Mask[2];
+ SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0));
+ SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+ InVec = Op.getOperand(1);
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ unsigned NumElts = ResVT.getVectorNumElements();
+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
+ InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
+ } else {
+ InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec);
+ SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+ Mask[0] = 0; Mask[1] = 2;
+ VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+}
+
// v8i16 shuffles - Prefer shuffles in the following order:
// 1. [all] pshuflw, pshufhw, optional move
// 2. [ssse3] 1 x pshufb
@@ -7238,6 +7271,7 @@
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=94338&r1=94337&r2=94338&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Jan 23 18:05:03 2010
@@ -156,6 +156,11 @@
/// relative displacements.
WrapperRIP,
+ /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector.
+ /// Can be used to move a vector value from a MMX register to a XMM
+ /// register.
+ MOVQ2DQ,
+
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
@@ -634,6 +639,7 @@
SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
SelectionDAG &DAG);
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG);
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=94338&r1=94337&r2=94338&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Sat Jan 23 18:05:03 2010
@@ -501,6 +501,20 @@
(iPTR imm:$src3))))]>;
}
+// MMX to XMM for vector types
+def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
+ [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
+ (v2i64 (MOVQI2PQIrm addr:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert
+ (v2i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (v2i64 (MOVDI2PDIrm addr:$src))>;
+
// Mask creation
def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
Modified: llvm/trunk/test/CodeGen/X86/widen_cast-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_cast-2.ll?rev=94338&r1=94337&r2=94338&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_cast-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_cast-2.ll Sat Jan 23 18:05:03 2010
@@ -2,10 +2,8 @@
; CHECK: pextrd
; CHECK: pextrd
; CHECK: movd
-; CHECK: pextrd
-; CHECK: pextrd
-; CHECK: pextrd
-; CHECK: movd
+; CHECK: movaps
+
; bitcast v14i16 to v7i32
Modified: llvm/trunk/test/CodeGen/X86/widen_load-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_load-1.ll?rev=94338&r1=94337&r2=94338&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_load-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_load-1.ll Sat Jan 23 18:05:03 2010
@@ -3,7 +3,7 @@
; This load should be before the call, not after.
-; CHECK: movq compl+128(%rip), %xmm0
+; CHECK: movaps compl+128(%rip), %xmm0
; CHECK: movaps %xmm0, (%rsp)
; CHECK: callq killcommon
Added: llvm/trunk/test/CodeGen/X86/widen_load-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_load-2.ll?rev=94338&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_load-2.ll (added)
+++ llvm/trunk/test/CodeGen/X86/widen_load-2.ll Sat Jan 23 18:05:03 2010
@@ -0,0 +1,155 @@
+; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+
+; Test based on pr5626 to load/store
+;
+
+%i32vec3 = type <3 x i32>
+define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
+; CHECK: movaps
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+ %a = load %i32vec3* %ap, align 16
+ %b = load %i32vec3* %bp, align 16
+ %x = add %i32vec3 %a, %b
+ store %i32vec3 %x, %i32vec3* %ret, align 16
+ ret void
+}
+
+define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
+; CHECK: movq
+; CHECK: pinsrd
+; CHECK: movq
+; CHECK: pinsrd
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+ %a = load %i32vec3* %ap
+ %b = load %i32vec3* %bp
+ %x = add %i32vec3 %a, %b
+ store %i32vec3 %x, %i32vec3* %ret
+ ret void
+}
+
+%i32vec7 = type <7 x i32>
+define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+; CHECK: movaps
+ %a = load %i32vec7* %ap, align 16
+ %b = load %i32vec7* %bp, align 16
+ %x = add %i32vec7 %a, %b
+ store %i32vec7 %x, %i32vec7* %ret, align 16
+ ret void
+}
+
+%i32vec12 = type <12 x i32>
+define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: movaps
+ %a = load %i32vec12* %ap, align 16
+ %b = load %i32vec12* %bp, align 16
+ %x = add %i32vec12 %a, %b
+ store %i32vec12 %x, %i32vec12* %ret, align 16
+ ret void
+}
+
+
+%i16vec3 = type <3 x i16>
+define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
+; CHECK: movaps
+; CHECK: paddw
+; CHECK: movd
+; CHECK: pextrw
+ %a = load %i16vec3* %ap, align 16
+ %b = load %i16vec3* %bp, align 16
+ %x = add %i16vec3 %a, %b
+ store %i16vec3 %x, %i16vec3* %ret, align 16
+ ret void
+}
+
+%i16vec4 = type <4 x i16>
+define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
+; CHECK: movaps
+; CHECK: paddw
+; CHECK: movq
+ %a = load %i16vec4* %ap, align 16
+ %b = load %i16vec4* %bp, align 16
+ %x = add %i16vec4 %a, %b
+ store %i16vec4 %x, %i16vec4* %ret, align 16
+ ret void
+}
+
+%i16vec12 = type <12 x i16>
+define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: movq
+; CHECK: movaps
+ %a = load %i16vec12* %ap, align 16
+ %b = load %i16vec12* %bp, align 16
+ %x = add %i16vec12 %a, %b
+ store %i16vec12 %x, %i16vec12* %ret, align 16
+ ret void
+}
+
+%i16vec18 = type <18 x i16>
+define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: movd
+; CHECK: movaps
+; CHECK: movaps
+ %a = load %i16vec18* %ap, align 16
+ %b = load %i16vec18* %bp, align 16
+ %x = add %i16vec18 %a, %b
+ store %i16vec18 %x, %i16vec18* %ret, align 16
+ ret void
+}
+
+
+%i8vec3 = type <3 x i8>
+define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
+; CHECK: movaps
+; CHECK: paddb
+; CHECK: pextrb
+; CHECK: movb
+ %a = load %i8vec3* %ap, align 16
+ %b = load %i8vec3* %bp, align 16
+ %x = add %i8vec3 %a, %b
+ store %i8vec3 %x, %i8vec3* %ret, align 16
+ ret void
+}
+
+%i8vec31 = type <31 x i8>
+define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddb
+; CHECK: paddb
+; CHECK: movq
+; CHECK: pextrb
+; CHECK: pextrw
+ %a = load %i8vec31* %ap, align 16
+ %b = load %i8vec31* %bp, align 16
+ %x = add %i8vec31 %a, %b
+ store %i8vec31 %x, %i8vec31* %ret, align 16
+ ret void
+}
\ No newline at end of file
More information about the llvm-commits
mailing list