[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86ISelLowering.h X86InstrSSE.td
Evan Cheng
evan.cheng at apple.com
Thu Apr 20 18:05:22 PDT 2006
Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.178 -> 1.179
X86ISelLowering.h updated: 1.57 -> 1.58
X86InstrSSE.td updated: 1.109 -> 1.110
---
Log message:
Now generating perfect (I think) code for "vector set" with a single non-zero
scalar value.
e.g.
_mm_set_epi32(0, a, 0, 0);
==>
movd 4(%esp), %xmm0
pshufd $69, %xmm0, %xmm0
_mm_set_epi8(0, 0, 0, 0, 0, a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
==>
movzbw 4(%esp), %ax
movzwl %ax, %eax
pxor %xmm0, %xmm0
pinsrw $5, %eax, %xmm0
---
Diffs of the changes: (+175 -105)
X86ISelLowering.cpp | 198 +++++++++++++++++++++++++++++++++-------------------
X86ISelLowering.h | 11 +-
X86InstrSSE.td | 71 +++++++++++-------
3 files changed, 175 insertions(+), 105 deletions(-)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.178 llvm/lib/Target/X86/X86ISelLowering.cpp:1.179
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.178 Thu Apr 20 03:58:49 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Thu Apr 20 20:05:10 2006
@@ -1687,11 +1687,12 @@
return true;
}
-/// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVS{S|D}.
-static bool isMOVSMask(std::vector<SDOperand> &N) {
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(std::vector<SDOperand> &N) {
unsigned NumElems = N.size();
- if (NumElems != 2 && NumElems != 4)
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
if (!isUndefOrEqual(N[0], NumElems))
@@ -1706,18 +1707,18 @@
return true;
}
-bool X86::isMOVSMask(SDNode *N) {
+bool X86::isMOVLMask(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR);
std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
- return ::isMOVSMask(Ops);
+ return ::isMOVLMask(Ops);
}
-/// isCommutedMOVS - Returns true if the shuffle mask is except the reverse
-/// of what x86 movs want. X86 movs requires the lowest element to be lowest
+/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVS(std::vector<SDOperand> &Ops, bool V2IsSplat = false) {
+static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) {
unsigned NumElems = Ops.size();
- if (NumElems != 2 && NumElems != 4)
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
if (!isUndefOrEqual(Ops[0], 0))
@@ -1737,10 +1738,10 @@
return true;
}
-static bool isCommutedMOVS(SDNode *N, bool V2IsSplat = false) {
+static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) {
assert(N->getOpcode() == ISD::BUILD_VECTOR);
std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
- return isCommutedMOVS(Ops);
+ return isCommutedMOVL(Ops, V2IsSplat);
}
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
@@ -2055,9 +2056,9 @@
return Mask;
}
-/// getMOVSMask - Returns a vector_shuffle mask for an movs{s|d} operation
-/// of specified width.
-static SDOperand getMOVSMask(unsigned NumElems, SelectionDAG &DAG) {
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
@@ -2095,30 +2096,63 @@
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
}
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
+ assert(MVT::isVector(VT) && "Expected a vector type");
+ unsigned NumElems = getVectorNumElements(VT);
+ MVT::ValueType EVT = MVT::getVectorBaseType(VT);
+ bool isFP = MVT::isFloatingPoint(EVT);
+ SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
+ std::vector<SDOperand> ZeroVec(NumElems, Zero);
+ return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec);
+}
+
/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
///
static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
- SDOperand PermMask = Op.getOperand(2);
+ SDOperand Mask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
- unsigned NumElems = PermMask.getNumOperands();
- PermMask = getUnpacklMask(NumElems, DAG);
+ unsigned NumElems = Mask.getNumOperands();
+ Mask = getUnpacklMask(NumElems, DAG);
while (NumElems != 4) {
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, PermMask);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
NumElems >>= 1;
}
V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
- SDOperand Zero = DAG.getConstant(0, MVT::getVectorBaseType(MaskVT));
- std::vector<SDOperand> ZeroVec(4, Zero);
- SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, ZeroVec);
+ Mask = getZeroVector(MaskVT, DAG);
SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
- DAG.getNode(ISD::UNDEF, MVT::v4i32),
- SplatMask);
+ DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+static inline bool isZeroNode(SDOperand Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->getValue() == 0) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
+}
+
+/// getShuffleVectorAgainstZero - Return a vector_shuffle of a zero vector and
+/// the specified vector.
+static SDOperand getShuffleVectorAgainstZero(SDOperand Vec, MVT::ValueType VT,
+ unsigned NumElems, unsigned Idx,
+ SelectionDAG &DAG) {
+ SDOperand ZeroV = getZeroVector(VT, DAG);
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
+ SDOperand Zero = DAG.getConstant(0, EVT);
+ std::vector<SDOperand> MaskVec(NumElems, Zero);
+ MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, ZeroV, Vec, Mask);
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
@@ -2924,7 +2958,6 @@
SDOperand PermMask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
- bool V2IsSplat = isSplatVector(V2.Val);
if (isSplatMask(PermMask.Val)) {
if (NumElems <= 4) return Op;
@@ -2932,7 +2965,7 @@
return PromoteSplat(Op, DAG);
}
- if (X86::isMOVSMask(PermMask.Val) ||
+ if (X86::isMOVLMask(PermMask.Val) ||
X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
X86::isMOVHLPSMask(PermMask.Val) ||
@@ -2944,15 +2977,30 @@
ShouldXformToMOVLP(V1.Val, PermMask.Val))
return CommuteVectorShuffle(Op, DAG);
- if (isCommutedMOVS(PermMask.Val, V2IsSplat)) {
+ bool V1IsSplat = isSplatVector(V1.Val);
+ bool V2IsSplat = isSplatVector(V2.Val);
+ if (V1IsSplat && !V2IsSplat) {
+ Op = CommuteVectorShuffle(Op, DAG);
+ V1 = Op.getOperand(0);
+ V2 = Op.getOperand(1);
+ PermMask = Op.getOperand(2);
+ V2IsSplat = true;
+ }
+
+ if (isCommutedMOVL(PermMask.Val, V2IsSplat)) {
+ Op = CommuteVectorShuffle(Op, DAG);
+ V1 = Op.getOperand(0);
+ V2 = Op.getOperand(1);
+ PermMask = Op.getOperand(2);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
- SDOperand NewMask = getMOVSMask(NumElems, DAG);
- Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
+ SDOperand NewMask = getMOVLMask(NumElems, DAG);
+ if (NewMask.Val != PermMask.Val)
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
- return CommuteVectorShuffle(Op, DAG);
+ return Op;
}
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
@@ -3088,48 +3136,60 @@
if (ISD::isBuildVectorAllOnes(Op.Val))
return Op;
- std::set<SDOperand> Values;
- SDOperand Elt0 = Op.getOperand(0);
- Values.insert(Elt0);
- bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) &&
- cast<ConstantSDNode>(Elt0)->getValue() == 0) ||
- (isa<ConstantFPSDNode>(Elt0) &&
- cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0));
- bool RestAreZero = true;
unsigned NumElems = Op.getNumOperands();
- for (unsigned i = 1; i < NumElems; ++i) {
- SDOperand Elt = Op.getOperand(i);
- if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) {
- if (!FPC->isExactlyValue(+0.0))
- RestAreZero = false;
- } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
- if (!C->isNullValue())
- RestAreZero = false;
- } else
- RestAreZero = false;
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType EVT = MVT::getVectorBaseType(VT);
+ std::vector<unsigned> NonZeros;
+ std::set<SDOperand> Values;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ unsigned Idx = NumElems - i - 1;
+ SDOperand Elt = Op.getOperand(Idx);
Values.insert(Elt);
+ if (!isZeroNode(Elt))
+ NonZeros.push_back(Idx);
}
- if (RestAreZero) {
- if (Elt0IsZero) return Op;
+ if (NonZeros.size() == 0)
+ return Op;
- // Zero extend a scalar to a vector.
- if (Elt0.getValueType() != MVT::i64)
- return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0);
-
- // See if we can turn it into a f64 op.
- bool IsLegal = false;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt0)) {
- Elt0 = DAG.getConstantFP(BitsToDouble(C->getValue()), MVT::f64);
- IsLegal = true;
- } else if (Elt0.getOpcode() == ISD::LOAD) {
- Elt0 = DAG.getLoad(MVT::f64, Elt0.getOperand(0), Elt0.getOperand(1),
- Elt0.getOperand(2));
- IsLegal = true;
- }
- if (IsLegal)
- return DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64,
- DAG.getNode(X86ISD::ZEXT_S2VEC, MVT::v2f64, Elt0));
+ if (NonZeros.size() == 1) {
+ unsigned Idx = NonZeros[0];
+ SDOperand Item = Op.getOperand(Idx);
+ if (Idx == 0 || MVT::getSizeInBits(EVT) >= 32)
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR,VT, Item);
+ if (Idx == 0)
+ return getShuffleVectorAgainstZero(Item, VT, NumElems, Idx, DAG);
+
+ // If element VT is < 32, convert it to a insert into a zero vector.
+ if (MVT::getSizeInBits(EVT) <= 16) {
+ SDOperand ZeroV;
+ if (EVT == MVT::i8) {
+ Item = DAG.getNode(ISD::ANY_EXTEND, MVT::i16, Item);
+ if ((Idx % 2) != 0)
+ Item = DAG.getNode(ISD::SHL, MVT::i16,
+ Item, DAG.getConstant(8, MVT::i8));
+ Idx /= 2;
+ ZeroV = getZeroVector(MVT::v8i16, DAG);
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, ZeroV, Item,
+ DAG.getConstant(Idx, MVT::i32)));
+ } else {
+ ZeroV = getZeroVector(VT, DAG);
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, VT, ZeroV, Item,
+ DAG.getConstant(Idx, MVT::i32));
+ }
+ }
+
+ // Turn it into a shuffle of zero and zero-extended scalar to vector.
+ Item = getShuffleVectorAgainstZero(Item, VT, NumElems, 0, DAG);
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
+ std::vector<SDOperand> MaskVec;
+ for (unsigned i = 0; i < NumElems; i++)
+ MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
+ DAG.getNode(ISD::UNDEF, VT), Mask);
}
if (Values.size() > 2) {
@@ -3138,7 +3198,6 @@
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
- MVT::ValueType VT = Op.getValueType();
SDOperand PermMask = getUnpacklMask(NumElems, DAG);
std::vector<SDOperand> V(NumElems);
for (unsigned i = 0; i < NumElems; ++i)
@@ -3406,7 +3465,6 @@
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::S2VEC: return "X86ISD::S2VEC";
- case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC";
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
}
@@ -3514,7 +3572,7 @@
if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
if (NumElts == 2) return true;
if (NumElts == 4) {
- return (isMOVSMask(BVOps) || isCommutedMOVS(BVOps, true) ||
+ return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) ||
isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps));
}
return false;
Index: llvm/lib/Target/X86/X86ISelLowering.h
diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.57 llvm/lib/Target/X86/X86ISelLowering.h:1.58
--- llvm/lib/Target/X86/X86ISelLowering.h:1.57 Thu Apr 20 03:58:49 2006
+++ llvm/lib/Target/X86/X86ISelLowering.h Thu Apr 20 20:05:10 2006
@@ -150,10 +150,6 @@
/// have to match the operand type.
S2VEC,
- /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base
- /// does not have to match the operand type.
- ZEXT_S2VEC,
-
/// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRW.
PEXTRW,
@@ -230,9 +226,10 @@
/// <0, 0, 1, 1>
bool isUNPCKL_v_undef_Mask(SDNode *N);
- /// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to MOVS{S|D}.
- bool isMOVSMask(SDNode *N);
+ /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSS,
+ /// MOVSD, and MOVD, i.e. setting the lowest element.
+ bool isMOVLMask(SDNode *N);
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.109 llvm/lib/Target/X86/X86InstrSSE.td:1.110
--- llvm/lib/Target/X86/X86InstrSSE.td:1.109 Wed Apr 19 16:15:24 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Thu Apr 20 20:05:10 2006
@@ -29,8 +29,6 @@
[SDNPOutFlag]>;
def X86s2vec : SDNode<"X86ISD::S2VEC",
SDTypeProfile<1, 1, []>, []>;
-def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
- SDTypeProfile<1, 1, []>, []>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
SDTypeProfile<1, 2, []>, []>;
def X86pinsrw : SDNode<"X86ISD::PINSRW",
@@ -104,8 +102,8 @@
return X86::isMOVLPMask(N);
}]>;
-def MOVS_shuffle_mask : PatLeaf<(build_vector), [{
- return X86::isMOVSMask(N);
+def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVLMask(N);
}]>;
def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
@@ -2194,20 +2192,18 @@
"movss {$src2, $dst|$dst, $src2}", []>;
def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
"movsd {$src2, $dst|$dst, $src2}", []>;
-def MOVLDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
- "movd {$src2, $dst|$dst, $src2}", []>;
let AddedComplexity = 20 in {
def MOVLPSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"movss {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
- MOVS_shuffle_mask)))]>;
+ MOVL_shuffle_mask)))]>;
def MOVLPDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"movsd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
- MOVS_shuffle_mask)))]>;
+ MOVL_shuffle_mask)))]>;
}
}
@@ -2223,23 +2219,36 @@
// Move to lower bits of a VR128 and zeroing upper bits.
// Loading from memory automatically zeroing upper bits.
+let AddedComplexity = 20 in {
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
"movss {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ MOVL_shuffle_mask)))]>;
def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
"movsd {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
+ [(set VR128:$dst, (v2f64 (vector_shuffle immAllZerosV,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ MOVL_shuffle_mask)))]>;
+// movd / movq to XMM register zero-extends
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (vector_shuffle immAllZerosV,
+ (v4i32 (scalar_to_vector R32:$src)),
+ MOVL_shuffle_mask)))]>;
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
"movd {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
+ [(set VR128:$dst, (v4i32 (vector_shuffle immAllZerosV,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))),
+ MOVL_shuffle_mask)))]>;
+def MOVZQI2PQIrr : PDI<0x7E, MRMSrcMem, (ops VR128:$dst, VR64:$src),
+ "movq {$src, $dst|$dst, $src}", []>;
def MOVZQI2PQIrm : PDI<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
"movq {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (bc_v2i64 (v2f64 (X86zexts2vec
- (loadf64 addr:$src)))))]>;
+ [(set VR128:$dst, (bc_v2i64 (vector_shuffle immAllZerosV,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ MOVL_shuffle_mask)))]>;
+}
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -2341,17 +2350,23 @@
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>,
Requires<[HasSSE2]>;
-// Zeroing a VR128 then do a MOVS* to the lower bits.
-def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
+// Move scalar to XMM zero-extended
+// movd to XMM register zero-extends
+let AddedComplexity = 20 in {
+def : Pat<(v8i16 (vector_shuffle immAllZerosV,
+ (v8i16 (X86s2vec R32:$src)), MOVL_shuffle_mask)),
+ (MOVZDI2PDIrr R32:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v16i8 (vector_shuffle immAllZerosV,
+ (v16i8 (X86s2vec R32:$src)), MOVL_shuffle_mask)),
+ (MOVZDI2PDIrr R32:$src)>, Requires<[HasSSE2]>;
+// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
+def : Pat<(v2f64 (vector_shuffle immAllZerosV,
+ (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
(MOVLSD2PDrr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
+def : Pat<(v4f32 (vector_shuffle immAllZerosV,
+ (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
(MOVLSS2PSrr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (X86zexts2vec R32:$src)),
- (MOVLDI2PDIrr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v8i16 (X86zexts2vec R16:$src)),
- (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
-def : Pat<(v16i8 (X86zexts2vec R8:$src)),
- (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
+}
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
@@ -2448,13 +2463,13 @@
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
- MOVS_shuffle_mask)),
+ MOVL_shuffle_mask)),
(MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)),
MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
- MOVS_shuffle_mask)),
+ MOVL_shuffle_mask)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (loadv2i64 addr:$src2),
MOVHP_shuffle_mask)),
More information about the llvm-commits
mailing list