[llvm-commits] [llvm] r50838 - in /llvm/trunk: lib/Target/X86/X86ISelDAGToDAG.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrMMX.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_set-5.ll test/CodeGen/X86/vec_set-6.ll test/CodeGen/X86/vec_set-C.ll test/CodeGen/X86/vec_set-D.ll
Evan Cheng
evan.cheng at apple.com
Thu May 8 15:35:18 PDT 2008
Fixed.
Evan
On May 8, 2008, at 2:45 PM, Dan Gohman wrote:
> Hi Evan,
>
> This is causing regressions on x86 in
> UnitTests/Vector/build
> UnitTests/Vector/simple
> .
>
> llc is aborting with this message:
>
> $ llc -f Output/build.llvm.bc -o Output/build.llc.s
> Cannot yet select: 0x14081f0: v2f64 = X86ISD::ZEXT_VMOVL 0x1407fd0
>
> Can you investigate?
>
> Thanks,
>
> Dan
>
> On May 7, 2008, at 5:57 PM, Evan Cheng wrote:
>
>> Author: evancheng
>> Date: Wed May 7 19:57:18 2008
>> New Revision: 50838
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=50838&view=rev
>> Log:
>> Handle vector move / load which zero the destination register top
>> bits (i.e. movd, movq, movss (addr), movsd (addr)) with X86 specific
>> dag combine.
>>
>> Added:
>> llvm/trunk/test/CodeGen/X86/vec_set-C.ll
>> llvm/trunk/test/CodeGen/X86/vec_set-D.ll
>> Modified:
>> llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
>> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> llvm/trunk/lib/Target/X86/X86InstrMMX.td
>> llvm/trunk/lib/Target/X86/X86InstrSSE.td
>> llvm/trunk/test/CodeGen/X86/vec_set-5.ll
>> llvm/trunk/test/CodeGen/X86/vec_set-6.ll
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=50838&r1=50837&r2=50838&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed May 7
>> 19:57:18 2008
>> @@ -975,38 +975,19 @@
>>
>> // Also handle the case where we explicitly require zeros in the top
>> // elements. This is a vector shuffle from the zero vector.
>> - if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
>> + if (N.getOpcode() == X86ISD::ZEXT_VMOVL && N.Val->hasOneUse() &&
>> // Check to see if the top elements are all zeros (or bitcast
>> of zeros).
>> - ISD::isBuildVectorAllZeros(N.getOperand(0).Val) &&
>> - N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
>> - N.getOperand(1).Val->hasOneUse() &&
>> - ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
>> - N.getOperand(1).getOperand(0).hasOneUse()) {
>> - // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L
>> is something
>> - // from the LHS.
>> - unsigned
>> VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType());
>> - SDOperand ShufMask = N.getOperand(2);
>> - assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid
>> shuf mask!");
>> - if (ConstantSDNode *C =
>> dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
>> - if (C->getValue() == VecWidth) {
>> - for (unsigned i = 1; i != VecWidth; ++i) {
>> - if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
>> - // ok.
>> - } else {
>> - ConstantSDNode *C =
>> cast<ConstantSDNode>(ShufMask.getOperand(i));
>> - if (C->getValue() >= VecWidth) return false;
>> - }
>> - }
>> - }
>> -
>> - // Okay, this is a zero extending load. Fold it.
>> - LoadSDNode *LD =
>> cast<LoadSDNode>(N.getOperand(1).getOperand(0));
>> - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index,
>> Disp))
>> - return false;
>> - OutChain = LD->getChain();
>> - InChain = SDOperand(LD, 1);
>> - return true;
>> - }
>> + N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
>> + N.getOperand(0).Val->hasOneUse() &&
>> + ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).Val) &&
>> + N.getOperand(0).getOperand(0).hasOneUse()) {
>> + // Okay, this is a zero extending load. Fold it.
>> + LoadSDNode *LD =
>> cast<LoadSDNode>(N.getOperand(0).getOperand(0));
>> + if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
>> + return false;
>> + OutChain = LD->getChain();
>> + InChain = SDOperand(LD, 1);
>> + return true;
>> }
>> return false;
>> }
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=50838&r1=50837&r2=50838&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 7
>> 19:57:18 2008
>> @@ -2605,11 +2605,16 @@
>> }
>>
>> /// isScalarLoadToVector - Returns true if the node is a scalar load
>> that
>> -/// is promoted to a vector.
>> -static inline bool isScalarLoadToVector(SDNode *N) {
>> +/// is promoted to a vector. It also returns the LoadSDNode by
>> reference if
>> +/// required.
>> +static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD =
>> NULL) {
>> if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
>> N = N->getOperand(0).Val;
>> - return ISD::isNON_EXTLoad(N);
>> + if (ISD::isNON_EXTLoad(N)) {
>> + if (LD)
>> + *LD = cast<LoadSDNode>(N);
>> + return true;
>> + }
>> }
>> return false;
>> }
>> @@ -3082,8 +3087,16 @@
>> return SDOperand();
>>
>> // Let legalizer expand 2-wide build_vectors.
>> - if (EVTBits == 64)
>> + if (EVTBits == 64) {
>> + if (NumNonZero == 1) {
>> + // One half is zero or undef.
>> + unsigned Idx = CountTrailingZeros_32(NonZeros);
>> + SDOperand V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT,
>> + Op.getOperand(Idx));
>> + return getShuffleVectorZeroOrUndef(V2, Idx, true, DAG);
>> + }
>> return SDOperand();
>> + }
>>
>> // If element VT is < 32 bits, convert it to inserts into a zero
>> vector.
>> if (EVTBits == 8 && NumElems == 16) {
>> @@ -3131,13 +3144,6 @@
>> }
>> }
>>
>> - // Take advantage of the fact GR32 to VR128 scalar_to_vector
>> (i.e. movd)
>> - // clears the upper bits.
>> - // FIXME: we can do the same for v4f32 case when we know both
>> parts of
>> - // the lower half come from scalar_to_vector (loadf32). We
>> should do
>> - // that in post legalizer dag combiner with target specific
>> hooks.
>> - if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
>> - return V[0];
>> MVT::ValueType MaskVT =
>> MVT::getIntVectorWithNumElements(NumElems);
>> MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
>> SmallVector<SDOperand, 8> MaskVec;
>> @@ -3475,6 +3481,38 @@
>> &MaskVec[0], MaskVec.size()));
>> }
>>
>> +/// getZextVMoveL - Return a zero-extending vector move low node.
>> +///
>> +static SDOperand getZextVMoveL(MVT::ValueType VT, MVT::ValueType
>> OpVT,
>> + SDOperand SrcOp, SelectionDAG &DAG,
>> + const X86Subtarget *Subtarget) {
>> + if (VT == MVT::v2f64 || VT == MVT::v4f32) {
>> + LoadSDNode *LD = NULL;
>> + if (!isScalarLoadToVector(SrcOp.Val, &LD))
>> + LD = dyn_cast<LoadSDNode>(SrcOp);
>> + if (!LD) {
>> + // movssrr and movsdrr do not clear top bits. Try to use
>> movd, movq
>> + // instead.
>> + MVT::ValueType EVT = (OpVT == MVT::v2f64) ? MVT::i64 :
>> MVT::i32;
>> + if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
>> + SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
>> + SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
>> + SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
>> + // PR2108
>> + OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
>> + return DAG.getNode(ISD::BIT_CONVERT, VT,
>> + DAG.getNode(X86ISD::ZEXT_VMOVL, OpVT,
>> +
>> DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
>> +
>> SrcOp.getOperand(0).getOperand(0))));
>> + }
>> + }
>> + }
>> +
>> + return DAG.getNode(ISD::BIT_CONVERT, VT,
>> + DAG.getNode(X86ISD::ZEXT_VMOVL, OpVT,
>> + DAG.getNode(ISD::BIT_CONVERT,
>> OpVT, SrcOp)));
>> +}
>> +
>> SDOperand
>> X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG
>> &DAG) {
>> SDOperand V1 = Op.getOperand(0);
>> @@ -3515,27 +3553,33 @@
>> // FIXME: Figure out a cleaner way to do this.
>> // Try to make use of movq to zero out the top part.
>> if (ISD::isBuildVectorAllZeros(V2.Val)) {
>> - SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT,
>> PermMask, DAG, *this);
>> + SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT,
>> PermMask,
>> + DAG, *this);
>> if (NewOp.Val) {
>> SDOperand NewV1 = NewOp.getOperand(0);
>> SDOperand NewV2 = NewOp.getOperand(1);
>> SDOperand NewMask = NewOp.getOperand(2);
>> if (isCommutedMOVL(NewMask.Val, true, false)) {
>> NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask,
>> DAG);
>> - NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE,
>> NewOp.getValueType(),
>> - NewV1, NewV2, getMOVLMask(2, DAG));
>> - return DAG.getNode(ISD::BIT_CONVERT, VT,
>> LowerVECTOR_SHUFFLE(NewOp, DAG));
>> + return getZextVMoveL(VT, NewOp.getValueType(), NewV2,
>> DAG, Subtarget);
>> }
>> }
>> } else if (ISD::isBuildVectorAllZeros(V1.Val)) {
>> - SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT,
>> PermMask, DAG, *this);
>> + SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT,
>> PermMask,
>> + DAG, *this);
>> if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
>> - return DAG.getNode(ISD::BIT_CONVERT, VT,
>> LowerVECTOR_SHUFFLE(NewOp, DAG));
>> + return getZextVMoveL(VT, NewOp.getValueType(),
>> NewOp.getOperand(1),
>> + DAG, Subtarget);
>> }
>> }
>>
>> - if (X86::isMOVLMask(PermMask.Val))
>> - return (V1IsUndef) ? V2 : Op;
>> + if (X86::isMOVLMask(PermMask.Val)) {
>> + if (V1IsUndef)
>> + return V2;
>> + if (ISD::isBuildVectorAllZeros(V1.Val))
>> + return getZextVMoveL(VT, VT, V2, DAG, Subtarget);
>> + return Op;
>> + }
>>
>> if (X86::isMOVSHDUPMask(PermMask.Val) ||
>> X86::isMOVSLDUPMask(PermMask.Val) ||
>> @@ -5629,8 +5673,9 @@
>> case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
>> case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
>> case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
>> - case X86ISD::LCMPXCHG_DAG: return "x86ISD::LCMPXCHG_DAG";
>> - case X86ISD::LCMPXCHG8_DAG: return "x86ISD::LCMPXCHG8_DAG";
>> + case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
>> + case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
>> + case X86ISD::ZEXT_VMOVL: return "X86ISD::ZEXT_VMOVL";
>> }
>> }
>>
>> @@ -6192,16 +6237,46 @@
>> return false;
>> }
>>
>> -static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
>> - const X86Subtarget *Subtarget) {
>> +static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
>> MachineFrameInfo *MFI,
>> + const X86Subtarget *Subtarget) {
>> GlobalValue *GV;
>> int64_t Offset = 0;
>> if (isGAPlusOffset(Base, GV, Offset))
>> - return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
>> + return (GV->getAlignment() >= N && (Offset % N) == 0);
>> // DAG combine handles the stack object case.
>> return false;
>> }
>>
>> +static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask,
>> + unsigned NumElems,
>> MVT::ValueType EVT,
>> + MachineFrameInfo *MFI,
>> + SelectionDAG &DAG, SDNode
>> *&Base) {
>> + Base = NULL;
>> + for (unsigned i = 0; i < NumElems; ++i) {
>> + SDOperand Idx = PermMask.getOperand(i);
>> + if (Idx.getOpcode() == ISD::UNDEF) {
>> + if (!Base)
>> + return false;
>> + continue;
>> + }
>> +
>> + unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
>> + SDOperand Elt = getShuffleScalarElt(N, Index, DAG);
>> + if (!Elt.Val ||
>> + (Elt.getOpcode() != ISD::UNDEF && !
>> ISD::isNON_EXTLoad(Elt.Val)))
>> + return false;
>> + if (!Base) {
>> + Base = Elt.Val;
>> + continue;
>> + }
>> + if (Elt.getOpcode() == ISD::UNDEF)
>> + continue;
>> +
>> + if (!isConsecutiveLoad(Elt.Val, Base, i,
>> MVT::getSizeInBits(EVT)/8,MFI))
>> + return false;
>> + }
>> + return true;
>> +}
>>
>> /// PerformShuffleCombine - Combine a vector_shuffle that is equal to
>> /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-
>> bit load
>> @@ -6209,36 +6284,17 @@
>> /// order.
>> static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
>> const X86Subtarget
>> *Subtarget) {
>> - MachineFunction &MF = DAG.getMachineFunction();
>> - MachineFrameInfo *MFI = MF.getFrameInfo();
>> + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
>> MVT::ValueType VT = N->getValueType(0);
>> MVT::ValueType EVT = MVT::getVectorElementType(VT);
>> SDOperand PermMask = N->getOperand(2);
>> unsigned NumElems = PermMask.getNumOperands();
>> SDNode *Base = NULL;
>> - for (unsigned i = 0; i < NumElems; ++i) {
>> - SDOperand Elt = PermMask.getOperand(i);
>> - if (Elt.getOpcode() == ISD::UNDEF) {
>> - if (!Base)
>> - return SDOperand();
>> - continue;
>> - }
>> -
>> - unsigned Idx = cast<ConstantSDNode>(Elt)->getValue();
>> - SDOperand Arg = getShuffleScalarElt(N, Idx, DAG);
>> - if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
>> - return SDOperand();
>> - if (!Base) {
>> - Base = Arg.Val;
>> - continue;
>> - }
>> -
>> - if (!isConsecutiveLoad(Arg.Val, Base, i,
>> MVT::getSizeInBits(EVT)/8,MFI))
>> - return SDOperand();
>> - }
>> + if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, MFI,
>> DAG, Base))
>> + return SDOperand();
>>
>> LoadSDNode *LD = cast<LoadSDNode>(Base);
>> - if (isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget))
>> + if (isBaseAlignmentOfN(16, Base->getOperand(1).Val, MFI,
>> Subtarget))
>> return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD-
>>> getSrcValue(),
>> LD->getSrcValueOffset(), LD->isVolatile());
>> return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD-
>>> getSrcValue(),
>> @@ -6319,12 +6375,13 @@
>> }
>>
>> /// PerformSTORECombine - Do target-specific dag combines on STORE
>> nodes.
>> -static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG
>> &DAG,
>> +static SDOperand PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
>> const X86Subtarget *Subtarget) {
>> // Turn load->store of MMX types into GPR load/stores. This
>> avoids clobbering
>> // the FP state in cases where an emms may be missing.
>> // A preferable solution to the general problem is to figure out
>> the right
>> // places to insert EMMS. This qualifies as a quick hack.
>> + StoreSDNode *St = cast<StoreSDNode>(N);
>> if (MVT::isVector(St->getValue().getValueType()) &&
>> MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
>> isa<LoadSDNode>(St->getValue()) &&
>> @@ -6442,8 +6499,7 @@
>> default: break;
>> case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG,
>> Subtarget);
>> case ISD::SELECT: return PerformSELECTCombine(N, DAG,
>> Subtarget);
>> - case ISD::STORE:
>> - return PerformSTORECombine(cast<StoreSDNode>(N), DAG,
>> Subtarget);
>> + case ISD::STORE: return PerformSTORECombine(N, DAG,
>> Subtarget);
>> case X86ISD::FXOR:
>> case X86ISD::FOR: return PerformFORCombine(N, DAG);
>> case X86ISD::FAND: return PerformFANDCombine(N, DAG);
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=50838&r1=50837&r2=50838&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed May 7 19:57:18
>> 2008
>> @@ -181,10 +181,10 @@
>> /// in order to obtain suitable precision.
>> FRSQRT, FRCP,
>>
>> - // Thread Local Storage
>> + // TLSADDR, THREAThread - Thread Local Storage.
>> TLSADDR, THREAD_POINTER,
>>
>> - // Exception Handling helpers
>> + // EH_RETURN - Exception Handling helpers.
>> EH_RETURN,
>>
>> /// TC_RETURN - Tail call return.
>> @@ -194,12 +194,15 @@
>> /// operand #3 optional in flag
>> TC_RETURN,
>>
>> - // compare and swap
>> + // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
>> LCMPXCHG_DAG,
>> LCMPXCHG8_DAG,
>>
>> - // Store FP control world into i16 memory
>> - FNSTCW16m
>> + // FNSTCW16m - Store FP control world into i16 memory.
>> + FNSTCW16m,
>> +
>> + // ZEXT_VMOVL - Vector move low and zero extend.
>> + ZEXT_VMOVL
>> };
>> }
>>
>>
>> Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=50838&r1=50837&r2=50838&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Wed May 7 19:57:18 2008
>> @@ -200,18 +200,14 @@
>> // movd to MMX register zero-extends
>> def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins
>> GR32:$src),
>> "movd\t{$src, $dst|$dst, $src}",
>> - [(set VR64:$dst,
>> - (v2i32 (vector_shuffle immAllZerosV,
>> - (v2i32 (scalar_to_vector
>> GR32:$src)),
>> - MMX_MOVL_shuffle_mask)))]>;
>> + [(set VR64:$dst,
>> + (v2i32 (X86zvmovl (v2i32 (scalar_to_vector
>> GR32:$src)))))]>;
>> let AddedComplexity = 20 in
>> def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins
>> i32mem:$src),
>> "movd\t{$src, $dst|$dst, $src}",
>> - [(set VR64:$dst,
>> - (v2i32 (vector_shuffle immAllZerosV,
>> - (v2i32 (scalar_to_vector
>> - (loadi32 addr:
>> $src))),
>> - MMX_MOVL_shuffle_mask)))]>;
>> + [(set VR64:$dst,
>> + (v2i32 (X86zvmovl (v2i32
>> + (scalar_to_vector (loadi32 addr:
>> $src))))))]>;
>>
>> // Arithmetic Instructions
>>
>> @@ -564,14 +560,10 @@
>> // Move scalar to XMM zero-extended
>> // movd to XMM register zero-extends
>> let AddedComplexity = 15 in {
>> - def : Pat<(v8i8 (vector_shuffle immAllZerosV_bc,
>> - (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
>> - MMX_MOVL_shuffle_mask)),
>> - (MMX_MOVZDI2PDIrr GR32:$src)>;
>> - def : Pat<(v4i16 (vector_shuffle immAllZerosV_bc,
>> - (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
>> - MMX_MOVL_shuffle_mask)),
>> - (MMX_MOVZDI2PDIrr GR32:$src)>;
>> + def : Pat<(v8i8 (X86zvmovl (bc_v8i8 (v2i32 (scalar_to_vector
>> GR32:$src))))),
>> + (MMX_MOVZDI2PDIrr GR32:$src)>;
>> + def : Pat<(v4i16 (X86zvmovl (bc_v8i8 (v2i32 (scalar_to_vector
>> GR32:$src))))),
>> + (MMX_MOVZDI2PDIrr GR32:$src)>;
>> }
>>
>> // Scalar to v4i16 / v8i8. The source may be a GR32, but only the
>> lower
>>
>> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=50838&r1=50837&r2=50838&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed May 7 19:57:18 2008
>> @@ -47,6 +47,7 @@
>> def X86insrtps : SDNode<"X86ISD::INSERTPS",
>> SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>,
>> SDTCisSameAs<0,1>,
>> SDTCisVT<2, f32>,
>> SDTCisPtrTy<3>]>>;
>> +def X86zvmovl : SDNode<"X86ISD::ZEXT_VMOVL", SDTUnaryOp>;
>>
>> //
>> =
>> =
>> =
>> ----------------------------------------------------------------------=
>> ==//
>> // SSE Complex Patterns
>> @@ -1007,10 +1008,11 @@
>> let AddedComplexity = 20 in
>> def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins
>> f32mem:$src),
>> "movss\t{$src, $dst|$dst, $src}",
>> - [(set VR128:$dst, (v4f32 (vector_shuffle
>> immAllZerosV_bc,
>> - (v4f32 (scalar_to_vector (loadf32
>> addr:$src))),
>> -
>> MOVL_shuffle_mask)))]>;
>> + [(set VR128:$dst, (v4f32 (X86zvmovl (v4f32
>> (scalar_to_vector
>> + (loadf32 addr:
>> $src))))))]>;
>>
>> +def : Pat<(v4f32 (X86zvmovl (memopv4f32 addr:$src))),
>> + (MOVZSS2PSrm addr:$src)>;
>>
>> //
>> =
>> =
>> =
>> ----------------------------------------------------------------------=
>> ==//
>> // SSE2 Instructions
>> @@ -2264,51 +2266,36 @@
>> def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins
>> f64mem:$src),
>> "movsd\t{$src, $dst|$dst, $src}",
>> [(set VR128:$dst,
>> - (v2f64 (vector_shuffle immAllZerosV_bc,
>> - (v2f64 (scalar_to_vector
>> - (loadf64 addr:$src))),
>> - MOVL_shuffle_mask)))]>;
>> + (v2f64 (X86zvmovl (v2f64 (scalar_to_vector
>> + (loadf64 addr:
>> $src))))))]>;
>> +
>> +def : Pat<(v2f64 (X86zvmovl (memopv2f64 addr:$src))),
>> + (MOVZSD2PDrm addr:$src)>;
>>
>> // movd / movq to XMM register zero-extends
>> let AddedComplexity = 15 in {
>> def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins
>> GR32:$src),
>> "movd\t{$src, $dst|$dst, $src}",
>> - [(set VR128:$dst,
>> - (v4i32 (vector_shuffle immAllZerosV,
>> - (v4i32 (scalar_to_vector
>> GR32:$src)),
>> - MOVL_shuffle_mask)))]>;
>> + [(set VR128:$dst, (v4i32 (X86zvmovl
>> + (v4i32 (scalar_to_vector
>> GR32:$src)))))]>;
>> // This is X86-64 only.
>> def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins
>> GR64:$src),
>> "mov{d|q}\t{$src, $dst|$dst, $src}",
>> - [(set VR128:$dst,
>> - (v2i64 (vector_shuffle immAllZerosV_bc,
>> - (v2i64 (scalar_to_vector
>> GR64:$src)),
>> - MOVL_shuffle_mask)))]>;
>> + [(set VR128:$dst, (v2i64 (X86zvmovl
>> + (v2i64 (scalar_to_vector
>> GR64:$src)))))]>;
>> }
>>
>> -// Handle the v2f64 form of 'MOVZQI2PQIrr' for PR2108. FIXME: this
>> would be
>> -// better written as a dag combine xform.
>> -let AddedComplexity = 15 in
>> -def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
>> - (v2f64 (scalar_to_vector
>> - (f64 (bitconvert
>> GR64:$src)))),
>> - MOVL_shuffle_mask)),
>> - (MOVZQI2PQIrr GR64:$src)>, Requires<[HasSSE2]>;
>> -
>> -
>> let AddedComplexity = 20 in {
>> def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins
>> i32mem:$src),
>> "movd\t{$src, $dst|$dst, $src}",
>> [(set VR128:$dst,
>> - (v4i32 (vector_shuffle immAllZerosV,
>> - (v4i32 (scalar_to_vector (loadi32
>> addr:$src))),
>> - MOVL_shuffle_mask)))]>;
>> + (v4i32 (X86zvmovl (v4i32 (scalar_to_vector
>> + (loadi32 addr:
>> $src))))))]>;
>> def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:
>> $src),
>> "movq\t{$src, $dst|$dst, $src}",
>> [(set VR128:$dst,
>> - (v2i64 (vector_shuffle immAllZerosV_bc,
>> - (v2i64 (scalar_to_vector (loadi64
>> addr:$src))),
>> - MOVL_shuffle_mask)))]>, XS,
>> + (v2i64 (X86zvmovl (v2i64 (scalar_to_vector
>> + (loadi64 addr:
>> $src))))))]>, XS,
>> Requires<[HasSSE2]>;
>> }
>>
>> @@ -2317,17 +2304,14 @@
>> let AddedComplexity = 15 in
>> def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins
>> VR128:$src),
>> "movq\t{$src, $dst|$dst, $src}",
>> - [(set VR128:$dst, (v2i64 (vector_shuffle
>> immAllZerosV_bc,
>> - VR128:$src,
>> - MOVL_shuffle_mask)))]>,
>> + [(set VR128:$dst, (v2i64 (X86zvmovl (v2i64
>> VR128:$src))))]>,
>> XS, Requires<[HasSSE2]>;
>>
>> let AddedComplexity = 20 in
>> def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins
>> i128mem:$src),
>> "movq\t{$src, $dst|$dst, $src}",
>> - [(set VR128:$dst, (v2i64 (vector_shuffle
>> immAllZerosV_bc,
>> - (memopv2i64 addr:$src),
>> - MOVL_shuffle_mask)))]>,
>> + [(set VR128:$dst, (v2i64 (X86zvmovl
>> + (memopv2i64 addr:
>> $src))))]>,
>> XS, Requires<[HasSSE2]>;
>>
>> //
>> =
>> =
>> =
>> ----------------------------------------------------------------------=
>> ==//
>> @@ -2774,11 +2758,9 @@
>> // movd to XMM register zero-extends
>> let AddedComplexity = 15 in {
>> // Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
>> -def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
>> - (v2f64 (scalar_to_vector FR64:$src)),
>> MOVL_shuffle_mask)),
>> +def : Pat<(v2f64 (X86zvmovl (v2f64 (scalar_to_vector FR64:$src)))),
>> (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
>> -def : Pat<(v4f32 (vector_shuffle immAllZerosV_bc,
>> - (v4f32 (scalar_to_vector FR32:$src)),
>> MOVL_shuffle_mask)),
>> +def : Pat<(v4f32 (X86zvmovl (v4f32 (scalar_to_vector FR32:$src)))),
>> (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
>> }
>>
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vec_set-5.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-5.ll?rev=50838&r1=50837&r2=50838&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/vec_set-5.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vec_set-5.ll Wed May 7 19:57:18 2008
>> @@ -1,8 +1,7 @@
>> ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
>> ; RUN: grep movlhps %t | count 1
>> -; RUN: grep unpcklps %t | count 1
>> -; RUN: grep punpckldq %t | count 1
>> ; RUN: grep movq %t | count 1
>> +; RUN: grep movsd %t | count 1
>>
>> define <4 x float> @test1(float %a, float %b) nounwind {
>> %tmp = insertelement <4 x float> zeroinitializer, float %a, i32
>> 0 ; <<4 x float>> [#uses=1]
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vec_set-6.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-6.ll?rev=50838&r1=50837&r2=50838&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/vec_set-6.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vec_set-6.ll Wed May 7 19:57:18 2008
>> @@ -1,5 +1,6 @@
>> ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
>> -; RUN: grep unpcklps %t | count 1
>> +; RUN: grep movss %t | count 1
>> +; RUN: grep movups %t | count 1
>> ; RUN: grep shufps %t | count 1
>>
>> define <4 x float> @test(float %a, float %b, float %c) nounwind {
>>
>> Added: llvm/trunk/test/CodeGen/X86/vec_set-C.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-C.ll?rev=50838&view=auto
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/vec_set-C.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/vec_set-C.ll Wed May 7 19:57:18 2008
>> @@ -0,0 +1,7 @@
>> +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
>> +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd
>> +
>> +define <2 x i64> @t1(i64 %x) nounwind {
>> + %tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
>> + ret <2 x i64> %tmp8
>> +}
>>
>> Added: llvm/trunk/test/CodeGen/X86/vec_set-D.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-D.ll?rev=50838&view=auto
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/vec_set-D.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/vec_set-D.ll Wed May 7 19:57:18 2008
>> @@ -0,0 +1,7 @@
>> +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
>> +
>> +define <4 x i32> @t(i32 %x, i32 %y) nounwind {
>> + %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
>> + %tmp2 = insertelement <4 x i32> %tmp1, i32 %y, i32 1
>> + ret <4 x i32> %tmp2
>> +}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list