[llvm-commits] [llvm] r50838 - in /llvm/trunk: lib/Target/X86/X86ISelDAGToDAG.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrMMX.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_set-5.ll test/CodeGen/X86/vec_set-6.ll test/CodeGen/X86/vec_set-C.ll test/CodeGen/X86/vec_set-D.ll
Dan Gohman
gohman at apple.com
Thu May 8 14:45:12 PDT 2008
Hi Evan,
This is causing regressions on x86 in
UnitTests/Vector/build
UnitTests/Vector/simple
.
llc is aborting with this message:
$ llc -f Output/build.llvm.bc -o Output/build.llc.s
Cannot yet select: 0x14081f0: v2f64 = X86ISD::ZEXT_VMOVL 0x1407fd0
Can you investigate?
Thanks,
Dan
On May 7, 2008, at 5:57 PM, Evan Cheng wrote:
> Author: evancheng
> Date: Wed May 7 19:57:18 2008
> New Revision: 50838
>
> URL: http://llvm.org/viewvc/llvm-project?rev=50838&view=rev
> Log:
> Handle vector move / load which zero the destination register top
> bits (i.e. movd, movq, movss (addr), movsd (addr)) with X86 specific
> dag combine.
>
> Added:
> llvm/trunk/test/CodeGen/X86/vec_set-C.ll
> llvm/trunk/test/CodeGen/X86/vec_set-D.ll
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrMMX.td
> llvm/trunk/lib/Target/X86/X86InstrSSE.td
> llvm/trunk/test/CodeGen/X86/vec_set-5.ll
> llvm/trunk/test/CodeGen/X86/vec_set-6.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=50838&r1=50837&r2=50838&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed May 7
> 19:57:18 2008
> @@ -975,38 +975,19 @@
>
> // Also handle the case where we explicitly require zeros in the top
> // elements. This is a vector shuffle from the zero vector.
> - if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
> + if (N.getOpcode() == X86ISD::ZEXT_VMOVL && N.Val->hasOneUse() &&
> // Check to see if the top elements are all zeros (or bitcast
> of zeros).
> - ISD::isBuildVectorAllZeros(N.getOperand(0).Val) &&
> - N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
> - N.getOperand(1).Val->hasOneUse() &&
> - ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
> - N.getOperand(1).getOperand(0).hasOneUse()) {
> - // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L
> is something
> - // from the LHS.
> - unsigned
> VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType());
> - SDOperand ShufMask = N.getOperand(2);
> - assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid
> shuf mask!");
> - if (ConstantSDNode *C =
> dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
> - if (C->getValue() == VecWidth) {
> - for (unsigned i = 1; i != VecWidth; ++i) {
> - if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
> - // ok.
> - } else {
> - ConstantSDNode *C =
> cast<ConstantSDNode>(ShufMask.getOperand(i));
> - if (C->getValue() >= VecWidth) return false;
> - }
> - }
> - }
> -
> - // Okay, this is a zero extending load. Fold it.
> - LoadSDNode *LD =
> cast<LoadSDNode>(N.getOperand(1).getOperand(0));
> - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index,
> Disp))
> - return false;
> - OutChain = LD->getChain();
> - InChain = SDOperand(LD, 1);
> - return true;
> - }
> + N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
> + N.getOperand(0).Val->hasOneUse() &&
> + ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).Val) &&
> + N.getOperand(0).getOperand(0).hasOneUse()) {
> + // Okay, this is a zero extending load. Fold it.
> + LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
> + if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
> + return false;
> + OutChain = LD->getChain();
> + InChain = SDOperand(LD, 1);
> + return true;
> }
> return false;
> }
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=50838&r1=50837&r2=50838&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 7
> 19:57:18 2008
> @@ -2605,11 +2605,16 @@
> }
>
> /// isScalarLoadToVector - Returns true if the node is a scalar load
> that
> -/// is promoted to a vector.
> -static inline bool isScalarLoadToVector(SDNode *N) {
> +/// is promoted to a vector. It also returns the LoadSDNode by
> reference if
> +/// required.
> +static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
> if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
> N = N->getOperand(0).Val;
> - return ISD::isNON_EXTLoad(N);
> + if (ISD::isNON_EXTLoad(N)) {
> + if (LD)
> + *LD = cast<LoadSDNode>(N);
> + return true;
> + }
> }
> return false;
> }
> @@ -3082,8 +3087,16 @@
> return SDOperand();
>
> // Let legalizer expand 2-wide build_vectors.
> - if (EVTBits == 64)
> + if (EVTBits == 64) {
> + if (NumNonZero == 1) {
> + // One half is zero or undef.
> + unsigned Idx = CountTrailingZeros_32(NonZeros);
> + SDOperand V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT,
> + Op.getOperand(Idx));
> + return getShuffleVectorZeroOrUndef(V2, Idx, true, DAG);
> + }
> return SDOperand();
> + }
>
> // If element VT is < 32 bits, convert it to inserts into a zero
> vector.
> if (EVTBits == 8 && NumElems == 16) {
> @@ -3131,13 +3144,6 @@
> }
> }
>
> - // Take advantage of the fact GR32 to VR128 scalar_to_vector
> (i.e. movd)
> - // clears the upper bits.
> - // FIXME: we can do the same for v4f32 case when we know both
> parts of
> - // the lower half come from scalar_to_vector (loadf32). We
> should do
> - // that in post legalizer dag combiner with target specific
> hooks.
> - if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
> - return V[0];
> MVT::ValueType MaskVT =
> MVT::getIntVectorWithNumElements(NumElems);
> MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
> SmallVector<SDOperand, 8> MaskVec;
> @@ -3475,6 +3481,38 @@
> &MaskVec[0], MaskVec.size()));
> }
>
> +/// getZextVMoveL - Return a zero-extending vector move low node.
> +///
> +static SDOperand getZextVMoveL(MVT::ValueType VT, MVT::ValueType
> OpVT,
> + SDOperand SrcOp, SelectionDAG &DAG,
> + const X86Subtarget *Subtarget) {
> + if (VT == MVT::v2f64 || VT == MVT::v4f32) {
> + LoadSDNode *LD = NULL;
> + if (!isScalarLoadToVector(SrcOp.Val, &LD))
> + LD = dyn_cast<LoadSDNode>(SrcOp);
> + if (!LD) {
> + // movssrr and movsdrr do not clear top bits. Try to use
> movd, movq
> + // instead.
> + MVT::ValueType EVT = (OpVT == MVT::v2f64) ? MVT::i64 :
> MVT::i32;
> + if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
> + SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
> + SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
> + SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
> + // PR2108
> + OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
> + return DAG.getNode(ISD::BIT_CONVERT, VT,
> + DAG.getNode(X86ISD::ZEXT_VMOVL, OpVT,
> +
> DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
> +
> SrcOp.getOperand(0).getOperand(0))));
> + }
> + }
> + }
> +
> + return DAG.getNode(ISD::BIT_CONVERT, VT,
> + DAG.getNode(X86ISD::ZEXT_VMOVL, OpVT,
> + DAG.getNode(ISD::BIT_CONVERT,
> OpVT, SrcOp)));
> +}
> +
> SDOperand
> X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG
> &DAG) {
> SDOperand V1 = Op.getOperand(0);
> @@ -3515,27 +3553,33 @@
> // FIXME: Figure out a cleaner way to do this.
> // Try to make use of movq to zero out the top part.
> if (ISD::isBuildVectorAllZeros(V2.Val)) {
> - SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT,
> PermMask, DAG, *this);
> + SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT,
> PermMask,
> + DAG, *this);
> if (NewOp.Val) {
> SDOperand NewV1 = NewOp.getOperand(0);
> SDOperand NewV2 = NewOp.getOperand(1);
> SDOperand NewMask = NewOp.getOperand(2);
> if (isCommutedMOVL(NewMask.Val, true, false)) {
> NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask,
> DAG);
> - NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE,
> NewOp.getValueType(),
> - NewV1, NewV2, getMOVLMask(2, DAG));
> - return DAG.getNode(ISD::BIT_CONVERT, VT,
> LowerVECTOR_SHUFFLE(NewOp, DAG));
> + return getZextVMoveL(VT, NewOp.getValueType(), NewV2,
> DAG, Subtarget);
> }
> }
> } else if (ISD::isBuildVectorAllZeros(V1.Val)) {
> - SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT,
> PermMask, DAG, *this);
> + SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
> + DAG, *this);
> if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
> - return DAG.getNode(ISD::BIT_CONVERT, VT,
> LowerVECTOR_SHUFFLE(NewOp, DAG));
> + return getZextVMoveL(VT, NewOp.getValueType(),
> NewOp.getOperand(1),
> + DAG, Subtarget);
> }
> }
>
> - if (X86::isMOVLMask(PermMask.Val))
> - return (V1IsUndef) ? V2 : Op;
> + if (X86::isMOVLMask(PermMask.Val)) {
> + if (V1IsUndef)
> + return V2;
> + if (ISD::isBuildVectorAllZeros(V1.Val))
> + return getZextVMoveL(VT, VT, V2, DAG, Subtarget);
> + return Op;
> + }
>
> if (X86::isMOVSHDUPMask(PermMask.Val) ||
> X86::isMOVSLDUPMask(PermMask.Val) ||
> @@ -5629,8 +5673,9 @@
> case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
> case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
> case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
> - case X86ISD::LCMPXCHG_DAG: return "x86ISD::LCMPXCHG_DAG";
> - case X86ISD::LCMPXCHG8_DAG: return "x86ISD::LCMPXCHG8_DAG";
> + case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
> + case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
> + case X86ISD::ZEXT_VMOVL: return "X86ISD::ZEXT_VMOVL";
> }
> }
>
> @@ -6192,16 +6237,46 @@
> return false;
> }
>
> -static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
> - const X86Subtarget *Subtarget) {
> +static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
> MachineFrameInfo *MFI,
> + const X86Subtarget *Subtarget) {
> GlobalValue *GV;
> int64_t Offset = 0;
> if (isGAPlusOffset(Base, GV, Offset))
> - return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
> + return (GV->getAlignment() >= N && (Offset % N) == 0);
> // DAG combine handles the stack object case.
> return false;
> }
>
> +static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask,
> + unsigned NumElems,
> MVT::ValueType EVT,
> + MachineFrameInfo *MFI,
> + SelectionDAG &DAG, SDNode
> *&Base) {
> + Base = NULL;
> + for (unsigned i = 0; i < NumElems; ++i) {
> + SDOperand Idx = PermMask.getOperand(i);
> + if (Idx.getOpcode() == ISD::UNDEF) {
> + if (!Base)
> + return false;
> + continue;
> + }
> +
> + unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
> + SDOperand Elt = getShuffleScalarElt(N, Index, DAG);
> + if (!Elt.Val ||
> + (Elt.getOpcode() != ISD::UNDEF && !
> ISD::isNON_EXTLoad(Elt.Val)))
> + return false;
> + if (!Base) {
> + Base = Elt.Val;
> + continue;
> + }
> + if (Elt.getOpcode() == ISD::UNDEF)
> + continue;
> +
> + if (!isConsecutiveLoad(Elt.Val, Base, i,
> MVT::getSizeInBits(EVT)/8,MFI))
> + return false;
> + }
> + return true;
> +}
>
> /// PerformShuffleCombine - Combine a vector_shuffle that is equal to
> /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-
> bit load
> @@ -6209,36 +6284,17 @@
> /// order.
> static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
> const X86Subtarget
> *Subtarget) {
> - MachineFunction &MF = DAG.getMachineFunction();
> - MachineFrameInfo *MFI = MF.getFrameInfo();
> + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
> MVT::ValueType VT = N->getValueType(0);
> MVT::ValueType EVT = MVT::getVectorElementType(VT);
> SDOperand PermMask = N->getOperand(2);
> unsigned NumElems = PermMask.getNumOperands();
> SDNode *Base = NULL;
> - for (unsigned i = 0; i < NumElems; ++i) {
> - SDOperand Elt = PermMask.getOperand(i);
> - if (Elt.getOpcode() == ISD::UNDEF) {
> - if (!Base)
> - return SDOperand();
> - continue;
> - }
> -
> - unsigned Idx = cast<ConstantSDNode>(Elt)->getValue();
> - SDOperand Arg = getShuffleScalarElt(N, Idx, DAG);
> - if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
> - return SDOperand();
> - if (!Base) {
> - Base = Arg.Val;
> - continue;
> - }
> -
> - if (!isConsecutiveLoad(Arg.Val, Base, i,
> MVT::getSizeInBits(EVT)/8,MFI))
> - return SDOperand();
> - }
> + if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, MFI,
> DAG, Base))
> + return SDOperand();
>
> LoadSDNode *LD = cast<LoadSDNode>(Base);
> - if (isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget))
> + if (isBaseAlignmentOfN(16, Base->getOperand(1).Val, MFI,
> Subtarget))
> return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD-
> >getSrcValue(),
> LD->getSrcValueOffset(), LD->isVolatile());
> return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD-
> >getSrcValue(),
> @@ -6319,12 +6375,13 @@
> }
>
> /// PerformSTORECombine - Do target-specific dag combines on STORE
> nodes.
> -static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG
> &DAG,
> +static SDOperand PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
> const X86Subtarget *Subtarget) {
> // Turn load->store of MMX types into GPR load/stores. This
> avoids clobbering
> // the FP state in cases where an emms may be missing.
> // A preferable solution to the general problem is to figure out
> the right
> // places to insert EMMS. This qualifies as a quick hack.
> + StoreSDNode *St = cast<StoreSDNode>(N);
> if (MVT::isVector(St->getValue().getValueType()) &&
> MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
> isa<LoadSDNode>(St->getValue()) &&
> @@ -6442,8 +6499,7 @@
> default: break;
> case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG,
> Subtarget);
> case ISD::SELECT: return PerformSELECTCombine(N, DAG,
> Subtarget);
> - case ISD::STORE:
> - return PerformSTORECombine(cast<StoreSDNode>(N), DAG,
> Subtarget);
> + case ISD::STORE: return PerformSTORECombine(N, DAG,
> Subtarget);
> case X86ISD::FXOR:
> case X86ISD::FOR: return PerformFORCombine(N, DAG);
> case X86ISD::FAND: return PerformFANDCombine(N, DAG);
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=50838&r1=50837&r2=50838&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed May 7 19:57:18
> 2008
> @@ -181,10 +181,10 @@
> /// in order to obtain suitable precision.
> FRSQRT, FRCP,
>
> - // Thread Local Storage
> + // TLSADDR, THREAThread - Thread Local Storage.
> TLSADDR, THREAD_POINTER,
>
> - // Exception Handling helpers
> + // EH_RETURN - Exception Handling helpers.
> EH_RETURN,
>
> /// TC_RETURN - Tail call return.
> @@ -194,12 +194,15 @@
> /// operand #3 optional in flag
> TC_RETURN,
>
> - // compare and swap
> + // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
> LCMPXCHG_DAG,
> LCMPXCHG8_DAG,
>
> - // Store FP control world into i16 memory
> - FNSTCW16m
> + // FNSTCW16m - Store FP control world into i16 memory.
> + FNSTCW16m,
> +
> + // ZEXT_VMOVL - Vector move low and zero extend.
> + ZEXT_VMOVL
> };
> }
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=50838&r1=50837&r2=50838&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Wed May 7 19:57:18 2008
> @@ -200,18 +200,14 @@
> // movd to MMX register zero-extends
> def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins
> GR32:$src),
> "movd\t{$src, $dst|$dst, $src}",
> - [(set VR64:$dst,
> - (v2i32 (vector_shuffle immAllZerosV,
> - (v2i32 (scalar_to_vector
> GR32:$src)),
> - MMX_MOVL_shuffle_mask)))]>;
> + [(set VR64:$dst,
> + (v2i32 (X86zvmovl (v2i32 (scalar_to_vector
> GR32:$src)))))]>;
> let AddedComplexity = 20 in
> def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins
> i32mem:$src),
> "movd\t{$src, $dst|$dst, $src}",
> - [(set VR64:$dst,
> - (v2i32 (vector_shuffle immAllZerosV,
> - (v2i32 (scalar_to_vector
> - (loadi32 addr:$src))),
> - MMX_MOVL_shuffle_mask)))]>;
> + [(set VR64:$dst,
> + (v2i32 (X86zvmovl (v2i32
> + (scalar_to_vector (loadi32 addr:
> $src))))))]>;
>
> // Arithmetic Instructions
>
> @@ -564,14 +560,10 @@
> // Move scalar to XMM zero-extended
> // movd to XMM register zero-extends
> let AddedComplexity = 15 in {
> - def : Pat<(v8i8 (vector_shuffle immAllZerosV_bc,
> - (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
> - MMX_MOVL_shuffle_mask)),
> - (MMX_MOVZDI2PDIrr GR32:$src)>;
> - def : Pat<(v4i16 (vector_shuffle immAllZerosV_bc,
> - (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
> - MMX_MOVL_shuffle_mask)),
> - (MMX_MOVZDI2PDIrr GR32:$src)>;
> + def : Pat<(v8i8 (X86zvmovl (bc_v8i8 (v2i32 (scalar_to_vector
> GR32:$src))))),
> + (MMX_MOVZDI2PDIrr GR32:$src)>;
> + def : Pat<(v4i16 (X86zvmovl (bc_v8i8 (v2i32 (scalar_to_vector
> GR32:$src))))),
> + (MMX_MOVZDI2PDIrr GR32:$src)>;
> }
>
> // Scalar to v4i16 / v8i8. The source may be a GR32, but only the
> lower
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=50838&r1=50837&r2=50838&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed May 7 19:57:18 2008
> @@ -47,6 +47,7 @@
> def X86insrtps : SDNode<"X86ISD::INSERTPS",
> SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>,
> SDTCisSameAs<0,1>,
> SDTCisVT<2, f32>,
> SDTCisPtrTy<3>]>>;
> +def X86zvmovl : SDNode<"X86ISD::ZEXT_VMOVL", SDTUnaryOp>;
>
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> // SSE Complex Patterns
> @@ -1007,10 +1008,11 @@
> let AddedComplexity = 20 in
> def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins
> f32mem:$src),
> "movss\t{$src, $dst|$dst, $src}",
> - [(set VR128:$dst, (v4f32 (vector_shuffle
> immAllZerosV_bc,
> - (v4f32 (scalar_to_vector (loadf32
> addr:$src))),
> -
> MOVL_shuffle_mask)))]>;
> + [(set VR128:$dst, (v4f32 (X86zvmovl (v4f32
> (scalar_to_vector
> + (loadf32 addr:
> $src))))))]>;
>
> +def : Pat<(v4f32 (X86zvmovl (memopv4f32 addr:$src))),
> + (MOVZSS2PSrm addr:$src)>;
>
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> // SSE2 Instructions
> @@ -2264,51 +2266,36 @@
> def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins
> f64mem:$src),
> "movsd\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst,
> - (v2f64 (vector_shuffle immAllZerosV_bc,
> - (v2f64 (scalar_to_vector
> - (loadf64 addr:$src))),
> - MOVL_shuffle_mask)))]>;
> + (v2f64 (X86zvmovl (v2f64 (scalar_to_vector
> + (loadf64 addr:
> $src))))))]>;
> +
> +def : Pat<(v2f64 (X86zvmovl (memopv2f64 addr:$src))),
> + (MOVZSD2PDrm addr:$src)>;
>
> // movd / movq to XMM register zero-extends
> let AddedComplexity = 15 in {
> def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins
> GR32:$src),
> "movd\t{$src, $dst|$dst, $src}",
> - [(set VR128:$dst,
> - (v4i32 (vector_shuffle immAllZerosV,
> - (v4i32 (scalar_to_vector
> GR32:$src)),
> - MOVL_shuffle_mask)))]>;
> + [(set VR128:$dst, (v4i32 (X86zvmovl
> + (v4i32 (scalar_to_vector
> GR32:$src)))))]>;
> // This is X86-64 only.
> def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins
> GR64:$src),
> "mov{d|q}\t{$src, $dst|$dst, $src}",
> - [(set VR128:$dst,
> - (v2i64 (vector_shuffle immAllZerosV_bc,
> - (v2i64 (scalar_to_vector
> GR64:$src)),
> - MOVL_shuffle_mask)))]>;
> + [(set VR128:$dst, (v2i64 (X86zvmovl
> + (v2i64 (scalar_to_vector
> GR64:$src)))))]>;
> }
>
> -// Handle the v2f64 form of 'MOVZQI2PQIrr' for PR2108. FIXME: this
> would be
> -// better written as a dag combine xform.
> -let AddedComplexity = 15 in
> -def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
> - (v2f64 (scalar_to_vector
> - (f64 (bitconvert
> GR64:$src)))),
> - MOVL_shuffle_mask)),
> - (MOVZQI2PQIrr GR64:$src)>, Requires<[HasSSE2]>;
> -
> -
> let AddedComplexity = 20 in {
> def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins
> i32mem:$src),
> "movd\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst,
> - (v4i32 (vector_shuffle immAllZerosV,
> - (v4i32 (scalar_to_vector (loadi32
> addr:$src))),
> - MOVL_shuffle_mask)))]>;
> + (v4i32 (X86zvmovl (v4i32 (scalar_to_vector
> + (loadi32 addr:
> $src))))))]>;
> def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:
> $src),
> "movq\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst,
> - (v2i64 (vector_shuffle immAllZerosV_bc,
> - (v2i64 (scalar_to_vector (loadi64
> addr:$src))),
> - MOVL_shuffle_mask)))]>, XS,
> + (v2i64 (X86zvmovl (v2i64 (scalar_to_vector
> + (loadi64 addr:
> $src))))))]>, XS,
> Requires<[HasSSE2]>;
> }
>
> @@ -2317,17 +2304,14 @@
> let AddedComplexity = 15 in
> def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins
> VR128:$src),
> "movq\t{$src, $dst|$dst, $src}",
> - [(set VR128:$dst, (v2i64 (vector_shuffle
> immAllZerosV_bc,
> - VR128:$src,
> - MOVL_shuffle_mask)))]>,
> + [(set VR128:$dst, (v2i64 (X86zvmovl (v2i64
> VR128:$src))))]>,
> XS, Requires<[HasSSE2]>;
>
> let AddedComplexity = 20 in
> def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins
> i128mem:$src),
> "movq\t{$src, $dst|$dst, $src}",
> - [(set VR128:$dst, (v2i64 (vector_shuffle
> immAllZerosV_bc,
> - (memopv2i64 addr:$src),
> - MOVL_shuffle_mask)))]>,
> + [(set VR128:$dst, (v2i64 (X86zvmovl
> + (memopv2i64 addr:
> $src))))]>,
> XS, Requires<[HasSSE2]>;
>
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> @@ -2774,11 +2758,9 @@
> // movd to XMM register zero-extends
> let AddedComplexity = 15 in {
> // Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
> -def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
> - (v2f64 (scalar_to_vector FR64:$src)),
> MOVL_shuffle_mask)),
> +def : Pat<(v2f64 (X86zvmovl (v2f64 (scalar_to_vector FR64:$src)))),
> (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
> -def : Pat<(v4f32 (vector_shuffle immAllZerosV_bc,
> - (v4f32 (scalar_to_vector FR32:$src)),
> MOVL_shuffle_mask)),
> +def : Pat<(v4f32 (X86zvmovl (v4f32 (scalar_to_vector FR32:$src)))),
> (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
> }
>
>
> Modified: llvm/trunk/test/CodeGen/X86/vec_set-5.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-5.ll?rev=50838&r1=50837&r2=50838&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_set-5.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vec_set-5.ll Wed May 7 19:57:18 2008
> @@ -1,8 +1,7 @@
> ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
> ; RUN: grep movlhps %t | count 1
> -; RUN: grep unpcklps %t | count 1
> -; RUN: grep punpckldq %t | count 1
> ; RUN: grep movq %t | count 1
> +; RUN: grep movsd %t | count 1
>
> define <4 x float> @test1(float %a, float %b) nounwind {
> %tmp = insertelement <4 x float> zeroinitializer, float %a, i32
> 0 ; <<4 x float>> [#uses=1]
>
> Modified: llvm/trunk/test/CodeGen/X86/vec_set-6.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-6.ll?rev=50838&r1=50837&r2=50838&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_set-6.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vec_set-6.ll Wed May 7 19:57:18 2008
> @@ -1,5 +1,6 @@
> ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
> -; RUN: grep unpcklps %t | count 1
> +; RUN: grep movss %t | count 1
> +; RUN: grep movups %t | count 1
> ; RUN: grep shufps %t | count 1
>
> define <4 x float> @test(float %a, float %b, float %c) nounwind {
>
> Added: llvm/trunk/test/CodeGen/X86/vec_set-C.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-C.ll?rev=50838&view=auto
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_set-C.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_set-C.ll Wed May 7 19:57:18 2008
> @@ -0,0 +1,7 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
> +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd
> +
> +define <2 x i64> @t1(i64 %x) nounwind {
> + %tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
> + ret <2 x i64> %tmp8
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/vec_set-D.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-D.ll?rev=50838&view=auto
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_set-D.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_set-D.ll Wed May 7 19:57:18 2008
> @@ -0,0 +1,7 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
> +
> +define <4 x i32> @t(i32 %x, i32 %y) nounwind {
> + %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
> + %tmp2 = insertelement <4 x i32> %tmp1, i32 %y, i32 1
> + ret <4 x i32> %tmp2
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list