[llvm-commits] [llvm] r166486 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/2012-01-18-vbitcast.ll test/CodeGen/X86/2012-03-
Michael Liao
michael.liao at intel.com
Mon Oct 29 09:15:07 PDT 2012
Please file a bug. I believe it's just a missing pattern issue. -
Michael
On Mon, 2012-10-29 at 08:03 -0700, Kuperstein, Michael M wrote:
> Hi,
> It looks like this broke one of our AVX2 tests.
>
> Reduced version:
>
> define <8 x i32> @foo(<8 x i1> %bar) nounwind readnone {
> entry:
> %s = sext <8 x i1> %bar to <8 x i32>
> ret <8 x i32> %s
> }
>
> When compiling with mattr=+avx2 I get:
>
> LLVM ERROR: Cannot select: 0xb2c250: v8i32 = X86ISD::VZEXT 0xb2cc50 [ID=12]
> 0xb2cc50: v16i16 = insert_subvector 0xb2ee50, 0xb2eb50, 0xb2f150 [ID=11]
> 0xb2ee50: v16i16 = undef [ID=6]
> 0xb2eb50: v8i16 = bitcast 0xb2cb50 [ID=10]
> 0xb2cb50: v2i64,ch = load 0xaf8de0, 0xb2bf50, 0xb2c050<LD16[<unknown>]> [ID=9]
> 0xb2bf50: i64,ch = CopyFromReg 0xaf8de0, 0xb2be50 [ORD=1] [ID=8]
> 0xb2be50: i64 = Register %vreg0 [ORD=1] [ID=1]
> 0xb2c050: i64 = undef [ORD=1] [ID=2]
> 0xb2f150: i64 = Constant<0> [ID=7]
> In function: foo
>
> I didn't investigate further.
> Michael, want me to open a bug on this?
>
> -----Original Message-----
> From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Michael Liao
> Sent: Tuesday, October 23, 2012 19:34
> To: llvm-commits at cs.uiuc.edu
> Subject: [llvm-commits] [llvm] r166486 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/2012-01-18-vbitcast.ll test/CodeGen/X86/2012-03-1...
>
> Author: hliao
> Date: Tue Oct 23 12:34:00 2012
> New Revision: 166486
>
> URL: http://llvm.org/viewvc/llvm-project?rev=166486&view=rev
> Log:
> Enable lowering ZERO_EXTEND/ANY_EXTEND to PMOVZX from SSE4.1
>
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> llvm/trunk/lib/Target/X86/X86InstrSSE.td
> llvm/trunk/test/CodeGen/X86/2012-01-18-vbitcast.ll
> llvm/trunk/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
> llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll
> llvm/trunk/test/CodeGen/X86/pointer-vector.ll
> llvm/trunk/test/CodeGen/X86/promote.ll
> llvm/trunk/test/CodeGen/X86/trunc-ext-ld-st.ll
> llvm/trunk/test/CodeGen/X86/vec_compare-2.ll
> llvm/trunk/test/CodeGen/X86/widen_load-2.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Oct 23 12:34:00
> +++ 2012
> @@ -6562,6 +6562,78 @@
> getShuffleSHUFImmediate(SVOp), DAG); }
>
> +// Reduce a vector shuffle to zext.
> +SDValue
> +X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG)
> +const {
> + // PMOVZX is only available from SSE41.
> + if (!Subtarget->hasSSE41())
> + return SDValue();
> +
> + EVT VT = Op.getValueType();
> +
> + // Only AVX2 support 256-bit vector integer extending.
> + if (!Subtarget->hasAVX2() && VT.is256BitVector())
> + return SDValue();
> +
> + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); DebugLoc
> + DL = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 =
> + Op.getOperand(1); unsigned NumElems = VT.getVectorNumElements();
> +
> + // Extending is an unary operation and the element type of the source
> + vector // won't be equal to or larger than i64.
> + if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
> + VT.getVectorElementType() == MVT::i64)
> + return SDValue();
> +
> + // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
> + unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
> + while ((1 << Shift) < NumElems) {
> + if (SVOp->getMaskElt(1 << Shift) == 1)
> + break;
> + Shift += 1;
> + // The maximal ratio is 8, i.e. from i8 to i64.
> + if (Shift > 3)
> + return SDValue();
> + }
> +
> + // Check the shuffle mask.
> + unsigned Mask = (1U << Shift) - 1;
> + for (unsigned i = 0; i != NumElems; ++i) {
> + int EltIdx = SVOp->getMaskElt(i);
> + if ((i & Mask) != 0 && EltIdx != -1)
> + return SDValue();
> + if ((i & Mask) == 0 && EltIdx != (i >> Shift))
> + return SDValue();
> + }
> +
> + unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
> + EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits); EVT NVT =
> + EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
> +
> + if (!isTypeLegal(NVT))
> + return SDValue();
> +
> + // Simplify the operand as it's prepared to be fed into shuffle.
> + unsigned SignificantBits = NVT.getSizeInBits() >> Shift; if
> + (V1.getOpcode() == ISD::BITCAST &&
> + V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
> + V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
> + V1.getOperand(0)
> + .getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
> + // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
> + SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
> + // If it's foldable, i.e. normal load with single use, we will let code
> + // selection to fold it. Otherwise, we will short the conversion sequence.
> + if (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())
> + V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); }
> +
> + return DAG.getNode(ISD::BITCAST, DL, VT,
> + DAG.getNode(X86ISD::VZEXT, DL, NVT, V1)); }
> +
> SDValue
> X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
> ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); @@ -6592,6 +6664,11 @@
> return PromoteSplat(SVOp, DAG);
> }
>
> + // Check integer expanding shuffles.
> + SDValue NewOp = lowerVectorIntExtend(Op, DAG); if (NewOp.getNode())
> + return NewOp;
> +
> // If the shuffle can be profitably rewritten as a narrower shuffle, then
> // do it!
> if (VT == MVT::v8i16 || VT == MVT::v16i8 || @@ -11825,6 +11902,8 @@
> case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
> case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
> case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
> + case X86ISD::VZEXT: return "X86ISD::VZEXT";
> + case X86ISD::VSEXT: return "X86ISD::VSEXT";
> case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
> case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
> case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
> @@ -16529,6 +16608,21 @@
> return OptimizeConditionalInDecrement(N, DAG); }
>
> +/// performVZEXTCombine - Performs build vector combines static SDValue
> +performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
> + TargetLowering::DAGCombinerInfo &DCI,
> + const X86Subtarget *Subtarget)
> +{
> + // (vzext (bitcast (vzext (x)) -> (vzext x)
> + SDValue In = N->getOperand(0);
> + while (In.getOpcode() == ISD::BITCAST)
> + In = In.getOperand(0);
> +
> + if (In.getOpcode() != X86ISD::VZEXT)
> + return SDValue();
> +
> + return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(),
> +N->getValueType(0), In.getOperand(0)); }
> +
> SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
> DAGCombinerInfo &DCI) const {
> SelectionDAG &DAG = DCI.DAG;
> @@ -16569,6 +16663,7 @@
> case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
> case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
> case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
> + case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
> case X86ISD::SHUFP: // Handle all target specific shuffles
> case X86ISD::PALIGN:
> case X86ISD::UNPCKH:
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Oct 23 12:34:00 2012
> @@ -236,6 +236,12 @@
> // VSEXT_MOVL - Vector move low and sign extend.
> VSEXT_MOVL,
>
> + // VZEXT - Vector integer zero-extend.
> + VZEXT,
> +
> + // VSEXT - Vector integer signed-extend.
> + VSEXT,
> +
> // VFPEXT - Vector FP extend.
> VFPEXT,
>
> @@ -832,6 +838,8 @@
>
> SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
>
> + SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
> +
> virtual SDValue
> LowerFormalArguments(SDValue Chain,
> CallingConv::ID CallConv, bool isVarArg,
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Oct 23
> +++ 12:34:00 2012
> @@ -90,6 +90,14 @@
> def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
> [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
>
> +def X86vzext : SDNode<"X86ISD::VZEXT",
> + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
> + SDTCisInt<0>,
> +SDTCisInt<1>]>>;
> +
> +def X86vsext : SDNode<"X86ISD::VSEXT",
> + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
> + SDTCisInt<0>,
> +SDTCisInt<1>]>>;
> +
> def X86vfpext : SDNode<"X86ISD::VFPEXT",
> SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
> SDTCisFP<0>, SDTCisFP<1>]>>;
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Oct 23 12:34:00 2012
> @@ -5841,6 +5841,81 @@
> (PMOVZXBQrm addr:$src)>;
> }
>
> +let Predicates = [HasAVX2] in {
> + def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr
> +VR128:$src)>;
> + def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr
> +VR128:$src)>;
> + def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr
> +VR128:$src)>;
> +
> + def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr
> + VR128:$src)>; def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))),
> + (VPMOVZXWQYrr VR128:$src)>;
> +
> + def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr
> +VR128:$src)>; }
> +
> +let Predicates = [HasAVX] in {
> + def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr
> +VR128:$src)>;
> + def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr
> +VR128:$src)>;
> + def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr
> +VR128:$src)>;
> +
> + def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr
> + VR128:$src)>; def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))),
> + (VPMOVZXWQrr VR128:$src)>;
> +
> + def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr
> + VR128:$src)>;
> +
> + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
> + (VPMOVZXBWrm addr:$src)>;
> + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
> + (VPMOVZXBWrm addr:$src)>;
> + def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
> + (VPMOVZXBDrm addr:$src)>;
> + def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
> + (VPMOVZXBQrm addr:$src)>;
> +
> + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
> + (VPMOVZXWDrm addr:$src)>;
> + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
> + (VPMOVZXWDrm addr:$src)>;
> + def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
> + (VPMOVZXWQrm addr:$src)>;
> +
> + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
> + (VPMOVZXDQrm addr:$src)>;
> + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
> + (VPMOVZXDQrm addr:$src)>;
> +}
> +
> +let Predicates = [UseSSE41] in {
> + def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr
> +VR128:$src)>;
> + def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr
> +VR128:$src)>;
> + def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr
> +VR128:$src)>;
> +
> + def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr
> + VR128:$src)>; def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))),
> + (PMOVZXWQrr VR128:$src)>;
> +
> + def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr
> + VR128:$src)>;
> +
> + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
> + (PMOVZXBWrm addr:$src)>;
> + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
> + (PMOVZXBWrm addr:$src)>;
> + def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
> + (PMOVZXBDrm addr:$src)>;
> + def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
> + (PMOVZXBQrm addr:$src)>;
> +
> + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
> + (PMOVZXWDrm addr:$src)>;
> + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
> + (PMOVZXWDrm addr:$src)>;
> + def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
> + (PMOVZXWQrm addr:$src)>;
> +
> + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
> + (PMOVZXDQrm addr:$src)>;
> + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
> + (PMOVZXDQrm addr:$src)>;
> +}
> +
> //===----------------------------------------------------------------------===//
> // SSE4.1 - Extract Instructions
> //===----------------------------------------------------------------------===//
>
> Modified: llvm/trunk/test/CodeGen/X86/2012-01-18-vbitcast.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-01-18-vbitcast.ll?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2012-01-18-vbitcast.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2012-01-18-vbitcast.ll Tue Oct 23
> +++ 12:34:00 2012
> @@ -2,8 +2,8 @@
>
> ;CHECK: vcast
> define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
> -;CHECK: pshufd
> -;CHECK: pshufd
> +;CHECK: pmovzxdq
> +;CHECK: pmovzxdq
> %af = bitcast <2 x float> %a to <2 x i32>
> %bf = bitcast <2 x float> %b to <2 x i32>
> %x = sub <2 x i32> %af, %bf
>
> Modified: llvm/trunk/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-03-15-build_vector_wl.ll?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2012-03-15-build_vector_wl.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2012-03-15-build_vector_wl.ll Tue Oct 23
> +++ 12:34:00 2012
> @@ -4,7 +4,7 @@
> define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
> entry:
> %out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK: shufb
> +; CHECK: pmovzxbd
> ret <4 x i8> %out
> ; CHECK: ret
> }
>
> Modified: llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll Tue Oct 23
> +++ 12:34:00 2012
> @@ -3,7 +3,7 @@
> ; CHECK: load_store
> define void @load_store(<4 x i16>* %in) {
> entry:
> -; CHECK: movsd
> +; CHECK: pmovzxwd
> %A27 = load <4 x i16>* %in, align 4
> %A28 = add <4 x i16> %A27, %A27
> ; CHECK: movlpd
> @@ -27,6 +27,6 @@
> BB:
> %t = load <2 x i32>* %ptr
> ret <2 x i32> %t
> -;CHECK: movsd
> +;CHECK: pmovzxdq
> ;CHECK: ret
> }
>
> Modified: llvm/trunk/test/CodeGen/X86/pointer-vector.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pointer-vector.ll?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/pointer-vector.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/pointer-vector.ll Tue Oct 23 12:34:00
> +++ 2012
> @@ -81,8 +81,7 @@
> entry:
> %G = load <4 x i8>* %p
> ;CHECK: movl
> -;CHECK: movd
> -;CHECK: pshufb
> +;CHECK: pmovzxbd
> ;CHECK: pand
> %K = inttoptr <4 x i8> %G to <4 x i32*>
> ;CHECK: ret
> @@ -105,7 +104,7 @@
> entry:
> %G = load <2 x i8*>* %p
> ;CHECK: movl
> -;CHECK: movsd
> +;CHECK: pmovzxdq
> %T = bitcast <2 x i8*> %G to <2 x i32*>
> ;CHECK: ret
> ret <2 x i32*> %T
>
> Modified: llvm/trunk/test/CodeGen/X86/promote.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/promote.ll?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/promote.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/promote.ll Tue Oct 23 12:34:00 2012
> @@ -20,7 +20,7 @@
> ; CHECK: shuff_f
> define i32 @shuff_f(<4 x i8>* %A) {
> entry:
> -; CHECK: pshufb
> +; CHECK: pmovzxbd
> ; CHECK: paddd
> ; CHECK: pshufb
> %0 = load <4 x i8>* %A, align 8
>
> Modified: llvm/trunk/test/CodeGen/X86/trunc-ext-ld-st.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/trunc-ext-ld-st.ll?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/trunc-ext-ld-st.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/trunc-ext-ld-st.ll Tue Oct 23 12:34:00
> +++ 2012
> @@ -2,8 +2,7 @@
>
> ;CHECK: load_2_i8
> ; A single 16-bit load
> -;CHECK: movzwl
> -;CHECK: pshufb
> +;CHECK: pmovzxbq
> ;CHECK: paddq
> ;CHECK: pshufb
> ; A single 16-bit store
> @@ -19,8 +18,7 @@
>
> ;CHECK: load_2_i16
> ; Read 32-bits
> -;CHECK: movd
> -;CHECK: pshufb
> +;CHECK: pmovzxwq
> ;CHECK: paddq
> ;CHECK: pshufb
> ;CHECK: movd
> @@ -33,7 +31,7 @@
> }
>
> ;CHECK: load_2_i32
> -;CHECK: pshufd
> +;CHECK: pmovzxdq
> ;CHECK: paddq
> ;CHECK: pshufd
> ;CHECK: ret
> @@ -45,8 +43,7 @@
> }
>
> ;CHECK: load_4_i8
> -;CHECK: movd
> -;CHECK: pshufb
> +;CHECK: pmovzxbd
> ;CHECK: paddd
> ;CHECK: pshufb
> ;CHECK: ret
> @@ -58,7 +55,7 @@
> }
>
> ;CHECK: load_4_i16
> -;CHECK: punpcklwd
> +;CHECK: pmovzxwd
> ;CHECK: paddd
> ;CHECK: pshufb
> ;CHECK: ret
> @@ -70,7 +67,7 @@
> }
>
> ;CHECK: load_8_i8
> -;CHECK: punpcklbw
> +;CHECK: pmovzxbw
> ;CHECK: paddw
> ;CHECK: pshufb
> ;CHECK: ret
>
> Modified: llvm/trunk/test/CodeGen/X86/vec_compare-2.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_compare-2.ll?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_compare-2.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vec_compare-2.ll Tue Oct 23 12:34:00
> +++ 2012
> @@ -10,8 +10,7 @@
> entry:
> ; CHECK: cfi_def_cfa_offset
> ; CHECK-NOT: set
> -; CHECK: punpcklwd
> -; CHECK: pshufd
> +; CHECK: pmovzxwq
> ; CHECK: pshufb
> %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
> %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
>
> Modified: llvm/trunk/test/CodeGen/X86/widen_load-2.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_load-2.ll?rev=166486&r1=166485&r2=166486&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/widen_load-2.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/widen_load-2.ll Tue Oct 23 12:34:00 2012
> @@ -170,7 +170,7 @@
> ; CHECK: rot
> %i8vec3pack = type { <3 x i8>, i8 }
> define %i8vec3pack @rot() nounwind {
> -; CHECK: movd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
> +; CHECK: pmovzxbd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
> entry:
> %X = alloca %i8vec3pack, align 4
> %rot = alloca %i8vec3pack, align 4
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list