[llvm-commits] [llvm] r59399 - in /llvm/trunk: lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp test/CodeGen/X86/vec_shuffle-25.ll test/CodeGen/X86/vec_shuffle-26.ll test/CodeGen/X86/vec_shuffle-27.ll
Evan Cheng
evan.cheng at apple.com
Tue Nov 18 10:00:13 PST 2008
Hi Mon Ping,
Thanks. Some nitpicks below.
Evan
On Nov 15, 2008, at 9:06 PM, Mon P Wang wrote:
> Author: wangmp
> Date: Sat Nov 15 23:06:27 2008
> New Revision: 59399
>
> URL: http://llvm.org/viewvc/llvm-project?rev=59399&view=rev
> Log:
> Improved shuffle normalization to avoid using extract/build when we
> can extract using different indexes for two vectors. Added a few tests
> for vector shuffles.
>
> Added:
> llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll
> llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll
> llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp?rev=59399&r1=59398&r2=59399&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
> (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp Sat
> Nov 15 23:06:27 2008
> @@ -2292,8 +2292,8 @@
> // Utility for visitShuffleVector - Returns true if the mask is mask
> starting
> // from SIndx and increasing to the element length (undefs are
> allowed).
> static bool SequentialMask(SDValue Mask, unsigned SIndx) {
> - unsigned NumElems = Mask.getNumOperands();
> - for (unsigned i = 0; i != NumElems; ++i) {
> + unsigned MaskNumElts = Mask.getNumOperands();
> + for (unsigned i = 0; i != MaskNumElts; ++i) {
> if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) {
> unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))-
> >getZExtValue();
> if (Idx != i + SIndx)
> @@ -2304,161 +2304,187 @@
> }
>
> void SelectionDAGLowering::visitShuffleVector(User &I) {
> - SDValue V1 = getValue(I.getOperand(0));
> - SDValue V2 = getValue(I.getOperand(1));
> + SDValue Srcs[2];
> + Srcs[0] = getValue(I.getOperand(0));
> + Srcs[1] = getValue(I.getOperand(1));
A common idiom used is:
SDValue Srcs[] = { getValue(I.getOperand(0),
getValue(I.getOperand(1)) };
Is an array preferrable to V1 and V2?
>
> SDValue Mask = getValue(I.getOperand(2));
>
> MVT VT = TLI.getValueType(I.getType());
> - MVT VT1 = V1.getValueType();
> - unsigned MaskNumElts = Mask.getNumOperands();
> - unsigned Src1NumElts = VT1.getVectorNumElements();
> + MVT SrcVT = Srcs[0].getValueType();
> + int MaskNumElts = Mask.getNumOperands();
> + int SrcNumElts = SrcVT.getVectorNumElements();
Why int instead of unsigned?
>
>
> - if (Src1NumElts == MaskNumElts) {
> - setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
> + if (SrcNumElts == MaskNumElts) {
> + setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0],
> Srcs[1], Mask));
> return;
> }
>
> // Normalize the shuffle vector since mask and vector length don't
> match.
> - if (Src1NumElts < MaskNumElts && MaskNumElts % Src1NumElts == 0) {
> - // We can concat vectors to make the mask and input vector match.
> - if (Src1NumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
> - // The shuffle is concatenating two vectors.
> - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, V1, V2));
> + MVT MaskEltVT = Mask.getValueType().getVectorElementType();
> +
> + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
> + // Mask is longer than the source vectors and is a multiple of
> the source
> + // vectors. We can use concatenate vector to make the mask and
> vectors
> + // length match.
lengthes.
>
> + if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
> + // The shuffle is concatenating two vectors together.
> + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, Srcs[0],
> Srcs[1]));
> return;
> }
>
> - // Pad both vectors with undefs to the same size as the mask.
> - unsigned NumConcat = MaskNumElts / Src1NumElts;
> - std::vector<SDValue> UnOps(Src1NumElts,
> - DAG.getNode(ISD::UNDEF,
> -
> VT1.getVectorElementType()));
> - SDValue UndefVal = DAG.getNode(ISD::BUILD_VECTOR, VT1,
> - &UnOps[0], UnOps.size());
> + // Pad both vectors with undefs to make them the same length as
> the mask.
> + unsigned NumConcat = MaskNumElts / SrcNumElts;
> + SDValue UndefVal = DAG.getNode(ISD::UNDEF, SrcVT);
>
> SmallVector<SDValue, 8> MOps1, MOps2;
> - MOps1.push_back(V1);
> - MOps2.push_back(V2);
> + MOps1.push_back(Srcs[0]);
> + MOps2.push_back(Srcs[1]);
> for (unsigned i = 1; i != NumConcat; ++i) {
> MOps1.push_back(UndefVal);
> MOps2.push_back(UndefVal);
> }
It seems silly to use vectors instead of arrays here.
>
> - V1 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0],
> MOps1.size());
> - V2 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0],
> MOps2.size());
> + Srcs[0] = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0],
> MOps1.size());
> + Srcs[1] = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0],
> MOps2.size());
>
> // Readjust mask for new input vector length.
> SmallVector<SDValue, 8> MappedOps;
> - for (unsigned i = 0; i != MaskNumElts; ++i) {
> + for (int i = 0; i != MaskNumElts; ++i) {
> if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
> MappedOps.push_back(Mask.getOperand(i));
> } else {
> - unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))-
> >getZExtValue();
> - if (Idx < Src1NumElts) {
> - MappedOps.push_back(DAG.getConstant(Idx,
> -
> Mask.getOperand(i).getValueType()));
> - } else {
> - MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts -
> Src1NumElts,
> -
> Mask.getOperand(i).getValueType()));
> - }
> + int Idx = cast<ConstantSDNode>(Mask.getOperand(i))-
> >getZExtValue();
> + if (Idx < SrcNumElts)
> + MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
> + else
> + MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts -
> SrcNumElts,
> + MaskEltVT));
> }
> }
> Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
> &MappedOps[0], MappedOps.size());
>
> - setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
> + setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0],
> Srcs[1], Mask));
> return;
> }
>
> - if (Src1NumElts > MaskNumElts) {
> + if (SrcNumElts > MaskNumElts) {
> // Resulting vector is shorter than the incoming vector.
> - if (Src1NumElts == MaskNumElts && SequentialMask(Mask,0)) {
> + if (SrcNumElts == MaskNumElts && SequentialMask(Mask,0)) {
> // Shuffle extracts 1st vector.
> - setValue(&I, V1);
> + setValue(&I, Srcs[0]);
> return;
> }
>
> - if (Src1NumElts == MaskNumElts &&
> SequentialMask(Mask,MaskNumElts)) {
> + if (SrcNumElts == MaskNumElts &&
> SequentialMask(Mask,MaskNumElts)) {
> // Shuffle extracts 2nd vector.
> - setValue(&I, V2);
> + setValue(&I, Srcs[1]);
> return;
> }
>
> - // Analyze the access pattern of the vector to see if we can
> extract each
> - // subvector and then do the shuffle. The analysis is done by
> calculating
> - // the range of elements the mask access on both vectors. If it
> is useful,
> - // we could do better by considering separate what elements are
> accessed
> - // in each vector (i.e., have min/max for each vector).
> - int MinRange = Src1NumElts+1;
> - int MaxRange = -1;
> - for (unsigned i = 0; i != MaskNumElts; ++i) {
> + // Analyze the access pattern of the vector to see if we can
> extract
> + // two subvectors and do the shuffle. The analysis is done by
> calculating
> + // the range of elements the mask access on both vectors.
> + int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
> + int MaxRange[2] = {-1, -1};
> +
> + for (int i = 0; i != MaskNumElts; ++i) {
> SDValue Arg = Mask.getOperand(i);
> if (Arg.getOpcode() != ISD::UNDEF) {
> assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE
> mask!");
> - int Idx = cast<ConstantSDNode>(Mask.getOperand(i))-
> >getZExtValue();
> - if (Idx > (int) Src1NumElts)
> - Idx -= Src1NumElts;
> - if (Idx > MaxRange)
> - MaxRange = Idx;
> - if (Idx < MinRange)
> - MinRange = Idx;
> - }
> - }
> - // Adjust MinRange to start at an even boundary since this give
> us
> - // better quality splits later.
> - if ((unsigned) MinRange < Src1NumElts && MinRange%2 != 0)
> - MinRange = MinRange - 1;
> - if (MaxRange - MinRange < (int) MaskNumElts) {
> - // Extract subvector because the range is less than the new
> vector length
> - unsigned StartIdx = (MinRange/MaskNumElts)*MaskNumElts;
> - if (MaxRange - StartIdx < MaskNumElts) {
> - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V1,
> - DAG.getIntPtrConstant(MinRange));
> - V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V2,
> - DAG.getIntPtrConstant(MinRange));
> - // Readjust mask for new input vector length.
> - SmallVector<SDValue, 8> MappedOps;
> - for (unsigned i = 0; i != MaskNumElts; ++i) {
> - if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
> - MappedOps.push_back(Mask.getOperand(i));
> - } else {
> - unsigned Idx =
> - cast<ConstantSDNode>(Mask.getOperand(i))-
> >getZExtValue();
> - if (Idx < Src1NumElts) {
> - MappedOps.push_back(DAG.getConstant(Idx - StartIdx,
> -
> Mask.getOperand(i).getValueType()));
> - } else {
> - Idx = Idx - Src1NumElts - StartIdx + MaskNumElts;
> - MappedOps.push_back(DAG.getConstant(Idx,
> -
> Mask.getOperand(i).getValueType()));
> - }
> - }
> + int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> + int Input = 0;
> + if (Idx >= SrcNumElts) {
> + Input = 1;
> + Idx -= SrcNumElts;
> }
> - Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
> - &MappedOps[0], MappedOps.size());
> + if (Idx > MaxRange[Input])
> + MaxRange[Input] = Idx;
> + if (Idx < MinRange[Input])
> + MinRange[Input] = Idx;
> + }
> + }
>
> - setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
> Mask));
> - return;
> + // Check if the access is smaller than the vector size and can
> we find
> + // a reasonable extract index.
> + int RangeUse[2]; // 0 = Unused, 1 = Extract, 2 = Can not
> Extract.
Perhaps initialize RangeUse with 2's to eliminate some nesting below?
>
> + int StartIdx[2]; // StartIdx to extract from
> + for (int Input=0; Input < 2; ++Input) {
int -> unsigned?
>
> + if (MinRange[Input] == SrcNumElts+1 && MaxRange[Input] == -1) {
> + RangeUse[Input] = 0; // Unused
> + StartIdx[Input] = 0;
> + } else if (MaxRange[Input] - MinRange[Input] < MaskNumElts) {
> + // Fits within range but we should see if we can find a good
> + // start index that a multiple of the mask length.
> + if (MaxRange[Input] < MaskNumElts) {
> + RangeUse[Input] = 1; // Extract from beginning of the
> vector
> + StartIdx[Input] = 0;
> + } else {
> + StartIdx[Input] = (MinRange[Input]/
> MaskNumElts)*MaskNumElts;
> + if (MaxRange[Input] - StartIdx[Input] < MaskNumElts)
> + RangeUse[Input] = 1; // Extract from a multiple of the
> mask length.
> + else
> + RangeUse[Input] = 2; // Can not extract
> + }
> + } else
> + RangeUse[Input] = 2; // Access doesn't fit within range
> + }
> +
> + if (RangeUse[0] == 0 && RangeUse[0] == 0) {
> + setValue(&I, DAG.getNode(ISD::UNDEF, VT)); // Vectors are
> not used.
> + return;
> + }
> + else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
> + // Extract appropriate subvector and generate a vector shuffle
> + for (int Input=0; Input < 2; ++Input) {
> + if (RangeUse[Input] == 0) {
> + Srcs[Input] = DAG.getNode(ISD::UNDEF, VT);
> + } else {
> + Srcs[Input] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT,
> Srcs[Input],
> +
> DAG.getIntPtrConstant(StartIdx[Input]));
> + }
> + }
> + // Calculate new mask.
> + SmallVector<SDValue, 8> MappedOps;
> + for (int i = 0; i != MaskNumElts; ++i) {
int -> unsigned?
>
> + SDValue Arg = Mask.getOperand(i);
> + if (Arg.getOpcode() == ISD::UNDEF) {
> + MappedOps.push_back(Arg);
> + } else {
> + int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> + if (Idx < SrcNumElts)
> + MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0],
> MaskEltVT));
> + else {
> + Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts;
> + MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
> + }
> + }
> }
> + Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
> + &MappedOps[0], MappedOps.size());
> + setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0],
> Srcs[1], Mask));
> + return;
> }
> }
>
> - // We can't use either concat vectors or extract subvectors so we
> fall back
> - // to insert and extracts.
> + // We can't use either concat vectors or extract subvectors so
> fall back to
> + // replacing the shuffle with extract and build vector.
> + // to insert and build vector.
> MVT EltVT = VT.getVectorElementType();
> MVT PtrVT = TLI.getPointerTy();
> SmallVector<SDValue,8> Ops;
> - for (unsigned i = 0; i != MaskNumElts; ++i) {
> + for (int i = 0; i != MaskNumElts; ++i) {
> SDValue Arg = Mask.getOperand(i);
> if (Arg.getOpcode() == ISD::UNDEF) {
> Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> } else {
> assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE
> mask!");
> - unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> - if (Idx < Src1NumElts)
> - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V1,
> + int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> + if (Idx < SrcNumElts)
> + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
> Srcs[0],
> DAG.getConstant(Idx, PtrVT)));
> else
> - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V2,
> - DAG.getConstant(Idx -
> Src1NumElts, PtrVT)));
> + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
> Srcs[1],
> + DAG.getConstant(Idx - SrcNumElts,
> PtrVT)));
> }
> }
> setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0],
> Ops.size()));
>
> Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll?rev=59399&view=auto
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll Sat Nov 15
> 23:06:27 2008
> @@ -0,0 +1,34 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
> +; RUN: grep unpcklps %t | count 3
> +; RUN: grep unpckhps %t | count 1
> +
> +; Transpose example using the more generic vector shuffle. We return
> +; float8 instead of float16 since x86 can return that in register.
> +; ModuleID = 'transpose2_opt.bc'
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-
> f80:32:32"
> +target triple = "i386-apple-cl.1.0"
> + at r0 = common global <4 x float> zeroinitializer, align 16 ; <<4 x
> float>*> [#uses=1]
> + at r1 = common global <4 x float> zeroinitializer, align 16 ; <<4 x
> float>*> [#uses=1]
> + at r2 = common global <4 x float> zeroinitializer, align 16 ; <<4 x
> float>*> [#uses=1]
> + at r3 = common global <4 x float> zeroinitializer, align 16 ; <<4 x
> float>*> [#uses=1]
> +
> +define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1,
> <4 x float> %p2, <4 x float> %p3) nounwind {
> +entry:
> + %unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x
> i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2]
> + %unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x
> i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2]
> + %unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x
> i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2]
> + %unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x
> i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2]
> + %unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float>
> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x
> float>> [#uses=1]
> + %unpcklps14a = shufflevector <4 x float> %unpcklps14, <4 x float>
> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>;
> + %unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float>
> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x
> float>> [#uses=1]
> + %unpckhps17a = shufflevector <4 x float> %unpckhps17, <4 x float>
> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>;
> + %r1 = shufflevector <16 x float> %unpcklps14a, <16 x float>
> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32
> 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,
> i32 14, i32 15>;
> + %unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float>
> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x
> float>> [#uses=1]
> + %unpcklps20a = shufflevector <4 x float> %unpcklps20, <4 x float>
> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>;
> + %r2 = shufflevector <16 x float> %r1, <16 x float> %unpcklps20a,
> <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
> i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>;
> + %unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float>
> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x
> float>> [#uses=1]
> + %unpckhps23a = shufflevector <4 x float> %unpckhps23, <4 x float>
> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef>;
> + %r3 = shufflevector <16 x float> %r2, <16 x float> %unpckhps23a,
> <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
> i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>;
> + %r4 = shufflevector <16 x float> %r3, <16 x float> undef, <8 x
> i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>;
> + ret <8 x float> %r4;
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll?rev=59399&view=auto
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll Sat Nov 15
> 23:06:27 2008
> @@ -0,0 +1,29 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
> +; RUN: grep unpcklps %t | count 1
> +; RUN: grep unpckhps %t | count 3
> +
> +; Transpose example using the more generic vector shuffle. Return
> float8
> +; instead of float16
> +; ModuleID = 'transpose2_opt.bc'
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-
> f80:32:32"
> +target triple = "i386-apple-cl.1.0"
> + at r0 = common global <4 x float> zeroinitializer, align 16 ; <<4 x
> float>*> [#uses=1]
> + at r1 = common global <4 x float> zeroinitializer, align 16 ; <<4 x
> float>*> [#uses=1]
> + at r2 = common global <4 x float> zeroinitializer, align 16 ; <<4 x
> float>*> [#uses=1]
> + at r3 = common global <4 x float> zeroinitializer, align 16 ; <<4 x
> float>*> [#uses=1]
> +
> +define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1,
> <4 x float> %p2, <4 x float> %p3) nounwind {
> +entry:
> + %unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x
> i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2]
> + %unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x
> i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2]
> + %unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x
> i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2]
> + %unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x
> i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2]
> + %unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float>
> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x
> float>> [#uses=1]
> + %unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float>
> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x
> float>> [#uses=1]
> + %r1 = shufflevector <4 x float> %unpcklps14, <4 x float>
> %unpckhps17, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5,
> i32 6, i32 7 >;
> + %unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float>
> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x
> float>> [#uses=1]
> + %unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float>
> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x
> float>> [#uses=1]
> + %r2 = shufflevector <4 x float> %unpcklps20, <4 x float>
> %unpckhps23, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5,
> i32 6, i32 7 >;
> +; %r3 = shufflevector <8 x float> %r1, <8 x float> %r2, <16
> x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32
> 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >;
> + ret <8 x float> %r2;
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll?rev=59399&view=auto
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll Sat Nov 15
> 23:06:27 2008
> @@ -0,0 +1,17 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
> +; RUN: grep addps %t | count 2
> +; RUN: grep mulps %t | count 2
> +; RUN: grep subps %t | count 2
> +
> +; ModuleID = 'vec_shuffle-27.bc'
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-
> f80:32:32"
> +target triple = "i686-apple-cl.1.0"
> +
> +define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0,
> <8 x float> %T1) nounwind readnone {
> +entry:
> + %tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32>
> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 > ; <<8 x
> float>> [#uses=1]
> + %sub = sub <8 x float> %T1, %T0 ; <<8 x float>> [#uses=1]
> + %mul = mul <8 x float> %sub, %tmp7 ; <<8 x float>> [#uses=1]
> + %add = add <8 x float> %mul, %T0 ; <<8 x float>> [#uses=1]
> + ret <8 x float> %add
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list