[llvm-commits] [llvm] r59399 - in /llvm/trunk: lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp test/CodeGen/X86/vec_shuffle-25.ll test/CodeGen/X86/vec_shuffle-26.ll test/CodeGen/X86/vec_shuffle-27.ll

Tue Nov 18 10:00:13 PST 2008

Hi Mon Ping,

Thanks. Some nitpicks below.

Evan

On Nov 15, 2008, at 9:06 PM, Mon P Wang wrote:

> Author: wangmp
> Date: Sat Nov 15 23:06:27 2008
> New Revision: 59399
>
> URL: http://llvm.org/viewvc/llvm-project?rev=59399&view=rev
> Log:
> Improved shuffle normalization to avoid using extract/build when we
> can extract using different indexes for two vectors. Added a few tests
> for vector shuffles.
>
> Added:
>    llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll
>    llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll
>    llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll
> Modified:
>    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp?rev=59399&r1=59398&r2=59399&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp  
> (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp Sat  
> Nov 15 23:06:27 2008
> @@ -2292,8 +2292,8 @@
> // Utility for visitShuffleVector - Returns true if the mask is mask  
> starting
> // from SIndx and increasing to the element length (undefs are  
> allowed).
> static bool SequentialMask(SDValue Mask, unsigned SIndx) {
> -  unsigned NumElems = Mask.getNumOperands();
> -  for (unsigned i = 0; i != NumElems; ++i) {
> +  unsigned MaskNumElts = Mask.getNumOperands();
> +  for (unsigned i = 0; i != MaskNumElts; ++i) {
>     if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) {
>       unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
>       if (Idx != i + SIndx)
> @@ -2304,161 +2304,187 @@
> }
>
> void SelectionDAGLowering::visitShuffleVector(User &I) {
> -  SDValue V1   = getValue(I.getOperand(0));
> -  SDValue V2   = getValue(I.getOperand(1));
> +  SDValue Srcs[2];
> +  Srcs[0] = getValue(I.getOperand(0));
> +  Srcs[1] = getValue(I.getOperand(1));

A common idiom used is:
SDValue Srcs[] = { getValue(I.getOperand(0),  
getValue(I.getOperand(1)) };

Is an array preferrable to V1 and V2?

>
>   SDValue Mask = getValue(I.getOperand(2));
>
>   MVT VT = TLI.getValueType(I.getType());
> -  MVT VT1 = V1.getValueType();
> -  unsigned MaskNumElts = Mask.getNumOperands();
> -  unsigned Src1NumElts = VT1.getVectorNumElements();
> +  MVT SrcVT = Srcs[0].getValueType();
> +  int MaskNumElts = Mask.getNumOperands();
> +  int SrcNumElts = SrcVT.getVectorNumElements();

Why int instead of unsigned?

>
>
> -  if (Src1NumElts == MaskNumElts) {
> -    setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
> +  if (SrcNumElts == MaskNumElts) {
> +    setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0],  
> Srcs[1], Mask));
>     return;
>   }
>
>   // Normalize the shuffle vector since mask and vector length don't  
> match.
> -  if (Src1NumElts < MaskNumElts && MaskNumElts % Src1NumElts == 0) {
> -    // We can concat vectors to make the mask and input vector match.
> -    if (Src1NumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
> -      // The shuffle is concatenating two vectors.
> -      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, V1, V2));
> +  MVT MaskEltVT = Mask.getValueType().getVectorElementType();
> +
> +  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
> +    // Mask is longer than the source vectors and is a multiple of  
> the source
> +    // vectors.  We can use concatenate vector to make the mask and  
> vectors
> +    // length match.

lengthes.

>
> +    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
> +      // The shuffle is concatenating two vectors together.
> +      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, Srcs[0],  
> Srcs[1]));
>       return;
>     }
>
> -    // Pad both vectors with undefs to the same size as the mask.
> -    unsigned NumConcat = MaskNumElts / Src1NumElts;
> -    std::vector<SDValue> UnOps(Src1NumElts,
> -                               DAG.getNode(ISD::UNDEF,
> -                                            
> VT1.getVectorElementType()));
> -    SDValue UndefVal = DAG.getNode(ISD::BUILD_VECTOR, VT1,
> -                                   &UnOps[0], UnOps.size());
> +    // Pad both vectors with undefs to make them the same length as  
> the mask.
> +    unsigned NumConcat = MaskNumElts / SrcNumElts;
> +    SDValue UndefVal = DAG.getNode(ISD::UNDEF, SrcVT);
>
>     SmallVector<SDValue, 8> MOps1, MOps2;
> -    MOps1.push_back(V1);
> -    MOps2.push_back(V2);
> +    MOps1.push_back(Srcs[0]);
> +    MOps2.push_back(Srcs[1]);
>     for (unsigned i = 1; i != NumConcat; ++i) {
>       MOps1.push_back(UndefVal);
>       MOps2.push_back(UndefVal);
>     }

It seems silly to use vectors instead of arrays here.

>
> -    V1 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0],  
> MOps1.size());
> -    V2 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0],  
> MOps2.size());
> +    Srcs[0] = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0],  
> MOps1.size());
> +    Srcs[1] = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0],  
> MOps2.size());
>
>     // Readjust mask for new input vector length.
>     SmallVector<SDValue, 8> MappedOps;
> -    for (unsigned i = 0; i != MaskNumElts; ++i) {
> +    for (int i = 0; i != MaskNumElts; ++i) {
>       if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
>         MappedOps.push_back(Mask.getOperand(i));
>       } else {
> -        unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
> -        if (Idx < Src1NumElts) {
> -          MappedOps.push_back(DAG.getConstant(Idx,
> -                                            
> Mask.getOperand(i).getValueType()));
> -        } else {
> -          MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts -  
> Src1NumElts,
> -                                            
> Mask.getOperand(i).getValueType()));
> -        }
> +        int Idx = cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
> +        if (Idx < SrcNumElts)
> +          MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
> +        else
> +          MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts -  
> SrcNumElts,
> +                                              MaskEltVT));
>       }
>     }
>     Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
>                        &MappedOps[0], MappedOps.size());
>
> -    setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
> +    setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0],  
> Srcs[1], Mask));
>     return;
>   }
>
> -  if (Src1NumElts > MaskNumElts) {
> +  if (SrcNumElts > MaskNumElts) {
>     // Resulting vector is shorter than the incoming vector.
> -    if (Src1NumElts == MaskNumElts && SequentialMask(Mask,0)) {
> +    if (SrcNumElts == MaskNumElts && SequentialMask(Mask,0)) {
>       // Shuffle extracts 1st vector.
> -      setValue(&I, V1);
> +      setValue(&I, Srcs[0]);
>       return;
>     }
>
> -    if (Src1NumElts == MaskNumElts &&  
> SequentialMask(Mask,MaskNumElts)) {
> +    if (SrcNumElts == MaskNumElts &&  
> SequentialMask(Mask,MaskNumElts)) {
>       // Shuffle extracts 2nd vector.
> -      setValue(&I, V2);
> +      setValue(&I, Srcs[1]);
>       return;
>     }
>
> -    // Analyze the access pattern of the vector to see if we can  
> extract each
> -    // subvector and then do the shuffle. The analysis is done by  
> calculating
> -    // the range of elements the mask access on both vectors. If it  
> is useful,
> -    // we could do better by considering separate what elements are  
> accessed
> -    // in each vector (i.e., have min/max for each vector).
> -    int MinRange = Src1NumElts+1;
> -    int MaxRange = -1;
> -    for (unsigned i = 0; i != MaskNumElts; ++i) {
> +    // Analyze the access pattern of the vector to see if we can  
> extract
> +    // two subvectors and do the shuffle. The analysis is done by  
> calculating
> +    // the range of elements the mask access on both vectors.
> +    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
> +    int MaxRange[2] = {-1, -1};
> +
> +    for (int i = 0; i != MaskNumElts; ++i) {
>       SDValue Arg = Mask.getOperand(i);
>       if (Arg.getOpcode() != ISD::UNDEF) {
>         assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE  
> mask!");
> -        int Idx = cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
> -        if (Idx > (int) Src1NumElts)
> -          Idx -= Src1NumElts;
> -        if (Idx > MaxRange)
> -          MaxRange = Idx;
> -        if (Idx < MinRange)
> -          MinRange = Idx;
> -      }
> -    }
> -    // Adjust MinRange to start at an even boundary since this give  
> us
> -    // better quality splits later.
> -    if ((unsigned) MinRange < Src1NumElts && MinRange%2 != 0)
> -      MinRange = MinRange - 1;
> -    if (MaxRange - MinRange < (int) MaskNumElts) {
> -      // Extract subvector because the range is less than the new  
> vector length
> -      unsigned StartIdx = (MinRange/MaskNumElts)*MaskNumElts;
> -      if (MaxRange - StartIdx < MaskNumElts) {
> -        V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V1,
> -                         DAG.getIntPtrConstant(MinRange));
> -        V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V2,
> -                         DAG.getIntPtrConstant(MinRange));
> -        // Readjust mask for new input vector length.
> -        SmallVector<SDValue, 8> MappedOps;
> -        for (unsigned i = 0; i != MaskNumElts; ++i) {
> -          if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
> -            MappedOps.push_back(Mask.getOperand(i));
> -          } else {
> -            unsigned Idx =
> -              cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
> -            if (Idx < Src1NumElts) {
> -              MappedOps.push_back(DAG.getConstant(Idx - StartIdx,
> -                                          
> Mask.getOperand(i).getValueType()));
> -            } else {
> -              Idx = Idx - Src1NumElts - StartIdx + MaskNumElts;
> -              MappedOps.push_back(DAG.getConstant(Idx,
> -                                         
> Mask.getOperand(i).getValueType()));
> -            }
> -          }
> +        int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> +        int Input = 0;
> +        if (Idx >= SrcNumElts) {
> +          Input = 1;
> +          Idx -= SrcNumElts;
>         }
> -        Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
> -                           &MappedOps[0], MappedOps.size());
> +        if (Idx > MaxRange[Input])
> +          MaxRange[Input] = Idx;
> +        if (Idx < MinRange[Input])
> +          MinRange[Input] = Idx;
> +      }
> +    }
>
> -        setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,  
> Mask));
> -        return;
> +    // Check if the access is smaller than the vector size and can  
> we find
> +    // a reasonable extract index.
> +    int RangeUse[2];  // 0 = Unused, 1 = Extract, 2 = Can not  
> Extract.

Perhaps initialize RangeUse with 2's to eliminate some nesting below?

>
> +    int StartIdx[2];  // StartIdx to extract from
> +    for (int Input=0; Input < 2; ++Input) {

int -> unsigned?

>
> +      if (MinRange[Input] == SrcNumElts+1 && MaxRange[Input] == -1) {
> +        RangeUse[Input] = 0; // Unused
> +        StartIdx[Input] = 0;
> +      } else if (MaxRange[Input] - MinRange[Input] < MaskNumElts) {
> +        // Fits within range but we should see if we can find a good
> +        // start index that a multiple of the mask length.
> +        if (MaxRange[Input] < MaskNumElts) {
> +          RangeUse[Input] = 1; // Extract from beginning of the  
> vector
> +          StartIdx[Input] = 0;
> +        } else {
> +          StartIdx[Input] = (MinRange[Input]/ 
> MaskNumElts)*MaskNumElts;
> +          if (MaxRange[Input] - StartIdx[Input] < MaskNumElts)
> +            RangeUse[Input] = 1; // Extract from a multiple of the  
> mask length.
> +          else
> +            RangeUse[Input] = 2; // Can not extract
> +        }
> +      } else
> +        RangeUse[Input] = 2;  // Access doesn't fit within range
> +    }
> +
> +    if (RangeUse[0] == 0 && RangeUse[0] == 0) {
> +      setValue(&I, DAG.getNode(ISD::UNDEF, VT));  // Vectors are  
> not used.
> +      return;
> +    }
> +    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
> +      // Extract appropriate subvector and generate a vector shuffle
> +      for (int Input=0; Input < 2; ++Input) {
> +        if (RangeUse[Input] == 0) {
> +          Srcs[Input] = DAG.getNode(ISD::UNDEF, VT);
> +        } else {
> +          Srcs[Input] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT,  
> Srcs[Input],
> +                                     
> DAG.getIntPtrConstant(StartIdx[Input]));
> +        }
> +      }
> +      // Calculate new mask.
> +      SmallVector<SDValue, 8> MappedOps;
> +      for (int i = 0; i != MaskNumElts; ++i) {

int -> unsigned?

>
> +        SDValue Arg = Mask.getOperand(i);
> +        if (Arg.getOpcode() == ISD::UNDEF) {
> +          MappedOps.push_back(Arg);
> +        } else {
> +          int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> +          if (Idx < SrcNumElts)
> +            MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0],  
> MaskEltVT));
> +          else {
> +            Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts;
> +            MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
> +          }
> +        }
>       }
> +      Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
> +                         &MappedOps[0], MappedOps.size());
> +      setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0],  
> Srcs[1], Mask));
> +      return;
>     }
>   }
>
> -  // We can't use either concat vectors or extract subvectors so we  
> fall back
> -  // to insert and extracts.
> +  // We can't use either concat vectors or extract subvectors so  
> fall back to
> +  // replacing the shuffle with extract and build vector.
> +  // to insert and build vector.
>   MVT EltVT = VT.getVectorElementType();
>   MVT PtrVT = TLI.getPointerTy();
>   SmallVector<SDValue,8> Ops;
> -  for (unsigned i = 0; i != MaskNumElts; ++i) {
> +  for (int i = 0; i != MaskNumElts; ++i) {
>     SDValue Arg = Mask.getOperand(i);
>     if (Arg.getOpcode() == ISD::UNDEF) {
>       Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
>     } else {
>       assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE  
> mask!");
> -      unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> -      if (Idx < Src1NumElts)
> -        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V1,
> +      int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> +      if (Idx < SrcNumElts)
> +        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,  
> Srcs[0],
>                                   DAG.getConstant(Idx, PtrVT)));
>       else
> -        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V2,
> -                                  DAG.getConstant(Idx -  
> Src1NumElts, PtrVT)));
> +        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,  
> Srcs[1],
> +                                  DAG.getConstant(Idx - SrcNumElts,  
> PtrVT)));
>     }
>   }
>   setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0],  
> Ops.size()));
>
> Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll?rev=59399&view=auto
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-25.ll Sat Nov 15  
> 23:06:27 2008
> @@ -0,0 +1,34 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
> +; RUN: grep unpcklps %t | count 3
> +; RUN: grep unpckhps %t | count 1
> +
> +; Transpose example using the more generic vector shuffle.  We return
> +; float8 instead of float16 since x86 can return that in register.
> +; ModuleID = 'transpose2_opt.bc'
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32- 
> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64- 
> f80:32:32"
> +target triple = "i386-apple-cl.1.0"
> + at r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x  
> float>*> [#uses=1]
> + at r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x  
> float>*> [#uses=1]
> + at r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x  
> float>*> [#uses=1]
> + at r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x  
> float>*> [#uses=1]
> +
> +define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1,  
> <4 x float> %p2, <4 x float> %p3) nounwind {
> +entry:
> +	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x  
> i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
> +	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x  
> i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
> +	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x  
> i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
> +	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x  
> i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
> +	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float>  
> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x  
> float>> [#uses=1]
> +	%unpcklps14a = shufflevector <4 x float> %unpcklps14,  <4 x float>  
> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32  
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32  
> undef, i32 undef, i32 undef, i32 undef, i32 undef>;
> +	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float>  
> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x  
> float>> [#uses=1]
> +	%unpckhps17a = shufflevector <4 x float> %unpckhps17,  <4 x float>  
> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32  
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32  
> undef, i32 undef, i32 undef, i32 undef, i32 undef>; 	
> +	%r1 = shufflevector <16 x float> %unpcklps14a,  <16 x float>  
> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32  
> 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13,  
> i32 14, i32 15>;
> +	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float>  
> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x  
> float>> [#uses=1]
> +	%unpcklps20a = shufflevector <4 x float> %unpcklps20,  <4 x float>  
> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32  
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32  
> undef, i32 undef, i32 undef, i32 undef, i32 undef>;
> +	%r2 = shufflevector <16 x float> %r1,  <16 x float> %unpcklps20a,  
> <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,  
> i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>;
> +	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float>  
> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x  
> float>> [#uses=1]
> +	%unpckhps23a = shufflevector <4 x float> %unpckhps23,  <4 x float>  
> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32  
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32  
> undef, i32 undef, i32 undef, i32 undef, i32 undef>;
> +	%r3 = shufflevector <16 x float> %r2,  <16 x float> %unpckhps23a,  
> <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,  
> i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>;
> +	%r4 = shufflevector <16 x float> %r3,  <16 x float> undef, <8 x  
> i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>;
> +	ret <8 x float> %r4;
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll?rev=59399&view=auto
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-26.ll Sat Nov 15  
> 23:06:27 2008
> @@ -0,0 +1,29 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
> +; RUN: grep unpcklps %t | count 1
> +; RUN: grep unpckhps %t | count 3
> +
> +; Transpose example using the more generic vector shuffle. Return  
> float8
> +; instead of float16
> +; ModuleID = 'transpose2_opt.bc'
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32- 
> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64- 
> f80:32:32"
> +target triple = "i386-apple-cl.1.0"
> + at r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x  
> float>*> [#uses=1]
> + at r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x  
> float>*> [#uses=1]
> + at r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x  
> float>*> [#uses=1]
> + at r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x  
> float>*> [#uses=1]
> +
> +define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1,  
> <4 x float> %p2, <4 x float> %p3) nounwind {
> +entry:
> +	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x  
> i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
> +	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x  
> i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
> +	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x  
> i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
> +	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x  
> i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
> +	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float>  
> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x  
> float>> [#uses=1]
> +	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float>  
> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x  
> float>> [#uses=1]
> +        %r1 = shufflevector <4 x float> %unpcklps14,  <4 x float>  
> %unpckhps17,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5,  
> i32 6, i32 7 >;
> +	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float>  
> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x  
> float>> [#uses=1]
> +	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float>  
> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x  
> float>> [#uses=1]
> +        %r2 = shufflevector <4 x float> %unpcklps20,  <4 x float>  
> %unpckhps23,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5,  
> i32 6, i32 7 >;
> +;       %r3 = shufflevector <8 x float> %r1,  <8 x float> %r2,  <16  
> x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32  
> 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >;
> +	ret <8 x float> %r2;
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll?rev=59399&view=auto
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-27.ll Sat Nov 15  
> 23:06:27 2008
> @@ -0,0 +1,17 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
> +; RUN: grep addps %t | count 2
> +; RUN: grep mulps %t | count 2
> +; RUN: grep subps %t | count 2
> +
> +; ModuleID = 'vec_shuffle-27.bc'
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32- 
> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64- 
> f80:32:32"
> +target triple = "i686-apple-cl.1.0"
> +
> +define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0,  
> <8 x float> %T1) nounwind readnone {
> +entry:
> +	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32>  
> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x  
> float>> [#uses=1]
> +	%sub = sub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
> +	%mul = mul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
> +	%add = add <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
> +	ret <8 x float> %add
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits