[llvm-commits] [llvm] r58964 - in /llvm/trunk: docs/LangRef.html lib/Bitcode/Reader/BitcodeReader.cpp lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/CodeGen/SelectionDAG/LegalizeTypes.h lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp lib/Transforms/Scalar/InstructionCombining.cpp lib/VMCore/ConstantFold.cpp lib/VMCore/Instructions.cpp lib/VMCore/Verifier.cpp

Wed Nov 12 15:59:33 PST 2008

Hi Duncan,

I like this implementation in general and you capture a case that I  
missed (in avoid using build vector) :-> .   In my thinking (which is  
why I went more for a prescan methodology), the typical case is when  
we can use split vectors for the new shuffle.  So pre splitting the  
vectors seems fine as we will using some of the result of the presplit  
in the vector shuffle.  I don't particular like prebuilding the  
BuildOps though because we build a set of extract element nodes that  
we will typically throw away. It seems a little cleaner to me to  
iterate through the mask again and build these nodes when we need  
them.  What do you think?

Cheers,
-- Mon Ping

On Nov 12, 2008, at 9:05 AM, Duncan Sands wrote:

> Hi Mon Ping, what do you think of this for splitting
> vector shuffle?  I didn't bother trying to handle
> non-power-of-two vector sizes since that is (a) awkward
> in this setup, and (b) they are going away soon anyway
> thanks to widening.
>
> Ciao,
>
> Duncan.
>
> PS: I've attached the final function, since that may
> be easier to read.
> PPS: I didn't test it much but it seems to work!
>
> Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
> ===================================================================
> --- llvm.orig/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp	 
> 2008-11-12 11:18:51.000000000 +0100
> +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp	2008-11-12  
> 17:56:08.000000000 +0100
> @@ -650,52 +650,110 @@
>
> void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue  
> &Lo,
>                                                   SDValue &Hi) {
> -  // Build the low part.
> +  // The low and high parts of the original input give four input  
> vectors.
> +  SDValue Inputs[4];
> +  GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
> +  GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
> +  MVT NewVT = Inputs[0].getValueType();
> +  unsigned NewElts = NewVT.getVectorNumElements();
> +  assert(NewVT == Inputs[1].getValueType() &&
> +         "Non power-of-two vectors not supported!");
> +
> +  // If Lo or Hi uses elements from at most two of the four input  
> vectors, then
> +  // express it as a vector shuffle of those two inputs.  Otherwise  
> extract the
> +  // input elements by hand and construct the Lo/Hi output using a  
> BUILD_VECTOR.
>   SDValue Mask = N->getOperand(2);
> -  SmallVector<SDValue, 16> Ops;
> -  MVT LoVT, HiVT;
> -  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
> -  MVT EltVT = LoVT.getVectorElementType();
> -  unsigned LoNumElts = LoVT.getVectorNumElements();
> -  unsigned NumElements = Mask.getNumOperands();
> -
> -  // Insert all of the elements from the input that are needed.  We  
> use
> -  // buildvector of extractelement here because the input vectors  
> will have
> -  // to be legalized, so this makes the code simpler.
> -  for (unsigned i = 0; i != LoNumElts; ++i) {
> -    SDValue Arg = Mask.getOperand(i);
> -    if (Arg.getOpcode() == ISD::UNDEF) {
> -      Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> -    } else {
> -      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
> -      SDValue InVec = N->getOperand(0);
> -      if (Idx >= NumElements) {
> -        InVec = N->getOperand(1);
> -        Idx -= NumElements;
> +  MVT IdxVT = Mask.getValueType().getVectorElementType();
> +  MVT EltVT = NewVT.getVectorElementType();
> +  SmallVector<SDValue, 16> BuildOps;
> +  SmallVector<SDValue, 16> MaskOps;
> +  for (unsigned High = 0; High < 2; ++High) {
> +    SDValue &Output = High ? Hi : Lo;
> +
> +    // Build a shuffle mask for the output, discovering on the fly  
> which
> +    // input vectors to use as shuffle operands (recorded in  
> InputUsed).
> +    // At the same time, accumulate the final vector elements in  
> BuildOps,
> +    // for use if building a suitable shuffle vector proves too hard.
> +    unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
> +    unsigned FirstMaskIdx = High * NewElts;
> +    bool useBuildVector = false;
> +    for (unsigned MaskOffset = 0; MaskOffset < NewElts; + 
> +MaskOffset) {
> +      SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
> +      if (Arg.getOpcode() == ISD::UNDEF) {
> +        MaskOps.push_back(Arg);
> +        BuildOps.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> +        continue;
>       }
> -      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,  
> InVec,
> -                                DAG.getIntPtrConstant(Idx)));
> +
> +      // The mask element.  This indexes into the input.
> +      unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
> +      // The input vector this mask element indexes into.
> +      unsigned Input = Idx / NewElts;
> +
> +      if (Input >= array_lengthof(Inputs)) {
> +        // The mask element indexes off the end of the input!  This  
> is illegal
> +        // but handle it anyway.
> +        MaskOps.push_back(DAG.getNode(ISD::UNDEF, IdxVT));
> +        BuildOps.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> +        continue;
> +      }
> +
> +      // Turn the index into an offset from the start of the input  
> vector.
> +      Idx -= Input * NewElts;
> +
> +      // Extract the vector element by hand, and record it for  
> later use.
> +      BuildOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
> +                                     Inputs[Input],
> +                                     DAG.getIntPtrConstant(Idx)));
> +
> +      if (useBuildVector)
> +        // The following logic is only needed when creating a new  
> shuffle, so
> +        // skip it for a small speedup if we won't be building a  
> vector shuffle.
> +        continue;
> +
> +      // Find or create a shuffle vector operand to hold this input.
> +      unsigned OpNo;
> +      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
> +        if (InputUsed[OpNo] == Input) {
> +          // This input vector is already an operand.
> +          break;
> +        } else if (InputUsed[OpNo] == -1U) {
> +          // Create a new operand for this input vector.
> +          InputUsed[OpNo] = Input;
> +          break;
> +        }
> +      }
> +
> +      if (OpNo < array_lengthof(InputUsed))
> +        // Add the mask index for the new shuffle vector.
> +        MaskOps.push_back(DAG.getConstant(Idx + OpNo * NewElts,  
> IdxVT));
> +      else
> +        // More than two input vectors used!  Give up on trying to  
> create a
> +        // shuffle vector.  Insert all elements into a BUILD_VECTOR  
> instead.
> +        useBuildVector = true;
>     }
> -  }
> -  Lo = DAG.getNode(ISD::BUILD_VECTOR, LoVT, &Ops[0], Ops.size());
> -  Ops.clear();
>
> -  for (unsigned i = LoNumElts; i != NumElements; ++i) {
> -    SDValue Arg = Mask.getOperand(i);
> -    if (Arg.getOpcode() == ISD::UNDEF) {
> -      Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> +    if (useBuildVector) {
> +      Output = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &BuildOps[0],
> +                           BuildOps.size());
> +    } else if (InputUsed[0] == -1U) {
> +      // No input vectors were used!  The result is undefined.
> +      Output = DAG.getNode(ISD::UNDEF, NewVT);
>     } else {
> -      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
> -      SDValue InVec = N->getOperand(0);
> -      if (Idx >= NumElements) {
> -        InVec = N->getOperand(1);
> -        Idx -= NumElements;
> -      }
> -      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,  
> InVec,
> -                                DAG.getIntPtrConstant(Idx)));
> +      // At least one input vector was used.  Create a new shuffle  
> vector.
> +      SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR,
> +                                    MVT::getVectorVT(IdxVT,  
> MaskOps.size()),
> +                                    &MaskOps[0], MaskOps.size());
> +      SDValue Op0 = Inputs[InputUsed[0]];
> +      // If only one input was used, use an undefined vector for  
> the other.
> +      SDValue Op1 = InputUsed[1] == -1U ?
> +        DAG.getNode(ISD::UNDEF, NewVT) : Inputs[InputUsed[1]];
> +      Output = DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, Op0, Op1,  
> NewMask);
>     }
> +
> +    BuildOps.clear();
> +    MaskOps.clear();
>   }
> -  Hi = DAG.getNode(ISD::BUILD_VECTOR, HiVT, &Ops[0], Ops.size());
> }
>
> void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
> <shuffle.txt>