[llvm-commits] [llvm] r58964 - in /llvm/trunk: docs/LangRef.html lib/Bitcode/Reader/BitcodeReader.cpp lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/CodeGen/SelectionDAG/LegalizeTypes.h lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp lib/Transforms/Scalar/InstructionCombining.cpp lib/VMCore/ConstantFold.cpp lib/VMCore/Instructions.cpp lib/VMCore/Verifier.cpp

Mon Ping Wang wangmp at apple.com
Fri Nov 14 10:54:01 PST 2008


Hi Duncan,

This looks good to me.  I'll be testing the vector code on our side  
and I'll check in some shuffle tests in test directory.

Thanks,
   -- Mon Ping


On Nov 14, 2008, at 8:13 AM, Duncan Sands wrote:

> Hi Mon Ping,
>
>> As an experiment, I ran the compiler on a ~32,000 line vector program
>> generating code for X86 SSE4 and dump how often we split and when we
>> use split vectors instead of using the BuildOps.
>>
>> vec_length:16 total_splits:16200 use_split: 16200
>> vec_length:  8 total_splits:22687 use_split: 22687
>
> those are convincing numbers!  How about this then (untested, but  
> compiles)?
>
> Ciao,
>
> Duncan.
>
> Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp	(revision 59124)
> +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp	(working copy)
> @@ -650,52 +650,123 @@
>
> void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue  
> &Lo,
>                                                   SDValue &Hi) {
> -  // Build the low part.
> +  // The low and high parts of the original input give four input  
> vectors.
> +  SDValue Inputs[4];
> +  GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
> +  GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
> +  MVT NewVT = Inputs[0].getValueType();
> +  unsigned NewElts = NewVT.getVectorNumElements();
> +  assert(NewVT == Inputs[1].getValueType() &&
> +         "Non power-of-two vectors not supported!");
> +
> +  // If Lo or Hi uses elements from at most two of the four input  
> vectors, then
> +  // express it as a vector shuffle of those two inputs.  Otherwise  
> extract the
> +  // input elements by hand and construct the Lo/Hi output using a  
> BUILD_VECTOR.
>   SDValue Mask = N->getOperand(2);
> +  MVT IdxVT = Mask.getValueType().getVectorElementType();
>   SmallVector<SDValue, 16> Ops;
> -  MVT LoVT, HiVT;
> -  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
> -  MVT EltVT = LoVT.getVectorElementType();
> -  unsigned LoNumElts = LoVT.getVectorNumElements();
> -  unsigned NumElements = Mask.getNumOperands();
> -
> -  // Insert all of the elements from the input that are needed.  We  
> use
> -  // buildvector of extractelement here because the input vectors  
> will have
> -  // to be legalized, so this makes the code simpler.
> -  for (unsigned i = 0; i != LoNumElts; ++i) {
> -    SDValue Arg = Mask.getOperand(i);
> -    if (Arg.getOpcode() == ISD::UNDEF) {
> -      Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> -    } else {
> -      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
> -      SDValue InVec = N->getOperand(0);
> -      if (Idx >= NumElements) {
> -        InVec = N->getOperand(1);
> -        Idx -= NumElements;
> +  Ops.reserve(NewElts);
> +  for (unsigned High = 0; High < 2; ++High) {
> +    SDValue &Output = High ? Hi : Lo;
> +
> +    // Build a shuffle mask for the output, discovering on the fly  
> which
> +    // input vectors to use as shuffle operands (recorded in  
> InputUsed).
> +    // If building a suitable shuffle vector proves too hard, then  
> bail
> +    // out with useBuildVector set.
> +    unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
> +    unsigned FirstMaskIdx = High * NewElts;
> +    bool useBuildVector = false;
> +    for (unsigned MaskOffset = 0; MaskOffset < NewElts; + 
> +MaskOffset) {
> +      SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
> +
> +      // The mask element.  This indexes into the input.
> +      unsigned Idx = Arg.getOpcode() == ISD::UNDEF ?
> +        -1U : cast<ConstantSDNode>(Arg)->getZExtValue();
> +
> +      // The input vector this mask element indexes into.
> +      unsigned Input = Idx / NewElts;
> +
> +      if (Input >= array_lengthof(Inputs)) {
> +        // The mask element does not index into any input vector.
> +        Ops.push_back(DAG.getNode(ISD::UNDEF, IdxVT));
> +        continue;
> +      }
> +
> +      // Turn the index into an offset from the start of the input  
> vector.
> +      Idx -= Input * NewElts;
> +
> +      // Find or create a shuffle vector operand to hold this input.
> +      unsigned OpNo;
> +      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
> +        if (InputUsed[OpNo] == Input) {
> +          // This input vector is already an operand.
> +          break;
> +        } else if (InputUsed[OpNo] == -1U) {
> +          // Create a new operand for this input vector.
> +          InputUsed[OpNo] = Input;
> +          break;
> +        }
> +      }
> +
> +      if (OpNo >= array_lengthof(InputUsed)) {
> +        // More than two input vectors used!  Give up on trying to  
> create a
> +        // shuffle vector.  Insert all elements into a BUILD_VECTOR  
> instead.
> +        useBuildVector = true;
> +        break;
>       }
> -      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,  
> InVec,
> -                                DAG.getIntPtrConstant(Idx)));
> +
> +      // Add the mask index for the new shuffle vector.
> +      Ops.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT));
>     }
> -  }
> -  Lo = DAG.getNode(ISD::BUILD_VECTOR, LoVT, &Ops[0], Ops.size());
> -  Ops.clear();
>
> -  for (unsigned i = LoNumElts; i != NumElements; ++i) {
> -    SDValue Arg = Mask.getOperand(i);
> -    if (Arg.getOpcode() == ISD::UNDEF) {
> -      Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> -    } else {
> -      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))- 
> >getZExtValue();
> -      SDValue InVec = N->getOperand(0);
> -      if (Idx >= NumElements) {
> -        InVec = N->getOperand(1);
> -        Idx -= NumElements;
> +    if (useBuildVector) {
> +      MVT EltVT = NewVT.getVectorElementType();
> +      Ops.clear();
> +
> +      // Extract the input elements by hand.
> +      for (unsigned MaskOffset = 0; MaskOffset < NewElts; + 
> +MaskOffset) {
> +        SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
> +
> +        // The mask element.  This indexes into the input.
> +        unsigned Idx = Arg.getOpcode() == ISD::UNDEF ?
> +          -1U : cast<ConstantSDNode>(Arg)->getZExtValue();
> +
> +        // The input vector this mask element indexes into.
> +        unsigned Input = Idx / NewElts;
> +
> +        if (Input >= array_lengthof(Inputs)) {
> +          // The mask element is "undef" or indexes off the end of  
> the input.
> +          Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> +          continue;
> +        }
> +
> +        // Turn the index into an offset from the start of the  
> input vector.
> +        Idx -= Input * NewElts;
> +
> +        // Extract the vector element by hand.
> +        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
> +                                  Inputs[Input],  
> DAG.getIntPtrConstant(Idx)));
>       }
> -      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,  
> InVec,
> -                                DAG.getIntPtrConstant(Idx)));
> +
> +      // Construct the Lo/Hi output using a BUILD_VECTOR.
> +      Output = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &Ops[0],  
> Ops.size());
> +    } else if (InputUsed[0] == -1U) {
> +      // No input vectors were used!  The result is undefined.
> +      Output = DAG.getNode(ISD::UNDEF, NewVT);
> +    } else {
> +      // At least one input vector was used.  Create a new shuffle  
> vector.
> +      SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR,
> +                                    MVT::getVectorVT(IdxVT, Ops.size 
> ()),
> +                                    &Ops[0], Ops.size());
> +      SDValue Op0 = Inputs[InputUsed[0]];
> +      // If only one input was used, use an undefined vector for  
> the other.
> +      SDValue Op1 = InputUsed[1] == -1U ?
> +        DAG.getNode(ISD::UNDEF, NewVT) : Inputs[InputUsed[1]];
> +      Output = DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, Op0, Op1,  
> NewMask);
>     }
> +
> +    Ops.clear();
>   }
> -  Hi = DAG.getNode(ISD::BUILD_VECTOR, HiVT, &Ops[0], Ops.size());
> }
>
> void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,




More information about the llvm-commits mailing list