[llvm-commits] [llvm] r58964 - in /llvm/trunk: docs/LangRef.html lib/Bitcode/Reader/BitcodeReader.cpp lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/CodeGen/SelectionDAG/LegalizeTypes.h lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp lib/Transforms/Scalar/InstructionCombining.cpp lib/VMCore/ConstantFold.cpp lib/VMCore/Instructions.cpp lib/VMCore/Verifier.cpp
Mon Ping Wang
wangmp at apple.com
Fri Nov 14 10:54:01 PST 2008
Hi Duncan,
This looks good to me. I'll be testing the vector code on our side
and I'll check in some shuffle tests in test directory.
Thanks,
-- Mon Ping
On Nov 14, 2008, at 8:13 AM, Duncan Sands wrote:
> Hi Mon Ping,
>
>> As an experiment, I ran the compiler on a ~32,000 line vector program
>> generating code for X86 SSE4 and dump how often we split and when we
>> use split vectors instead of using the BuildOps.
>>
>> vec_length:16 total_splits:16200 use_split: 16200
>> vec_length: 8 total_splits:22687 use_split: 22687
>
> those are convincing numbers! How about this then (untested, but
> compiles)?
>
> Ciao,
>
> Duncan.
>
> Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (revision 59124)
> +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (working copy)
> @@ -650,52 +650,123 @@
>
> void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue
> &Lo,
> SDValue &Hi) {
> - // Build the low part.
> + // The low and high parts of the original input give four input
> vectors.
> + SDValue Inputs[4];
> + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
> + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
> + MVT NewVT = Inputs[0].getValueType();
> + unsigned NewElts = NewVT.getVectorNumElements();
> + assert(NewVT == Inputs[1].getValueType() &&
> + "Non power-of-two vectors not supported!");
> +
> + // If Lo or Hi uses elements from at most two of the four input
> vectors, then
> + // express it as a vector shuffle of those two inputs. Otherwise
> extract the
> + // input elements by hand and construct the Lo/Hi output using a
> BUILD_VECTOR.
> SDValue Mask = N->getOperand(2);
> + MVT IdxVT = Mask.getValueType().getVectorElementType();
> SmallVector<SDValue, 16> Ops;
> - MVT LoVT, HiVT;
> - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
> - MVT EltVT = LoVT.getVectorElementType();
> - unsigned LoNumElts = LoVT.getVectorNumElements();
> - unsigned NumElements = Mask.getNumOperands();
> -
> - // Insert all of the elements from the input that are needed. We
> use
> - // buildvector of extractelement here because the input vectors
> will have
> - // to be legalized, so this makes the code simpler.
> - for (unsigned i = 0; i != LoNumElts; ++i) {
> - SDValue Arg = Mask.getOperand(i);
> - if (Arg.getOpcode() == ISD::UNDEF) {
> - Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> - } else {
> - unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))-
> >getZExtValue();
> - SDValue InVec = N->getOperand(0);
> - if (Idx >= NumElements) {
> - InVec = N->getOperand(1);
> - Idx -= NumElements;
> + Ops.reserve(NewElts);
> + for (unsigned High = 0; High < 2; ++High) {
> + SDValue &Output = High ? Hi : Lo;
> +
> + // Build a shuffle mask for the output, discovering on the fly
> which
> + // input vectors to use as shuffle operands (recorded in
> InputUsed).
> + // If building a suitable shuffle vector proves too hard, then
> bail
> + // out with useBuildVector set.
> + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
> + unsigned FirstMaskIdx = High * NewElts;
> + bool useBuildVector = false;
> + for (unsigned MaskOffset = 0; MaskOffset < NewElts; +
> +MaskOffset) {
> + SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
> +
> + // The mask element. This indexes into the input.
> + unsigned Idx = Arg.getOpcode() == ISD::UNDEF ?
> + -1U : cast<ConstantSDNode>(Arg)->getZExtValue();
> +
> + // The input vector this mask element indexes into.
> + unsigned Input = Idx / NewElts;
> +
> + if (Input >= array_lengthof(Inputs)) {
> + // The mask element does not index into any input vector.
> + Ops.push_back(DAG.getNode(ISD::UNDEF, IdxVT));
> + continue;
> + }
> +
> + // Turn the index into an offset from the start of the input
> vector.
> + Idx -= Input * NewElts;
> +
> + // Find or create a shuffle vector operand to hold this input.
> + unsigned OpNo;
> + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
> + if (InputUsed[OpNo] == Input) {
> + // This input vector is already an operand.
> + break;
> + } else if (InputUsed[OpNo] == -1U) {
> + // Create a new operand for this input vector.
> + InputUsed[OpNo] = Input;
> + break;
> + }
> + }
> +
> + if (OpNo >= array_lengthof(InputUsed)) {
> + // More than two input vectors used! Give up on trying to
> create a
> + // shuffle vector. Insert all elements into a BUILD_VECTOR
> instead.
> + useBuildVector = true;
> + break;
> }
> - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
> InVec,
> - DAG.getIntPtrConstant(Idx)));
> +
> + // Add the mask index for the new shuffle vector.
> + Ops.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT));
> }
> - }
> - Lo = DAG.getNode(ISD::BUILD_VECTOR, LoVT, &Ops[0], Ops.size());
> - Ops.clear();
>
> - for (unsigned i = LoNumElts; i != NumElements; ++i) {
> - SDValue Arg = Mask.getOperand(i);
> - if (Arg.getOpcode() == ISD::UNDEF) {
> - Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> - } else {
> - unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))-
> >getZExtValue();
> - SDValue InVec = N->getOperand(0);
> - if (Idx >= NumElements) {
> - InVec = N->getOperand(1);
> - Idx -= NumElements;
> + if (useBuildVector) {
> + MVT EltVT = NewVT.getVectorElementType();
> + Ops.clear();
> +
> + // Extract the input elements by hand.
> + for (unsigned MaskOffset = 0; MaskOffset < NewElts; +
> +MaskOffset) {
> + SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
> +
> + // The mask element. This indexes into the input.
> + unsigned Idx = Arg.getOpcode() == ISD::UNDEF ?
> + -1U : cast<ConstantSDNode>(Arg)->getZExtValue();
> +
> + // The input vector this mask element indexes into.
> + unsigned Input = Idx / NewElts;
> +
> + if (Input >= array_lengthof(Inputs)) {
> + // The mask element is "undef" or indexes off the end of
> the input.
> + Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
> + continue;
> + }
> +
> + // Turn the index into an offset from the start of the
> input vector.
> + Idx -= Input * NewElts;
> +
> + // Extract the vector element by hand.
> + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
> + Inputs[Input],
> DAG.getIntPtrConstant(Idx)));
> }
> - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
> InVec,
> - DAG.getIntPtrConstant(Idx)));
> +
> + // Construct the Lo/Hi output using a BUILD_VECTOR.
> + Output = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &Ops[0],
> Ops.size());
> + } else if (InputUsed[0] == -1U) {
> + // No input vectors were used! The result is undefined.
> + Output = DAG.getNode(ISD::UNDEF, NewVT);
> + } else {
> + // At least one input vector was used. Create a new shuffle
> vector.
> + SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR,
> + MVT::getVectorVT(IdxVT, Ops.size
> ()),
> + &Ops[0], Ops.size());
> + SDValue Op0 = Inputs[InputUsed[0]];
> + // If only one input was used, use an undefined vector for
> the other.
> + SDValue Op1 = InputUsed[1] == -1U ?
> + DAG.getNode(ISD::UNDEF, NewVT) : Inputs[InputUsed[1]];
> + Output = DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, Op0, Op1,
> NewMask);
> }
> +
> + Ops.clear();
> }
> - Hi = DAG.getNode(ISD::BUILD_VECTOR, HiVT, &Ops[0], Ops.size());
> }
>
> void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
More information about the llvm-commits
mailing list