[llvm-commits] [llvm] r148537 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Bruno Cardoso Lopes bruno.cardoso at gmail.com
Fri Jan 20 09:22:34 PST 2012


Hi Craig,

On Fri, Jan 20, 2012 at 7:29 AM, Craig Topper <craig.topper at gmail.com> wrote:
> Author: ctopper
> Date: Fri Jan 20 03:29:03 2012
> New Revision: 148537
>
> URL: http://llvm.org/viewvc/llvm-project?rev=148537&view=rev
> Log:
> Improve 256-bit shuffle splitting to allow 2 sources in each 128-bit lane. As long as only a single lane of the source is used in the lane in the destination. This makes the splitting match much closer to what happens with 256-bit shuffles when AVX is disabled and only 128-bit XMM is allowed.
>
> Modified:
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148537&r1=148536&r2=148537&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jan 20 03:29:03 2012
> @@ -3162,17 +3162,6 @@
>   return (Val < 0) || (Val >= Low && Val < Hi);
>  }
>
> -/// isUndefOrInRange - Return true if every element in Mask, begining
> -/// from position Pos and ending in Pos+Size, falls within the specified
> -/// range (L, L+Pos]. or is undef.
> -static bool isUndefOrInRange(ArrayRef<int> Mask,
> -                             int Pos, int Size, int Low, int Hi) {
> -  for (int i = Pos, e = Pos+Size; i != e; ++i)
> -    if (!isUndefOrInRange(Mask[i], Low, Hi))
> -      return false;
> -  return true;
> -}
> -
>  /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
>  /// specified value.
>  static bool isUndefOrEqual(int Val, int CmpVal) {
> @@ -5948,95 +5937,106 @@
>                                              OpVT, SrcOp)));
>  }
>
> -/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector
> -/// shuffle node referes to only one lane in the sources.
> -static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) {
> -  EVT VT = SVOp->getValueType(0);
> -  int NumElems = VT.getVectorNumElements();
> -  int HalfSize = NumElems/2;
> -  ArrayRef<int> M = SVOp->getMask();
> -  bool MatchA = false, MatchB = false;
> -
> -  for (int l = 0; l < NumElems*2; l += HalfSize) {
> -    if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) {
> -      MatchA = true;
> -      break;
> -    }
> -  }
> -
> -  for (int l = 0; l < NumElems*2; l += HalfSize) {
> -    if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) {
> -      MatchB = true;
> -      break;
> -    }
> -  }
> -
> -  return MatchA && MatchB;
> -}
> -
>  /// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
>  /// which could not be matched by any known target speficic shuffle
>  static SDValue
>  LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
> -  if (areShuffleHalvesWithinDisjointLanes(SVOp)) {
> -    // If each half of a vector shuffle node referes to only one lane in the
> -    // source vectors, extract each used 128-bit lane and shuffle them using
> -    // 128-bit shuffles. Then, concatenate the results. Otherwise leave
> -    // the work to the legalizer.
> -    DebugLoc dl = SVOp->getDebugLoc();
> -    EVT VT = SVOp->getValueType(0);
> -    int NumElems = VT.getVectorNumElements();
> -    int HalfSize = NumElems/2;
> +  EVT VT = SVOp->getValueType(0);
> +
> +  unsigned NumElems = VT.getVectorNumElements();
> +  unsigned NumLaneElems = NumElems / 2;
>
> -    // Extract the reference for each half
> -    int FstVecExtractIdx = 0, SndVecExtractIdx = 0;
> -    int FstVecOpNum = 0, SndVecOpNum = 0;
> -    for (int i = 0; i < HalfSize; ++i) {
> -      int Elt = SVOp->getMaskElt(i);
> -      if (SVOp->getMaskElt(i) < 0)
> +  int MinRange[2][2] = { { static_cast<int>(NumElems),
> +                           static_cast<int>(NumElems) },
> +                         { static_cast<int>(NumElems),
> +                           static_cast<int>(NumElems) } };
> +  int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } };

The initial ideia of having all those different function was to make
the code more readable, could you please add a bunch of comments
making all this new code more understandable?

> +  // Collect used ranges for each source in each lane
> +  for (unsigned l = 0; l < 2; ++l) {
> +    unsigned LaneStart = l*NumLaneElems;
> +    for (unsigned i = 0; i != NumLaneElems; ++i) {
> +      int Idx = SVOp->getMaskElt(i+LaneStart);
> +      if (Idx < 0)
>         continue;
> -      FstVecOpNum = Elt/NumElems;
> -      FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
> -      break;
> +
> +      int Input = 0;
> +      if (Idx >= (int)NumElems) {
> +        Idx -= NumElems;
> +        Input = 1;
> +      }
> +
> +      if (Idx > MaxRange[l][Input])
> +        MaxRange[l][Input] = Idx;
> +      if (Idx < MinRange[l][Input])
> +        MinRange[l][Input] = Idx;
>     }
> -    for (int i = HalfSize; i < NumElems; ++i) {
> -      int Elt = SVOp->getMaskElt(i);
> -      if (SVOp->getMaskElt(i) < 0)
> +  }
> +
> +  // Make sure each range is 128-bits
> +  int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } };
> +  for (unsigned l = 0; l < 2; ++l) {
> +    for (unsigned Input = 0; Input < 2; ++Input) {
> +      if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0)
>         continue;
> -      SndVecOpNum = Elt/NumElems;
> -      SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
> -      break;
> +
> +      if (MinRange[l][Input] >= 0 && MinRange[l][Input] < (int)NumLaneElems)
> +        ExtractIdx[l][Input] = 0;
> +      else if (MinRange[l][Input] >= (int)NumLaneElems &&
> +               MinRange[l][Input] < (int)NumElems)
> +        ExtractIdx[l][Input] = NumLaneElems;
> +      else
> +        return SDValue();
>     }
> +  }
>
> -    // Extract the subvectors
> -    SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum),
> -                      DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl);
> -    SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum),
> -                      DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl);
> +  DebugLoc dl = SVOp->getDebugLoc();
> +  MVT EltVT = VT.getVectorElementType().getSimpleVT();
> +  EVT NVT = MVT::getVectorVT(EltVT, NumElems/2);
>
> -    // Generate 128-bit shuffles
> -    SmallVector<int, 16> MaskV1, MaskV2;
> -    for (int i = 0; i < HalfSize; ++i) {
> -      int Elt = SVOp->getMaskElt(i);
> -      MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize);
> -    }
> -    for (int i = HalfSize; i < NumElems; ++i) {
> -      int Elt = SVOp->getMaskElt(i);
> -      MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize);
> +  SDValue Ops[2][2];
> +  for (unsigned l = 0; l < 2; ++l) {
> +    for (unsigned Input = 0; Input < 2; ++Input) {
> +      if (ExtractIdx[l][Input] >= 0)
> +        Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input),
> +                                DAG.getConstant(ExtractIdx[l][Input], MVT::i32),
> +                                                DAG, dl);
> +      else
> +        Ops[l][Input] = DAG.getUNDEF(NVT);
>     }
> +  }
>
> -    EVT NVT = V1.getValueType();
> -    V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]);
> -    V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]);
> -
> -    // Concatenate the result back
> -    SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1,
> -                                   DAG.getConstant(0, MVT::i32), DAG, dl);
> -    return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
> -                              DAG, dl);
> +  // Generate 128-bit shuffles
> +  SmallVector<int, 16> Mask1, Mask2;
> +  for (unsigned i = 0; i != NumLaneElems; ++i) {
> +    int Elt = SVOp->getMaskElt(i);
> +    if (Elt >= (int)NumElems) {
> +      Elt %= NumLaneElems;
> +      Elt += NumLaneElems;
> +    } else if (Elt >= 0) {
> +      Elt %= NumLaneElems;
> +    }
> +    Mask1.push_back(Elt);
> +  }
> +  for (unsigned i = NumLaneElems; i != NumElems; ++i) {
> +    int Elt = SVOp->getMaskElt(i);
> +    if (Elt >= (int)NumElems) {
> +      Elt %= NumLaneElems;
> +      Elt += NumLaneElems;
> +    } else if (Elt >= 0) {
> +      Elt %= NumLaneElems;
> +    }
> +    Mask2.push_back(Elt);
>   }
>
> -  return SDValue();
> +  SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]);
> +  SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]);
> +
> +  // Concatenate the result back
> +  SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1,
> +                                 DAG.getConstant(0, MVT::i32), DAG, dl);
> +  return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32),
> +                            DAG, dl);
>  }
>
>  /// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



-- 
Bruno Cardoso Lopes
http://www.brunocardoso.cc




More information about the llvm-commits mailing list