[llvm] r244314 - [ARM] Update ReconstructShuffle to handle mismatched types

Fri Aug 7 05:02:45 PDT 2015

On Fri, Aug 7, 2015 at 1:40 PM, Silviu Baranga via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: sbaranga
> Date: Fri Aug  7 06:40:46 2015
> New Revision: 244314
>
> URL: http://llvm.org/viewvc/llvm-project?rev=244314&view=rev
> Log:
> [ARM] Update ReconstructShuffle to handle mismatched types
>
> Summary:
> Port the ReconstructShuffle function from AArch64 to ARM
> to handle mismatched incoming types in the BUILD_VECTOR
> node.
>
> This fixes an outstanding FIXME in the ReconstructShuffle
> code.
>
> Reviewers: t.p.northover, rengolin
>
> Subscribers: aemerson, llvm-commits, rengolin
>
> Differential Revision: http://reviews.llvm.org/D11720
>
> Modified:
>     llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>     llvm/trunk/test/CodeGen/ARM/vtrn.ll
>     llvm/trunk/test/CodeGen/ARM/vuzp.ll
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=244314&r1=244313&r2=244314&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Aug  7 06:40:46 2015
> @@ -5529,18 +5529,46 @@ SDValue ARMTargetLowering::LowerBUILD_VE
>    return SDValue();
>  }
>
> +/// getExtFactor - Determine the adjustment factor for the position when
> +/// generating an "extract from vector registers" instruction.
> +static unsigned getExtFactor(SDValue &V) {
> +  EVT EltType = V.getValueType().getVectorElementType();
> +  return EltType.getSizeInBits() / 8;
> +}
> +
>  // Gather data to see if the operation can be modelled as a
>  // shuffle in combination with VEXTs.
>  SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
>                                                SelectionDAG &DAG) const {
> +  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
>    SDLoc dl(Op);
>    EVT VT = Op.getValueType();
>    unsigned NumElts = VT.getVectorNumElements();
>
> -  SmallVector<SDValue, 2> SourceVecs;
> -  SmallVector<unsigned, 2> MinElts;
> -  SmallVector<unsigned, 2> MaxElts;
> -
> +  struct ShuffleSourceInfo {
> +    SDValue Vec;
> +    unsigned MinElt;
> +    unsigned MaxElt;
> +
> +    // We may insert some combination of BITCASTs and VEXT nodes to force
> Vec to
> +    // be compatible with the shuffle we intend to construct. As a result
> +    // ShuffleVec will be some sliding window into the original Vec.
> +    SDValue ShuffleVec;
> +
> +    // Code should guarantee that element i in Vec starts at element
> "WindowBase
> +    // + i * WindowScale in ShuffleVec".
> +    int WindowBase;
> +    int WindowScale;
> +
> +    bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
> +    ShuffleSourceInfo(SDValue Vec)
> +        : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec),
> WindowBase(0),
> +          WindowScale(1) {}
> +  };
> +
> +  // First gather all vectors used as an immediate source for this
> BUILD_VECTOR
> +  // node.
> +  SmallVector<ShuffleSourceInfo, 2> Sources;
>    for (unsigned i = 0; i < NumElts; ++i) {
>      SDValue V = Op.getOperand(i);
>      if (V.getOpcode() == ISD::UNDEF)
> @@ -5549,127 +5577,161 @@ SDValue ARMTargetLowering::ReconstructSh
>        // A shuffle can only come from building a vector from various
>        // elements of other vectors.
>        return SDValue();
> -    } else if (V.getOperand(0).getValueType().getVectorElementType() !=
> -               VT.getVectorElementType()) {
> -      // This code doesn't know how to handle shuffles where the vector
> -      // element types do not match (this happens because type
> legalization
> -      // promotes the return type of EXTRACT_VECTOR_ELT).
> -      // FIXME: It might be appropriate to extend this code to handle
> -      // mismatched types.
> -      return SDValue();
>      }
>
> -    // Record this extraction against the appropriate vector if
> possible...
> +    // Add this element source to the list if it's not already there.
>      SDValue SourceVec = V.getOperand(0);
> -    // If the element number isn't a constant, we can't effectively
> -    // analyze what's going on.
> -    if (!isa<ConstantSDNode>(V.getOperand(1)))
> -      return SDValue();
> -    unsigned EltNo =
> cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
> -    bool FoundSource = false;
> -    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
> -      if (SourceVecs[j] == SourceVec) {
> -        if (MinElts[j] > EltNo)
> -          MinElts[j] = EltNo;
> -        if (MaxElts[j] < EltNo)
> -          MaxElts[j] = EltNo;
> -        FoundSource = true;
> -        break;
> -      }
> -    }
> +    auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
> +    if (Source == Sources.end())
> +      Source = Sources.insert(Sources.end(),
> ShuffleSourceInfo(SourceVec));
>
> -    // Or record a new source if not...
> -    if (!FoundSource) {
> -      SourceVecs.push_back(SourceVec);
> -      MinElts.push_back(EltNo);
> -      MaxElts.push_back(EltNo);
> -    }
> +    // Update the minimum and maximum lane number seen.
> +    unsigned EltNo =
> cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
> +    Source->MinElt = std::min(Source->MinElt, EltNo);
> +    Source->MaxElt = std::max(Source->MaxElt, EltNo);
>    }
>
>    // Currently only do something sane when at most two source vectors
> -  // involved.
> -  if (SourceVecs.size() > 2)
> +  // are involved.
> +  if (Sources.size() > 2)
>      return SDValue();
>
> -  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
> -  int VEXTOffsets[2] = {0, 0};
> +  // Find out the smallest element size among result and two sources, and
> use
> +  // it as element size to build the shuffle_vector.
> +  EVT SmallestEltTy = VT.getVectorElementType();
> +  for (auto &Source : Sources) {
> +    EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
> +    if (SrcEltTy.bitsLT(SmallestEltTy))
> +      SmallestEltTy = SrcEltTy;
> +  }
> +  unsigned ResMultiplier =
> +      VT.getVectorElementType().getSizeInBits() /
> SmallestEltTy.getSizeInBits();
> +  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
> +  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy,
> NumElts);
> +
> +  // If the source vector is too wide or too narrow, we may nevertheless
> be able
> +  // to construct a compatible shuffle either by concatenating it with
> UNDEF or
> +  // extracting a suitable range of elements.
> +  for (auto &Src : Sources) {
> +    EVT SrcVT = Src.ShuffleVec.getValueType();
>
> -  // This loop extracts the usage patterns of the source vectors
> -  // and prepares appropriate SDValues for a shuffle if possible.
> -  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
> -    if (SourceVecs[i].getValueType() == VT) {
> -      // No VEXT necessary
> -      ShuffleSrcs[i] = SourceVecs[i];
> -      VEXTOffsets[i] = 0;
> +    if (SrcVT.getSizeInBits() == VT.getSizeInBits())
> +      continue;
> +
> +    // This stage of the search produces a source with the same element
> type as
> +    // the original, but with a total width matching the BUILD_VECTOR
> output.
> +    EVT EltVT = SrcVT.getVectorElementType();
> +    unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
> +    EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
> +
> +    if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
> +      if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
> +        return SDValue();
> +      // We can pad out the smaller vector for free, so if it's part of a
> +      // shuffle...
> +      Src.ShuffleVec =
> +          DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
> +                      DAG.getUNDEF(Src.ShuffleVec.getValueType()));
>        continue;
> -    } else if (SourceVecs[i].getValueType().getVectorNumElements() <
> NumElts) {
> -      // It probably isn't worth padding out a smaller vector just to
> -      // break it down again in a shuffle.
> -      return SDValue();
>      }
>
> -    // Since only 64-bit and 128-bit vectors are legal on ARM and
> -    // we've eliminated the other cases...
> -    assert(SourceVecs[i].getValueType().getVectorNumElements() ==
> 2*NumElts &&
> -           "unexpected vector sizes in ReconstructShuffle");
> +    if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
> +      return SDValue();
>
> -    if (MaxElts[i] - MinElts[i] >= NumElts) {
> +    if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
>        // Span too large for a VEXT to cope
>        return SDValue();
>      }
>
> -    if (MinElts[i] >= NumElts) {
> +    if (Src.MinElt >= NumSrcElts) {
>        // The extraction can just take the second half
> -      VEXTOffsets[i] = NumElts;
> -      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> -                                   SourceVecs[i],
> -                                   DAG.getIntPtrConstant(NumElts, dl));
> -    } else if (MaxElts[i] < NumElts) {
> +      Src.ShuffleVec =
> +          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
> +                      DAG.getConstant(NumSrcElts, dl, MVT::i32));
> +      Src.WindowBase = -NumSrcElts;
> +    } else if (Src.MaxElt < NumSrcElts) {
>        // The extraction can just take the first half
> -      VEXTOffsets[i] = 0;
> -      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> -                                   SourceVecs[i],
> -                                   DAG.getIntPtrConstant(0, dl));
> +      Src.ShuffleVec =
> +          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
> +                      DAG.getConstant(0, dl, MVT::i32));
>      } else {
>        // An actual VEXT is needed
> -      VEXTOffsets[i] = MinElts[i];
> -      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> -                                     SourceVecs[i],
> -                                     DAG.getIntPtrConstant(0, dl));
> -      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> -                                     SourceVecs[i],
> -                                     DAG.getIntPtrConstant(NumElts, dl));
> -      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1,
> VEXTSrc2,
> -                                   DAG.getConstant(VEXTOffsets[i], dl,
> -                                                   MVT::i32));
> +      SDValue VEXTSrc1 =
> +          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
> +                      DAG.getConstant(0, dl, MVT::i32));
> +      SDValue VEXTSrc2 =
> +          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
> +                      DAG.getConstant(NumSrcElts, dl, MVT::i32));
> +      unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
> +
> +      Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
> +                                   VEXTSrc2,
> +                                   DAG.getConstant(Imm, dl, MVT::i32));
> +      Src.WindowBase = -Src.MinElt;
>      }
>    }
>
> -  SmallVector<int, 8> Mask;
> +  // Another possible incompatibility occurs from the vector element
> types. We
> +  // can fix this by bitcasting the source vectors to the same type we
> intend
> +  // for the shuffle.
> +  for (auto &Src : Sources) {
> +    EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
> +    if (SrcEltTy == SmallestEltTy)
> +      continue;
> +    assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
> +    Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT,
> Src.ShuffleVec);
> +    Src.WindowScale = SrcEltTy.getSizeInBits() /
> SmallestEltTy.getSizeInBits();
> +    Src.WindowBase *= Src.WindowScale;
> +  }
>
> -  for (unsigned i = 0; i < NumElts; ++i) {
> +  // Final sanity check before we try to actually produce a shuffle.
> +  for (auto Src : Sources)
>

On release builds this generates the unused variable warning for Src.
Please fix.

> +    assert(Src.ShuffleVec.getValueType() == ShuffleVT);
> +
> +  // The stars all align, our next step is to produce the mask for the
> shuffle.
> +  SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
> +  int BitsPerShuffleLane =
> ShuffleVT.getVectorElementType().getSizeInBits();
> +  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
>      SDValue Entry = Op.getOperand(i);
> -    if (Entry.getOpcode() == ISD::UNDEF) {
> -      Mask.push_back(-1);
> +    if (Entry.getOpcode() == ISD::UNDEF)
>        continue;
> -    }
>
> -    SDValue ExtractVec = Entry.getOperand(0);
> -    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
> -                                          .getOperand(1))->getSExtValue();
> -    if (ExtractVec == SourceVecs[0]) {
> -      Mask.push_back(ExtractElt - VEXTOffsets[0]);
> -    } else {
> -      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
> -    }
> +    auto Src = std::find(Sources.begin(), Sources.end(),
> Entry.getOperand(0));
> +    int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
> +
> +    // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an
> implicit
> +    // trunc. So only std::min(SrcBits, DestBits) actually get defined in
> this
> +    // segment.
> +    EVT OrigEltTy =
> Entry.getOperand(0).getValueType().getVectorElementType();
> +    int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
> +                               VT.getVectorElementType().getSizeInBits());
> +    int LanesDefined = BitsDefined / BitsPerShuffleLane;
> +
> +    // This source is expected to fill ResMultiplier lanes of the final
> shuffle,
> +    // starting at the appropriate offset.
> +    int *LaneMask = &Mask[i * ResMultiplier];
> +
> +    int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
> +    ExtractBase += NumElts * (Src - Sources.begin());
> +    for (int j = 0; j < LanesDefined; ++j)
> +      LaneMask[j] = ExtractBase + j;
>    }
>
>    // Final check before we try to produce nonsense...
> -  if (isShuffleMaskLegal(Mask, VT))
> -    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
> -                                &Mask[0]);
> +  if (!isShuffleMaskLegal(Mask, ShuffleVT))
> +    return SDValue();
>
> -  return SDValue();
> +  // We can't handle more than two sources. This should have already
> +  // been checked before this point.
> +  assert(Sources.size() <= 2 && "Too many sources!");
> +
> +  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT),
> DAG.getUNDEF(ShuffleVT) };
> +  for (unsigned i = 0; i < Sources.size(); ++i)
> +    ShuffleOps[i] = Sources[i].ShuffleVec;
> +
> +  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
> +                                         ShuffleOps[1], &Mask[0]);
> +  return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
>  }
>
>  /// isShuffleMaskLegal - Targets can use this to indicate that they only
>
> Modified: llvm/trunk/test/CodeGen/ARM/vtrn.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtrn.ll?rev=244314&r1=244313&r2=244314&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/vtrn.ll (original)
> +++ llvm/trunk/test/CodeGen/ARM/vtrn.ll Fri Aug  7 06:40:46 2015
> @@ -335,3 +335,39 @@ entry:
>    %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32
> undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7>
>    ret <8 x i16> %0
>  }
> +
> +; Here we get a build_vector node, where all the incoming extract_element
> +; values do modify the type. However, we get different input types, as
> some of
> +; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and
> some of
> +; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).
> +define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
> +                                             <4 x i32> %cmp0, <4 x i32>
> %cmp1,
> +                                             <4 x i16> %cmp2, <4 x i16>
> %cmp3) {
> +  ; CHECK-LABEL: vtrn_mismatched_builvector0
> +  ; CHECK: vmovn.i32
> +  ; CHECK: vtrn
> +  ; CHECK: vbsl
> +  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
> +  %c1 = icmp ult <4 x i16> %cmp2, %cmp3
> +  %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4,
> i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
> +  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
> +  ret <8 x i8> %rv
> +}
> +
> +; Here we get a build_vector node, where half the incoming extract_element
> +; values do not modify the type (the values form cmp2), but half of them
> do
> +; (from the icmp operation).
> +define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
> +                           <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8>
> *%cmp2_ptr) {
> +  ; CHECK-LABEL: vtrn_mismatched_builvector1
> +  ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
> +  ; CHECK: vmovl
> +  ; CHECK: vtrn.8
> +  ; CHECK: vbsl
> +  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
> +  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
> +  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
> +  %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32
> 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
> +  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
> +  ret <8 x i8> %rv
> +}
>
> Modified: llvm/trunk/test/CodeGen/ARM/vuzp.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vuzp.ll?rev=244314&r1=244313&r2=244314&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/vuzp.ll (original)
> +++ llvm/trunk/test/CodeGen/ARM/vuzp.ll Fri Aug  7 06:40:46 2015
> @@ -285,3 +285,76 @@ entry:
>    %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0,
> i32 0, i32 1, i32 3>
>    ret <4 x i32> %0
>  }
> +
> +define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32>
> %cmp0, <8 x i32> %cmp1) {
> +; In order to create the select we need to truncate the vcgt result from
> a vector of i32 to a vector of i8.
> +; This results in a build_vector with mismatched types. We will generate
> two vmovn.i32 instructions to
> +; truncate from i32 to i16 and one vuzp to perform the final truncation
> for i8.
> +; CHECK-LABEL: vuzp_trunc
> +; CHECK: vmovn.i32
> +; CHECK: vmovn.i32
> +; CHECK: vuzp
> +; CHECK: vbsl
> +  %c = icmp ult <8 x i32> %cmp0, %cmp1
> +  %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
> +  ret <8 x i8> %res
> +}
> +
> +; Shuffle the result from the compare with a <4 x i8>.
> +; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we
> wouldn't be able
> +; to perform the vuzp and get the vbsl mask.
> +define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
> +                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8>
> *%cmp2_ptr) {
> +; CHECK-LABEL: vuzp_trunc_and_shuffle
> +; CHECK: vmovl
> +; CHECK: vuzp
> +; CHECK: vbsl
> +  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
> +  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
> +  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
> +  %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32
> 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
> +  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
> +  ret <8 x i8> %rv
> +}
> +
> +; Use an undef value for the <4 x i8> that is being shuffled with the
> compare result.
> +; This produces a build_vector with some of the operands undefs.
> +define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x
> i8> %tr1,
> +                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8>
> *%cmp2_ptr) {
> +; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right
> +; CHECK: vuzp
> +; CHECK: vbsl
> +  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
> +  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
> +  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
> +  %c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32
> 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
> +  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
> +  ret <8 x i8> %rv
> +}
> +
> +define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x
> i8> %tr1,
> +                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8>
> *%cmp2_ptr) {
> +; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left
> +; CHECK: vuzp
> +; CHECK: vbsl
> +  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
> +  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
> +  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
> +  %c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32
> 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
> +  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
> +  ret <8 x i8> %rv
> +}
> +
> +; We're using large data types here, and we have to fill with undef
> values until we
> +; get some vector size that we can represent.
> +define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
> +                            <5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8>
> *%cmp2_ptr) {
> +; CHECK-LABEL: vuzp_wide_type
> +; CHECK: vbsl
> +  %cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4
> +  %cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1>
> +  %c0 = icmp ult <5 x i32> %cmp0, %cmp1
> +  %c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32
> 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
> +  %rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1
> +  ret <10 x i8> %rv
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150807/c114e39d/attachment.html>