<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Aug 7, 2015 at 1:40 PM, Silviu Baranga via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: sbaranga<br>

Date: Fri Aug  7 06:40:46 2015<br>

New Revision: 244314<br>

<br>

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=244314&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=244314&view=rev</a><br>

Log:<br>

[ARM] Update ReconstructShuffle to handle mismatched types<br>

<br>

Summary:<br>

Port the ReconstructShuffle function from AArch64 to ARM<br>

to handle mismatched incoming types in the BUILD_VECTOR<br>

node.<br>

<br>

This fixes an outstanding FIXME in the ReconstructShuffle<br>

code.<br>

<br>

Reviewers: t.p.northover, rengolin<br>

<br>

Subscribers: aemerson, llvm-commits, rengolin<br>

<br>

Differential Revision: <a href="http://reviews.llvm.org/D11720" rel="noreferrer" target="_blank">http://reviews.llvm.org/D11720</a><br>

<br>

Modified:<br>

    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp<br>

    llvm/trunk/test/CodeGen/ARM/vtrn.ll<br>

    llvm/trunk/test/CodeGen/ARM/vuzp.ll<br>

<br>

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=244314&r1=244313&r2=244314&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=244314&r1=244313&r2=244314&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)<br>

+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Aug  7 06:40:46 2015<br>

@@ -5529,18 +5529,46 @@ SDValue ARMTargetLowering::LowerBUILD_VE<br>

   return SDValue();<br>

 }<br>

<br>

+/// getExtFactor - Determine the adjustment factor for the position when<br>

+/// generating an "extract from vector registers" instruction.<br>

+static unsigned getExtFactor(SDValue &V) {<br>

+  EVT EltType = V.getValueType().getVectorElementType();<br>

+  return EltType.getSizeInBits() / 8;<br>

+}<br>

+<br>

 // Gather data to see if the operation can be modelled as a<br>

 // shuffle in combination with VEXTs.<br>

 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,<br>

                                               SelectionDAG &DAG) const {<br>

+  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");<br>

   SDLoc dl(Op);<br>

   EVT VT = Op.getValueType();<br>

   unsigned NumElts = VT.getVectorNumElements();<br>

<br>

-  SmallVector<SDValue, 2> SourceVecs;<br>

-  SmallVector<unsigned, 2> MinElts;<br>

-  SmallVector<unsigned, 2> MaxElts;<br>

-<br>

+  struct ShuffleSourceInfo {<br>

+    SDValue Vec;<br>

+    unsigned MinElt;<br>

+    unsigned MaxElt;<br>

+<br>

+    // We may insert some combination of BITCASTs and VEXT nodes to force Vec to<br>

+    // be compatible with the shuffle we intend to construct. As a result<br>

+    // ShuffleVec will be some sliding window into the original Vec.<br>

+    SDValue ShuffleVec;<br>

+<br>

+    // Code should guarantee that element i in Vec starts at element "WindowBase<br>

+    // + i * WindowScale in ShuffleVec".<br>

+    int WindowBase;<br>

+    int WindowScale;<br>

+<br>

+    bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }<br>

+    ShuffleSourceInfo(SDValue Vec)<br>

+        : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),<br>

+          WindowScale(1) {}<br>

+  };<br>

+<br>

+  // First gather all vectors used as an immediate source for this BUILD_VECTOR<br>

+  // node.<br>

+  SmallVector<ShuffleSourceInfo, 2> Sources;<br>

   for (unsigned i = 0; i < NumElts; ++i) {<br>

     SDValue V = Op.getOperand(i);<br>

     if (V.getOpcode() == ISD::UNDEF)<br>

@@ -5549,127 +5577,161 @@ SDValue ARMTargetLowering::ReconstructSh<br>

       // A shuffle can only come from building a vector from various<br>

       // elements of other vectors.<br>

       return SDValue();<br>

-    } else if (V.getOperand(0).getValueType().getVectorElementType() !=<br>

-               VT.getVectorElementType()) {<br>

-      // This code doesn't know how to handle shuffles where the vector<br>

-      // element types do not match (this happens because type legalization<br>

-      // promotes the return type of EXTRACT_VECTOR_ELT).<br>

-      // FIXME: It might be appropriate to extend this code to handle<br>

-      // mismatched types.<br>

-      return SDValue();<br>

     }<br>

<br>

-    // Record this extraction against the appropriate vector if possible...<br>

+    // Add this element source to the list if it's not already there.<br>

     SDValue SourceVec = V.getOperand(0);<br>

-    // If the element number isn't a constant, we can't effectively<br>

-    // analyze what's going on.<br>

-    if (!isa<ConstantSDNode>(V.getOperand(1)))<br>

-      return SDValue();<br>

-    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();<br>

-    bool FoundSource = false;<br>

-    for (unsigned j = 0; j < SourceVecs.size(); ++j) {<br>

-      if (SourceVecs[j] == SourceVec) {<br>

-        if (MinElts[j] > EltNo)<br>

-          MinElts[j] = EltNo;<br>

-        if (MaxElts[j] < EltNo)<br>

-          MaxElts[j] = EltNo;<br>

-        FoundSource = true;<br>

-        break;<br>

-      }<br>

-    }<br>

+    auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);<br>

+    if (Source == Sources.end())<br>

+      Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));<br>

<br>

-    // Or record a new source if not...<br>

-    if (!FoundSource) {<br>

-      SourceVecs.push_back(SourceVec);<br>

-      MinElts.push_back(EltNo);<br>

-      MaxElts.push_back(EltNo);<br>

-    }<br>

+    // Update the minimum and maximum lane number seen.<br>

+    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();<br>

+    Source->MinElt = std::min(Source->MinElt, EltNo);<br>

+    Source->MaxElt = std::max(Source->MaxElt, EltNo);<br>

   }<br>

<br>

   // Currently only do something sane when at most two source vectors<br>

-  // involved.<br>

-  if (SourceVecs.size() > 2)<br>

+  // are involved.<br>

+  if (Sources.size() > 2)<br>

     return SDValue();<br>

<br>

-  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };<br>

-  int VEXTOffsets[2] = {0, 0};<br>

+  // Find out the smallest element size among result and two sources, and use<br>

+  // it as element size to build the shuffle_vector.<br>

+  EVT SmallestEltTy = VT.getVectorElementType();<br>

+  for (auto &Source : Sources) {<br>

+    EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();<br>

+    if (SrcEltTy.bitsLT(SmallestEltTy))<br>

+      SmallestEltTy = SrcEltTy;<br>

+  }<br>

+  unsigned ResMultiplier =<br>

+      VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();<br>

+  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();<br>

+  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);<br>

+<br>

+  // If the source vector is too wide or too narrow, we may nevertheless be able<br>

+  // to construct a compatible shuffle either by concatenating it with UNDEF or<br>

+  // extracting a suitable range of elements.<br>

+  for (auto &Src : Sources) {<br>

+    EVT SrcVT = Src.ShuffleVec.getValueType();<br>

<br>

-  // This loop extracts the usage patterns of the source vectors<br>

-  // and prepares appropriate SDValues for a shuffle if possible.<br>

-  for (unsigned i = 0; i < SourceVecs.size(); ++i) {<br>

-    if (SourceVecs[i].getValueType() == VT) {<br>

-      // No VEXT necessary<br>

-      ShuffleSrcs[i] = SourceVecs[i];<br>

-      VEXTOffsets[i] = 0;<br>

+    if (SrcVT.getSizeInBits() == VT.getSizeInBits())<br>

+      continue;<br>

+<br>

+    // This stage of the search produces a source with the same element type as<br>

+    // the original, but with a total width matching the BUILD_VECTOR output.<br>

+    EVT EltVT = SrcVT.getVectorElementType();<br>

+    unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();<br>

+    EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);<br>

+<br>

+    if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {<br>

+      if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())<br>

+        return SDValue();<br>

+      // We can pad out the smaller vector for free, so if it's part of a<br>

+      // shuffle...<br>

+      Src.ShuffleVec =<br>

+          DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,<br>

+                      DAG.getUNDEF(Src.ShuffleVec.getValueType()));<br>

       continue;<br>

-    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {<br>

-      // It probably isn't worth padding out a smaller vector just to<br>

-      // break it down again in a shuffle.<br>

-      return SDValue();<br>

     }<br>

<br>

-    // Since only 64-bit and 128-bit vectors are legal on ARM and<br>

-    // we've eliminated the other cases...<br>

-    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&<br>

-           "unexpected vector sizes in ReconstructShuffle");<br>

+    if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())<br>

+      return SDValue();<br>

<br>

-    if (MaxElts[i] - MinElts[i] >= NumElts) {<br>

+    if (Src.MaxElt - Src.MinElt >= NumSrcElts) {<br>

       // Span too large for a VEXT to cope<br>

       return SDValue();<br>

     }<br>

<br>

-    if (MinElts[i] >= NumElts) {<br>

+    if (Src.MinElt >= NumSrcElts) {<br>

       // The extraction can just take the second half<br>

-      VEXTOffsets[i] = NumElts;<br>

-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,<br>

-                                   SourceVecs[i],<br>

-                                   DAG.getIntPtrConstant(NumElts, dl));<br>

-    } else if (MaxElts[i] < NumElts) {<br>

+      Src.ShuffleVec =<br>

+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,<br>

+                      DAG.getConstant(NumSrcElts, dl, MVT::i32));<br>

+      Src.WindowBase = -NumSrcElts;<br>

+    } else if (Src.MaxElt < NumSrcElts) {<br>

       // The extraction can just take the first half<br>

-      VEXTOffsets[i] = 0;<br>

-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,<br>

-                                   SourceVecs[i],<br>

-                                   DAG.getIntPtrConstant(0, dl));<br>

+      Src.ShuffleVec =<br>

+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,<br>

+                      DAG.getConstant(0, dl, MVT::i32));<br>

     } else {<br>

       // An actual VEXT is needed<br>

-      VEXTOffsets[i] = MinElts[i];<br>

-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,<br>

-                                     SourceVecs[i],<br>

-                                     DAG.getIntPtrConstant(0, dl));<br>

-      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,<br>

-                                     SourceVecs[i],<br>

-                                     DAG.getIntPtrConstant(NumElts, dl));<br>

-      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,<br>

-                                   DAG.getConstant(VEXTOffsets[i], dl,<br>

-                                                   MVT::i32));<br>

+      SDValue VEXTSrc1 =<br>

+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,<br>

+                      DAG.getConstant(0, dl, MVT::i32));<br>

+      SDValue VEXTSrc2 =<br>

+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,<br>

+                      DAG.getConstant(NumSrcElts, dl, MVT::i32));<br>

+      unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);<br>

+<br>

+      Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,<br>

+                                   VEXTSrc2,<br>

+                                   DAG.getConstant(Imm, dl, MVT::i32));<br>

+      Src.WindowBase = -Src.MinElt;<br>

     }<br>

   }<br>

<br>

-  SmallVector<int, 8> Mask;<br>

+  // Another possible incompatibility occurs from the vector element types. We<br>

+  // can fix this by bitcasting the source vectors to the same type we intend<br>

+  // for the shuffle.<br>

+  for (auto &Src : Sources) {<br>

+    EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();<br>

+    if (SrcEltTy == SmallestEltTy)<br>

+      continue;<br>

+    assert(ShuffleVT.getVectorElementType() == SmallestEltTy);<br>

+    Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);<br>

+    Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();<br>

+    Src.WindowBase *= Src.WindowScale;<br>

+  }<br>

<br>

-  for (unsigned i = 0; i < NumElts; ++i) {<br>

+  // Final sanity check before we try to actually produce a shuffle.<br>

+  for (auto Src : Sources)<br></blockquote><div><br></div><div>On release builds this generates the unused variable warning for Src. Please fix.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+    assert(Src.ShuffleVec.getValueType() == ShuffleVT);<br>

+<br>

+  // The stars all align, our next step is to produce the mask for the shuffle.<br>

+  SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);<br>

+  int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();<br>

+  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {<br>

     SDValue Entry = Op.getOperand(i);<br>

-    if (Entry.getOpcode() == ISD::UNDEF) {<br>

-      Mask.push_back(-1);<br>

+    if (Entry.getOpcode() == ISD::UNDEF)<br>

       continue;<br>

-    }<br>

<br>

-    SDValue ExtractVec = Entry.getOperand(0);<br>

-    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)<br>

-                                          .getOperand(1))->getSExtValue();<br>

-    if (ExtractVec == SourceVecs[0]) {<br>

-      Mask.push_back(ExtractElt - VEXTOffsets[0]);<br>

-    } else {<br>

-      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);<br>

-    }<br>

+    auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));<br>

+    int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();<br>

+<br>

+    // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit<br>

+    // trunc. So only std::min(SrcBits, DestBits) actually get defined in this<br>

+    // segment.<br>

+    EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();<br>

+    int BitsDefined = std::min(OrigEltTy.getSizeInBits(),<br>

+                               VT.getVectorElementType().getSizeInBits());<br>

+    int LanesDefined = BitsDefined / BitsPerShuffleLane;<br>

+<br>

+    // This source is expected to fill ResMultiplier lanes of the final shuffle,<br>

+    // starting at the appropriate offset.<br>

+    int *LaneMask = &Mask[i * ResMultiplier];<br>

+<br>

+    int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;<br>

+    ExtractBase += NumElts * (Src - Sources.begin());<br>

+    for (int j = 0; j < LanesDefined; ++j)<br>

+      LaneMask[j] = ExtractBase + j;<br>

   }<br>

<br>

   // Final check before we try to produce nonsense...<br>

-  if (isShuffleMaskLegal(Mask, VT))<br>

-    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],<br>

-                                &Mask[0]);<br>

+  if (!isShuffleMaskLegal(Mask, ShuffleVT))<br>

+    return SDValue();<br>

<br>

-  return SDValue();<br>

+  // We can't handle more than two sources. This should have already<br>

+  // been checked before this point.<br>

+  assert(Sources.size() <= 2 && "Too many sources!");<br>

+<br>

+  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };<br>

+  for (unsigned i = 0; i < Sources.size(); ++i)<br>

+    ShuffleOps[i] = Sources[i].ShuffleVec;<br>

+<br>

+  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],<br>

+                                         ShuffleOps[1], &Mask[0]);<br>

+  return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);<br>

 }<br>

<br>

 /// isShuffleMaskLegal - Targets can use this to indicate that they only<br>

<br>

Modified: llvm/trunk/test/CodeGen/ARM/vtrn.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtrn.ll?rev=244314&r1=244313&r2=244314&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtrn.ll?rev=244314&r1=244313&r2=244314&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/ARM/vtrn.ll (original)<br>

+++ llvm/trunk/test/CodeGen/ARM/vtrn.ll Fri Aug  7 06:40:46 2015<br>

@@ -335,3 +335,39 @@ entry:<br>

   %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7><br>

   ret <8 x i16> %0<br>

 }<br>

+<br>

+; Here we get a build_vector node, where all the incoming extract_element<br>

+; values do modify the type. However, we get different input types, as some of<br>

+; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of<br>

+; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).<br>

+define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,<br>

+                                             <4 x i32> %cmp0, <4 x i32> %cmp1,<br>

+                                             <4 x i16> %cmp2, <4 x i16> %cmp3) {<br>

+  ; CHECK-LABEL: vtrn_mismatched_builvector0<br>

+  ; CHECK: vmovn.i32<br>

+  ; CHECK: vtrn<br>

+  ; CHECK: vbsl<br>

+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>

+  %c1 = icmp ult <4 x i16> %cmp2, %cmp3<br>

+  %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7><br>

+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>

+  ret <8 x i8> %rv<br>

+}<br>

+<br>

+; Here we get a build_vector node, where half the incoming extract_element<br>

+; values do not modify the type (the values form cmp2), but half of them do<br>

+; (from the icmp operation).<br>

+define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,<br>

+                           <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {<br>

+  ; CHECK-LABEL: vtrn_mismatched_builvector1<br>

+  ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn<br>

+  ; CHECK: vmovl<br>

+  ; CHECK: vtrn.8<br>

+  ; CHECK: vbsl<br>

+  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4<br>

+  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1><br>

+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>

+  %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7><br>

+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>

+  ret <8 x i8> %rv<br>

+}<br>

<br>

Modified: llvm/trunk/test/CodeGen/ARM/vuzp.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vuzp.ll?rev=244314&r1=244313&r2=244314&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vuzp.ll?rev=244314&r1=244313&r2=244314&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/ARM/vuzp.ll (original)<br>

+++ llvm/trunk/test/CodeGen/ARM/vuzp.ll Fri Aug  7 06:40:46 2015<br>

@@ -285,3 +285,76 @@ entry:<br>

   %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3><br>

   ret <4 x i32> %0<br>

 }<br>

+<br>

+define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {<br>

+; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.<br>

+; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to<br>

+; truncate from i32 to i16 and one vuzp to perform the final truncation for i8.<br>

+; CHECK-LABEL: vuzp_trunc<br>

+; CHECK: vmovn.i32<br>

+; CHECK: vmovn.i32<br>

+; CHECK: vuzp<br>

+; CHECK: vbsl<br>

+  %c = icmp ult <8 x i32> %cmp0, %cmp1<br>

+  %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1<br>

+  ret <8 x i8> %res<br>

+}<br>

+<br>

+; Shuffle the result from the compare with a <4 x i8>.<br>

+; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we wouldn't be able<br>

+; to perform the vuzp and get the vbsl mask.<br>

+define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,<br>

+                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {<br>

+; CHECK-LABEL: vuzp_trunc_and_shuffle<br>

+; CHECK: vmovl<br>

+; CHECK: vuzp<br>

+; CHECK: vbsl<br>

+  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4<br>

+  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1><br>

+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>

+  %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7><br>

+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>

+  ret <8 x i8> %rv<br>

+}<br>

+<br>

+; Use an undef value for the <4 x i8> that is being shuffled with the compare result.<br>

+; This produces a build_vector with some of the operands undefs.<br>

+define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,<br>

+                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {<br>

+; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right<br>

+; CHECK: vuzp<br>

+; CHECK: vbsl<br>

+  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4<br>

+  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1><br>

+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>

+  %c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7><br>

+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>

+  ret <8 x i8> %rv<br>

+}<br>

+<br>

+define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,<br>

+                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {<br>

+; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left<br>

+; CHECK: vuzp<br>

+; CHECK: vbsl<br>

+  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4<br>

+  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1><br>

+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>

+  %c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7><br>

+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>

+  ret <8 x i8> %rv<br>

+}<br>

+<br>

+; We're using large data types here, and we have to fill with undef values until we<br>

+; get some vector size that we can represent.<br>

+define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,<br>

+                            <5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8> *%cmp2_ptr) {<br>

+; CHECK-LABEL: vuzp_wide_type<br>

+; CHECK: vbsl<br>

+  %cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4<br>

+  %cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1><br>

+  %c0 = icmp ult <5 x i32> %cmp0, %cmp1<br>

+  %c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9><br>

+  %rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1<br>

+  ret <10 x i8> %rv<br>

+}<br>

<br>

<br>

_______________________________________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>

<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>

</blockquote></div><br>

</div></div>