<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Aug 7, 2015 at 1:40 PM, Silviu Baranga via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: sbaranga<br>
Date: Fri Aug 7 06:40:46 2015<br>
New Revision: 244314<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=244314&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=244314&view=rev</a><br>
Log:<br>
[ARM] Update ReconstructShuffle to handle mismatched types<br>
<br>
Summary:<br>
Port the ReconstructShuffle function from AArch64 to ARM<br>
to handle mismatched incoming types in the BUILD_VECTOR<br>
node.<br>
<br>
This fixes an outstanding FIXME in the ReconstructShuffle<br>
code.<br>
<br>
Reviewers: t.p.northover, rengolin<br>
<br>
Subscribers: aemerson, llvm-commits, rengolin<br>
<br>
Differential Revision: <a href="http://reviews.llvm.org/D11720" rel="noreferrer" target="_blank">http://reviews.llvm.org/D11720</a><br>
<br>
Modified:<br>
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp<br>
llvm/trunk/test/CodeGen/ARM/vtrn.ll<br>
llvm/trunk/test/CodeGen/ARM/vuzp.ll<br>
<br>
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=244314&r1=244313&r2=244314&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=244314&r1=244313&r2=244314&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)<br>
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Aug 7 06:40:46 2015<br>
@@ -5529,18 +5529,46 @@ SDValue ARMTargetLowering::LowerBUILD_VE<br>
return SDValue();<br>
}<br>
<br>
+/// getExtFactor - Determine the adjustment factor for the position when<br>
+/// generating an "extract from vector registers" instruction.<br>
+static unsigned getExtFactor(SDValue &V) {<br>
+ EVT EltType = V.getValueType().getVectorElementType();<br>
+ return EltType.getSizeInBits() / 8;<br>
+}<br>
+<br>
// Gather data to see if the operation can be modelled as a<br>
// shuffle in combination with VEXTs.<br>
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,<br>
SelectionDAG &DAG) const {<br>
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");<br>
SDLoc dl(Op);<br>
EVT VT = Op.getValueType();<br>
unsigned NumElts = VT.getVectorNumElements();<br>
<br>
- SmallVector<SDValue, 2> SourceVecs;<br>
- SmallVector<unsigned, 2> MinElts;<br>
- SmallVector<unsigned, 2> MaxElts;<br>
-<br>
+ struct ShuffleSourceInfo {<br>
+ SDValue Vec;<br>
+ unsigned MinElt;<br>
+ unsigned MaxElt;<br>
+<br>
+ // We may insert some combination of BITCASTs and VEXT nodes to force Vec to<br>
+ // be compatible with the shuffle we intend to construct. As a result<br>
+ // ShuffleVec will be some sliding window into the original Vec.<br>
+ SDValue ShuffleVec;<br>
+<br>
+ // Code should guarantee that element i in Vec starts at element "WindowBase<br>
+ // + i * WindowScale in ShuffleVec".<br>
+ int WindowBase;<br>
+ int WindowScale;<br>
+<br>
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }<br>
+ ShuffleSourceInfo(SDValue Vec)<br>
+ : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),<br>
+ WindowScale(1) {}<br>
+ };<br>
+<br>
+ // First gather all vectors used as an immediate source for this BUILD_VECTOR<br>
+ // node.<br>
+ SmallVector<ShuffleSourceInfo, 2> Sources;<br>
for (unsigned i = 0; i < NumElts; ++i) {<br>
SDValue V = Op.getOperand(i);<br>
if (V.getOpcode() == ISD::UNDEF)<br>
@@ -5549,127 +5577,161 @@ SDValue ARMTargetLowering::ReconstructSh<br>
// A shuffle can only come from building a vector from various<br>
// elements of other vectors.<br>
return SDValue();<br>
- } else if (V.getOperand(0).getValueType().getVectorElementType() !=<br>
- VT.getVectorElementType()) {<br>
- // This code doesn't know how to handle shuffles where the vector<br>
- // element types do not match (this happens because type legalization<br>
- // promotes the return type of EXTRACT_VECTOR_ELT).<br>
- // FIXME: It might be appropriate to extend this code to handle<br>
- // mismatched types.<br>
- return SDValue();<br>
}<br>
<br>
- // Record this extraction against the appropriate vector if possible...<br>
+ // Add this element source to the list if it's not already there.<br>
SDValue SourceVec = V.getOperand(0);<br>
- // If the element number isn't a constant, we can't effectively<br>
- // analyze what's going on.<br>
- if (!isa<ConstantSDNode>(V.getOperand(1)))<br>
- return SDValue();<br>
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();<br>
- bool FoundSource = false;<br>
- for (unsigned j = 0; j < SourceVecs.size(); ++j) {<br>
- if (SourceVecs[j] == SourceVec) {<br>
- if (MinElts[j] > EltNo)<br>
- MinElts[j] = EltNo;<br>
- if (MaxElts[j] < EltNo)<br>
- MaxElts[j] = EltNo;<br>
- FoundSource = true;<br>
- break;<br>
- }<br>
- }<br>
+ auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);<br>
+ if (Source == Sources.end())<br>
+ Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));<br>
<br>
- // Or record a new source if not...<br>
- if (!FoundSource) {<br>
- SourceVecs.push_back(SourceVec);<br>
- MinElts.push_back(EltNo);<br>
- MaxElts.push_back(EltNo);<br>
- }<br>
+ // Update the minimum and maximum lane number seen.<br>
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();<br>
+ Source->MinElt = std::min(Source->MinElt, EltNo);<br>
+ Source->MaxElt = std::max(Source->MaxElt, EltNo);<br>
}<br>
<br>
// Currently only do something sane when at most two source vectors<br>
- // involved.<br>
- if (SourceVecs.size() > 2)<br>
+ // are involved.<br>
+ if (Sources.size() > 2)<br>
return SDValue();<br>
<br>
- SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };<br>
- int VEXTOffsets[2] = {0, 0};<br>
+ // Find out the smallest element size among result and two sources, and use<br>
+ // it as element size to build the shuffle_vector.<br>
+ EVT SmallestEltTy = VT.getVectorElementType();<br>
+ for (auto &Source : Sources) {<br>
+ EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();<br>
+ if (SrcEltTy.bitsLT(SmallestEltTy))<br>
+ SmallestEltTy = SrcEltTy;<br>
+ }<br>
+ unsigned ResMultiplier =<br>
+ VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();<br>
+ NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();<br>
+ EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);<br>
+<br>
+ // If the source vector is too wide or too narrow, we may nevertheless be able<br>
+ // to construct a compatible shuffle either by concatenating it with UNDEF or<br>
+ // extracting a suitable range of elements.<br>
+ for (auto &Src : Sources) {<br>
+ EVT SrcVT = Src.ShuffleVec.getValueType();<br>
<br>
- // This loop extracts the usage patterns of the source vectors<br>
- // and prepares appropriate SDValues for a shuffle if possible.<br>
- for (unsigned i = 0; i < SourceVecs.size(); ++i) {<br>
- if (SourceVecs[i].getValueType() == VT) {<br>
- // No VEXT necessary<br>
- ShuffleSrcs[i] = SourceVecs[i];<br>
- VEXTOffsets[i] = 0;<br>
+ if (SrcVT.getSizeInBits() == VT.getSizeInBits())<br>
+ continue;<br>
+<br>
+ // This stage of the search produces a source with the same element type as<br>
+ // the original, but with a total width matching the BUILD_VECTOR output.<br>
+ EVT EltVT = SrcVT.getVectorElementType();<br>
+ unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();<br>
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);<br>
+<br>
+ if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {<br>
+ if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())<br>
+ return SDValue();<br>
+ // We can pad out the smaller vector for free, so if it's part of a<br>
+ // shuffle...<br>
+ Src.ShuffleVec =<br>
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,<br>
+ DAG.getUNDEF(Src.ShuffleVec.getValueType()));<br>
continue;<br>
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {<br>
- // It probably isn't worth padding out a smaller vector just to<br>
- // break it down again in a shuffle.<br>
- return SDValue();<br>
}<br>
<br>
- // Since only 64-bit and 128-bit vectors are legal on ARM and<br>
- // we've eliminated the other cases...<br>
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&<br>
- "unexpected vector sizes in ReconstructShuffle");<br>
+ if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())<br>
+ return SDValue();<br>
<br>
- if (MaxElts[i] - MinElts[i] >= NumElts) {<br>
+ if (Src.MaxElt - Src.MinElt >= NumSrcElts) {<br>
// Span too large for a VEXT to cope<br>
return SDValue();<br>
}<br>
<br>
- if (MinElts[i] >= NumElts) {<br>
+ if (Src.MinElt >= NumSrcElts) {<br>
// The extraction can just take the second half<br>
- VEXTOffsets[i] = NumElts;<br>
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,<br>
- SourceVecs[i],<br>
- DAG.getIntPtrConstant(NumElts, dl));<br>
- } else if (MaxElts[i] < NumElts) {<br>
+ Src.ShuffleVec =<br>
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,<br>
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));<br>
+ Src.WindowBase = -NumSrcElts;<br>
+ } else if (Src.MaxElt < NumSrcElts) {<br>
// The extraction can just take the first half<br>
- VEXTOffsets[i] = 0;<br>
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,<br>
- SourceVecs[i],<br>
- DAG.getIntPtrConstant(0, dl));<br>
+ Src.ShuffleVec =<br>
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,<br>
+ DAG.getConstant(0, dl, MVT::i32));<br>
} else {<br>
// An actual VEXT is needed<br>
- VEXTOffsets[i] = MinElts[i];<br>
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,<br>
- SourceVecs[i],<br>
- DAG.getIntPtrConstant(0, dl));<br>
- SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,<br>
- SourceVecs[i],<br>
- DAG.getIntPtrConstant(NumElts, dl));<br>
- ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,<br>
- DAG.getConstant(VEXTOffsets[i], dl,<br>
- MVT::i32));<br>
+ SDValue VEXTSrc1 =<br>
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,<br>
+ DAG.getConstant(0, dl, MVT::i32));<br>
+ SDValue VEXTSrc2 =<br>
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,<br>
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));<br>
+ unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);<br>
+<br>
+ Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,<br>
+ VEXTSrc2,<br>
+ DAG.getConstant(Imm, dl, MVT::i32));<br>
+ Src.WindowBase = -Src.MinElt;<br>
}<br>
}<br>
<br>
- SmallVector<int, 8> Mask;<br>
+ // Another possible incompatibility occurs from the vector element types. We<br>
+ // can fix this by bitcasting the source vectors to the same type we intend<br>
+ // for the shuffle.<br>
+ for (auto &Src : Sources) {<br>
+ EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();<br>
+ if (SrcEltTy == SmallestEltTy)<br>
+ continue;<br>
+ assert(ShuffleVT.getVectorElementType() == SmallestEltTy);<br>
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);<br>
+ Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();<br>
+ Src.WindowBase *= Src.WindowScale;<br>
+ }<br>
<br>
- for (unsigned i = 0; i < NumElts; ++i) {<br>
+ // Final sanity check before we try to actually produce a shuffle.<br>
+ for (auto Src : Sources)<br></blockquote><div><br></div><div>On release builds this generates the unused variable warning for Src. Please fix.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ assert(Src.ShuffleVec.getValueType() == ShuffleVT);<br>
+<br>
+ // The stars all align, our next step is to produce the mask for the shuffle.<br>
+ SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);<br>
+ int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();<br>
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {<br>
SDValue Entry = Op.getOperand(i);<br>
- if (Entry.getOpcode() == ISD::UNDEF) {<br>
- Mask.push_back(-1);<br>
+ if (Entry.getOpcode() == ISD::UNDEF)<br>
continue;<br>
- }<br>
<br>
- SDValue ExtractVec = Entry.getOperand(0);<br>
- int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)<br>
- .getOperand(1))->getSExtValue();<br>
- if (ExtractVec == SourceVecs[0]) {<br>
- Mask.push_back(ExtractElt - VEXTOffsets[0]);<br>
- } else {<br>
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);<br>
- }<br>
+ auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));<br>
+ int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();<br>
+<br>
+ // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit<br>
+ // trunc. So only std::min(SrcBits, DestBits) actually get defined in this<br>
+ // segment.<br>
+ EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();<br>
+ int BitsDefined = std::min(OrigEltTy.getSizeInBits(),<br>
+ VT.getVectorElementType().getSizeInBits());<br>
+ int LanesDefined = BitsDefined / BitsPerShuffleLane;<br>
+<br>
+ // This source is expected to fill ResMultiplier lanes of the final shuffle,<br>
+ // starting at the appropriate offset.<br>
+ int *LaneMask = &Mask[i * ResMultiplier];<br>
+<br>
+ int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;<br>
+ ExtractBase += NumElts * (Src - Sources.begin());<br>
+ for (int j = 0; j < LanesDefined; ++j)<br>
+ LaneMask[j] = ExtractBase + j;<br>
}<br>
<br>
// Final check before we try to produce nonsense...<br>
- if (isShuffleMaskLegal(Mask, VT))<br>
- return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],<br>
- &Mask[0]);<br>
+ if (!isShuffleMaskLegal(Mask, ShuffleVT))<br>
+ return SDValue();<br>
<br>
- return SDValue();<br>
+ // We can't handle more than two sources. This should have already<br>
+ // been checked before this point.<br>
+ assert(Sources.size() <= 2 && "Too many sources!");<br>
+<br>
+ SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };<br>
+ for (unsigned i = 0; i < Sources.size(); ++i)<br>
+ ShuffleOps[i] = Sources[i].ShuffleVec;<br>
+<br>
+ SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],<br>
+ ShuffleOps[1], &Mask[0]);<br>
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);<br>
}<br>
<br>
/// isShuffleMaskLegal - Targets can use this to indicate that they only<br>
<br>
Modified: llvm/trunk/test/CodeGen/ARM/vtrn.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtrn.ll?rev=244314&r1=244313&r2=244314&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtrn.ll?rev=244314&r1=244313&r2=244314&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/ARM/vtrn.ll (original)<br>
+++ llvm/trunk/test/CodeGen/ARM/vtrn.ll Fri Aug 7 06:40:46 2015<br>
@@ -335,3 +335,39 @@ entry:<br>
%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7><br>
ret <8 x i16> %0<br>
}<br>
+<br>
+; Here we get a build_vector node, where all the incoming extract_element<br>
+; values do modify the type. However, we get different input types, as some of<br>
+; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of<br>
+; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).<br>
+define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,<br>
+ <4 x i32> %cmp0, <4 x i32> %cmp1,<br>
+ <4 x i16> %cmp2, <4 x i16> %cmp3) {<br>
+ ; CHECK-LABEL: vtrn_mismatched_builvector0<br>
+ ; CHECK: vmovn.i32<br>
+ ; CHECK: vtrn<br>
+ ; CHECK: vbsl<br>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>
+ %c1 = icmp ult <4 x i16> %cmp2, %cmp3<br>
+ %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7><br>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>
+ ret <8 x i8> %rv<br>
+}<br>
+<br>
+; Here we get a build_vector node, where half the incoming extract_element<br>
+; values do not modify the type (the values form cmp2), but half of them do<br>
+; (from the icmp operation).<br>
+define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,<br>
+ <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {<br>
+ ; CHECK-LABEL: vtrn_mismatched_builvector1<br>
+ ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn<br>
+ ; CHECK: vmovl<br>
+ ; CHECK: vtrn.8<br>
+ ; CHECK: vbsl<br>
+ %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4<br>
+ %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1><br>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>
+ %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7><br>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>
+ ret <8 x i8> %rv<br>
+}<br>
<br>
Modified: llvm/trunk/test/CodeGen/ARM/vuzp.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vuzp.ll?rev=244314&r1=244313&r2=244314&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vuzp.ll?rev=244314&r1=244313&r2=244314&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/ARM/vuzp.ll (original)<br>
+++ llvm/trunk/test/CodeGen/ARM/vuzp.ll Fri Aug 7 06:40:46 2015<br>
@@ -285,3 +285,76 @@ entry:<br>
%0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3><br>
ret <4 x i32> %0<br>
}<br>
+<br>
+define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {<br>
+; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.<br>
+; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to<br>
+; truncate from i32 to i16 and one vuzp to perform the final truncation for i8.<br>
+; CHECK-LABEL: vuzp_trunc<br>
+; CHECK: vmovn.i32<br>
+; CHECK: vmovn.i32<br>
+; CHECK: vuzp<br>
+; CHECK: vbsl<br>
+ %c = icmp ult <8 x i32> %cmp0, %cmp1<br>
+ %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1<br>
+ ret <8 x i8> %res<br>
+}<br>
+<br>
+; Shuffle the result from the compare with a <4 x i8>.<br>
+; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we wouldn't be able<br>
+; to perform the vuzp and get the vbsl mask.<br>
+define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,<br>
+ <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {<br>
+; CHECK-LABEL: vuzp_trunc_and_shuffle<br>
+; CHECK: vmovl<br>
+; CHECK: vuzp<br>
+; CHECK: vbsl<br>
+ %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4<br>
+ %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1><br>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>
+ %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7><br>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>
+ ret <8 x i8> %rv<br>
+}<br>
+<br>
+; Use an undef value for the <4 x i8> that is being shuffled with the compare result.<br>
+; This produces a build_vector with some of the operands undefs.<br>
+define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,<br>
+ <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {<br>
+; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right<br>
+; CHECK: vuzp<br>
+; CHECK: vbsl<br>
+ %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4<br>
+ %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1><br>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>
+ %c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7><br>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>
+ ret <8 x i8> %rv<br>
+}<br>
+<br>
+define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,<br>
+ <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {<br>
+; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left<br>
+; CHECK: vuzp<br>
+; CHECK: vbsl<br>
+ %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4<br>
+ %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1><br>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1<br>
+ %c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7><br>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1<br>
+ ret <8 x i8> %rv<br>
+}<br>
+<br>
+; We're using large data types here, and we have to fill with undef values until we<br>
+; get some vector size that we can represent.<br>
+define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,<br>
+ <5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8> *%cmp2_ptr) {<br>
+; CHECK-LABEL: vuzp_wide_type<br>
+; CHECK: vbsl<br>
+ %cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4<br>
+ %cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1><br>
+ %c0 = icmp ult <5 x i32> %cmp0, %cmp1<br>
+ %c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9><br>
+ %rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1<br>
+ ret <10 x i8> %rv<br>
+}<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br>
</div></div>