[llvm] r213830 - [AArch64] Fix a bug generating incorrect instruction when building small vector.
Kevin Qin
Kevin.Qin at arm.com
Wed Jul 23 19:05:43 PDT 2014
Author: kevinqin
Date: Wed Jul 23 21:05:42 2014
New Revision: 213830
URL: http://llvm.org/viewvc/llvm-project?rev=213830&view=rev
Log:
[AArch64] Fix a bug generating incorrect instruction when building small vector.
This bug is introduced by r211144. The element of operand may be
smaller than the element of result, but previous commit can
only handle the contrary condition. This commit is to handle this
scenario and generate optimized codes like ZIP1.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/neon-perm.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=213830&r1=213829&r2=213830&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Wed Jul 23 21:05:42 2014
@@ -4180,9 +4180,22 @@ SDValue AArch64TargetLowering::Reconstru
if (SourceVecs.size() > 2)
return SDValue();
- SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+ // Find out the smallest element size among result and two sources, and use
+ // it as element size to build the shuffle_vector.
+ EVT SmallestEltTy = VT.getVectorElementType();
+ for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+ EVT SrcEltTy = SourceVecs[i].getValueType().getVectorElementType();
+ if (SrcEltTy.bitsLT(SmallestEltTy)) {
+ SmallestEltTy = SrcEltTy;
+ }
+ }
+ unsigned ResMultiplier =
+ VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
int VEXTOffsets[2] = { 0, 0 };
int OffsetMultipliers[2] = { 1, 1 };
+ NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
+ SDValue ShuffleSrcs[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
// This loop extracts the usage patterns of the source vectors
// and prepares appropriate SDValues for a shuffle if possible.
@@ -4190,15 +4203,15 @@ SDValue AArch64TargetLowering::Reconstru
unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
SDValue CurSource = SourceVecs[i];
if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType()) {
- // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
- // Then bitcast it to the vector which holds asserted element type,
- // and record the multiplier of element width between SourceVecs and
- // Build_vector which is needed to extract the correct lanes later.
- EVT CastVT =
- EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- SourceVecs[i].getValueSizeInBits() /
- VT.getVectorElementType().getSizeInBits());
+ ShuffleVT.getVectorElementType()) {
+ // As ShuffleVT holds smallest element size, it may hit here only if
+ // the element type of SourceVecs is bigger than that of ShuffleVT.
+ // Adjust the element size of SourceVecs to match ShuffleVT, and record
+ // the multipliers.
+ EVT CastVT = EVT::getVectorVT(
+ *DAG.getContext(), ShuffleVT.getVectorElementType(),
+ SourceVecs[i].getValueSizeInBits() /
+ ShuffleVT.getVectorElementType().getSizeInBits());
CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
@@ -4207,7 +4220,7 @@ SDValue AArch64TargetLowering::Reconstru
MinElts[i] *= OffsetMultipliers[i];
}
- if (CurSource.getValueType() == VT) {
+ if (CurSource.getValueType() == ShuffleVT) {
// No VEXT necessary
ShuffleSrcs[i] = CurSource;
VEXTOffsets[i] = 0;
@@ -4215,8 +4228,9 @@ SDValue AArch64TargetLowering::Reconstru
} else if (NumSrcElts < NumElts) {
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
- ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
- DAG.getUNDEF(CurSource.getValueType()));
+ ShuffleSrcs[i] =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, ShuffleVT, CurSource,
+ DAG.getUNDEF(CurSource.getValueType()));
continue;
}
@@ -4233,50 +4247,61 @@ SDValue AArch64TargetLowering::Reconstru
if (MinElts[i] >= NumElts) {
// The extraction can just take the second half
VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+ CurSource, DAG.getIntPtrConstant(NumElts));
} else if (MaxElts[i] < NumElts) {
// The extraction can just take the first half
VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(0));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+ CurSource, DAG.getIntPtrConstant(0));
} else {
// An actual VEXT is needed
VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(NumElts));
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+ CurSource, DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+ CurSource, DAG.getIntPtrConstant(NumElts));
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
- ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
- DAG.getConstant(Imm, MVT::i32));
+ ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, ShuffleVT, VEXTSrc1,
+ VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
}
}
SmallVector<int, 8> Mask;
+ unsigned VTEltSize = VT.getVectorElementType().getSizeInBits();
- for (unsigned i = 0; i < NumElts; ++i) {
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF) {
- Mask.push_back(-1);
- continue;
- }
-
- SDValue ExtractVec = Entry.getOperand(0);
- int ExtractElt =
- cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
- if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
- } else {
- Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
- VEXTOffsets[1]);
+ int SourceNum = 1;
+ unsigned LanePartNum = 0;
+ int ExtractElt;
+ if (Entry.getOpcode() != ISD::UNDEF) {
+ // Check how many parts of source lane should be inserted.
+ SDValue ExtractVec = Entry.getOperand(0);
+ if (ExtractVec == SourceVecs[0])
+ SourceNum = 0;
+ ExtractElt = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+ unsigned ExtEltSize =
+ ExtractVec.getValueType().getVectorElementType().getSizeInBits();
+ unsigned SmallerSize = ExtEltSize < VTEltSize ? ExtEltSize : VTEltSize;
+ LanePartNum = SmallerSize / SmallestEltTy.getSizeInBits();
+ }
+
+ for (unsigned j = 0; j != ResMultiplier; ++j) {
+ if (j < LanePartNum)
+ Mask.push_back(ExtractElt * OffsetMultipliers[SourceNum] +
+ NumElts * SourceNum - VEXTOffsets[SourceNum] + j);
+ else
+ Mask.push_back(-1);
}
}
// Final check before we try to produce nonsense...
- if (isShuffleMaskLegal(Mask, VT))
- return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
- &Mask[0]);
+ if (isShuffleMaskLegal(Mask, ShuffleVT)) {
+ SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleSrcs[0],
+ ShuffleSrcs[1], &Mask[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ }
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/AArch64/neon-perm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-perm.ll?rev=213830&r1=213829&r2=213830&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-perm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-perm.ll Wed Jul 23 21:05:42 2014
@@ -1387,6 +1387,13 @@ entry:
ret <8 x i16> %shuffle.i
}
+define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
+; CHECK-LABEL: test_vzip1_v4i8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i8> %lo
+}
+
define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) {
; CHECK-LABEL: test_same_vzip2_s8:
; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
More information about the llvm-commits
mailing list