[llvm] r282237 - [InstCombine] Fix for PR29124: reduce insertelements to shufflevector
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 23 02:14:08 PDT 2016
Author: abataev
Date: Fri Sep 23 04:14:08 2016
New Revision: 282237
URL: http://llvm.org/viewvc/llvm-project?rev=282237&view=rev
Log:
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll
llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
llvm/trunk/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=282237&r1=282236&r2=282237&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Fri Sep 23 04:14:08 2016
@@ -1037,17 +1037,21 @@ Value *InstCombiner::SimplifyDemandedVec
}
}
- APInt UndefElts4(LHSVWidth, 0);
+ APInt LHSUndefElts(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- UndefElts4, Depth + 1);
+ LHSUndefElts, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
- APInt UndefElts3(LHSVWidth, 0);
+ APInt RHSUndefElts(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
- UndefElts3, Depth + 1);
+ RHSUndefElts, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
bool NewUndefElts = false;
+ unsigned LHSIdx = -1u;
+ unsigned RHSIdx = -1u;
+ bool LHSUniform = true;
+ bool RHSUniform = true;
for (unsigned i = 0; i < VWidth; i++) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal == -1u) {
@@ -1056,18 +1060,57 @@ Value *InstCombiner::SimplifyDemandedVec
NewUndefElts = true;
UndefElts.setBit(i);
} else if (MaskVal < LHSVWidth) {
- if (UndefElts4[MaskVal]) {
+ if (LHSUndefElts[MaskVal]) {
NewUndefElts = true;
UndefElts.setBit(i);
+ } else {
+ LHSIdx = LHSIdx == -1u ? MaskVal : LHSVWidth;
+ LHSUniform = LHSUniform && (MaskVal == i);
}
} else {
- if (UndefElts3[MaskVal - LHSVWidth]) {
+ if (RHSUndefElts[MaskVal - LHSVWidth]) {
NewUndefElts = true;
UndefElts.setBit(i);
+ } else {
+ RHSIdx = RHSIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth;
+ RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i);
}
}
}
+ // Try to transform shuffle with constant vector and single element from
+ // this constant vector to single insertelement instruction.
+ // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
+ // insertelement V, C[ci], ci-n
+ if (LHSVWidth == Shuffle->getType()->getNumElements()) {
+ Value *Op = nullptr;
+ Constant *Value = nullptr;
+ unsigned Idx = -1u;
+
+ // Find constant vector wigth the single element in shuffle (LHS or RHS).
+ if (LHSIdx < LHSVWidth && RHSUniform) {
+ if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
+ Op = Shuffle->getOperand(1);
+ Value = CV->getOperand(LHSIdx);
+ Idx = LHSIdx;
+ }
+ }
+ if (RHSIdx < LHSVWidth && LHSUniform) {
+ if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
+ Op = Shuffle->getOperand(0);
+ Value = CV->getOperand(RHSIdx);
+ Idx = RHSIdx;
+ }
+ }
+ // Found constant vector with single element - convert to insertelement.
+ if (Op && Value) {
+ Instruction *New = InsertElementInst::Create(
+ Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
+ Shuffle->getName());
+ InsertNewInstWith(New, *Shuffle);
+ return New;
+ }
+ }
if (NewUndefElts) {
// Add additional discovered undefs.
SmallVector<Constant*, 16> Elts;
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=282237&r1=282236&r2=282237&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Fri Sep 23 04:14:08 2016
@@ -585,58 +585,103 @@ static bool isShuffleEquivalentToSelect(
return true;
}
-/// insertelt (shufflevector X, CVec, Mask), C, CIndex -->
-/// shufflevector X, CVec', Mask'
+/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
+/// --> shufflevector X, CVec', Mask'
static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
- // Bail out if the shuffle has more than one use. In that case, we'd be
+ auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
+ // Bail out if the parent has more than one use. In that case, we'd be
// replacing the insertelt with a shuffle, and that's not a clear win.
- auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
- if (!Shuf || !Shuf->hasOneUse())
- return nullptr;
-
- // The shuffle must have a constant vector operand. The insertelt must have a
- // constant scalar being inserted at a constant position in the vector.
- Constant *ShufConstVec, *InsEltScalar;
- uint64_t InsEltIndex;
- if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
- !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
- !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
- return nullptr;
-
- // Adding an element to an arbitrary shuffle could be expensive, but a shuffle
- // that selects elements from vectors without crossing lanes is assumed cheap.
- // If we're just adding a constant into that shuffle, it will still be cheap.
- if (!isShuffleEquivalentToSelect(*Shuf))
+ if (!Inst || !Inst->hasOneUse())
return nullptr;
+ if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
+ // The shuffle must have a constant vector operand. The insertelt must have
+ // a constant scalar being inserted at a constant position in the vector.
+ Constant *ShufConstVec, *InsEltScalar;
+ uint64_t InsEltIndex;
+ if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
+ !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
+ !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
+ return nullptr;
+
+ // Adding an element to an arbitrary shuffle could be expensive, but a
+ // shuffle that selects elements from vectors without crossing lanes is
+ // assumed cheap.
+ // If we're just adding a constant into that shuffle, it will still be
+ // cheap.
+ if (!isShuffleEquivalentToSelect(*Shuf))
+ return nullptr;
+
+ // From the above 'select' check, we know that the mask has the same number
+ // of elements as the vector input operands. We also know that each constant
+ // input element is used in its lane and can not be used more than once by
+ // the shuffle. Therefore, replace the constant in the shuffle's constant
+ // vector with the insertelt constant. Replace the constant in the shuffle's
+ // mask vector with the insertelt index plus the length of the vector
+ // (because the constant vector operand of a shuffle is always the 2nd
+ // operand).
+ Constant *Mask = Shuf->getMask();
+ unsigned NumElts = Mask->getType()->getVectorNumElements();
+ SmallVector<Constant *, 16> NewShufElts(NumElts);
+ SmallVector<Constant *, 16> NewMaskElts(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I) {
+ if (I == InsEltIndex) {
+ NewShufElts[I] = InsEltScalar;
+ Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
+ NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
+ } else {
+ // Copy over the existing values.
+ NewShufElts[I] = ShufConstVec->getAggregateElement(I);
+ NewMaskElts[I] = Mask->getAggregateElement(I);
+ }
+ }
- // From the above 'select' check, we know that the mask has the same number of
- // elements as the vector input operands. We also know that each constant
- // input element is used in its lane and can not be used more than once by the
- // shuffle. Therefore, replace the constant in the shuffle's constant vector
- // with the insertelt constant. Replace the constant in the shuffle's mask
- // vector with the insertelt index plus the length of the vector (because the
- // constant vector operand of a shuffle is always the 2nd operand).
- Constant *Mask = Shuf->getMask();
- unsigned NumElts = Mask->getType()->getVectorNumElements();
- SmallVector<Constant*, 16> NewShufElts(NumElts);
- SmallVector<Constant*, 16> NewMaskElts(NumElts);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (i == InsEltIndex) {
- NewShufElts[i] = InsEltScalar;
- Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
- NewMaskElts[i] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
- } else {
- // Copy over the existing values.
- NewShufElts[i] = ShufConstVec->getAggregateElement(i);
- NewMaskElts[i] = Mask->getAggregateElement(i);
+ // Create new operands for a shuffle that includes the constant of the
+ // original insertelt. The old shuffle will be dead now.
+ return new ShuffleVectorInst(Shuf->getOperand(0),
+ ConstantVector::get(NewShufElts),
+ ConstantVector::get(NewMaskElts));
+ } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
+ // Transform sequences of insertelements ops with constant data/indexes into
+ // a single shuffle op.
+ unsigned NumElts = InsElt.getType()->getNumElements();
+
+ uint64_t InsertIdx[2];
+ Constant *Val[2];
+ if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
+ !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
+ !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
+ !match(IEI->getOperand(1), m_Constant(Val[1])))
+ return nullptr;
+ SmallVector<Constant *, 16> Values(NumElts);
+ SmallVector<Constant *, 16> Mask(NumElts);
+ auto ValI = std::begin(Val);
+ // Generate new constant vector and mask.
+ // We have 2 values/masks from the insertelements instructions. Insert them
+ // into new value/mask vectors.
+ for (uint64_t I : InsertIdx) {
+ if (!Values[I]) {
+ assert(!Mask[I]);
+ Values[I] = *ValI;
+ Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
+ NumElts + I);
+ }
+ ++ValI;
+ }
+ // Remaining values are filled with 'undef' values.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (!Values[I]) {
+ assert(!Mask[I]);
+ Values[I] = UndefValue::get(InsElt.getType()->getElementType());
+ Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
+ }
}
+ // Create new operands for a shuffle that includes the constant of the
+ // original insertelt.
+ return new ShuffleVectorInst(IEI->getOperand(0),
+ ConstantVector::get(Values),
+ ConstantVector::get(Mask));
}
-
- // Create new operands for a shuffle that includes the constant of the
- // original insertelt. The old shuffle will be dead now.
- Constant *NewShufVec = ConstantVector::get(NewShufElts);
- Constant *NewMask = ConstantVector::get(NewMaskElts);
- return new ShuffleVectorInst(Shuf->getOperand(0), NewShufVec, NewMask);
+ return nullptr;
}
Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Modified: llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll?rev=282237&r1=282236&r2=282237&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll Fri Sep 23 04:14:08 2016
@@ -26,6 +26,15 @@ define <4 x float> @twoInserts(<4 x floa
ret <4 x float> %ins2
}
+define <4 x i32> @shuffleRetain(<4 x i32> %base) {
+; CHECK-LABEL: @shuffleRetain(
+; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> %base, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 1>, <4 x i32> <i32 1, i32 2, i32 undef, i32 7>
+; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+;
+ %shuf = shufflevector <4 x i32> %base, <4 x i32> <i32 4, i32 3, i32 2, i32 1>, <4 x i32> <i32 1, i32 2, i32 undef, i32 7>
+ ret <4 x i32> %shuf
+}
+
; TODO: Transform an arbitrary shuffle with constant into a shuffle that is equivalant to a vector select.
define <4 x float> @disguisedSelect(<4 x float> %x) {
Modified: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll?rev=282237&r1=282236&r2=282237&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll Fri Sep 23 04:14:08 2016
@@ -236,8 +236,8 @@ define <4 x float> @test_select(float %f
define <2 x i64> @PR24922(<2 x i64> %v) {
; CHECK-LABEL: @PR24922(
-; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <2 x i64> %v, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT: ret <2 x i64> [[RESULT]]
+; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x i64> %v, i64 0, i32 0
+; CHECK-NEXT: ret <2 x i64> [[RESULT1]]
;
%result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
ret <2 x i64> %result
Modified: llvm/trunk/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vector_insertelt_shuffle.ll?rev=282237&r1=282236&r2=282237&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vector_insertelt_shuffle.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vector_insertelt_shuffle.ll Fri Sep 23 04:14:08 2016
@@ -7,10 +7,9 @@ define<4 x float> @foo(<4 x float> %x) {
ret<4 x float> %ins2
}
-; FIXME: insertelements should fold to shuffle
+; insertelements should fold to shuffle
; CHECK-LABEL: @foo
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
+; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x float> %
define<4 x float> @bar(<4 x float> %x, float %a) {
@@ -45,12 +44,11 @@ define<4 x float> @bazz(<4 x float> %x,
ret<4 x float> %ins6
}
-; FIXME: insertelements should fold to shuffle
+; insertelements should fold to shuffle
; CHECK-LABEL: @bazz
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 3
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 5.000000e+00, i32 %
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
+; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 7.000000e+00, i32 %
; CHECK-NEXT: ret <4 x float> %
@@ -75,3 +73,22 @@ define<4 x float> @bazzzz(<4 x float> %x
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
; CHECK-NEXT: ret <4 x float> %
+define<4 x float> @bazzzzz() {
+ %ins1 = insertelement <4 x float> insertelement (<4 x float> <float 1.0, float 2.0, float 3.0, float undef>, float 4.0, i32 3), float 5.0, i32 1
+ %ins2 = insertelement<4 x float> %ins1, float 10.0, i32 2
+ ret<4 x float> %ins2
+}
+
+; insertelements should fold to shuffle
+; CHECK-LABEL: @bazzzzz
+; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 5.000000e+00, float 1.000000e+01, float 4.000000e+00>
+
+define<4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
+ %ins1 = insertelement <4 x float> insertelement (<4 x float> shufflevector (<4 x float> undef, <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0> , <4 x i32> <i32 0, i32 5, i32 undef, i32 6> ), float 4.0, i32 3), float 5.0, i32 1
+ ret<4 x float> %ins1
+}
+
+; insertelements should fold to shuffle
+; CHECK-LABEL: @bazzzzz
+; CHECK-NEXT: ret <4 x float> <float undef, float 5.000000e+00, float undef, float 4.000000e+00>
+
Modified: llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll?rev=282237&r1=282236&r2=282237&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll Fri Sep 23 04:14:08 2016
@@ -74,7 +74,7 @@ define <4 x float> @insertps_0x1a_single
define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
; CHECK-LABEL: @insertps_0xc1(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
More information about the llvm-commits
mailing list