[llvm] dce6c43 - [SLP]Improve isFixedVectorShuffle and its use.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 30 10:25:48 PST 2021
Author: Alexey Bataev
Date: 2021-11-30T10:10:20-08:00
New Revision: dce6c434ead3ccbaa67b8db2301b2a9fb4319123
URL: https://github.com/llvm/llvm-project/commit/dce6c434ead3ccbaa67b8db2301b2a9fb4319123
DIFF: https://github.com/llvm/llvm-project/commit/dce6c434ead3ccbaa67b8db2301b2a9fb4319123.diff
LOG: [SLP]Improve isFixedVectorShuffle and its use.
Extended support for undefined source vector/extract indices/non-fixed
vector types, also no need to check for the parent of the extractelement
instructions with the constant indicies.
Differential Revision: https://reviews.llvm.org/D114121
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e3d3d8992c233..4db630fbd063f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -327,7 +327,11 @@ static bool isCommutative(Instruction *I) {
/// TargetTransformInfo::getInstructionThroughput?
static Optional<TargetTransformInfo::ShuffleKind>
isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
- auto *EI0 = cast<ExtractElementInst>(VL[0]);
+ const auto *It =
+ find_if(VL, [](Value *V) { return isa<ExtractElementInst>(V); });
+ if (It == VL.end())
+ return None;
+ auto *EI0 = cast<ExtractElementInst>(*It);
if (isa<ScalableVectorType>(EI0->getVectorOperandType()))
return None;
unsigned Size =
@@ -336,33 +340,41 @@ isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
Value *Vec2 = nullptr;
enum ShuffleMode { Unknown, Select, Permute };
ShuffleMode CommonShuffleMode = Unknown;
+ Mask.assign(VL.size(), UndefMaskElem);
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
+ // Undef can be represented as an undef element in a vector.
+ if (isa<UndefValue>(VL[I]))
+ continue;
auto *EI = cast<ExtractElementInst>(VL[I]);
+ if (isa<ScalableVectorType>(EI->getVectorOperandType()))
+ return None;
auto *Vec = EI->getVectorOperand();
+ // We can extractelement from undef or poison vector.
+ if (isa<UndefValue>(Vec))
+ continue;
// All vector operands must have the same number of vector elements.
if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
return None;
+ if (isa<UndefValue>(EI->getIndexOperand()))
+ continue;
auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
if (!Idx)
return None;
// Undefined behavior if Idx is negative or >= Size.
- if (Idx->getValue().uge(Size)) {
- Mask.push_back(UndefMaskElem);
+ if (Idx->getValue().uge(Size))
continue;
- }
unsigned IntIdx = Idx->getValue().getZExtValue();
- Mask.push_back(IntIdx);
- // We can extractelement from undef or poison vector.
- if (isa<UndefValue>(Vec))
- continue;
+ Mask[I] = IntIdx;
// For correct shuffling we have to have at most 2
diff erent vector operands
// in all extractelement instructions.
- if (!Vec1 || Vec1 == Vec)
+ if (!Vec1 || Vec1 == Vec) {
Vec1 = Vec;
- else if (!Vec2 || Vec2 == Vec)
+ } else if (!Vec2 || Vec2 == Vec) {
Vec2 = Vec;
- else
+ Mask[I] += Size;
+ } else {
return None;
+ }
if (CommonShuffleMode == Permute)
continue;
// If the extract index is not the same as the operation number, it is a
@@ -4414,15 +4426,19 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
bool IsGather) {
DenseMap<Value *, int> ExtractVectorsTys;
for (auto *V : VL) {
+ if (isa<UndefValue>(V))
+ continue;
// If all users of instruction are going to be vectorized and this
// instruction itself is not going to be vectorized, consider this
// instruction as dead and remove its cost from the final cost of the
// vectorized tree.
- if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
- (IsGather && ScalarToTreeEntry.count(V)))
+ if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals))
continue;
auto *EE = cast<ExtractElementInst>(V);
- unsigned Idx = *getExtractIndex(EE);
+ Optional<unsigned> EEIdx = getExtractIndex(EE);
+ if (!EEIdx)
+ continue;
+ unsigned Idx = *EEIdx;
if (TTIRef.getNumberOfParts(VecTy) !=
TTIRef.getNumberOfParts(EE->getVectorOperandType())) {
auto It =
@@ -4454,6 +4470,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
for (const auto &Data : ExtractVectorsTys) {
auto *EEVTy = cast<FixedVectorType>(Data.first->getType());
unsigned NumElts = VecTy->getNumElements();
+ if (Data.second % NumElts == 0)
+ continue;
if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) {
unsigned Idx = (Data.second / NumElts) * NumElts;
unsigned EENumElts = EEVTy->getNumElements();
@@ -4516,10 +4534,12 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// broadcast.
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
}
- if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) &&
- allSameBlock(VL) &&
- !isa<ScalableVectorType>(
- cast<ExtractElementInst>(E->getMainOp())->getVectorOperandType())) {
+ if ((E->getOpcode() == Instruction::ExtractElement ||
+ all_of(E->Scalars,
+ [](Value *V) {
+ return isa<ExtractElementInst, UndefValue>(V);
+ })) &&
+ allSameType(VL)) {
// Check that gather of extractelements can be represented as just a
// shuffle of a single/two vectors the scalars are extracted from.
SmallVector<int> Mask;
@@ -5111,7 +5131,11 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
[this](Value *V) { return EphValues.contains(V); }) &&
(allConstant(TE->Scalars) || isSplat(TE->Scalars) ||
TE->Scalars.size() < Limit ||
- (TE->getOpcode() == Instruction::ExtractElement &&
+ ((TE->getOpcode() == Instruction::ExtractElement ||
+ all_of(TE->Scalars,
+ [](Value *V) {
+ return isa<ExtractElementInst, UndefValue>(V);
+ })) &&
isFixedVectorShuffle(TE->Scalars, Mask)) ||
(TE->State == TreeEntry::NeedToGather &&
TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()));
@@ -9183,8 +9207,9 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
SmallVector<Value *, 16> BuildVectorOpds;
SmallVector<int> Mask;
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
- (llvm::all_of(BuildVectorOpds,
- [](Value *V) { return isa<ExtractElementInst>(V); }) &&
+ (llvm::all_of(
+ BuildVectorOpds,
+ [](Value *V) { return isa<ExtractElementInst, UndefValue>(V); }) &&
isFixedVectorShuffle(BuildVectorOpds, Mask)))
return false;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
index 8ab137cc2d7d8..9c19a32b2f41f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
@@ -230,25 +230,21 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @ashr_lshr_shl_v8i32(
-; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6
-; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6
-; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <2 x i32> <i32 4, i32 5>
-; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
-; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
-; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; SSE-NEXT: [[R51:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
-; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R51]], i32 [[AB6]], i32 6
-; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
-; SSE-NEXT: ret <8 x i32> [[R7]]
+; SSE-NEXT: [[TMP8:%.*]] = shl <8 x i32> [[A]], [[B]]
+; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <2 x i32> <i32 6, i32 7>
+; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
+; SSE-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R52]], <8 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE-NEXT: ret <8 x i32> [[R71]]
;
; SLM-LABEL: @ashr_lshr_shl_v8i32(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
index 3af16bf404a3f..783b50ae4b178 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
@@ -230,25 +230,21 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @ashr_lshr_shl_v8i32(
-; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6
-; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
-; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6
-; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <2 x i32> <i32 4, i32 5>
-; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
-; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
-; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; SSE-NEXT: [[R51:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
-; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R51]], i32 [[AB6]], i32 6
-; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
-; SSE-NEXT: ret <8 x i32> [[R7]]
+; SSE-NEXT: [[TMP8:%.*]] = shl <8 x i32> [[A]], [[B]]
+; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <2 x i32> <i32 6, i32 7>
+; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
+; SSE-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R52]], <8 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE-NEXT: ret <8 x i32> [[R71]]
;
; SLM-LABEL: @ashr_lshr_shl_v8i32(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
More information about the llvm-commits
mailing list