[llvm] [SLP] Match poison as instruction with the same opcode (PR #115946)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 12 13:57:28 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
Patch allows to vector scalar instruction + poison values as if poisons
are instructions with the same opcode. It allows better vectorization of
the repeated values, reduces number of insertelement instructions and
serves as a base ground for copyable elements vectorization
AVX512, -O3 + LTO
JM/ldecod - better vector code
Applications/oggenc - better vectorization
CINT2017speed/625.x264_s
CINT2017rate/525.x264_r - better vector code
CFP2017rate/526.blender_r - better vector code
CFP2006/447.dealII - small variations
Benchmarks/Bullet - extra vector code
CFP2017rate/510.parest_r - better vectorization
CINT2017rate/502.gcc_r
CINT2017speed/602.gcc_s - extra vector code
Benchmarks/tramp3d-v4 - small variations
CFP2006/453.povray - extra vector code
JM/lencod - better vector code
CFP2017rate/511.povray_r - extra vector code
MemFunctions/MemFunctions - extra vector code
LoopVectorization/LoopVectorizationBenchmarks - extra vector code
XRay/FDRMode - extra vector code
XRay/ReturnReference - extra vector code
LCALS/SubsetCLambdaLoops - extra vector code
LCALS/SubsetCRawLoops - extra vector code
LCALS/SubsetARawLoops - extra vector code
LCALS/SubsetALambdaLoops - extra vector code
DOE-ProxyApps-C++/miniFE - extra vector code
LoopVectorization/LoopInterleavingBenchmarks - extra vector code
LCALS/SubsetBLambdaLoops - extra vector code
MicroBenchmarks/harris - extra vector code
ImageProcessing/Dither - extra vector code
MicroBenchmarks/SLPVectorization - extra vector code
ImageProcessing/Blur - extra vector code
ImageProcessing/Dilate - extra vector code
Builtins/Int128 - extra vector code
ImageProcessing/Interpolation - extra vector code
ImageProcessing/BilateralFiltering - extra vector code
ImageProcessing/AnisotropicDiffusion - extra vector code
MicroBenchmarks/LoopInterchange - extra code vectorized
LCALS/SubsetBRawLoops - extra code vectorized
CINT2006/464.h264ref - extra vectorization with wider vectors
CFP2017rate/508.namd_r - small variations, extra phis vectorized
CFP2006/444.namd - 2 2 x phi replaced by 4 x phi
DOE-ProxyApps-C/SimpleMOC - extra code vectorized
CINT2017rate/541.leela_r
CINT2017speed/641.leela_s - the function better vectorized and inlined
Benchmarks/Misc/oourafft - 2 4 x bit reductions replaced by 2 x vector code
FreeBench/fourinarow - better vectorization
---
Patch is 50.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115946.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+204-69)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll (+16-14)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll (+6-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll (+5-6)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1bf082d57b8bb0..bbc2560778541d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -456,15 +456,18 @@ static std::string shortBundleName(ArrayRef<Value *> VL, int Idx = -1) {
/// \returns true if all of the instructions in \p VL are in the same block or
/// false otherwise.
static bool allSameBlock(ArrayRef<Value *> VL) {
- Instruction *I0 = dyn_cast<Instruction>(VL[0]);
- if (!I0)
+ auto *It = find_if(VL, IsaPred<Instruction>);
+ if (It == VL.end())
return false;
+ Instruction *I0 = cast<Instruction>(*It);
if (all_of(VL, isVectorLikeInstWithConstOps))
return true;
BasicBlock *BB = I0->getParent();
- for (int I = 1, E = VL.size(); I < E; I++) {
- auto *II = dyn_cast<Instruction>(VL[I]);
+ for (Value *V : iterator_range(It, VL.end())) {
+ if (isa<PoisonValue>(V))
+ continue;
+ auto *II = dyn_cast<Instruction>(V);
if (!II)
return false;
@@ -893,10 +896,19 @@ static bool isCmpSameOrSwapped(const CmpInst *BaseCI, const CmpInst *CI,
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
const TargetLibraryInfo &TLI) {
// Make sure these are all Instructions.
- if (!all_of(VL, IsaPred<Instruction>))
+ if (!all_of(VL, IsaPred<Instruction, PoisonValue>))
+ return InstructionsState::invalid();
+
+ auto *It = find_if(VL, IsaPred<Instruction>);
+ if (It == VL.end())
+ return InstructionsState::invalid();
+
+ Value *V = *It;
+ unsigned InstCnt = std::count_if(It, VL.end(), IsaPred<Instruction>);
+ if ((VL.size() > 2 && !isa<PHINode>(V) && InstCnt < VL.size() / 2) ||
+ (VL.size() == 2 && InstCnt < 2))
return InstructionsState::invalid();
- Value *V = VL.front();
bool IsCastOp = isa<CastInst>(V);
bool IsBinOp = isa<BinaryOperator>(V);
bool IsCmpOp = isa<CmpInst>(V);
@@ -904,7 +916,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
IsCmpOp ? cast<CmpInst>(V)->getPredicate() : CmpInst::BAD_ICMP_PREDICATE;
unsigned Opcode = cast<Instruction>(V)->getOpcode();
unsigned AltOpcode = Opcode;
- unsigned AltIndex = 0;
+ unsigned AltIndex = std::distance(VL.begin(), It);
bool SwappedPredsCompatible = [&]() {
if (!IsCmpOp)
@@ -940,8 +952,17 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty())
return InstructionsState::invalid();
}
+ bool AnyPoison = InstCnt != VL.size();
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
- auto *I = cast<Instruction>(VL[Cnt]);
+ auto *I = dyn_cast<Instruction>(VL[Cnt]);
+ if (!I)
+ continue;
+
+ // Cannot combine poison and divisions.
+ if (AnyPoison && (I->isIntDivRem() || I->isArithmeticShift() ||
+ I->getOpcode() == Instruction::FDiv ||
+ I->getOpcode() == Instruction::FRem || isa<CallInst>(I)))
+ return InstructionsState::invalid();
unsigned InstOpcode = I->getOpcode();
if (IsBinOp && isa<BinaryOperator>(I)) {
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
@@ -1177,10 +1198,13 @@ static SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
Type *ScalarTy = VL[0]->getType();
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
SmallBitVector OpcodeMask(VL.size() * ScalarTyNumElements, false);
- for (unsigned Lane : seq<unsigned>(VL.size()))
+ for (unsigned Lane : seq<unsigned>(VL.size())) {
+ if (isa<PoisonValue>(VL[Lane]))
+ continue;
if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
OpcodeMask.set(Lane * ScalarTyNumElements,
Lane * ScalarTyNumElements + ScalarTyNumElements);
+ }
return OpcodeMask;
}
@@ -1781,13 +1805,17 @@ class BoUpSLP {
(S.MainOp->getNumOperands() <= 2 || !MainAltOps.empty() ||
!S.isAltShuffle()) &&
all_of(Ops, [&S](Value *V) {
- return cast<Instruction>(V)->getNumOperands() ==
- S.MainOp->getNumOperands();
+ return isa<PoisonValue>(V) ||
+ cast<Instruction>(V)->getNumOperands() ==
+ S.MainOp->getNumOperands();
}))
return S.isAltShuffle() ? LookAheadHeuristics::ScoreAltOpcodes
: LookAheadHeuristics::ScoreSameOpcode;
}
+ if (I1 && isa<PoisonValue>(V2))
+ return LookAheadHeuristics::ScoreSameOpcode;
+
if (isa<UndefValue>(V2))
return LookAheadHeuristics::ScoreUndef;
@@ -2336,17 +2364,17 @@ class BoUpSLP {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
- assert(isa<Instruction>(VL[0]) && "Expected instruction");
constexpr unsigned IntrinsicNumOperands = 2;
- unsigned NumOperands = isa<IntrinsicInst>(VL[0])
- ? IntrinsicNumOperands
- : cast<Instruction>(VL[0])->getNumOperands();
+ auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
+ unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
+ : VL0->getNumOperands();
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
OpsVec[OpIdx].resize(NumLanes);
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
+ assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
+ "Expected instruction or poison value");
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
@@ -2357,6 +2385,12 @@ class BoUpSLP {
// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely
// tell the inverse operations by checking commutativity.
+ if (isa<PoisonValue>(VL[Lane])) {
+ OpsVec[OpIdx][Lane] = {
+ PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true,
+ false};
+ continue;
+ }
bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
@@ -2451,7 +2485,7 @@ class BoUpSLP {
Value *OpILn = getValue(OpI, Ln);
return (L && L->isLoopInvariant(OpILn)) ||
(getSameOpcode({Op, OpILn}, TLI).getOpcode() &&
- Op->getParent() == cast<Instruction>(OpILn)->getParent());
+ allSameBlock({Op, OpILn}));
}))
return true;
}
@@ -2463,7 +2497,8 @@ class BoUpSLP {
VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
L(R.LI->getLoopFor(
- (cast<Instruction>(RootVL.front())->getParent()))) {
+ (cast<Instruction>(*find_if(RootVL, IsaPred<Instruction>))
+ ->getParent()))) {
// Append all the operands of RootVL.
appendOperandsOfVL(RootVL);
}
@@ -3265,13 +3300,18 @@ class BoUpSLP {
/// Set the operands of this bundle in their original order.
void setOperandsInOrder() {
assert(Operands.empty() && "Already initialized?");
- auto *I0 = cast<Instruction>(Scalars[0]);
+ auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
Operands.resize(I0->getNumOperands());
unsigned NumLanes = Scalars.size();
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
OpIdx != NumOperands; ++OpIdx) {
Operands[OpIdx].resize(NumLanes);
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ if (isa<PoisonValue>(Scalars[Lane])) {
+ Operands[OpIdx][Lane] =
+ PoisonValue::get(I0->getOperand(OpIdx)->getType());
+ continue;
+ }
auto *I = cast<Instruction>(Scalars[Lane]);
assert(I->getNumOperands() == NumOperands &&
"Expected same number of operands");
@@ -4891,8 +4931,8 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
PointerOps.resize(Sz);
auto *POIter = PointerOps.begin();
for (Value *V : VL) {
- auto *L = cast<LoadInst>(V);
- if (!L->isSimple())
+ auto *L = dyn_cast<LoadInst>(V);
+ if (!L || !L->isSimple())
return LoadsState::Gather;
*POIter = L->getPointerOperand();
++POIter;
@@ -5470,6 +5510,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
TE.ReuseShuffleIndices.end());
if (TE.getOpcode() == Instruction::ExtractElement && !TE.isAltShuffle() &&
all_of(TE.Scalars, [Sz](Value *V) {
+ if (isa<PoisonValue>(V))
+ return true;
std::optional<unsigned> Idx = getExtractIndex(cast<Instruction>(V));
return Idx && *Idx < Sz;
})) {
@@ -5554,7 +5596,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
auto PHICompare = [&](unsigned I1, unsigned I2) {
Value *V1 = TE.Scalars[I1];
Value *V2 = TE.Scalars[I2];
- if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0))
+ if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0) ||
+ isa<PoisonValue>(V1) || isa<PoisonValue>(V2))
return false;
if (V1->getNumUses() < V2->getNumUses())
return true;
@@ -7319,8 +7362,14 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
for (unsigned I : seq<unsigned>(0, S.MainOp->getNumOperands())) {
Operands.emplace_back();
// Prepare the operand vector.
- for (Value *V : VL)
+ for (Value *V : VL) {
+ if (isa<PoisonValue>(V)) {
+ Operands.back().push_back(
+ PoisonValue::get(S.MainOp->getOperand(I)->getType()));
+ continue;
+ }
Operands.back().push_back(cast<Instruction>(V)->getOperand(I));
+ }
}
if (Operands.size() == 2) {
// Try find best operands candidates.
@@ -7427,8 +7476,11 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
if (VL0->getNumOperands() > MaxPHINumOperands)
return TreeEntry::NeedToGather;
// Check for terminator values (e.g. invoke).
- for (Value *V : VL)
- for (Value *Incoming : cast<PHINode>(V)->incoming_values()) {
+ for (Value *V : VL) {
+ auto *PHI = dyn_cast<PHINode>(V);
+ if (!PHI)
+ continue;
+ for (Value *Incoming : PHI->incoming_values()) {
Instruction *Term = dyn_cast<Instruction>(Incoming);
if (Term && Term->isTerminator()) {
LLVM_DEBUG(dbgs()
@@ -7436,6 +7488,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
return TreeEntry::NeedToGather;
}
}
+ }
return TreeEntry::Vectorize;
}
@@ -7511,8 +7564,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy))
LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
- else if (any_of(VL,
- [](Value *V) { return !cast<LoadInst>(V)->isSimple(); }))
+ else if (any_of(VL, [](Value *V) {
+ auto *LI = dyn_cast<LoadInst>(V);
+ return !LI || !LI->isSimple();
+ }))
LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
else
LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
@@ -7536,6 +7591,8 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType();
for (Value *V : VL) {
+ if (isa<PoisonValue>(V))
+ continue;
Type *Ty = cast<Instruction>(V)->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
LLVM_DEBUG(
@@ -7552,7 +7609,9 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
Type *ComparedTy = VL0->getOperand(0)->getType();
for (Value *V : VL) {
- CmpInst *Cmp = cast<CmpInst>(V);
+ if (isa<PoisonValue>(V))
+ continue;
+ auto *Cmp = cast<CmpInst>(V);
if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
Cmp->getOperand(0)->getType() != ComparedTy) {
LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
@@ -7795,7 +7854,13 @@ class PHIHandler {
}
// Prepare the operand vector.
for (auto [Idx, V] : enumerate(Phis)) {
- auto *P = cast<PHINode>(V);
+ auto *P = dyn_cast<PHINode>(V);
+ if (!P) {
+ assert(isa<PoisonValue>(V) &&
+ "Expected isa instruction or poison value.");
+ Operands[I][Idx] = V;
+ continue;
+ }
if (P->getIncomingBlock(I) == InBB)
Operands[I][Idx] = P->getIncomingValue(I);
else
@@ -7814,6 +7879,11 @@ class PHIHandler {
Blocks.try_emplace(InBB).first->second.push_back(I);
}
for (auto [Idx, V] : enumerate(Phis)) {
+ if (isa<PoisonValue>(V)) {
+ for (unsigned I : seq<unsigned>(Main->getNumIncomingValues()))
+ Operands[I][Idx] = V;
+ continue;
+ }
auto *P = cast<PHINode>(V);
for (unsigned I : seq<unsigned>(0, P->getNumIncomingValues())) {
BasicBlock *InBB = P->getIncomingBlock(I);
@@ -7863,7 +7933,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *V : VL) {
if (isConstant(V)) {
ReuseShuffleIndices.emplace_back(
- isa<UndefValue>(V) ? PoisonMaskElem : UniqueValues.size());
+ isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
UniqueValues.emplace_back(V);
continue;
}
@@ -7895,11 +7965,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}))) {
if (DoNotFail && UniquePositions.size() > 1 &&
NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
- all_of(UniqueValues, [=](Value *V) {
- return isa<ExtractElementInst>(V) ||
- areAllUsersVectorized(cast<Instruction>(V),
- UserIgnoreList);
- })) {
+ all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
// Find the number of elements, which forms full vectors.
unsigned PWSz = getFullVectorNumberOfElements(
*TTI, UniqueValues.front()->getType(), UniqueValues.size());
@@ -7907,8 +7973,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices.clear();
} else {
NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
- NonUniqueValueVL.append(PWSz - UniqueValues.size(),
- UniqueValues.back());
+ NonUniqueValueVL.append(
+ PWSz - UniqueValues.size(),
+ PoisonValue::get(UniqueValues.front()->getType()));
VL = NonUniqueValueVL;
}
return true;
@@ -8043,7 +8110,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return true;
// Check if all operands are extracts, part of vector node or can build a
// regular vectorize node.
- SmallVector<unsigned, 2> InstsCount(VL.size(), 0);
+ SmallVector<unsigned, 8> InstsCount;
for (Value *V : VL) {
auto *I = cast<Instruction>(V);
InstsCount.push_back(count_if(I->operand_values(), [](Value *Op) {
@@ -8437,6 +8504,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
+ if (isa<PoisonValue>(V)) {
+ Left.push_back(PoisonValue::get(VL0->getOperand(0)->getType()));
+ Right.push_back(PoisonValue::get(VL0->getOperand(1)->getType()));
+ continue;
+ }
auto *Cmp = cast<CmpInst>(V);
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
@@ -8636,7 +8708,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (isa<BinaryOperator>(VL0) || CI) {
ValueList Left, Right;
if (!CI || all_of(VL, [](Value *V) {
- return cast<CmpInst>(V)->isCommutative();
+ return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
})) {
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
} else {
@@ -8649,6 +8721,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
for (Value *V : VL) {
+ if (isa<PoisonValue>(V)) {
+ Left.push_back(
+ PoisonValue::get(MainCI->getOperand(0)->getType()));
+ Right.push_back(
+ PoisonValue::get(MainCI->getOperand(1)->getType()));
+ continue;
+ }
auto *Cmp = cast<CmpInst>(V);
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
@@ -8853,6 +8932,8 @@ void BoUpSLP::TreeEntry::buildAltOpShuffleMask(
unsigned Idx = I;
if (!ReorderIndices.empty())
Idx = OrderMask[I];
+ if (isa<PoisonValue>(Scalars[Idx]))
+ continue;
auto *OpInst = cast<Instruction>(Scalars[Idx]);
if (IsAltOp(OpInst)) {
Mask[I] = Sz + Idx;
@@ -9627,9 +9708,11 @@ void BoUpSLP::transformNodes() {
// Try to vectorize reduced values or if all users are vectorized.
// For expensive instructions extra extracts might be profitable.
if ((!UserIgnoreList || E.Idx != 0) &&
- TTI->getInstructionCost(cast<Instruction>(Slice.front()),
- CostKind) < TTI::TCC_Expensive &&
+ TTI->getInstructionCost(S.MainOp, CostKind) <
+ TTI::TCC_Expensive &&
!all_of(Slice, [&](Value *V) {
+ if (isa<PoisonValue>(V))
+ return true;
return areAllUsersVectorized(cast<Instruction>(V),
UserIgnoreList);
}))
@@ -9652,12 +9735,13 @@ void BoUpSLP::transformNodes() {
continue;
}
} else if (S.getOpcode() == Instruction::ExtractElement ||
- (TTI->getInstructionCost(
- cast<Instruction>(Slice.front()), CostKind) <
+ (TTI->getInstructionCost(S.MainOp, CostKind) <
TTI::TCC_Expensive &&
!CheckOperandsProfitability(
- cast<Instruction>(Slice.front()),
- cast<Instruction>(Slice.back()), S))) {
+ S.MainOp,
+ cast<Instruction>(*find_if(reverse(Slice),
+ IsaPred<Instruction>)),
+ S))) {
// Do not vectorize extractelements (handled effectively
// alread). Do not vectorize non-profitable instructions (with
// low cost and non-vectorizable operands.)
@@ -10887,7 +10971,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
const unsigned Sz = UniqueValues.size();
SmallBitVector UsedScalars(Sz, false);
for (unsigned ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/115946
More information about the llvm-commits
mailing list