[llvm] 760f550 - [SLP] NFC. Replace MainOp and AltOp in TreeEntry with InstructionsState. (#120198)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 17:05:43 PST 2025
Author: Han-Kuan Chen
Date: 2025-01-10T09:05:39+08:00
New Revision: 760f550de25792db83cd39c88ef57ab6d80a41a0
URL: https://github.com/llvm/llvm-project/commit/760f550de25792db83cd39c88ef57ab6d80a41a0
DIFF: https://github.com/llvm/llvm-project/commit/760f550de25792db83cd39c88ef57ab6d80a41a0.diff
LOG: [SLP] NFC. Replace MainOp and AltOp in TreeEntry with InstructionsState. (#120198)
Add TreeEntry::hasState.
Add assert for getTreeEntry.
Remove the OpValue parameter from the canReuseExtract function.
Remove the Opcode parameter from the ComputeMaxBitWidth lambda function.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 48ed612c11b366..6360ddb57007d6 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2417,15 +2417,16 @@ class BoUpSLP {
}
/// Go through the instructions in VL and append their operands.
- void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0) {
+ void appendOperandsOfVL(ArrayRef<Value *> VL, const InstructionsState &S) {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
// IntrinsicInst::isCommutative returns true if swapping the first "two"
// arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
- unsigned NumOperands = VL0->getNumOperands();
- ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
+ unsigned NumOperands = S.getMainOp()->getNumOperands();
+ ArgSize = isa<IntrinsicInst>(S.getMainOp()) ? IntrinsicNumOperands
+ : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2445,8 +2446,8 @@ class BoUpSLP {
// tell the inverse operations by checking commutativity.
if (isa<PoisonValue>(VL[Lane])) {
OpsVec[OpIdx][Lane] = {
- PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true,
- false};
+ PoisonValue::get(S.getMainOp()->getOperand(OpIdx)->getType()),
+ true, false};
continue;
}
bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
@@ -2558,11 +2559,12 @@ class BoUpSLP {
public:
/// Initialize with all the operands of the instruction vector \p RootVL.
- VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
+ VLOperands(ArrayRef<Value *> RootVL, const InstructionsState &S,
+ const BoUpSLP &R)
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
- L(R.LI->getLoopFor((VL0->getParent()))) {
+ L(R.LI->getLoopFor(S.getMainOp()->getParent())) {
// Append all the operands of RootVL.
- appendOperandsOfVL(RootVL, VL0);
+ appendOperandsOfVL(RootVL, S);
}
/// \Returns a value vector with the operands across all lanes for the
@@ -3035,7 +3037,7 @@ class BoUpSLP {
/// non-identity permutation that allows to reuse extract instructions.
/// \param ResizeAllowed indicates whether it is allowed to handle subvector
/// extract order.
- bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
+ bool canReuseExtract(ArrayRef<Value *> VL,
SmallVectorImpl<unsigned> &CurrentOrder,
bool ResizeAllowed = false) const;
@@ -3262,7 +3264,7 @@ class BoUpSLP {
};
/// Checks if the current node is a gather node.
- bool isGather() const {return State == NeedToGather; }
+ bool isGather() const { return State == NeedToGather; }
/// A vector of scalars.
ValueList Scalars;
@@ -3326,9 +3328,9 @@ class BoUpSLP {
/// reordering of operands during buildTree_rec() and vectorizeTree().
SmallVector<ValueList, 2> Operands;
- /// The main/alternate instruction.
- Instruction *MainOp = nullptr;
- Instruction *AltOp = nullptr;
+ /// MainOp and AltOp are recorded inside. S should be obtained from
+ /// newTreeEntry.
+ InstructionsState S = InstructionsState::invalid();
/// Interleaving factor for interleaved loads Vectorize nodes.
unsigned InterleaveFactor = 0;
@@ -3352,10 +3354,10 @@ class BoUpSLP {
/// Set this bundle's operand from Scalars.
void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
- VLOperands Ops(Scalars, MainOp, R);
+ VLOperands Ops(Scalars, S, R);
if (RequireReorder)
Ops.reorder();
- for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
+ for (unsigned I : seq<unsigned>(S.getMainOp()->getNumOperands()))
setOperand(I, Ops.getVL(I));
}
@@ -3388,13 +3390,9 @@ class BoUpSLP {
}
/// Some of the instructions in the list have alternate opcodes.
- bool isAltShuffle() const { return MainOp != AltOp; }
+ bool isAltShuffle() const { return S.isAltShuffle(); }
- bool isOpcodeOrAlt(Instruction *I) const {
- unsigned CheckedOpcode = I->getOpcode();
- return (getOpcode() == CheckedOpcode ||
- getAltOpcode() == CheckedOpcode);
- }
+ bool isOpcodeOrAlt(Instruction *I) const { return S.isOpcodeOrAlt(I); }
/// Chooses the correct key for scheduling data. If \p Op has the same (or
/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is
@@ -3403,31 +3401,24 @@ class BoUpSLP {
auto *I = dyn_cast<Instruction>(Op);
if (I && isOpcodeOrAlt(I))
return Op;
- return MainOp;
+ return S.getMainOp();
}
void setOperations(const InstructionsState &S) {
assert(S && "InstructionsState is invalid.");
- MainOp = S.getMainOp();
- AltOp = S.getAltOp();
+ this->S = S;
}
- Instruction *getMainOp() const {
- return MainOp;
- }
+ Instruction *getMainOp() const { return S.getMainOp(); }
- Instruction *getAltOp() const {
- return AltOp;
- }
+ Instruction *getAltOp() const { return S.getAltOp(); }
/// The main/alternate opcodes for the list of instructions.
- unsigned getOpcode() const {
- return MainOp ? MainOp->getOpcode() : 0;
- }
+ unsigned getOpcode() const { return S.getOpcode(); }
- unsigned getAltOpcode() const {
- return AltOp ? AltOp->getOpcode() : 0;
- }
+ unsigned getAltOpcode() const { return S.getAltOpcode(); }
+
+ bool hasState() const { return S.valid(); }
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
@@ -3523,16 +3514,13 @@ class BoUpSLP {
dbgs() << "CombinedVectorize\n";
break;
}
- dbgs() << "MainOp: ";
- if (MainOp)
- dbgs() << *MainOp << "\n";
- else
- dbgs() << "NULL\n";
- dbgs() << "AltOp: ";
- if (AltOp)
- dbgs() << *AltOp << "\n";
- else
- dbgs() << "NULL\n";
+ if (S) {
+ dbgs() << "MainOp: " << *S.getMainOp() << "\n";
+ dbgs() << "AltOp: " << *S.getAltOp() << "\n";
+ } else {
+ dbgs() << "MainOp: NULL\n";
+ dbgs() << "AltOp: NULL\n";
+ }
dbgs() << "VectorizedValue: ";
if (VectorizedValue)
dbgs() << *VectorizedValue << "\n";
@@ -3707,9 +3695,13 @@ class BoUpSLP {
}
#endif
- TreeEntry *getTreeEntry(Value *V) { return ScalarToTreeEntry.lookup(V); }
+ TreeEntry *getTreeEntry(Value *V) {
+ assert(V && "V cannot be nullptr.");
+ return ScalarToTreeEntry.lookup(V);
+ }
const TreeEntry *getTreeEntry(Value *V) const {
+ assert(V && "V cannot be nullptr.");
return ScalarToTreeEntry.lookup(V);
}
@@ -5590,7 +5582,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
// Try build correct order for extractelement instructions.
SmallVector<int> ReusedMask(TE.ReuseShuffleIndices.begin(),
TE.ReuseShuffleIndices.end());
- if (TE.getOpcode() == Instruction::ExtractElement &&
+ if (TE.hasState() && TE.getOpcode() == Instruction::ExtractElement &&
all_of(TE.Scalars, [Sz](Value *V) {
if (isa<PoisonValue>(V))
return true;
@@ -5752,10 +5744,11 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
return std::nullopt; // No need to reorder.
return std::move(Phis);
}
- if (TE.isGather() && !TE.isAltShuffle() && allSameType(TE.Scalars)) {
+ if (TE.isGather() && (!TE.hasState() || !TE.isAltShuffle()) &&
+ allSameType(TE.Scalars)) {
// TODO: add analysis of other gather nodes with extractelement
// instructions and other values/instructions, not only undefs.
- if ((TE.getOpcode() == Instruction::ExtractElement ||
+ if (((TE.hasState() && TE.getOpcode() == Instruction::ExtractElement) ||
(all_of(TE.Scalars, IsaPred<UndefValue, ExtractElementInst>) &&
any_of(TE.Scalars, IsaPred<ExtractElementInst>))) &&
all_of(TE.Scalars, [](Value *V) {
@@ -5765,8 +5758,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
// Check that gather of extractelements can be represented as
// just a shuffle of a single vector.
OrdersType CurrentOrder;
- bool Reuse = canReuseExtract(TE.Scalars, TE.getMainOp(), CurrentOrder,
- /*ResizeAllowed=*/true);
+ bool Reuse =
+ canReuseExtract(TE.Scalars, CurrentOrder, /*ResizeAllowed=*/true);
if (Reuse || !CurrentOrder.empty())
return std::move(CurrentOrder);
}
@@ -5815,7 +5808,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
return Order;
// Check if can include the order of vectorized loads. For masked gathers do
// extra analysis later, so include such nodes into a special list.
- if (TE.isGather() && TE.getOpcode() == Instruction::Load) {
+ if (TE.hasState() && TE.getOpcode() == Instruction::Load) {
SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;
LoadsState Res = canVectorizeLoads(TE.Scalars, TE.Scalars.front(),
@@ -5930,7 +5923,7 @@ void BoUpSLP::reorderTopToBottom() {
// Patterns like [fadd,fsub] can be combined into a single instruction in
// x86. Reordering them into [fsub,fadd] blocks this pattern. So we need
// to take into account their order when looking for the most used order.
- if (TE->isAltShuffle()) {
+ if (TE->hasState() && TE->isAltShuffle()) {
VectorType *VecTy =
getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size());
unsigned Opcode0 = TE->getOpcode();
@@ -6009,7 +6002,7 @@ void BoUpSLP::reorderTopToBottom() {
if (It != GathersToOrders.end())
return It->second;
}
- if (OpTE->isAltShuffle()) {
+ if (OpTE->hasState() && OpTE->isAltShuffle()) {
auto It = AltShufflesToOrders.find(OpTE);
if (It != AltShufflesToOrders.end())
return It->second;
@@ -7610,7 +7603,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
}
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
- bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
+ bool Reuse = canReuseExtract(VL, CurrentOrder);
// FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
if (!has_single_bit(VL.size()))
return TreeEntry::NeedToGather;
@@ -8623,7 +8616,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TE->dump());
ValueList Left, Right;
- VLOperands Ops(VL, VL0, *this);
+ VLOperands Ops(VL, S, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
@@ -8891,7 +8884,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const {
return N;
}
-bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
+bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL,
SmallVectorImpl<unsigned> &CurrentOrder,
bool ResizeAllowed) const {
const auto *It = find_if(VL, IsaPred<ExtractElementInst, ExtractValueInst>);
@@ -9545,7 +9538,7 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) {
// Do not reorder nodes if it small (just 2 elements), all-constant or all
// instructions have same opcode already.
- if (TE.Scalars.size() == 2 || (TE.getOpcode() && !TE.isAltShuffle()) ||
+ if (TE.Scalars.size() == 2 || (TE.hasState() && !TE.isAltShuffle()) ||
all_of(TE.Scalars, isConstant))
return;
@@ -9764,7 +9757,7 @@ void BoUpSLP::transformNodes() {
// Do not try partial vectorization for small nodes (<= 2), nodes with the
// same opcode and same parent block or all constants.
if (VL.size() <= 2 || LoadEntriesToVectorize.contains(Idx) ||
- !(!E.getOpcode() || E.getOpcode() == Instruction::Load ||
+ !(!E.hasState() || E.getOpcode() == Instruction::Load ||
E.isAltShuffle() || !allSameBlock(VL)) ||
allConstant(VL) || isSplat(VL))
continue;
@@ -9907,6 +9900,8 @@ void BoUpSLP::transformNodes() {
E.ReorderIndices.clear();
}
}
+ if (!E.hasState())
+ continue;
switch (E.getOpcode()) {
case Instruction::Load: {
// No need to reorder masked gather loads, just reorder the scalar
@@ -10026,7 +10021,7 @@ void BoUpSLP::transformNodes() {
getCanonicalGraphSize() <= SmallTree &&
count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()),
[](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isGather() &&
+ return TE->isGather() && TE->hasState() &&
TE->getOpcode() == Instruction::Load &&
!allSameBlock(TE->Scalars);
}) == 1)
@@ -10042,13 +10037,13 @@ void BoUpSLP::transformNodes() {
for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
TreeEntry &E = *TE;
if (E.isGather() &&
- (E.getOpcode() == Instruction::Load ||
- (!E.getOpcode() && any_of(E.Scalars,
- [&](Value *V) {
- return isa<LoadInst>(V) &&
- !isVectorized(V) &&
- !isDeleted(cast<Instruction>(V));
- }))) &&
+ ((E.hasState() && E.getOpcode() == Instruction::Load) ||
+ (!E.hasState() && any_of(E.Scalars,
+ [&](Value *V) {
+ return isa<LoadInst>(V) &&
+ !isVectorized(V) &&
+ !isDeleted(cast<Instruction>(V));
+ }))) &&
!isSplat(E.Scalars)) {
for (Value *V : E.Scalars) {
auto *LI = dyn_cast<LoadInst>(V);
@@ -10642,7 +10637,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
bool PrevNodeFound = any_of(
ArrayRef(R.VectorizableTree).take_front(E->Idx),
[&](const std::unique_ptr<TreeEntry> &TE) {
- return ((!TE->isAltShuffle() &&
+ return ((TE->hasState() && !TE->isAltShuffle() &&
TE->getOpcode() == Instruction::ExtractElement) ||
TE->isGather()) &&
all_of(enumerate(TE->Scalars), [&](auto &&Data) {
@@ -11768,7 +11763,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
if (TE.get() == E)
break;
- if (TE->isAltShuffle() &&
+ if (TE->hasState() && TE->isAltShuffle() &&
((TE->getOpcode() == E->getOpcode() &&
TE->getAltOpcode() == E->getAltOpcode()) ||
(TE->getOpcode() == E->getAltOpcode() &&
@@ -11930,10 +11925,12 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
[this](Value *V) { return EphValues.contains(V); }) &&
(allConstant(TE->Scalars) || isSplat(TE->Scalars) ||
TE->Scalars.size() < Limit ||
- ((TE->getOpcode() == Instruction::ExtractElement ||
+ (((TE->hasState() &&
+ TE->getOpcode() == Instruction::ExtractElement) ||
all_of(TE->Scalars, IsaPred<ExtractElementInst, UndefValue>)) &&
isFixedVectorShuffle(TE->Scalars, Mask, AC)) ||
- (TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()) ||
+ ((TE->hasState() && TE->getOpcode() == Instruction::Load) &&
+ (!TE->hasState() || !TE->isAltShuffle())) ||
any_of(TE->Scalars, IsaPred<LoadInst>));
};
@@ -12062,9 +12059,10 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
!VectorizableTree.empty() &&
all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
return (TE->isGather() &&
- TE->getOpcode() != Instruction::ExtractElement &&
+ (!TE->hasState() ||
+ TE->getOpcode() != Instruction::ExtractElement) &&
count_if(TE->Scalars, IsaPred<ExtractElementInst>) <= Limit) ||
- TE->getOpcode() == Instruction::PHI;
+ (TE->hasState() && TE->getOpcode() == Instruction::PHI);
}))
return true;
@@ -12098,6 +12096,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
return false;
if (VectorizableTree.back()->isGather() &&
+ VectorizableTree.back()->hasState() &&
VectorizableTree.back()->isAltShuffle() &&
VectorizableTree.back()->getVectorFactor() > 2 &&
allSameBlock(VectorizableTree.back()->Scalars) &&
@@ -12122,7 +12121,7 @@ bool BoUpSLP::isTreeNotExtendable() const {
getCanonicalGraphSize() <= SmallTree &&
count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()),
[](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isGather() &&
+ return TE->isGather() && TE->hasState() &&
TE->getOpcode() == Instruction::Load &&
!allSameBlock(TE->Scalars);
}) == 1)
@@ -12134,7 +12133,7 @@ bool BoUpSLP::isTreeNotExtendable() const {
TreeEntry &E = *VectorizableTree[Idx];
if (!E.isGather())
continue;
- if (E.getOpcode() && E.getOpcode() != Instruction::Load)
+ if (E.hasState() && E.getOpcode() != Instruction::Load)
return false;
if (isSplat(E.Scalars) || allConstant(E.Scalars))
continue;
@@ -12444,7 +12443,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
TE.dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
continue;
}
- if (TE.isGather()) {
+ if (TE.isGather() && TE.hasState()) {
if (const TreeEntry *E = getTreeEntry(TE.getMainOp());
E && E->getVectorFactor() == TE.getVectorFactor() &&
E->isSame(TE.Scalars)) {
@@ -14875,14 +14874,15 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
}
}
// Gather extracts after we check for full matched gathers only.
- if (!ExtractShuffles.empty() || E->getOpcode() != Instruction::Load ||
- ((E->getOpcode() == Instruction::Load ||
+ if (!ExtractShuffles.empty() || !E->hasState() ||
+ E->getOpcode() != Instruction::Load ||
+ (((E->hasState() && E->getOpcode() == Instruction::Load) ||
any_of(E->Scalars, IsaPred<LoadInst>)) &&
any_of(E->Scalars,
[this](Value *V) {
return isa<LoadInst>(V) && getTreeEntry(V);
})) ||
- E->isAltShuffle() ||
+ (E->hasState() && E->isAltShuffle()) ||
all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) ||
isSplat(E->Scalars) ||
(E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) {
@@ -15262,7 +15262,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size());
if (E->isGather()) {
// Set insert point for non-reduction initial nodes.
- if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
+ if (E->hasState() && E->Idx == 0 && !UserIgnoreList)
setInsertPointAfterBundle(E);
Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs);
E->VectorizedValue = Vec;
@@ -18150,10 +18150,9 @@ void BoUpSLP::computeMinimumValueSizes() {
return;
SmallVector<unsigned> ToDemote;
- auto ComputeMaxBitWidth = [&](const TreeEntry &E, bool IsTopRoot,
- bool IsProfitableToDemoteRoot, unsigned Opcode,
- unsigned Limit, bool IsTruncRoot,
- bool IsSignedCmp) -> unsigned {
+ auto ComputeMaxBitWidth =
+ [&](const TreeEntry &E, bool IsTopRoot, bool IsProfitableToDemoteRoot,
+ unsigned Limit, bool IsTruncRoot, bool IsSignedCmp) -> unsigned {
ToDemote.clear();
// Check if the root is trunc and the next node is gather/buildvector, then
// keep trunc in scalars, which is free in most cases.
@@ -18194,11 +18193,14 @@ void BoUpSLP::computeMinimumValueSizes() {
return MaxBitWidth;
}
+ if (!E.hasState())
+ return 0u;
+
unsigned VF = E.getVectorFactor();
Type *ScalarTy = E.Scalars.front()->getType();
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
auto *TreeRootIT = dyn_cast<IntegerType>(ScalarTy->getScalarType());
- if (!TreeRootIT || !Opcode)
+ if (!TreeRootIT)
return 0u;
if (any_of(E.Scalars,
@@ -18270,6 +18272,7 @@ void BoUpSLP::computeMinimumValueSizes() {
IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF)))
return 0u;
+ unsigned Opcode = E.getOpcode();
bool IsProfitableToDemote = Opcode == Instruction::Trunc ||
Opcode == Instruction::SExt ||
Opcode == Instruction::ZExt || NumParts > 1;
@@ -18350,15 +18353,14 @@ void BoUpSLP::computeMinimumValueSizes() {
while (NodeIdx < VectorizableTree.size()) {
ArrayRef<Value *> TreeRoot = VectorizableTree[NodeIdx]->Scalars;
unsigned Limit = 2;
- unsigned Opcode = VectorizableTree[NodeIdx]->getOpcode();
if (IsTopRoot &&
ReductionBitWidth ==
DL->getTypeSizeInBits(
VectorizableTree.front()->Scalars.front()->getType()))
Limit = 3;
unsigned MaxBitWidth = ComputeMaxBitWidth(
- *VectorizableTree[NodeIdx], IsTopRoot, IsProfitableToDemoteRoot, Opcode,
- Limit, IsTruncRoot, IsSignedCmp);
+ *VectorizableTree[NodeIdx], IsTopRoot, IsProfitableToDemoteRoot, Limit,
+ IsTruncRoot, IsSignedCmp);
if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) {
if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth)
ReductionBitWidth = bit_ceil(MaxBitWidth);
@@ -18401,19 +18403,21 @@ void BoUpSLP::computeMinimumValueSizes() {
});
IsSignedCmp =
NodeIdx < VectorizableTree.size() &&
- any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
- [&](const EdgeInfo &EI) {
- return EI.UserTE->getOpcode() == Instruction::ICmp &&
- any_of(EI.UserTE->Scalars, [&](Value *V) {
- auto *IC = dyn_cast<ICmpInst>(V);
- return IC &&
- (IC->isSigned() ||
- !isKnownNonNegative(IC->getOperand(0),
- SimplifyQuery(*DL)) ||
- !isKnownNonNegative(IC->getOperand(1),
- SimplifyQuery(*DL)));
- });
- });
+ any_of(
+ VectorizableTree[NodeIdx]->UserTreeIndices,
+ [&](const EdgeInfo &EI) {
+ return (EI.UserTE->hasState() &&
+ EI.UserTE->getOpcode() == Instruction::ICmp) &&
+ any_of(EI.UserTE->Scalars, [&](Value *V) {
+ auto *IC = dyn_cast<ICmpInst>(V);
+ return IC &&
+ (IC->isSigned() ||
+ !isKnownNonNegative(IC->getOperand(0),
+ SimplifyQuery(*DL)) ||
+ !isKnownNonNegative(IC->getOperand(1),
+ SimplifyQuery(*DL)));
+ });
+ });
}
// If the maximum bit width we compute is less than the width of the roots'
More information about the llvm-commits
mailing list