[llvm] [SLP]Improve minbitwidth analysis. (PR #78976)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 16 07:28:05 PST 2024
================
@@ -13270,171 +13305,253 @@ bool BoUpSLP::collectValuesToDemote(
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!collectValuesToDemote(IncValue, ToDemote, DemotedConsts, Roots,
- Visited))
+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
+ ToDemote, DemotedConsts, Visited,
+ MaxDepthLevel, IsProfitableToDemote))
return false;
break;
}
// Otherwise, conservatively give up.
default:
- return false;
+ if (!IsPotentiallyTruncated(I, BitWidth))
+ return false;
+ MaxDepthLevel = 0;
+ Start = End = 0;
+ break;
}
+ ++MaxDepthLevel;
// Gather demoted constant operands.
for (unsigned Idx : seq<unsigned>(Start, End))
if (isa<Constant>(I->getOperand(Idx)))
DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx);
// Record the value that we can demote.
ToDemote.push_back(V);
- return true;
+ return IsProfitableToDemote;
}
void BoUpSLP::computeMinimumValueSizes() {
// We only attempt to truncate integer expressions.
- auto &TreeRoot = VectorizableTree[0]->Scalars;
- auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
- if (!TreeRootIT || VectorizableTree.front()->State == TreeEntry::NeedToGather)
- return;
+ bool IsStoreOrInsertElt =
+ VectorizableTree.front()->getOpcode() == Instruction::Store ||
+ VectorizableTree.front()->getOpcode() == Instruction::InsertElement;
+ unsigned NodeIdx = 0;
+ if (IsStoreOrInsertElt &&
+ VectorizableTree.front()->State != TreeEntry::NeedToGather)
+ NodeIdx = 1;
// Ensure the roots of the vectorizable tree don't form a cycle.
- if (!VectorizableTree.front()->UserTreeIndices.empty())
+ if (VectorizableTree[NodeIdx]->State == TreeEntry::NeedToGather ||
+ (NodeIdx == 0 && !VectorizableTree[NodeIdx]->UserTreeIndices.empty()) ||
+ (NodeIdx != 0 && any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
+ [&](const EdgeInfo &EI) {
+ return EI.UserTE->Idx >
+ static_cast<int>(NodeIdx);
+ })))
return;
- // Conservatively determine if we can actually truncate the roots of the
- // expression. Collect the values that can be demoted in ToDemote and
- // additional roots that require investigating in Roots.
- SmallVector<Value *, 32> ToDemote;
- DenseMap<Instruction *, SmallVector<unsigned>> DemotedConsts;
- SmallVector<Value *, 4> Roots;
- for (auto *Root : TreeRoot) {
- DenseSet<Value *> Visited;
- if (!collectValuesToDemote(Root, ToDemote, DemotedConsts, Roots, Visited))
- return;
+ // The first value node for store/insertelement is sext/zext/trunc? Skip it,
+ // resize to the final type.
+ bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
+ if (NodeIdx != 0 &&
+ VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
+ (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
+ VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
+ VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
+ assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
+ ++NodeIdx;
+ IsProfitableToDemoteRoot = true;
}
- // The maximum bit width required to represent all the values that can be
- // demoted without loss of precision. It would be safe to truncate the roots
- // of the expression to this width.
- auto MaxBitWidth = 1u;
-
- // We first check if all the bits of the roots are demanded. If they're not,
- // we can truncate the roots to this narrower type.
- for (auto *Root : TreeRoot) {
- auto Mask = DB->getDemandedBits(cast<Instruction>(Root));
- MaxBitWidth = std::max<unsigned>(Mask.getBitWidth() - Mask.countl_zero(),
- MaxBitWidth);
- }
-
- // True if the roots can be zero-extended back to their original type, rather
- // than sign-extended. We know that if the leading bits are not demanded, we
- // can safely zero-extend. So we initialize IsKnownPositive to True.
- bool IsKnownPositive = true;
-
- // If all the bits of the roots are demanded, we can try a little harder to
- // compute a narrower type. This can happen, for example, if the roots are
- // getelementptr indices. InstCombine promotes these indices to the pointer
- // width. Thus, all their bits are technically demanded even though the
- // address computation might be vectorized in a smaller type.
- //
- // We start by looking at each entry that can be demoted. We compute the
- // maximum bit width required to store the scalar by using ValueTracking to
- // compute the number of high-order bits we can truncate.
- if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
- all_of(TreeRoot, [](Value *V) {
- return all_of(V->users(),
- [](User *U) { return isa<GetElementPtrInst>(U); });
- })) {
- MaxBitWidth = 8u;
-
+ SmallVector<Value *> ToDemote;
+ DenseMap<Instruction *, SmallVector<unsigned>> DemotedConsts;
+ auto ComputeMaxBitWidth = [&](ArrayRef<Value *> TreeRoot, unsigned VF,
+ bool IsTopRoot, bool IsProfitableToDemoteRoot,
+ unsigned Opcode, unsigned Limit) {
+ ToDemote.clear();
+ auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
+ if (!TreeRootIT || !Opcode)
+ return 0u;
+
+ unsigned NumParts = TTI->getNumberOfParts(
+ FixedVectorType::get(TreeRoot.front()->getType(), VF));
+
+ // The maximum bit width required to represent all the values that can be
+ // demoted without loss of precision. It would be safe to truncate the roots
+ // of the expression to this width.
+ auto MaxBitWidth = 1u;
----------------
RKSimon wrote:
Don't use auto
https://github.com/llvm/llvm-project/pull/78976
More information about the llvm-commits
mailing list