[llvm] [SLP]Improve minbitwidth analysis. (PR #78976)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 16 07:28:05 PST 2024
================
@@ -13270,171 +13305,253 @@ bool BoUpSLP::collectValuesToDemote(
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!collectValuesToDemote(IncValue, ToDemote, DemotedConsts, Roots,
- Visited))
+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
+ ToDemote, DemotedConsts, Visited,
+ MaxDepthLevel, IsProfitableToDemote))
return false;
break;
}
// Otherwise, conservatively give up.
default:
- return false;
+ if (!IsPotentiallyTruncated(I, BitWidth))
+ return false;
+ MaxDepthLevel = 0;
+ Start = End = 0;
+ break;
}
+ ++MaxDepthLevel;
// Gather demoted constant operands.
for (unsigned Idx : seq<unsigned>(Start, End))
if (isa<Constant>(I->getOperand(Idx)))
DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx);
// Record the value that we can demote.
ToDemote.push_back(V);
- return true;
+ return IsProfitableToDemote;
}
void BoUpSLP::computeMinimumValueSizes() {
// We only attempt to truncate integer expressions.
- auto &TreeRoot = VectorizableTree[0]->Scalars;
- auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
- if (!TreeRootIT || VectorizableTree.front()->State == TreeEntry::NeedToGather)
- return;
+ bool IsStoreOrInsertElt =
+ VectorizableTree.front()->getOpcode() == Instruction::Store ||
+ VectorizableTree.front()->getOpcode() == Instruction::InsertElement;
+ unsigned NodeIdx = 0;
+ if (IsStoreOrInsertElt &&
+ VectorizableTree.front()->State != TreeEntry::NeedToGather)
+ NodeIdx = 1;
// Ensure the roots of the vectorizable tree don't form a cycle.
- if (!VectorizableTree.front()->UserTreeIndices.empty())
+ if (VectorizableTree[NodeIdx]->State == TreeEntry::NeedToGather ||
+ (NodeIdx == 0 && !VectorizableTree[NodeIdx]->UserTreeIndices.empty()) ||
+ (NodeIdx != 0 && any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
+ [&](const EdgeInfo &EI) {
+ return EI.UserTE->Idx >
+ static_cast<int>(NodeIdx);
+ })))
return;
- // Conservatively determine if we can actually truncate the roots of the
- // expression. Collect the values that can be demoted in ToDemote and
- // additional roots that require investigating in Roots.
- SmallVector<Value *, 32> ToDemote;
- DenseMap<Instruction *, SmallVector<unsigned>> DemotedConsts;
- SmallVector<Value *, 4> Roots;
- for (auto *Root : TreeRoot) {
- DenseSet<Value *> Visited;
- if (!collectValuesToDemote(Root, ToDemote, DemotedConsts, Roots, Visited))
- return;
+ // The first value node for store/insertelement is sext/zext/trunc? Skip it,
+ // resize to the final type.
+ bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
+ if (NodeIdx != 0 &&
+ VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
+ (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
+ VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
+ VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
+ assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
+ ++NodeIdx;
+ IsProfitableToDemoteRoot = true;
}
- // The maximum bit width required to represent all the values that can be
- // demoted without loss of precision. It would be safe to truncate the roots
- // of the expression to this width.
- auto MaxBitWidth = 1u;
-
- // We first check if all the bits of the roots are demanded. If they're not,
- // we can truncate the roots to this narrower type.
- for (auto *Root : TreeRoot) {
- auto Mask = DB->getDemandedBits(cast<Instruction>(Root));
- MaxBitWidth = std::max<unsigned>(Mask.getBitWidth() - Mask.countl_zero(),
- MaxBitWidth);
- }
-
- // True if the roots can be zero-extended back to their original type, rather
- // than sign-extended. We know that if the leading bits are not demanded, we
- // can safely zero-extend. So we initialize IsKnownPositive to True.
- bool IsKnownPositive = true;
-
- // If all the bits of the roots are demanded, we can try a little harder to
- // compute a narrower type. This can happen, for example, if the roots are
- // getelementptr indices. InstCombine promotes these indices to the pointer
- // width. Thus, all their bits are technically demanded even though the
- // address computation might be vectorized in a smaller type.
- //
- // We start by looking at each entry that can be demoted. We compute the
- // maximum bit width required to store the scalar by using ValueTracking to
- // compute the number of high-order bits we can truncate.
- if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
- all_of(TreeRoot, [](Value *V) {
- return all_of(V->users(),
- [](User *U) { return isa<GetElementPtrInst>(U); });
- })) {
- MaxBitWidth = 8u;
-
+ SmallVector<Value *> ToDemote;
+ DenseMap<Instruction *, SmallVector<unsigned>> DemotedConsts;
+ auto ComputeMaxBitWidth = [&](ArrayRef<Value *> TreeRoot, unsigned VF,
+ bool IsTopRoot, bool IsProfitableToDemoteRoot,
+ unsigned Opcode, unsigned Limit) {
+ ToDemote.clear();
+ auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
+ if (!TreeRootIT || !Opcode)
+ return 0u;
+
+ unsigned NumParts = TTI->getNumberOfParts(
+ FixedVectorType::get(TreeRoot.front()->getType(), VF));
+
+ // The maximum bit width required to represent all the values that can be
+ // demoted without loss of precision. It would be safe to truncate the roots
+ // of the expression to this width.
+ auto MaxBitWidth = 1u;
+
+ // True if the roots can be zero-extended back to their original type,
+ // rather than sign-extended. We know that if the leading bits are not
+ // demanded, we can safely zero-extend. So we initialize IsKnownPositive to
+ // True.
// Determine if the sign bit of all the roots is known to be zero. If not,
// IsKnownPositive is set to False.
- IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) {
+ bool IsKnownPositive = all_of(TreeRoot, [&](Value *R) {
KnownBits Known = computeKnownBits(R, *DL);
return Known.isNonNegative();
});
- // Determine the maximum number of bits required to store the scalar
- // values.
- for (auto *Scalar : ToDemote) {
- auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT);
- auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
- MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
- }
-
- // If we can't prove that the sign bit is zero, we must add one to the
- // maximum bit width to account for the unknown sign bit. This preserves
- // the existing sign bit so we can safely sign-extend the root back to the
- // original type. Otherwise, if we know the sign bit is zero, we will
- // zero-extend the root instead.
- //
- // FIXME: This is somewhat suboptimal, as there will be cases where adding
- // one to the maximum bit width will yield a larger-than-necessary
- // type. In general, we need to add an extra bit only if we can't
- // prove that the upper bit of the original type is equal to the
- // upper bit of the proposed smaller type. If these two bits are the
- // same (either zero or one) we know that sign-extending from the
- // smaller type will result in the same value. Here, since we can't
- // yet prove this, we are just making the proposed smaller type
- // larger to ensure correctness.
- if (!IsKnownPositive)
- ++MaxBitWidth;
- }
-
- // Round MaxBitWidth up to the next power-of-two.
- MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
-
- // If the maximum bit width we compute is less than the with of the roots'
- // type, we can proceed with the narrowing. Otherwise, do nothing.
- if (MaxBitWidth >= TreeRootIT->getBitWidth())
- return;
+ // We first check if all the bits of the roots are demanded. If they're not,
+ // we can truncate the roots to this narrower type.
+ for (auto *Root : TreeRoot) {
+ auto NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT);
+ auto NumTypeBits = DL->getTypeSizeInBits(Root->getType());
+ unsigned BitWidth1 = NumTypeBits - NumSignBits;
+ // If we can't prove that the sign bit is zero, we must add one to the
+ // maximum bit width to account for the unknown sign bit. This preserves
+ // the existing sign bit so we can safely sign-extend the root back to the
+ // original type. Otherwise, if we know the sign bit is zero, we will
+ // zero-extend the root instead.
+ //
+ // FIXME: This is somewhat suboptimal, as there will be cases where adding
+ // one to the maximum bit width will yield a larger-than-necessary
+ // type. In general, we need to add an extra bit only if we can't
+ // prove that the upper bit of the original type is equal to the
+ // upper bit of the proposed smaller type. If these two bits are
+ // the same (either zero or one) we know that sign-extending from
+ // the smaller type will result in the same value. Here, since we
+ // can't yet prove this, we are just making the proposed smaller
+ // type larger to ensure correctness.
+ if (!IsKnownPositive)
+ ++BitWidth1;
+
+ auto Mask = DB->getDemandedBits(cast<Instruction>(Root));
----------------
RKSimon wrote:
Don't use auto
https://github.com/llvm/llvm-project/pull/78976
More information about the llvm-commits
mailing list