[llvm] [LSV] Merge contiguous chains across scalar types (PR #154069)
Drew Kersnar via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 10:56:35 PST 2025
================
@@ -480,49 +582,120 @@ bool Vectorizer::runOnPseudoBB(BasicBlock::iterator Begin,
});
bool Changed = false;
+ SmallVector<Chain> ContiguousSubChains;
+
for (const auto &[EqClassKey, EqClass] :
- collectEquivalenceClasses(Begin, End))
- Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
+ collectEquivalenceClasses(Begin, End)) {
- return Changed;
-}
+ LLVM_DEBUG({
+ dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()
+ << " keyed on " << EqClassKey << ":\n";
+ for (Instruction *I : EqClass)
+ dbgs() << " " << *I << "\n";
+ });
-bool Vectorizer::runOnEquivalenceClass(const EqClassKey &EqClassKey,
- ArrayRef<Instruction *> EqClass) {
- bool Changed = false;
+ for (Chain &C : gatherChains(EqClass)) {
- LLVM_DEBUG({
- dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()
- << " keyed on " << EqClassKey << ":\n";
- for (Instruction *I : EqClass)
- dbgs() << " " << *I << "\n";
- });
+ // Split up the chain into increasingly smaller chains, until we can
+ // finally vectorize the chains.
+ //
+ // (Don't be scared by the depth of the loop nest here. These operations
+ // are all at worst O(n lg n) in the number of instructions, and splitting
+ // chains doesn't change the number of instrs. So the whole loop nest is
+ // O(n lg n).)
+ for (auto &C : splitChainByMayAliasInstrs(C)) {
+ for (auto &C : splitChainByContiguity(C)) {
+ ContiguousSubChains.emplace_back(C);
+ }
+ }
+ }
+ }
- std::vector<Chain> Chains = gatherChains(EqClass);
- LLVM_DEBUG(dbgs() << "LSV: Got " << Chains.size()
- << " nontrivial chains.\n";);
- for (Chain &C : Chains)
- Changed |= runOnChain(C);
- return Changed;
-}
+ // Merge chains in reverse order, so that the first chain is the largest.
+ for (int I = ContiguousSubChains.size() - 1; I > 0; I--) {
+ Chain &C1 = ContiguousSubChains[I - 1];
+ Chain &C2 = ContiguousSubChains[I];
-bool Vectorizer::runOnChain(Chain &C) {
- LLVM_DEBUG({
- dbgs() << "LSV: Running on chain with " << C.size() << " instructions:\n";
- dumpChain(C);
- });
+ // If the scalar types of the chains are the same, we can merge them
+ // without inserting any casts.
+ if (getLoadStoreType(C1[0].Inst)->getScalarType() ==
+ getLoadStoreType(C2[0].Inst)->getScalarType())
+ continue;
+
+ const Value *C1Ptr = getLoadStorePointerOperand(C1[0].Inst);
+ const Value *C2Ptr = getLoadStorePointerOperand(C2[0].Inst);
+ unsigned AS1 = C1Ptr->getType()->getPointerAddressSpace();
+ unsigned AS2 = C2Ptr->getType()->getPointerAddressSpace();
+ bool C1IsLoad = isa<LoadInst>(C1[0].Inst);
+ bool C2IsLoad = isa<LoadInst>(C2[0].Inst);
+
+ // If the chains are mapped to different types, have distinct underlying
+ // pointer objects, or include both loads and stores, skip.
+ if (C1IsLoad != C2IsLoad || AS1 != AS2 ||
+ ::getUnderlyingObject(C1Ptr) != ::getUnderlyingObject(C2Ptr))
+ continue;
+
+ // Compute constant offset between chain leaders; if unknown, skip.
+ std::optional<APInt> DeltaOpt = computeLeaderDelta(C1[0].Inst, C2[0].Inst);
+ if (!DeltaOpt)
+ continue;
+
+ // Check that rebasing C2 into C1's coordinate space will not overlap C1.
+ if (chainsOverlapAfterRebase(C1, C2, *DeltaOpt))
+ continue;
+
+ // Determine the common integer cast type for normalization and ensure total
+ // bitwidth matches across all elements of both chains.
+ Type *C1ElemTy = getLoadStoreType(C1[0].Inst);
+ unsigned TotalBits = DL.getTypeSizeInBits(C1ElemTy);
+ auto AllElemsMatchTotalBits = [&](const Chain &C) {
+ return llvm::all_of(C, [&](const ChainElem &E) {
+ return DL.getTypeSizeInBits(getLoadStoreType(E.Inst)) == TotalBits;
+ });
+ };
+ if (!AllElemsMatchTotalBits(C1) || !AllElemsMatchTotalBits(C2))
+ continue;
+
+ // Power-of-two span ensures we can form a legal, single vector access
+ // without padding or splitting. Many targets and cost models assume POT
+ // widths, and it guarantees an integral element count for the chosen
+ // VecElemTy.
+ APInt Sz = C2.front().OffsetFromLeader +
+ DL.getTypeStoreSize(getLoadStoreType(C2.front().Inst)) -
+ C1.back().OffsetFromLeader + *DeltaOpt;
+ if (!Sz.isPowerOf2())
+ continue;
+
----------------
dakersnar wrote:
Do you mind explaining this calculation? This doesn't seem right to me.
https://github.com/llvm/llvm-project/pull/154069
More information about the llvm-commits
mailing list