[llvm] [LSV] Merge contiguous chains across scalar types (PR #154069)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 08:04:14 PDT 2025
================
@@ -479,49 +593,111 @@ bool Vectorizer::runOnPseudoBB(BasicBlock::iterator Begin,
});
bool Changed = false;
+ SmallVector<Chain> ContiguousSubChains;
+
for (const auto &[EqClassKey, EqClass] :
- collectEquivalenceClasses(Begin, End))
- Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
+ collectEquivalenceClasses(Begin, End)) {
- return Changed;
-}
+ LLVM_DEBUG({
+ dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()
+ << " keyed on " << EqClassKey << ":\n";
+ for (Instruction *I : EqClass)
+ dbgs() << " " << *I << "\n";
+ });
-bool Vectorizer::runOnEquivalenceClass(const EqClassKey &EqClassKey,
- ArrayRef<Instruction *> EqClass) {
- bool Changed = false;
+ for (Chain &C : gatherChains(EqClass)) {
- LLVM_DEBUG({
- dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()
- << " keyed on " << EqClassKey << ":\n";
- for (Instruction *I : EqClass)
- dbgs() << " " << *I << "\n";
- });
+ // Split up the chain into increasingly smaller chains, until we can
+ // finally vectorize the chains.
+ //
+ // (Don't be scared by the depth of the loop nest here. These operations
+ // are all at worst O(n lg n) in the number of instructions, and splitting
+ // chains doesn't change the number of instrs. So the whole loop nest is
+ // O(n lg n).)
+ for (auto &C : splitChainByMayAliasInstrs(C)) {
+ for (auto &C : splitChainByContiguity(C)) {
+ ContiguousSubChains.emplace_back(C);
+ }
+ }
+ }
+ }
- std::vector<Chain> Chains = gatherChains(EqClass);
- LLVM_DEBUG(dbgs() << "LSV: Got " << Chains.size()
- << " nontrivial chains.\n";);
- for (Chain &C : Chains)
- Changed |= runOnChain(C);
- return Changed;
-}
+ // Merge chains in reverse order, so that the first chain is the largest.
+ for (int I = ContiguousSubChains.size() - 1; I > 0; I--) {
+ Chain &C1 = ContiguousSubChains[I - 1];
+ Chain &C2 = ContiguousSubChains[I];
-bool Vectorizer::runOnChain(Chain &C) {
- LLVM_DEBUG({
- dbgs() << "LSV: Running on chain with " << C.size() << " instructions:\n";
- dumpChain(C);
- });
+ // If the scalar types of the chains are the same, we can merge them
+ // without inserting any casts.
+ if (getLoadStoreType(C1[0].Inst)->getScalarType() ==
+ getLoadStoreType(C2[0].Inst)->getScalarType())
+ continue;
+
+ const Value *C1Ptr = getLoadStorePointerOperand(C1[0].Inst);
+ const Value *C2Ptr = getLoadStorePointerOperand(C2[0].Inst);
+ unsigned AS1 = C1Ptr->getType()->getPointerAddressSpace();
+ unsigned AS2 = C2Ptr->getType()->getPointerAddressSpace();
+ bool C1IsLoad = isa<LoadInst>(C1[0].Inst);
+ bool C2IsLoad = isa<LoadInst>(C2[0].Inst);
+
+ // If the chains are mapped to different types, have distinct underlying
+ // pointer objects, or include both loads and stores, skip.
+ if (getUnderlyingPtrObject(C1Ptr) != getUnderlyingPtrObject(C2Ptr) ||
+ C1IsLoad != C2IsLoad || AS1 != AS2)
+ continue;
+
+ // Compute constant offset between chain leaders; if unknown, skip.
+ std::optional<APInt> DeltaOpt = computeLeaderDelta(C1[0].Inst, C2[0].Inst);
+ if (!DeltaOpt)
+ continue;
+
+ // Check that rebasing C2 into C1's coordinate space will not overlap C1.
+ if (chainsOverlapAfterRebase(C1, C2, *DeltaOpt))
+ continue;
+
+ // Determine the common integer cast type for normalization and ensure total
+ // bitwidth matches across all elements of both chains.
+ Type *C1ElemTy = getLoadStoreType(C1[0].Inst);
+ unsigned TotalBits = DL.getTypeSizeInBits(C1ElemTy);
+ auto AllElemsMatchTotalBits = [&](const Chain &C) {
+ return llvm::all_of(C, [&](const ChainElem &E) {
+ return DL.getTypeSizeInBits(getLoadStoreType(E.Inst)) == TotalBits;
+ });
+ };
+ if (!AllElemsMatchTotalBits(C1) || !AllElemsMatchTotalBits(C2))
+ continue;
+
+ // Rebase C2's offsets into C1's coordinate space prior to merging.
+ rebaseChain(C2, *DeltaOpt);
+
+ // Merge C2 into C1 by appending all elements of C2 to C1, then erase C2
+ // from ContiguousSubChains.
+ C1.insert(C1.end(), C2.begin(), C2.end());
+ ContiguousSubChains.erase(ContiguousSubChains.begin() + I);
+
+ // Normalize the value operand/result type of each instruction in C1 to
+ // C1CastTy.
+ Type *C1CastTy =
+ Type::getIntNTy(C1ElemTy->getContext(), DL.getTypeSizeInBits(C1ElemTy));
+ normalizeChainToType(C1, C1CastTy);
+ }
+
+ for (auto &C : ContiguousSubChains) {
+ if (C.size() <= 1)
+ continue;
+ for (auto &AlignedSubChain : splitChainByAlignment(C))
+ Changed |= vectorizeChain(AlignedSubChain);
+ }
+
+ // Erase all instructions scheduled for deletion in this pseudo-BB.
+ for (Instruction *I : ToErase) {
+ auto *PtrOperand = getLoadStorePointerOperand(I);
+ if (I->use_empty())
+ I->eraseFromParent();
+ RecursivelyDeleteTriviallyDeadInstructions(PtrOperand);
----------------
arsenm wrote:
Can't you just directly call RecursivelyDeleteTriviallyDeadInstructions on I?
https://github.com/llvm/llvm-project/pull/154069
More information about the llvm-commits
mailing list