[llvm] [LoadStoreVectorizer] Fill gaps in load/store chains to enable vectorization (PR #159388)
Drew Kersnar via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 06:51:10 PST 2025
================
@@ -1641,3 +1829,146 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
.sextOrTrunc(OrigBitWidth);
return std::nullopt;
}
+
+bool Vectorizer::accessIsAllowedAndFast(unsigned SizeBytes, unsigned AS,
+ Align Alignment,
+ unsigned VecElemBits) const {
+ if (Alignment.value() % SizeBytes == 0)
+ return true;
+ unsigned VectorizedSpeed = 0;
+ bool AllowsMisaligned = TTI.allowsMisalignedMemoryAccesses(
+ F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
+ if (!AllowsMisaligned) {
+ LLVM_DEBUG(
+ dbgs() << "LSV: Access of " << SizeBytes << "B in addrspace " << AS
+ << " with alignment " << Alignment.value()
+ << " is misaligned, and therefore can't be vectorized.\n");
+ return false;
+ }
+
+ unsigned ElementwiseSpeed = 0;
+ (TTI).allowsMisalignedMemoryAccesses((F).getContext(), VecElemBits, AS,
+ Alignment, &ElementwiseSpeed);
+ if (VectorizedSpeed < ElementwiseSpeed) {
+ LLVM_DEBUG(dbgs() << "LSV: Access of " << SizeBytes << "B in addrspace "
+ << AS << " with alignment " << Alignment.value()
+ << " has relative speed " << VectorizedSpeed
+ << ", which is lower than the elementwise speed of "
+ << ElementwiseSpeed
+ << ". Therefore this access won't be vectorized.\n");
+ return false;
+ }
+ return true;
+}
+
+bool Vectorizer::shouldAttemptMaskedLoadStore(
+ const ArrayRef<ChainElem> C) const {
+ bool IsLoadChain = isa<LoadInst>(C[0].Inst);
+
+ unsigned AS = getLoadStoreAddressSpace(C[0].Inst);
+ Type *ElementType = getLoadStoreType(C[0].Inst)->getScalarType();
+ unsigned VecRegBits = TTI.getLoadStoreVecRegBitWidth(AS);
+ // Assume max alignment, splitChainByAlignment will legalize it later if the
+ // necessary alignment is not reached.
+ Align OptimisticAlign = Align(VecRegBits / 8);
+ unsigned int MaxVectorNumElems =
+ VecRegBits / DL.getTypeSizeInBits(ElementType);
+
+ // Attempt to find the smallest power-of-two number of elements that, if
+ // well aligned, could be represented as a legal masked load/store.
+ // If one exists for a given element type and address space, it is worth
+ // attempting to fill gaps as we may be able to create a legal masked
+ // load/store. If we do not end up with a legal masked load/store, chains with
+ // extra elements will be discarded.
+ const unsigned MinMaskedStoreNumElems = 4;
+ for (unsigned NumElems = MinMaskedStoreNumElems;
----------------
dakersnar wrote:
Removed this helper since your review comment was made, in favor of a simpler check. Resolving.
https://github.com/llvm/llvm-project/pull/159388
More information about the llvm-commits
mailing list