[llvm] [LoadStoreVectorizer] Fill gaps in load/store chains to enable vectorization (PR #159388)

Drew Kersnar via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 5 06:51:10 PST 2025


================
@@ -1641,3 +1829,146 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
         .sextOrTrunc(OrigBitWidth);
   return std::nullopt;
 }
+
+bool Vectorizer::accessIsAllowedAndFast(unsigned SizeBytes, unsigned AS,
+                                        Align Alignment,
+                                        unsigned VecElemBits) const {
+  if (Alignment.value() % SizeBytes == 0)
+    return true;
+  unsigned VectorizedSpeed = 0;
+  bool AllowsMisaligned = TTI.allowsMisalignedMemoryAccesses(
+      F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
+  if (!AllowsMisaligned) {
+    LLVM_DEBUG(
+        dbgs() << "LSV: Access of " << SizeBytes << "B in addrspace " << AS
+               << " with alignment " << Alignment.value()
+               << " is misaligned, and therefore can't be vectorized.\n");
+    return false;
+  }
+
+  unsigned ElementwiseSpeed = 0;
+  (TTI).allowsMisalignedMemoryAccesses((F).getContext(), VecElemBits, AS,
+                                       Alignment, &ElementwiseSpeed);
+  if (VectorizedSpeed < ElementwiseSpeed) {
+    LLVM_DEBUG(dbgs() << "LSV: Access of " << SizeBytes << "B in addrspace "
+                      << AS << " with alignment " << Alignment.value()
+                      << " has relative speed " << VectorizedSpeed
+                      << ", which is lower than the elementwise speed of "
+                      << ElementwiseSpeed
+                      << ".  Therefore this access won't be vectorized.\n");
+    return false;
+  }
+  return true;
+}
+
+bool Vectorizer::shouldAttemptMaskedLoadStore(
+    const ArrayRef<ChainElem> C) const {
+  bool IsLoadChain = isa<LoadInst>(C[0].Inst);
+
+  unsigned AS = getLoadStoreAddressSpace(C[0].Inst);
+  Type *ElementType = getLoadStoreType(C[0].Inst)->getScalarType();
+  unsigned VecRegBits = TTI.getLoadStoreVecRegBitWidth(AS);
+  // Assume max alignment, splitChainByAlignment will legalize it later if the
+  // necessary alignment is not reached.
+  Align OptimisticAlign = Align(VecRegBits / 8);
+  unsigned int MaxVectorNumElems =
+      VecRegBits / DL.getTypeSizeInBits(ElementType);
+
+  // Attempt to find the smallest power-of-two number of elements that, if
+  // well aligned, could be represented as a legal masked load/store.
+  // If one exists for a given element type and address space, it is worth
+  // attempting to fill gaps as we may be able to create a legal masked
+  // load/store. If we do not end up with a legal masked load/store, chains with
+  // extra elements will be discarded.
+  const unsigned MinMaskedStoreNumElems = 4;
+  for (unsigned NumElems = MinMaskedStoreNumElems;
----------------
dakersnar wrote:

Removed this helper since your review comment was made, in favor of a simpler check. Resolving.

https://github.com/llvm/llvm-project/pull/159388


More information about the llvm-commits mailing list