[llvm] [LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop (PR #96752)

Fri Dec 6 01:46:09 PST 2024

================
@@ -276,84 +277,85 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
 bool llvm::isDereferenceableAndAlignedInLoop(
     LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
     AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
-  auto &DL = LI->getDataLayout();
-  Value *Ptr = LI->getPointerOperand();
-
-  APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
-                DL.getTypeStoreSize(LI->getType()).getFixedValue());
-  const Align Alignment = LI->getAlign();
+  const SCEV *Ptr = SE.getSCEV(LI->getPointerOperand());
+  auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ptr);
 
-  Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
-
-  // If given a uniform (i.e. non-varying) address, see if we can prove the
-  // access is safe within the loop w/o needing predication.
-  if (L->isLoopInvariant(Ptr))
-    return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
-                                              HeaderFirstNonPHI, AC, &DT);
-
-  // Otherwise, check to see if we have a repeating access pattern where we can
-  // prove that all accesses are well aligned and dereferenceable.
-  auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
+  // Check to see if we have a repeating access pattern and it's possible
+  // to prove all accesses are well aligned.
   if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
     return false;
+
   auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
   if (!Step)
     return false;
 
-  auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
-  if (!TC)
+  // For the moment, restrict ourselves to the case where the access size is a
+  // multiple of the requested alignment and the base is aligned.
+  // TODO: generalize if a case found which warrants
+  const Align Alignment = LI->getAlign();
+  auto &DL = LI->getDataLayout();
+  APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
+                DL.getTypeStoreSize(LI->getType()).getFixedValue());
+  if (EltSize.urem(Alignment.value()) != 0)
     return false;
 
   // TODO: Handle overlapping accesses.
-  // We should be computing AccessSize as (TC - 1) * Step + EltSize.
-  if (EltSize.sgt(Step->getAPInt()))
+  if (EltSize.ugt(Step->getAPInt().abs()))
     return false;
 
-  // Compute the total access size for access patterns with unit stride and
-  // patterns with gaps. For patterns with unit stride, Step and EltSize are the
-  // same.
-  // For patterns with gaps (i.e. non unit stride), we are
-  // accessing EltSize bytes at every Step.
-  APInt AccessSize = TC * Step->getAPInt();
+  const SCEV *MaxBECount =
+      SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, *Predicates);
+  if (isa<SCEVCouldNotCompute>(MaxBECount))
----------------
david-arm wrote:

Yeah sorry, you're right. It shouldn't be part of this change. I think I just did it whilst addressing one of your review comments about reusing getStartAndEndForAccess. It should be fine to pass in `SE.getSmallConstantMaxTripCount` for now.

https://github.com/llvm/llvm-project/pull/96752