[llvm] [LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop (PR #96752)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 6 02:35:44 PST 2024
================
@@ -276,84 +277,85 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
bool llvm::isDereferenceableAndAlignedInLoop(
LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
- auto &DL = LI->getDataLayout();
- Value *Ptr = LI->getPointerOperand();
-
- APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
- DL.getTypeStoreSize(LI->getType()).getFixedValue());
- const Align Alignment = LI->getAlign();
+ const SCEV *Ptr = SE.getSCEV(LI->getPointerOperand());
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ptr);
- Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
-
- // If given a uniform (i.e. non-varying) address, see if we can prove the
- // access is safe within the loop w/o needing predication.
- if (L->isLoopInvariant(Ptr))
- return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
- HeaderFirstNonPHI, AC, &DT);
-
- // Otherwise, check to see if we have a repeating access pattern where we can
- // prove that all accesses are well aligned and dereferenceable.
- auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
+ // Check to see if we have a repeating access pattern and it's possible
+ // to prove all accesses are well aligned.
if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
return false;
+
auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
if (!Step)
return false;
- auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
- if (!TC)
+ // For the moment, restrict ourselves to the case where the access size is a
+ // multiple of the requested alignment and the base is aligned.
+ // TODO: generalize if a case found which warrants
+ const Align Alignment = LI->getAlign();
+ auto &DL = LI->getDataLayout();
+ APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
+ DL.getTypeStoreSize(LI->getType()).getFixedValue());
+ if (EltSize.urem(Alignment.value()) != 0)
return false;
// TODO: Handle overlapping accesses.
- // We should be computing AccessSize as (TC - 1) * Step + EltSize.
- if (EltSize.sgt(Step->getAPInt()))
+ if (EltSize.ugt(Step->getAPInt().abs()))
return false;
- // Compute the total access size for access patterns with unit stride and
- // patterns with gaps. For patterns with unit stride, Step and EltSize are the
- // same.
- // For patterns with gaps (i.e. non unit stride), we are
- // accessing EltSize bytes at every Step.
- APInt AccessSize = TC * Step->getAPInt();
+ const SCEV *MaxBECount =
+ SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, *Predicates);
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
----------------
david-arm wrote:
Actually, I can't pass in the result of `SE.getSmallConstantMaxTripCount` anymore because it returns an unsigned value. Instead I have changed it to call `SE.getPredicatedConstantMaxBackedgeTakenCount`, which is what `getSmallConstantMaxTripCount` does anyway.
https://github.com/llvm/llvm-project/pull/96752
More information about the llvm-commits
mailing list