[llvm] [LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop (PR #96752)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 19 07:03:56 PDT 2024
================
@@ -293,26 +293,65 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
// TODO: Handle overlapping accesses.
// We should be computing AccessSize as (TC - 1) * Step + EltSize.
- if (EltSize.sgt(Step->getAPInt()))
+ bool StepIsNegative = Step->getAPInt().isNegative();
+ APInt AbsStep = Step->getAPInt().abs();
+ if (EltSize.ugt(AbsStep))
+ return false;
+
+ // For the moment, restrict ourselves to the case where the access size is a
+ // multiple of the requested alignment and the base is aligned.
+ // TODO: generalize if a case found which warrants
+ if (EltSize.urem(Alignment.value()) != 0)
return false;
// Compute the total access size for access patterns with unit stride and
// patterns with gaps. For patterns with unit stride, Step and EltSize are the
// same.
// For patterns with gaps (i.e. non unit stride), we are
// accessing EltSize bytes at every Step.
- APInt AccessSize = TC * Step->getAPInt();
+ APInt AccessSize = TC * AbsStep;
assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
"implied by addrec definition");
Value *Base = nullptr;
if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) {
+ if (StepIsNegative)
+ return false;
Base = StartS->getValue();
} else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) {
- // Handle (NewBase + offset) as start value.
- const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0));
- const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1));
- if (StartS->getNumOperands() == 2 && Offset && NewBase) {
+ const SCEV *End = AddRec->evaluateAtIteration(
+ SE.getConstant(StartS->getType(), TC - 1), SE);
+
+ // The step recurrence could be negative so it's necessary to find the min
+ // and max accessed addresses in the loop.
+ const SCEV *Min = SE.getUMinExpr(StartS, End);
+ const SCEV *Max = SE.getUMaxExpr(StartS, End);
+ if (isa<SCEVCouldNotCompute>(Min) || isa<SCEVCouldNotCompute>(Max))
+ return false;
+
+ // Now calculate the total access size, which is (max - min) + element_size.
+ const SCEV *Diff = SE.getMinusSCEV(Max, Min);
+ if (isa<SCEVCouldNotCompute>(Diff))
+ return false;
+
+ const SCEV *AS = SE.getAddExpr(
+ Diff, SE.getConstant(Diff->getType(), EltSize.getZExtValue()));
+ auto *ASC = dyn_cast<SCEVConstant>(AS);
+ if (!ASC)
+ return false;
+
+ if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(Min)) {
----------------
david-arm wrote:
We could do certainly, but then we're effectively doing some of the work that's already done in `isDereferenceableAndAlignedPointer`. It's almost like we want to call `isDereferenceableAndAlignedPointer` on both the min and max addressed pointers in the loop.
https://github.com/llvm/llvm-project/pull/96752
More information about the llvm-commits
mailing list