[llvm] [Loads] Support dereference for non-constant offset (PR #149551)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 2 09:55:48 PDT 2025
================
@@ -368,32 +368,26 @@ bool llvm::isDereferenceableAndAlignedInLoop(
AccessSize = MaxPtrDiff;
AccessSizeSCEV = PtrDiff;
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
- if (MinAdd->getNumOperands() != 2)
+ const auto *NewBase = dyn_cast<SCEVUnknown>(SE.getPointerBase(MinAdd));
+ if (!NewBase)
return false;
- const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0));
- const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1));
- if (!Offset || !NewBase)
- return false;
-
- // The following code below assumes the offset is unsigned, but GEP
- // offsets are treated as signed so we can end up with a signed value
- // here too. For example, suppose the initial PHI value is (i8 255),
- // the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
- if (Offset->getAPInt().isNegative())
+ auto *OffsetSCEV = SE.removePointerBase(MinAdd);
+ if (!SE.isKnownNonNegative(OffsetSCEV))
return false;
// For the moment, restrict ourselves to the case where the offset is a
// multiple of the requested alignment and the base is aligned.
// TODO: generalize if a case found which warrants
- if (Offset->getAPInt().urem(Alignment.value()) != 0)
+ if (SE.getMinTrailingZeros(OffsetSCEV) < Log2(Alignment))
return false;
bool Overflow = false;
- AccessSize = MaxPtrDiff.uadd_ov(Offset->getAPInt(), Overflow);
+ AccessSize =
+ MaxPtrDiff.uadd_ov(SE.getUnsignedRangeMax(OffsetSCEV), Overflow);
if (Overflow)
----------------
annamthomas wrote:
We still continue to predicate the loads for test `deref_assumption_loop_access_start_variable ` because we fail on the overflow check here:
I dug into this code and I cannot see why MaxPtrDiff is a large unsigned value.
Florian's change 9a1e47839a331 applies LoopGuards to MaxPtrDiff, which is beneficial in my test case (because we know %iv.start < %N). I also changed the IR to have this form in addition to the assume, but we still say the uadd_ov overflows.
```
define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P, i64 range(i64 0, 2000) %N, ptr noalias %b, ptr noalias %c, i64 range(i64 0, 2000) %iv.start) nofree nosync {
entry:
%a = getelementptr i8, ptr %P, i64 16
%cmp = icmp ult i64 %iv.start, %N
call void @llvm.assume(i1 %cmp)
%mul = mul i64 %N, 4
%add = add i64 %mul, 16
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %P, i64 %add) ]
br i1 %cmp, label %loop, label %exit
loop: ; preds = %mainloop, %loop.latch
%iv = phi i64 [ %iv.next, %loop.latch ], [ %iv.start, %entry ]
%gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
%gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
%l.b = load i32, ptr %gep.b, align 1
%c.1 = icmp sge i32 %l.b, 0
br i1 %c.1, label %loop.latch, label %loop.then
loop.then: ; preds = %loop
%l.a = load i32, ptr %gep.a, align 1
br label %loop.latch
loop.latch: ; preds = %loop.then, %loop
%merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop ]
%gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
store i32 %merge, ptr %gep.c, align 1
%iv.next = add nuw nsw i64 %iv, 1
%term.cond = icmp slt i64 %iv.next, %N
br i1 %term.cond, label %loop, label %exit
exit:
ret void
}
```
Here are the values:
AccessStart: (16 + (4 * %iv.start)<nuw><nsw> + %P)
AccessEnd: (16 + (4 * %N)<nuw><nsw> + %P)
PtrDiff: ((4 * %N)<nuw><nsw> + (-4 * %iv.start)<nsw>)
SE.applyLoopGuards(PtrDiff, LoopGuards): ((4 * %N)<nuw><nsw> + (-4 * ((-1 + (1 umax %N))<nsw> umin %iv.start))<nsw>)
MaxPtrDiff (signed): -4
We know that %N ult %iv.start. So, why is MaxPtrDiff a large unsigned value? This is also why the uadd_ov overflows. N has a range of (0, 2000), so we should be able to prove MaxPtrDiff should be a positive value.
@nikic @fhahn Any ideas what is missing here?
https://github.com/llvm/llvm-project/pull/149551
More information about the llvm-commits
mailing list