[llvm] [Loads] Support dereference for non-constant offset (PR #149551)

Tue Sep 2 09:55:48 PDT 2025

================
@@ -368,32 +368,26 @@ bool llvm::isDereferenceableAndAlignedInLoop(
     AccessSize = MaxPtrDiff;
     AccessSizeSCEV = PtrDiff;
   } else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
-    if (MinAdd->getNumOperands() != 2)
+    const auto *NewBase = dyn_cast<SCEVUnknown>(SE.getPointerBase(MinAdd));
+    if (!NewBase)
       return false;
 
-    const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0));
-    const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1));
-    if (!Offset || !NewBase)
-      return false;
-
-    // The following code below assumes the offset is unsigned, but GEP
-    // offsets are treated as signed so we can end up with a signed value
-    // here too. For example, suppose the initial PHI value is (i8 255),
-    // the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
-    if (Offset->getAPInt().isNegative())
+    auto *OffsetSCEV = SE.removePointerBase(MinAdd);
+    if (!SE.isKnownNonNegative(OffsetSCEV))
       return false;
 
     // For the moment, restrict ourselves to the case where the offset is a
     // multiple of the requested alignment and the base is aligned.
     // TODO: generalize if a case found which warrants
-    if (Offset->getAPInt().urem(Alignment.value()) != 0)
+    if (SE.getMinTrailingZeros(OffsetSCEV) < Log2(Alignment))
       return false;
 
     bool Overflow = false;
-    AccessSize = MaxPtrDiff.uadd_ov(Offset->getAPInt(), Overflow);
+    AccessSize =
+        MaxPtrDiff.uadd_ov(SE.getUnsignedRangeMax(OffsetSCEV), Overflow);
     if (Overflow)
----------------
annamthomas wrote:

We still continue to predicate the loads for test `deref_assumption_loop_access_start_variable ` because we fail on the overflow check here:

I dug into this code and I cannot see why MaxPtrDiff is a large unsigned value.
Florian's change 9a1e47839a331 applies LoopGuards to MaxPtrDiff, which is beneficial in my test case (because we know %iv.start < %N). I also changed the IR to have this form in addition to the assume, but we still say the uadd_ov overflows.
```
define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P, i64 range(i64 0, 2000) %N, ptr noalias %b, ptr noalias %c, i64 range(i64 0, 2000) %iv.start) nofree nosync {
entry:
  %a = getelementptr i8, ptr %P, i64 16
  %cmp = icmp ult i64 %iv.start, %N
  call void @llvm.assume(i1 %cmp)
  %mul = mul i64 %N, 4
  %add = add i64 %mul, 16
  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %P, i64 %add) ]
  br i1 %cmp, label %loop, label %exit

loop:                                             ; preds = %mainloop, %loop.latch
  %iv = phi i64 [ %iv.next, %loop.latch ], [ %iv.start, %entry ]
  %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
  %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
  %l.b = load i32, ptr %gep.b, align 1
  %c.1 = icmp sge i32 %l.b, 0
  br i1 %c.1, label %loop.latch, label %loop.then

loop.then:                                        ; preds = %loop
  %l.a = load i32, ptr %gep.a, align 1
  br label %loop.latch

loop.latch:                                       ; preds = %loop.then, %loop
  %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop ]
  %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
  store i32 %merge, ptr %gep.c, align 1
  %iv.next = add nuw nsw i64 %iv, 1
  %term.cond = icmp slt i64 %iv.next, %N
  br i1 %term.cond, label %loop, label %exit

exit:
  ret void
}
```

Here are the values:
AccessStart:  (16 + (4 * %iv.start)<nuw><nsw> + %P)  
AccessEnd:  (16 + (4 * %N)<nuw><nsw> + %P) 
PtrDiff: ((4 * %N)<nuw><nsw> + (-4 * %iv.start)<nsw>)
SE.applyLoopGuards(PtrDiff, LoopGuards):   ((4 * %N)<nuw><nsw> + (-4 * ((-1 + (1 umax %N))<nsw> umin %iv.start))<nsw>)

MaxPtrDiff (signed): -4

We know that %N ult %iv.start. So, why is MaxPtrDiff a large unsigned value? This is also why the uadd_ov overflows. N has a range of (0, 2000), so we should be able to prove MaxPtrDiff should be a positive value. 
@nikic @fhahn  Any ideas what is missing here?

https://github.com/llvm/llvm-project/pull/149551