[llvm] [LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop (PR #96752)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 11 12:49:25 PDT 2024


================
@@ -318,11 +321,24 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
       // TODO: generalize if a case found which warrants
       if (Offset->getAPInt().urem(Alignment.value()) != 0)
         return false;
+      if (StepIsNegative) {
+        // In the last iteration of the loop the address we access we will be
+        // lower than the first by (TC - 1) * Step. So we need to make sure
+        // that there is enough room in Offset to accomodate this.
+        APInt SubOffset = (TC - 1) * AbsStep;
+        if (Offset->getAPInt().ult(SubOffset))
+          return false;
+        // We can safely use the new base because the decrementing pointer is
+        // always guaranteed to be >= new base. The total access size needs to
+        // take into account the start offset and the loaded element size.
+        AccessSize = Offset->getAPInt() + EltSize;
----------------
fhahn wrote:

Would something like the below work? I might be missing something, but it looks like the overflow check should trigger there?


```

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"

declare void @init(ptr nocapture nofree)


define void @test(ptr nocapture noundef writeonly %dest) {

entry:
  %local_dest = alloca [128 x i8], align 4
  %local_src = alloca [128 x i8], align 4
  %local_cmp = alloca [128 x i8], align 4
  call void @init(ptr %local_src)
  call void @init(ptr %local_cmp)
  br label %for.body

for.body:
  %iv = phi i8 [ 255, %entry ], [ %iv.next, %for.inc ]
  %arrayidx = getelementptr inbounds [1024 x i8], ptr %local_cmp, i8 0, i8 %iv
  %0 = load i8, ptr %arrayidx, align 1
  %cmp3.not = icmp eq i8 %0, 3
  br i1 %cmp3.not, label %for.inc, label %if.then

if.then:
  %arrayidx5 = getelementptr inbounds [1024 x i8], ptr %local_src, i8 0, i8 %iv
  %1 = load i8, ptr %arrayidx5, align 1
  %mul = shl nsw i8 %1, 2
  %arrayidx7 = getelementptr inbounds [1024 x i8], ptr %local_dest, i8 0, i8 %iv
  store i8 %mul, ptr %arrayidx7, align 1
  br label %for.inc

for.inc:
  %iv.next = add nsw i8 %iv, -1
  %cmp2.not = icmp eq i8 %iv, 200
  br i1 %cmp2.not, label %for.cond.cleanup, label %for.body

for.cond.cleanup:
  call void @llvm.memcpy.p0.p0.i8(ptr noundef nonnull align 4 dereferenceable(1024) %dest, ptr noundef nonnull align 4 dereferenceable(1024) %local_dest, i8 1024, i1 false)
  ret void
}
```

https://github.com/llvm/llvm-project/pull/96752


More information about the llvm-commits mailing list