[llvm] [LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop (PR #96752)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 12:49:25 PDT 2024
================
@@ -318,11 +321,24 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
// TODO: generalize if a case found which warrants
if (Offset->getAPInt().urem(Alignment.value()) != 0)
return false;
+ if (StepIsNegative) {
+ // In the last iteration of the loop the address we access we will be
+ // lower than the first by (TC - 1) * Step. So we need to make sure
+ // that there is enough room in Offset to accomodate this.
+ APInt SubOffset = (TC - 1) * AbsStep;
+ if (Offset->getAPInt().ult(SubOffset))
+ return false;
+ // We can safely use the new base because the decrementing pointer is
+ // always guaranteed to be >= new base. The total access size needs to
+ // take into account the start offset and the loaded element size.
+ AccessSize = Offset->getAPInt() + EltSize;
----------------
fhahn wrote:
Would something like the below work? I might be missing something, but it looks like the overflow check should trigger there?
```
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
declare void @init(ptr nocapture nofree)
define void @test(ptr nocapture noundef writeonly %dest) {
entry:
%local_dest = alloca [128 x i8], align 4
%local_src = alloca [128 x i8], align 4
%local_cmp = alloca [128 x i8], align 4
call void @init(ptr %local_src)
call void @init(ptr %local_cmp)
br label %for.body
for.body:
%iv = phi i8 [ 255, %entry ], [ %iv.next, %for.inc ]
%arrayidx = getelementptr inbounds [1024 x i8], ptr %local_cmp, i8 0, i8 %iv
%0 = load i8, ptr %arrayidx, align 1
%cmp3.not = icmp eq i8 %0, 3
br i1 %cmp3.not, label %for.inc, label %if.then
if.then:
%arrayidx5 = getelementptr inbounds [1024 x i8], ptr %local_src, i8 0, i8 %iv
%1 = load i8, ptr %arrayidx5, align 1
%mul = shl nsw i8 %1, 2
%arrayidx7 = getelementptr inbounds [1024 x i8], ptr %local_dest, i8 0, i8 %iv
store i8 %mul, ptr %arrayidx7, align 1
br label %for.inc
for.inc:
%iv.next = add nsw i8 %iv, -1
%cmp2.not = icmp eq i8 %iv, 200
br i1 %cmp2.not, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
call void @llvm.memcpy.p0.p0.i8(ptr noundef nonnull align 4 dereferenceable(1024) %dest, ptr noundef nonnull align 4 dereferenceable(1024) %local_dest, i8 1024, i1 false)
ret void
}
```
https://github.com/llvm/llvm-project/pull/96752
More information about the llvm-commits
mailing list