[llvm] c664769 - [AssumeBundles] offset should be added to correctly calculate align

Thu Apr 1 20:32:25 PDT 2021

Author: Juneyoung Lee
Date: 2021-04-02T12:32:05+09:00
New Revision: c6647693300be4b74575143db7429f284f3afeb1

URL: https://github.com/llvm/llvm-project/commit/c6647693300be4b74575143db7429f284f3afeb1
DIFF: https://github.com/llvm/llvm-project/commit/c6647693300be4b74575143db7429f284f3afeb1.diff

LOG: [AssumeBundles] offset should be added to correctly calculate align

This is a patch to fix the bug in alignment calculation (see https://reviews.llvm.org/D90529#2619492).

Consider this code:

```
call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 28)]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
; aligment of %arrayidx?
```

The llvm.assume guarantees that `%a - 28` is 32-bytes aligned, meaning that `%a` is 32k + 28 for some k.
Therefore `a - 4` cannot be 32-bytes aligned but the existing code was calculating the pointer as 32-bytes aligned.

The reason why this happened is as follows.
`DiffSCEV` stores `%arrayidx - %a` which is -4.
`OffSCEV` stores the offset value of “align”, which is 28.
`DiffSCEV` + `OffSCEV` = 24 should be used for `a - 4`'s offset from 32k, but `DiffSCEV` - `OffSCEV` = 32 was being used instead.

Reviewed By: Tyker

Differential Revision: https://reviews.llvm.org/D98759

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
    llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
    llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index bccf94fc217fe..45e3b7b05a074 100644

--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -141,7 +141,7 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
 
   // What we really want to know is the overall offset to the aligned
   // address. This address is displaced by the provided offset.
-  DiffSCEV = SE->getMinusSCEV(DiffSCEV, OffSCEV);
+  DiffSCEV = SE->getAddExpr(DiffSCEV, OffSCEV);
 
   LLVM_DEBUG(dbgs() << "AFI: alignment of " << *Ptr << " relative to "
                     << *AlignSCEV << " and offset " << *OffSCEV

diff  --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
index 610fd448c3b98..80761ad3be6b2 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -16,7 +16,7 @@ entry:
 define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
 entry:
   tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 24)]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 2
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 -2
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
@@ -28,7 +28,7 @@ entry:
 define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
 entry:
   tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 28)]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 1
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
@@ -37,6 +37,19 @@ entry:
 ; CHECK: ret i32
 }
 
+; TODO: this can be 8-bytes aligned
+define i32 @foo2b(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 28)]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2b
+; CHECK: load i32, i32* {{[^,]+}}, align 4
+; CHECK: ret i32
+}
+
 define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
   tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 0)]

diff  --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
index 453899c15c4fb..ddc0e5b641ae2 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -23,13 +23,13 @@ define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 24) ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -2
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
   call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 24)]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 2
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 -2
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
@@ -40,10 +40,28 @@ define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 28) ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 32
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
+entry:
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 28)]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 1
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+
+}
+
+; TODO: this can be 8-bytes aligned
+define i32 @foo2b(i32* nocapture %a) nounwind uwtable readonly {
+; CHECK-LABEL: define {{[^@]+}}@foo2b
+; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 28) ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
 entry:
   call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 28)]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1