[llvm] d8cd7fc - AlignmentFromAssumptions should only track pointer operand users (#73370)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 7 08:35:40 PST 2023


Author: alex-t
Date: 2023-12-07T17:35:35+01:00
New Revision: d8cd7fc1f404161f9ec378a1cf3c52f8b8e9beca

URL: https://github.com/llvm/llvm-project/commit/d8cd7fc1f404161f9ec378a1cf3c52f8b8e9beca
DIFF: https://github.com/llvm/llvm-project/commit/d8cd7fc1f404161f9ec378a1cf3c52f8b8e9beca.diff

LOG: AlignmentFromAssumptions should only track pointer operand users (#73370)

AlignmentFromAssumptions uses SCEV to update the load/store alignment.
It tracks down the use-def chains for the pointer which it takes from
the assumption cache until it reaches the load or store instruction. It
mistakenly adds to the worklist the users of the load result
irrespective of the fact that the load result has no connection with the
original pointer, moreover, it is not a pointer at all in most cases.
Thus the def-use chain contains irrelevant load users. When it is a
store instruction the algorithm attempts to adjust its alignment to the
alignment of the original pointer. The problem appears when the load and
store memory operand pointers belong to different address spaces and
possibly have different sizes.
The 4bf015c035e4e5b63c7222dfb15ff274a5ed905c was an attempt to address a
similar problem. The truncation or zero extension was added to make
pointers the same size. That looks strange to me because the zero
extension of the pointer is not legal. The test in the
4bf015c035e4e5b63c7222dfb15ff274a5ed905c does not work any longer as for
the explicit address spaces conversion the addrspacecast is generated.
Summarize:
1. For the alloca to global address spaces conversion addrspacecasts are
used, so the code added by the 4bf015c035e4e5b63c7222dfb15ff274a5ed905c
is no longer functional.
2. The AlignmentFromAssumptions algorithm should not add the load users
to the worklist as they have nothing to do with the original pointer.
3. Instead we only track users that are: GetelementPtrIns, PHINodes.

Added: 
    llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll

Modified: 
    llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 63b7903ef955d9..f3422a705dca7a 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -83,11 +83,7 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
                              const SCEV *OffSCEV, Value *Ptr,
                              ScalarEvolution *SE) {
   const SCEV *PtrSCEV = SE->getSCEV(Ptr);
-  // On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
-  // (*cough* AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
-  // may disagree. Trunc/extend so they agree.
-  PtrSCEV = SE->getTruncateOrZeroExtend(
-      PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
+
   const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
   if (isa<SCEVCouldNotCompute>(DiffSCEV))
     return Align(1);
@@ -267,11 +263,17 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     // Now that we've updated that use of the pointer, look for other uses of
     // the pointer to update.
     Visited.insert(J);
-    for (User *UJ : J->users()) {
-      Instruction *K = cast<Instruction>(UJ);
-      if (!Visited.count(K))
-        WorkList.push_back(K);
-    }
+    if (isa<GetElementPtrInst>(J) || isa<PHINode>(J))
+      for (auto &U : J->uses()) {
+        if (U->getType()->isPointerTy()) {
+          Instruction *K = cast<Instruction>(U.getUser());
+          StoreInst *SI = dyn_cast<StoreInst>(K);
+          if (SI && SI->getPointerOperandIndex() != U.getOperandNo())
+            continue;
+          if (!Visited.count(K))
+            WorkList.push_back(K);
+        }
+      }
   }
 
   return true;

diff  --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll
new file mode 100644
index 00000000000000..1c388528ae4b19
--- /dev/null
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
+
+define void @widget(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: define void @widget(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(3) nocapture [[ARG1:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[GETELEMENTPTR]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    [[GETELEMENTPTR2:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG1]], i32 1
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR2]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %getelementptr, i64 4) ]
+  %load = load i32, ptr addrspace(1) %getelementptr, align 2
+  %getelementptr2 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %load, ptr addrspace(3) %getelementptr2, align 4
+  ret void
+}
+
+define void @wibble(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define void @wibble(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT:    br i1 [[ICMP]], label [[BB4:%.*]], label [[BB5:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 6
+; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 7
+; CHECK-NEXT:    br label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], [[BB4]] ], [ [[GETELEMENTPTR6]], [[BB5]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PHI]], i64 4
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR8]], align 2
+; CHECK-NEXT:    [[GETELEMENTPTR9:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR9]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %icmp = icmp ugt i32 %arg2, 10
+  br i1 %icmp, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb
+  %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 6
+  br label %bb7
+
+bb5:                                              ; preds = %bb
+  %getelementptr6 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+  br label %bb7
+
+bb7:                                              ; preds = %bb5, %bb4
+  %phi = phi ptr addrspace(1) [ %getelementptr, %bb4 ], [ %getelementptr6, %bb5 ]
+  %getelementptr8 = getelementptr inbounds i32, ptr addrspace(1) %phi, i64 4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load i32, ptr addrspace(1) %getelementptr8, align 2
+  %getelementptr9 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+  store i32 %load, ptr addrspace(3) %getelementptr9, align 4
+  ret void
+}
+
+define void @ham(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define void @ham(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 0
+; CHECK-NEXT:    [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 10
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT:    br i1 [[ICMP]], label [[BB5:%.*]], label [[BB10:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], [[BB:%.*]] ], [ [[GETELEMENTPTR8:%.*]], [[BB5]] ]
+; CHECK-NEXT:    [[PHI6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[ADD:%.*]], [[BB5]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR7:%.*]] = getelementptr i32, ptr addrspace(1) [[PHI]], i32 4
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR7]], align 4
+; CHECK-NEXT:    [[ADD]] = add i32 [[PHI6]], [[LOAD]]
+; CHECK-NEXT:    [[GETELEMENTPTR8]] = getelementptr i32, ptr addrspace(1) [[PHI]], i32 [[ARG2]]
+; CHECK-NEXT:    [[ICMP9:%.*]] = icmp eq ptr addrspace(1) [[GETELEMENTPTR8]], [[GETELEMENTPTR4]]
+; CHECK-NEXT:    br i1 [[ICMP9]], label [[BB5]], label [[BB10]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[PHI11:%.*]] = phi i32 [ 0, [[BB]] ], [ [[ADD]], [[BB5]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR12:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT:    store i32 [[PHI11]], ptr addrspace(3) [[GETELEMENTPTR12]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
+  %getelementptr4 = getelementptr i32, ptr addrspace(1) %arg, i32 10
+  %icmp = icmp ugt i32 %arg2, 10
+  br i1 %icmp, label %bb5, label %bb10
+
+bb5:                                              ; preds = %bb5, %bb
+  %phi = phi ptr addrspace(1) [ %getelementptr, %bb ], [ %getelementptr8, %bb5 ]
+  %phi6 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+  %getelementptr7 = getelementptr i32, ptr addrspace(1) %phi, i32 4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load i32, ptr addrspace(1) %getelementptr7, align 2
+  %add = add i32 %phi6, %load
+  %getelementptr8 = getelementptr i32, ptr addrspace(1) %phi, i32 %arg2
+  %icmp9 = icmp eq ptr addrspace(1) %getelementptr8, %getelementptr4
+  br i1 %icmp9, label %bb5, label %bb10
+
+bb10:                                             ; preds = %bb5, %bb
+  %phi11 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+  %getelementptr12 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+  store i32 %phi11, ptr addrspace(3) %getelementptr12, align 4
+  ret void
+}
+
+define void @quux(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define void @quux(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 6
+; CHECK-NEXT:    [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 7
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[ICMP]], ptr addrspace(1) [[GETELEMENTPTR]], ptr addrspace(1) [[GETELEMENTPTR4]]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SELECT]], align 2
+; CHECK-NEXT:    [[GETELEMENTPTR5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR5]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %icmp = icmp ugt i32 %arg2, 10
+  %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 6
+  %getelementptr4 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+  %select = select i1 %icmp, ptr addrspace(1) %getelementptr, ptr addrspace(1) %getelementptr4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load i32, ptr addrspace(1) %select, align 2
+  %getelementptr5 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+  store i32 %load, ptr addrspace(3) %getelementptr5, align 4
+  ret void
+}
+
+define void @widget.1(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define void @widget.1(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ADDRSPACECAST:%.*]] = addrspacecast ptr addrspace(3) [[ARG3]] to ptr addrspace(1)
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ADDRSPACECAST]]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) [[ARG3]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR]], align 2
+; CHECK-NEXT:    [[GETELEMENTPTR4:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR4]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %addrspacecast = addrspacecast ptr addrspace(3) %arg3 to ptr addrspace(1)
+  %getelementptr = getelementptr i32, ptr addrspace(1) %addrspacecast
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg3, i64 4) ]
+  %load = load i32, ptr addrspace(1) %getelementptr, align 2
+  %getelementptr4 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+  store i32 %load, ptr addrspace(3) %getelementptr4, align 2
+  ret void
+}
+
+define void @baz(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: define void @baz(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(3) nocapture [[ARG1:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[ARG]], i64 16
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    [[GETELEMENTPTR2:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG1]], i32 1
+; CHECK-NEXT:    store ptr addrspace(1) [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR2]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load ptr addrspace(1), ptr addrspace(1) %getelementptr, align 2
+  %getelementptr2 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store ptr addrspace(1) %load, ptr addrspace(3) %getelementptr2, align 2
+  ret void
+}
+
+define void @foo(ptr addrspace(1) nocapture readonly %arg, i32 %arg1) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(3), ptr addrspace(1) [[ARG]], i64 16
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    store i32 [[ARG1]], ptr addrspace(3) [[LOAD]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load ptr addrspace(3), ptr addrspace(1) %getelementptr, align 2
+  store i32 %arg1, ptr addrspace(3) %load, align 2
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #0
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }


        


More information about the llvm-commits mailing list