[llvm] AlignmentFromAssumptions should only track pointer operand users (PR #73370)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 1 09:07:08 PST 2023


https://github.com/alex-t updated https://github.com/llvm/llvm-project/pull/73370

>From 09bceafc5dbff302157c1affe945cf9c3325fee2 Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Fri, 24 Nov 2023 21:11:35 +0100
Subject: [PATCH 1/6] AlignmentFromAssumptions should not track the load result
 users

---
 .../Scalar/AlignmentFromAssumptions.cpp       | 20 +++++++++----------
 .../alignment-from-assumptions_dont_crash.ll  | 16 +++++++++++++++
 2 files changed, 26 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 63b7903ef955d9b..905ff2e80cd111a 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -83,11 +83,7 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
                              const SCEV *OffSCEV, Value *Ptr,
                              ScalarEvolution *SE) {
   const SCEV *PtrSCEV = SE->getSCEV(Ptr);
-  // On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
-  // (*cough* AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
-  // may disagree. Trunc/extend so they agree.
-  PtrSCEV = SE->getTruncateOrZeroExtend(
-      PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
+
   const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
   if (isa<SCEVCouldNotCompute>(DiffSCEV))
     return Align(1);
@@ -216,6 +212,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
   }
 
   while (!WorkList.empty()) {
+    bool AddUsers = true;
     Instruction *J = WorkList.pop_back_val();
     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
@@ -226,6 +223,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         LI->setAlignment(NewAlignment);
         ++NumLoadAlignChanged;
       }
+      // The user of a Load uses data - not a pointer!
+      AddUsers = false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
         continue;
@@ -267,11 +266,12 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     // Now that we've updated that use of the pointer, look for other uses of
     // the pointer to update.
     Visited.insert(J);
-    for (User *UJ : J->users()) {
-      Instruction *K = cast<Instruction>(UJ);
-      if (!Visited.count(K))
-        WorkList.push_back(K);
-    }
+    if (AddUsers)
+      for (User *UJ : J->users()) {
+        Instruction *K = cast<Instruction>(UJ);
+        if (!Visited.count(K))
+          WorkList.push_back(K);
+      }
   }
 
   return true;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
new file mode 100644
index 000000000000000..107d677cdbc271c
--- /dev/null
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -0,0 +1,16 @@
+; Test that we don't crash.
+; RUN: opt < %s -passes=alignment-from-assumptions -S
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @vectorize_global_local(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+bb:
+  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 4
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+declare void @llvm.assume(i1 noundef)

>From 6e4189199b206e4423070830ba24811e7457d133 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Wed, 29 Nov 2023 21:36:24 +0100
Subject: [PATCH 2/6] AlignmentFromAssumptions should only track pointer
 operand users

---
 .../Scalar/AlignmentFromAssumptions.h         |  4 +-
 .../Scalar/AlignmentFromAssumptions.cpp       | 25 ++++---
 .../alignment-from-assumptions_dont_crash.ll  | 68 ++++++++++++++++++-
 3 files changed, 82 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 10b6e1c6a21b639..83ee9c26fbd11c6 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -25,6 +25,7 @@ class AssumptionCache;
 class DominatorTree;
 class ScalarEvolution;
 class SCEV;
+class AAResults;
 
 struct AlignmentFromAssumptionsPass
     : public PassInfoMixin<AlignmentFromAssumptionsPass> {
@@ -32,10 +33,11 @@ struct AlignmentFromAssumptionsPass
 
   // Glue for old PM.
   bool runImpl(Function &F, AssumptionCache &AC, ScalarEvolution *SE_,
-               DominatorTree *DT_);
+               DominatorTree *DT_, AAResults *AA_);
 
   ScalarEvolution *SE = nullptr;
   DominatorTree *DT = nullptr;
+  AAResults *AA = nullptr;
 
   bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
                             const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 905ff2e80cd111a..13e939521cbeb38 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -212,7 +212,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
   }
 
   while (!WorkList.empty()) {
-    bool AddUsers = true;
     Instruction *J = WorkList.pop_back_val();
     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
@@ -223,8 +222,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         LI->setAlignment(NewAlignment);
         ++NumLoadAlignChanged;
       }
-      // The user of a Load uses data - not a pointer!
-      AddUsers = false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
         continue;
@@ -265,13 +262,17 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
 
     // Now that we've updated that use of the pointer, look for other uses of
     // the pointer to update.
-    Visited.insert(J);
-    if (AddUsers)
-      for (User *UJ : J->users()) {
-        Instruction *K = cast<Instruction>(UJ);
-        if (!Visited.count(K))
-          WorkList.push_back(K);
+    if (auto UJ = dyn_cast<User>(J))
+      for (auto &U : UJ->uses()) {
+        if (U->getType()->isPointerTy()) {
+          if (AA->alias(U, AAPtr)) {
+            Instruction *K = cast<Instruction>(U.getUser());
+            if (!Visited.count(K))
+              WorkList.push_back(K);
+          }
+        }
       }
+
   }
 
   return true;
@@ -279,9 +280,10 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
 
 bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
                                            ScalarEvolution *SE_,
-                                           DominatorTree *DT_) {
+                                           DominatorTree *DT_, AAResults *AA_) {
   SE = SE_;
   DT = DT_;
+  AA = AA_;
 
   bool Changed = false;
   for (auto &AssumeVH : AC.assumptions())
@@ -300,7 +302,8 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
   ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
-  if (!runImpl(F, AC, &SE, &DT))
+  AAResults &AA = AM.getResult<AAManager>(F);
+  if (!runImpl(F, AC, &SE, &DT, &AA))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index 107d677cdbc271c..e55200aad44ae91 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -1,16 +1,78 @@
 ; Test that we don't crash.
-; RUN: opt < %s -passes=alignment-from-assumptions -S
+; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
 target triple = "amdgcn-amd-amdhsa"
 
-define amdgpu_kernel void @vectorize_global_local(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+define amdgpu_kernel void @test_gep(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_gep
+; GEPs are supported so the alignment is changed from 2 to 4
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
 bb:
   %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
   call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
-  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 4
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
   %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
   store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
   ret void
 }
+
+define amdgpu_kernel void @test_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_phi
+; PHI is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+bb:
+  %cond = icmp ugt i32 %idx, 10
+  br i1 %cond, label %bb1, label %bb2
+  
+bb1:
+  %gep1 = getelementptr i32, ptr addrspace(1) %arg, i32 6
+  br label %bb3
+  
+bb2:
+  %gep2 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+  br label %bb3
+
+bb3:
+  %gep3 = phi ptr addrspace(1) [%gep1, %bb1], [%gep2, %bb2]
+  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %gep3, i64 4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+
+define amdgpu_kernel void @test_select(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_select
+; select is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+bb:
+  %cond = icmp ugt i32 %idx, 10
+  %off1_gep = getelementptr i32, ptr addrspace(1) %arg, i32 6
+  %off2_gep = getelementptr i32, ptr addrspace(1) %arg, i32 7
+  %tmp2 = select i1 %cond, ptr addrspace(1) %off1_gep, ptr addrspace(1) %off2_gep
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+
+define amdgpu_kernel void @test_cast(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+bb:
+; CHECK-LABEL: @test_cast
+; addrspacecast is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+; store is a user of the GEP so, align 2 is changed to 4
+; CHECK: store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  %cast = addrspacecast ptr addrspace(3) %arg1 to ptr addrspace(1)
+  %tmp2 = getelementptr i32, ptr addrspace(1) %cast
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg1, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 2
+  ret void
+}
+
 declare void @llvm.assume(i1 noundef)

>From e7ac710739dcbe07485b14131d9ba84792c7c9b2 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Wed, 29 Nov 2023 21:42:06 +0100
Subject: [PATCH 3/6] Whitespace error corrected

---
 llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 13e939521cbeb38..27422e7d8bb3423 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -272,7 +272,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
           }
         }
       }
-
   }
 
   return true;

>From e258b77caa7d847eda2f38b9f6c038d9904e1086 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Thu, 30 Nov 2023 17:08:50 +0100
Subject: [PATCH 4/6] AlignmentFromAssumptions: pointer as a value to store
 case added

---
 .../Scalar/AlignmentFromAssumptions.cpp          |  4 ++++
 .../alignment-from-assumptions_dont_crash.ll     | 16 ++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 27422e7d8bb3423..dc24c0ffec6fb6f 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -265,6 +265,10 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     if (auto UJ = dyn_cast<User>(J))
       for (auto &U : UJ->uses()) {
         if (U->getType()->isPointerTy()) {
+          if (StoreInst *SI = dyn_cast<StoreInst>(U.getUser())) {
+            if (SI->getPointerOperandIndex() != U.getOperandNo())
+              continue;
+          }
           if (AA->alias(U, AAPtr)) {
             Instruction *K = cast<Instruction>(U.getUser());
             if (!Visited.count(K))
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index e55200aad44ae91..2e8355a42b7ba1e 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -75,4 +75,20 @@ bb:
   ret void
 }
 
+define amdgpu_kernel void @test_store_ptr(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+bb:
+; CHECK-LABEL: @test_store_ptr
+; GEPs are supported so the alignment is changed from 2 to 4
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
+; This store uses a pointer not as adress but as a value to store!
+; CHECK: store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2 
+  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2
+  ret void
+}
+
 declare void @llvm.assume(i1 noundef)

>From 413806c12d66016d9249e81d5295a9c2d73c2028 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Thu, 30 Nov 2023 21:04:07 +0100
Subject: [PATCH 5/6] AlignmentFromAssumptions. Change AliasAnalysis to
 instruction list

---
 .../Scalar/AlignmentFromAssumptions.h         |  4 +--
 .../Scalar/AlignmentFromAssumptions.cpp       | 17 ++++++-----
 .../alignment-from-assumptions_dont_crash.ll  | 28 +++++++++++++++++++
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 83ee9c26fbd11c6..10b6e1c6a21b639 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -25,7 +25,6 @@ class AssumptionCache;
 class DominatorTree;
 class ScalarEvolution;
 class SCEV;
-class AAResults;
 
 struct AlignmentFromAssumptionsPass
     : public PassInfoMixin<AlignmentFromAssumptionsPass> {
@@ -33,11 +32,10 @@ struct AlignmentFromAssumptionsPass
 
   // Glue for old PM.
   bool runImpl(Function &F, AssumptionCache &AC, ScalarEvolution *SE_,
-               DominatorTree *DT_, AAResults *AA_);
+               DominatorTree *DT_);
 
   ScalarEvolution *SE = nullptr;
   DominatorTree *DT = nullptr;
-  AAResults *AA = nullptr;
 
   bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
                             const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index dc24c0ffec6fb6f..c7e954d8856e3bd 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -262,14 +262,15 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
 
     // Now that we've updated that use of the pointer, look for other uses of
     // the pointer to update.
+    Visited.insert(J);
     if (auto UJ = dyn_cast<User>(J))
       for (auto &U : UJ->uses()) {
         if (U->getType()->isPointerTy()) {
-          if (StoreInst *SI = dyn_cast<StoreInst>(U.getUser())) {
-            if (SI->getPointerOperandIndex() != U.getOperandNo())
-              continue;
-          }
-          if (AA->alias(U, AAPtr)) {
+          if (isa<GetElementPtrInst>(U.getUser()) ||
+           isa<PHINode>(U.getUser()) ||
+           isa<LoadInst>(U.getUser()) ||
+           isa<StoreInst>(U.getUser()) ||
+           isa<MemIntrinsic>(U.getUser())) {
             Instruction *K = cast<Instruction>(U.getUser());
             if (!Visited.count(K))
               WorkList.push_back(K);
@@ -283,10 +284,9 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
 
 bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
                                            ScalarEvolution *SE_,
-                                           DominatorTree *DT_, AAResults *AA_) {
+                                           DominatorTree *DT_) {
   SE = SE_;
   DT = DT_;
-  AA = AA_;
 
   bool Changed = false;
   for (auto &AssumeVH : AC.assumptions())
@@ -305,8 +305,7 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
   ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
-  AAResults &AA = AM.getResult<AAManager>(F);
-  if (!runImpl(F, AC, &SE, &DT, &AA))
+  if (!runImpl(F, AC, &SE, &DT))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index 2e8355a42b7ba1e..a282584de9c035d 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -43,6 +43,34 @@ bb3:
   ret void
 }
 
+define amdgpu_kernel void @test_loop_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_loop_phi
+; PHI is supported - align 2 changed to 4
+; CHECK: load i32, ptr addrspace(1) %gep, align 4
+bb:
+  %ptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
+  %end = getelementptr i32, ptr addrspace(1) %arg, i32 10
+  %cond = icmp ugt i32 %idx, 10
+  br i1 %cond, label %bb1, label %bb2
+
+bb1:
+  %ptr1 = phi ptr addrspace(1) [%ptr, %bb], [%ptr2, %bb1]
+  %acc1 = phi i32 [0, %bb], [%acc2, %bb1]
+  %gep = getelementptr i32, ptr addrspace(1) %ptr1, i32 4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %val = load i32, ptr addrspace(1) %gep, align 2
+  %acc2 = add i32 %acc1, %val
+  %ptr2 = getelementptr i32, ptr addrspace(1) %ptr1, i32 %idx
+  %exit = icmp eq ptr addrspace(1) %ptr2, %end
+  br i1 %exit, label %bb1, label %bb2
+
+bb2:
+  %sum = phi i32 [0, %bb], [%acc2, %bb1]
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %sum, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+
 define amdgpu_kernel void @test_select(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
 ; CHECK-LABEL: @test_select
 ; select is not supported - align 2 not changed

>From 88ad7d9f0d21b5b84783224042f11f0c43241eab Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Fri, 1 Dec 2023 18:05:58 +0100
Subject: [PATCH 6/6] AlignmentFromAssumptions. Store pointer operand index
 check added. Test added accordingly.

---
 .../Scalar/AlignmentFromAssumptions.cpp       | 10 +++----
 .../alignment-from-assumptions_dont_crash.ll  | 30 +++++++++++++------
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index c7e954d8856e3bd..d3d71fe922156be 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -266,11 +266,11 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     if (auto UJ = dyn_cast<User>(J))
       for (auto &U : UJ->uses()) {
         if (U->getType()->isPointerTy()) {
-          if (isa<GetElementPtrInst>(U.getUser()) ||
-           isa<PHINode>(U.getUser()) ||
-           isa<LoadInst>(U.getUser()) ||
-           isa<StoreInst>(U.getUser()) ||
-           isa<MemIntrinsic>(U.getUser())) {
+          StoreInst *SI = dyn_cast<StoreInst>(U.getUser());
+          if ((SI && SI->getPointerOperandIndex() == U.getOperandNo()) ||
+              isa<GetElementPtrInst>(U.getUser()) ||
+              isa<PHINode>(U.getUser()) || isa<LoadInst>(U.getUser()) ||
+              isa<MemIntrinsic>(U.getUser())) {
             Instruction *K = cast<Instruction>(U.getUser());
             if (!Visited.count(K))
               WorkList.push_back(K);
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index a282584de9c035d..f07e16d01ddf5b6 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -103,20 +103,32 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_store_ptr(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+define amdgpu_kernel void @test_load_store_ptr_as_val(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
 bb:
-; CHECK-LABEL: @test_store_ptr
-; GEPs are supported so the alignment is changed from 2 to 4
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
+; CHECK-LABEL: @test_load_store_ptr_as_val
 ; This store uses a pointer not as adress but as a value to store!
-; CHECK: store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2 
-  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+; CHECK: store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2 
+  %tmp2 = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
   call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
-  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp3 = load ptr addrspace(1), ptr addrspace(1) %tmp2, align 2
   %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
-  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
-  store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2
+  store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2
   ret void
 }
 
+define amdgpu_kernel void @test_load_store_ptr_as_addr(ptr addrspace(1) nocapture readonly %arg, i32 %valToStore) {
+; CHECK-LABEL: @test_load_store_ptr_as_addr
+; CHECK: %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 4
+; store uses %tmp3 as an address BUT the %arg and %tmp3 have different address spaces
+; so, the align 2 is not changed
+; CHECK: store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
+bb:
+  %tmp2 = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 2
+  store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
+  ret void
+}
+
+
 declare void @llvm.assume(i1 noundef)



More information about the llvm-commits mailing list