[llvm] AlignmentFromAssumptions should only track pointer operand users (PR #73370)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 4 07:36:13 PST 2023


https://github.com/alex-t updated https://github.com/llvm/llvm-project/pull/73370

>From 09bceafc5dbff302157c1affe945cf9c3325fee2 Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Fri, 24 Nov 2023 21:11:35 +0100
Subject: [PATCH 1/7] AlignmentFromAssumptions should not track the load result
 users

---
 .../Scalar/AlignmentFromAssumptions.cpp       | 20 +++++++++----------
 .../alignment-from-assumptions_dont_crash.ll  | 16 +++++++++++++++
 2 files changed, 26 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 63b7903ef955d..905ff2e80cd11 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -83,11 +83,7 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
                              const SCEV *OffSCEV, Value *Ptr,
                              ScalarEvolution *SE) {
   const SCEV *PtrSCEV = SE->getSCEV(Ptr);
-  // On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
-  // (*cough* AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
-  // may disagree. Trunc/extend so they agree.
-  PtrSCEV = SE->getTruncateOrZeroExtend(
-      PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
+
   const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
   if (isa<SCEVCouldNotCompute>(DiffSCEV))
     return Align(1);
@@ -216,6 +212,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
   }
 
   while (!WorkList.empty()) {
+    bool AddUsers = true;
     Instruction *J = WorkList.pop_back_val();
     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
@@ -226,6 +223,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         LI->setAlignment(NewAlignment);
         ++NumLoadAlignChanged;
       }
+      // The user of a Load uses data - not a pointer!
+      AddUsers = false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
         continue;
@@ -267,11 +266,12 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     // Now that we've updated that use of the pointer, look for other uses of
     // the pointer to update.
     Visited.insert(J);
-    for (User *UJ : J->users()) {
-      Instruction *K = cast<Instruction>(UJ);
-      if (!Visited.count(K))
-        WorkList.push_back(K);
-    }
+    if (AddUsers)
+      for (User *UJ : J->users()) {
+        Instruction *K = cast<Instruction>(UJ);
+        if (!Visited.count(K))
+          WorkList.push_back(K);
+      }
   }
 
   return true;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
new file mode 100644
index 0000000000000..107d677cdbc27
--- /dev/null
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -0,0 +1,16 @@
+; Test that we don't crash.
+; RUN: opt < %s -passes=alignment-from-assumptions -S
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @vectorize_global_local(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+bb:
+  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 4
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+declare void @llvm.assume(i1 noundef)

>From 6e4189199b206e4423070830ba24811e7457d133 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Wed, 29 Nov 2023 21:36:24 +0100
Subject: [PATCH 2/7] AlignmentFromAssumptions should only track pointer
 operand users

---
 .../Scalar/AlignmentFromAssumptions.h         |  4 +-
 .../Scalar/AlignmentFromAssumptions.cpp       | 25 ++++---
 .../alignment-from-assumptions_dont_crash.ll  | 68 ++++++++++++++++++-
 3 files changed, 82 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 10b6e1c6a21b6..83ee9c26fbd11 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -25,6 +25,7 @@ class AssumptionCache;
 class DominatorTree;
 class ScalarEvolution;
 class SCEV;
+class AAResults;
 
 struct AlignmentFromAssumptionsPass
     : public PassInfoMixin<AlignmentFromAssumptionsPass> {
@@ -32,10 +33,11 @@ struct AlignmentFromAssumptionsPass
 
   // Glue for old PM.
   bool runImpl(Function &F, AssumptionCache &AC, ScalarEvolution *SE_,
-               DominatorTree *DT_);
+               DominatorTree *DT_, AAResults *AA_);
 
   ScalarEvolution *SE = nullptr;
   DominatorTree *DT = nullptr;
+  AAResults *AA = nullptr;
 
   bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
                             const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 905ff2e80cd11..13e939521cbeb 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -212,7 +212,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
   }
 
   while (!WorkList.empty()) {
-    bool AddUsers = true;
     Instruction *J = WorkList.pop_back_val();
     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
@@ -223,8 +222,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         LI->setAlignment(NewAlignment);
         ++NumLoadAlignChanged;
       }
-      // The user of a Load uses data - not a pointer!
-      AddUsers = false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
       if (!isValidAssumeForContext(ACall, J, DT))
         continue;
@@ -265,13 +262,17 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
 
     // Now that we've updated that use of the pointer, look for other uses of
     // the pointer to update.
-    Visited.insert(J);
-    if (AddUsers)
-      for (User *UJ : J->users()) {
-        Instruction *K = cast<Instruction>(UJ);
-        if (!Visited.count(K))
-          WorkList.push_back(K);
+    if (auto UJ = dyn_cast<User>(J))
+      for (auto &U : UJ->uses()) {
+        if (U->getType()->isPointerTy()) {
+          if (AA->alias(U, AAPtr)) {
+            Instruction *K = cast<Instruction>(U.getUser());
+            if (!Visited.count(K))
+              WorkList.push_back(K);
+          }
+        }
       }
+
   }
 
   return true;
@@ -279,9 +280,10 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
 
 bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
                                            ScalarEvolution *SE_,
-                                           DominatorTree *DT_) {
+                                           DominatorTree *DT_, AAResults *AA_) {
   SE = SE_;
   DT = DT_;
+  AA = AA_;
 
   bool Changed = false;
   for (auto &AssumeVH : AC.assumptions())
@@ -300,7 +302,8 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
   ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
-  if (!runImpl(F, AC, &SE, &DT))
+  AAResults &AA = AM.getResult<AAManager>(F);
+  if (!runImpl(F, AC, &SE, &DT, &AA))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index 107d677cdbc27..e55200aad44ae 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -1,16 +1,78 @@
 ; Test that we don't crash.
-; RUN: opt < %s -passes=alignment-from-assumptions -S
+; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
 target triple = "amdgcn-amd-amdhsa"
 
-define amdgpu_kernel void @vectorize_global_local(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+define amdgpu_kernel void @test_gep(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_gep
+; GEPs are supported so the alignment is changed from 2 to 4
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
 bb:
   %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
   call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
-  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 4
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
   %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
   store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
   ret void
 }
+
+define amdgpu_kernel void @test_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_phi
+; PHI is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+bb:
+  %cond = icmp ugt i32 %idx, 10
+  br i1 %cond, label %bb1, label %bb2
+  
+bb1:
+  %gep1 = getelementptr i32, ptr addrspace(1) %arg, i32 6
+  br label %bb3
+  
+bb2:
+  %gep2 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+  br label %bb3
+
+bb3:
+  %gep3 = phi ptr addrspace(1) [%gep1, %bb1], [%gep2, %bb2]
+  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %gep3, i64 4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+
+define amdgpu_kernel void @test_select(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_select
+; select is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+bb:
+  %cond = icmp ugt i32 %idx, 10
+  %off1_gep = getelementptr i32, ptr addrspace(1) %arg, i32 6
+  %off2_gep = getelementptr i32, ptr addrspace(1) %arg, i32 7
+  %tmp2 = select i1 %cond, ptr addrspace(1) %off1_gep, ptr addrspace(1) %off2_gep
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+
+define amdgpu_kernel void @test_cast(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+bb:
+; CHECK-LABEL: @test_cast
+; addrspacecast is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+; store is a user of the GEP so, align 2 is changed to 4
+; CHECK: store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  %cast = addrspacecast ptr addrspace(3) %arg1 to ptr addrspace(1)
+  %tmp2 = getelementptr i32, ptr addrspace(1) %cast
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg1, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 2
+  ret void
+}
+
 declare void @llvm.assume(i1 noundef)

>From e7ac710739dcbe07485b14131d9ba84792c7c9b2 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Wed, 29 Nov 2023 21:42:06 +0100
Subject: [PATCH 3/7] Whitespace error corrected

---
 llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 13e939521cbeb..27422e7d8bb34 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -272,7 +272,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
           }
         }
       }
-
   }
 
   return true;

>From e258b77caa7d847eda2f38b9f6c038d9904e1086 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Thu, 30 Nov 2023 17:08:50 +0100
Subject: [PATCH 4/7] AlignmentFromAssumptions: pointer as a value to store
 case added

---
 .../Scalar/AlignmentFromAssumptions.cpp          |  4 ++++
 .../alignment-from-assumptions_dont_crash.ll     | 16 ++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 27422e7d8bb34..dc24c0ffec6fb 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -265,6 +265,10 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     if (auto UJ = dyn_cast<User>(J))
       for (auto &U : UJ->uses()) {
         if (U->getType()->isPointerTy()) {
+          if (StoreInst *SI = dyn_cast<StoreInst>(U.getUser())) {
+            if (SI->getPointerOperandIndex() != U.getOperandNo())
+              continue;
+          }
           if (AA->alias(U, AAPtr)) {
             Instruction *K = cast<Instruction>(U.getUser());
             if (!Visited.count(K))
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index e55200aad44ae..2e8355a42b7ba 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -75,4 +75,20 @@ bb:
   ret void
 }
 
+define amdgpu_kernel void @test_store_ptr(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+bb:
+; CHECK-LABEL: @test_store_ptr
+; GEPs are supported so the alignment is changed from 2 to 4
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
+; This store uses a pointer not as adress but as a value to store!
+; CHECK: store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2 
+  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+  store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2
+  ret void
+}
+
 declare void @llvm.assume(i1 noundef)

>From 413806c12d66016d9249e81d5295a9c2d73c2028 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Thu, 30 Nov 2023 21:04:07 +0100
Subject: [PATCH 5/7] AlignmentFromAssumptions. Change AliasAnalysis to
 instruction list

---
 .../Scalar/AlignmentFromAssumptions.h         |  4 +--
 .../Scalar/AlignmentFromAssumptions.cpp       | 17 ++++++-----
 .../alignment-from-assumptions_dont_crash.ll  | 28 +++++++++++++++++++
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 83ee9c26fbd11..10b6e1c6a21b6 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -25,7 +25,6 @@ class AssumptionCache;
 class DominatorTree;
 class ScalarEvolution;
 class SCEV;
-class AAResults;
 
 struct AlignmentFromAssumptionsPass
     : public PassInfoMixin<AlignmentFromAssumptionsPass> {
@@ -33,11 +32,10 @@ struct AlignmentFromAssumptionsPass
 
   // Glue for old PM.
   bool runImpl(Function &F, AssumptionCache &AC, ScalarEvolution *SE_,
-               DominatorTree *DT_, AAResults *AA_);
+               DominatorTree *DT_);
 
   ScalarEvolution *SE = nullptr;
   DominatorTree *DT = nullptr;
-  AAResults *AA = nullptr;
 
   bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
                             const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index dc24c0ffec6fb..c7e954d8856e3 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -262,14 +262,15 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
 
     // Now that we've updated that use of the pointer, look for other uses of
     // the pointer to update.
+    Visited.insert(J);
     if (auto UJ = dyn_cast<User>(J))
       for (auto &U : UJ->uses()) {
         if (U->getType()->isPointerTy()) {
-          if (StoreInst *SI = dyn_cast<StoreInst>(U.getUser())) {
-            if (SI->getPointerOperandIndex() != U.getOperandNo())
-              continue;
-          }
-          if (AA->alias(U, AAPtr)) {
+          if (isa<GetElementPtrInst>(U.getUser()) ||
+           isa<PHINode>(U.getUser()) ||
+           isa<LoadInst>(U.getUser()) ||
+           isa<StoreInst>(U.getUser()) ||
+           isa<MemIntrinsic>(U.getUser())) {
             Instruction *K = cast<Instruction>(U.getUser());
             if (!Visited.count(K))
               WorkList.push_back(K);
@@ -283,10 +284,9 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
 
 bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
                                            ScalarEvolution *SE_,
-                                           DominatorTree *DT_, AAResults *AA_) {
+                                           DominatorTree *DT_) {
   SE = SE_;
   DT = DT_;
-  AA = AA_;
 
   bool Changed = false;
   for (auto &AssumeVH : AC.assumptions())
@@ -305,8 +305,7 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
   ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
-  AAResults &AA = AM.getResult<AAManager>(F);
-  if (!runImpl(F, AC, &SE, &DT, &AA))
+  if (!runImpl(F, AC, &SE, &DT))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index 2e8355a42b7ba..a282584de9c03 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -43,6 +43,34 @@ bb3:
   ret void
 }
 
+define amdgpu_kernel void @test_loop_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_loop_phi
+; PHI is supported - align 2 changed to 4
+; CHECK: load i32, ptr addrspace(1) %gep, align 4
+bb:
+  %ptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
+  %end = getelementptr i32, ptr addrspace(1) %arg, i32 10
+  %cond = icmp ugt i32 %idx, 10
+  br i1 %cond, label %bb1, label %bb2
+
+bb1:
+  %ptr1 = phi ptr addrspace(1) [%ptr, %bb], [%ptr2, %bb1]
+  %acc1 = phi i32 [0, %bb], [%acc2, %bb1]
+  %gep = getelementptr i32, ptr addrspace(1) %ptr1, i32 4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %val = load i32, ptr addrspace(1) %gep, align 2
+  %acc2 = add i32 %acc1, %val
+  %ptr2 = getelementptr i32, ptr addrspace(1) %ptr1, i32 %idx
+  %exit = icmp eq ptr addrspace(1) %ptr2, %end
+  br i1 %exit, label %bb1, label %bb2
+
+bb2:
+  %sum = phi i32 [0, %bb], [%acc2, %bb1]
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %sum, ptr addrspace(3) %tmp4, align 4
+  ret void
+}
+
 define amdgpu_kernel void @test_select(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
 ; CHECK-LABEL: @test_select
 ; select is not supported - align 2 not changed

>From 88ad7d9f0d21b5b84783224042f11f0c43241eab Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Fri, 1 Dec 2023 18:05:58 +0100
Subject: [PATCH 6/7] AlignmentFromAssumptions. Store pointer operand index
 check added. Test added accordingly.

---
 .../Scalar/AlignmentFromAssumptions.cpp       | 10 +++----
 .../alignment-from-assumptions_dont_crash.ll  | 30 +++++++++++++------
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index c7e954d8856e3..d3d71fe922156 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -266,11 +266,11 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     if (auto UJ = dyn_cast<User>(J))
       for (auto &U : UJ->uses()) {
         if (U->getType()->isPointerTy()) {
-          if (isa<GetElementPtrInst>(U.getUser()) ||
-           isa<PHINode>(U.getUser()) ||
-           isa<LoadInst>(U.getUser()) ||
-           isa<StoreInst>(U.getUser()) ||
-           isa<MemIntrinsic>(U.getUser())) {
+          StoreInst *SI = dyn_cast<StoreInst>(U.getUser());
+          if ((SI && SI->getPointerOperandIndex() == U.getOperandNo()) ||
+              isa<GetElementPtrInst>(U.getUser()) ||
+              isa<PHINode>(U.getUser()) || isa<LoadInst>(U.getUser()) ||
+              isa<MemIntrinsic>(U.getUser())) {
             Instruction *K = cast<Instruction>(U.getUser());
             if (!Visited.count(K))
               WorkList.push_back(K);
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index a282584de9c03..f07e16d01ddf5 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -103,20 +103,32 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_store_ptr(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+define amdgpu_kernel void @test_load_store_ptr_as_val(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
 bb:
-; CHECK-LABEL: @test_store_ptr
-; GEPs are supported so the alignment is changed from 2 to 4
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
+; CHECK-LABEL: @test_load_store_ptr_as_val
 ; This store uses a pointer not as adress but as a value to store!
-; CHECK: store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2 
-  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+; CHECK: store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2 
+  %tmp2 = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
   call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
-  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+  %tmp3 = load ptr addrspace(1), ptr addrspace(1) %tmp2, align 2
   %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
-  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
-  store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2
+  store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2
   ret void
 }
 
+define amdgpu_kernel void @test_load_store_ptr_as_addr(ptr addrspace(1) nocapture readonly %arg, i32 %valToStore) {
+; CHECK-LABEL: @test_load_store_ptr_as_addr
+; CHECK: %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 4
+; store uses %tmp3 as an address BUT the %arg and %tmp3 have different address spaces
+; so, the align 2 is not changed
+; CHECK: store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
+bb:
+  %tmp2 = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 2
+  store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
+  ret void
+}
+
+
 declare void @llvm.assume(i1 noundef)

>From c1016ec0b951c2af5e88ee6963eddc705b41f57a Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Mon, 4 Dec 2023 16:34:58 +0100
Subject: [PATCH 7/7] AlignmentFromAssumptions. Test update

---
 .../alignment-from-assumptions-track-users.ll | 206 ++++++++++++++++++
 .../alignment-from-assumptions_dont_crash.ll  | 134 ------------
 2 files changed, 206 insertions(+), 134 deletions(-)
 create mode 100644 llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll
 delete mode 100644 llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll

diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll
new file mode 100644
index 0000000000000..3c825baab5433
--- /dev/null
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll
@@ -0,0 +1,206 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
+; ModuleID = 'alignment-from-assumptions_dont_crash.ll'
+source_filename = "alignment-from-assumptions_dont_crash.ll"
+
+define amdgpu_kernel void @widget(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: define amdgpu_kernel void @widget(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(3) nocapture [[ARG1:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[GETELEMENTPTR]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    [[GETELEMENTPTR2:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG1]], i32 1
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR2]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %getelementptr, i64 4) ]
+  %load = load i32, ptr addrspace(1) %getelementptr, align 2
+  %getelementptr2 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store i32 %load, ptr addrspace(3) %getelementptr2, align 4
+  ret void
+}
+
+define amdgpu_kernel void @wibble(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define amdgpu_kernel void @wibble(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT:    br i1 [[ICMP]], label [[BB4:%.*]], label [[BB5:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 6
+; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 7
+; CHECK-NEXT:    br label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], [[BB4]] ], [ [[GETELEMENTPTR6]], [[BB5]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PHI]], i64 4
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR8]], align 2
+; CHECK-NEXT:    [[GETELEMENTPTR9:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR9]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %icmp = icmp ugt i32 %arg2, 10
+  br i1 %icmp, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb
+  %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 6
+  br label %bb7
+
+bb5:                                              ; preds = %bb
+  %getelementptr6 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+  br label %bb7
+
+bb7:                                              ; preds = %bb5, %bb4
+  %phi = phi ptr addrspace(1) [ %getelementptr, %bb4 ], [ %getelementptr6, %bb5 ]
+  %getelementptr8 = getelementptr inbounds i32, ptr addrspace(1) %phi, i64 4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load i32, ptr addrspace(1) %getelementptr8, align 2
+  %getelementptr9 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+  store i32 %load, ptr addrspace(3) %getelementptr9, align 4
+  ret void
+}
+
+define amdgpu_kernel void @ham(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define amdgpu_kernel void @ham(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 0
+; CHECK-NEXT:    [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 10
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT:    br i1 [[ICMP]], label [[BB5:%.*]], label [[BB10:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], [[BB:%.*]] ], [ [[GETELEMENTPTR8:%.*]], [[BB5]] ]
+; CHECK-NEXT:    [[PHI6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[ADD:%.*]], [[BB5]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR7:%.*]] = getelementptr i32, ptr addrspace(1) [[PHI]], i32 4
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR7]], align 4
+; CHECK-NEXT:    [[ADD]] = add i32 [[PHI6]], [[LOAD]]
+; CHECK-NEXT:    [[GETELEMENTPTR8]] = getelementptr i32, ptr addrspace(1) [[PHI]], i32 [[ARG2]]
+; CHECK-NEXT:    [[ICMP9:%.*]] = icmp eq ptr addrspace(1) [[GETELEMENTPTR8]], [[GETELEMENTPTR4]]
+; CHECK-NEXT:    br i1 [[ICMP9]], label [[BB5]], label [[BB10]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[PHI11:%.*]] = phi i32 [ 0, [[BB]] ], [ [[ADD]], [[BB5]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR12:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT:    store i32 [[PHI11]], ptr addrspace(3) [[GETELEMENTPTR12]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
+  %getelementptr4 = getelementptr i32, ptr addrspace(1) %arg, i32 10
+  %icmp = icmp ugt i32 %arg2, 10
+  br i1 %icmp, label %bb5, label %bb10
+
+bb5:                                              ; preds = %bb5, %bb
+  %phi = phi ptr addrspace(1) [ %getelementptr, %bb ], [ %getelementptr8, %bb5 ]
+  %phi6 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+  %getelementptr7 = getelementptr i32, ptr addrspace(1) %phi, i32 4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load i32, ptr addrspace(1) %getelementptr7, align 2
+  %add = add i32 %phi6, %load
+  %getelementptr8 = getelementptr i32, ptr addrspace(1) %phi, i32 %arg2
+  %icmp9 = icmp eq ptr addrspace(1) %getelementptr8, %getelementptr4
+  br i1 %icmp9, label %bb5, label %bb10
+
+bb10:                                             ; preds = %bb5, %bb
+  %phi11 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+  %getelementptr12 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+  store i32 %phi11, ptr addrspace(3) %getelementptr12, align 4
+  ret void
+}
+
+define amdgpu_kernel void @quux(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define amdgpu_kernel void @quux(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 6
+; CHECK-NEXT:    [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 7
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[ICMP]], ptr addrspace(1) [[GETELEMENTPTR]], ptr addrspace(1) [[GETELEMENTPTR4]]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SELECT]], align 2
+; CHECK-NEXT:    [[GETELEMENTPTR5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR5]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %icmp = icmp ugt i32 %arg2, 10
+  %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 6
+  %getelementptr4 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+  %select = select i1 %icmp, ptr addrspace(1) %getelementptr, ptr addrspace(1) %getelementptr4
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load i32, ptr addrspace(1) %select, align 2
+  %getelementptr5 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+  store i32 %load, ptr addrspace(3) %getelementptr5, align 4
+  ret void
+}
+
+define amdgpu_kernel void @widget.1(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define amdgpu_kernel void @widget.1(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ADDRSPACECAST:%.*]] = addrspacecast ptr addrspace(3) [[ARG3]] to ptr addrspace(1)
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ADDRSPACECAST]]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) [[ARG3]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR]], align 2
+; CHECK-NEXT:    [[GETELEMENTPTR4:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR4]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %addrspacecast = addrspacecast ptr addrspace(3) %arg3 to ptr addrspace(1)
+  %getelementptr = getelementptr i32, ptr addrspace(1) %addrspacecast
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg3, i64 4) ]
+  %load = load i32, ptr addrspace(1) %getelementptr, align 2
+  %getelementptr4 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+  store i32 %load, ptr addrspace(3) %getelementptr4, align 2
+  ret void
+}
+
+define amdgpu_kernel void @baz(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: define amdgpu_kernel void @baz(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(3) nocapture [[ARG1:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[ARG]], i64 16
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    [[GETELEMENTPTR2:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG1]], i32 1
+; CHECK-NEXT:    store ptr addrspace(1) [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR2]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load ptr addrspace(1), ptr addrspace(1) %getelementptr, align 2
+  %getelementptr2 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+  store ptr addrspace(1) %load, ptr addrspace(3) %getelementptr2, align 2
+  ret void
+}
+
+define amdgpu_kernel void @foo(ptr addrspace(1) nocapture readonly %arg, i32 %arg1) {
+; CHECK-LABEL: define amdgpu_kernel void @foo(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(3), ptr addrspace(1) [[ARG]], i64 16
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    store i32 [[ARG1]], ptr addrspace(3) [[LOAD]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
+  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+  %load = load ptr addrspace(3), ptr addrspace(1) %getelementptr, align 2
+  store i32 %arg1, ptr addrspace(3) %load, align 2
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #0
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
deleted file mode 100644
index f07e16d01ddf5..0000000000000
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ /dev/null
@@ -1,134 +0,0 @@
-; Test that we don't crash.
-; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
-
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
-target triple = "amdgcn-amd-amdhsa"
-
-define amdgpu_kernel void @test_gep(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
-; CHECK-LABEL: @test_gep
-; GEPs are supported so the alignment is changed from 2 to 4
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
-bb:
-  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
-  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
-  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
-  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
-  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
-  ret void
-}
-
-define amdgpu_kernel void @test_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
-; CHECK-LABEL: @test_phi
-; PHI is not supported - align 2 not changed
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
-bb:
-  %cond = icmp ugt i32 %idx, 10
-  br i1 %cond, label %bb1, label %bb2
-  
-bb1:
-  %gep1 = getelementptr i32, ptr addrspace(1) %arg, i32 6
-  br label %bb3
-  
-bb2:
-  %gep2 = getelementptr i32, ptr addrspace(1) %arg, i32 7
-  br label %bb3
-
-bb3:
-  %gep3 = phi ptr addrspace(1) [%gep1, %bb1], [%gep2, %bb2]
-  %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %gep3, i64 4
-  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
-  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
-  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
-  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
-  ret void
-}
-
-define amdgpu_kernel void @test_loop_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
-; CHECK-LABEL: @test_loop_phi
-; PHI is supported - align 2 changed to 4
-; CHECK: load i32, ptr addrspace(1) %gep, align 4
-bb:
-  %ptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
-  %end = getelementptr i32, ptr addrspace(1) %arg, i32 10
-  %cond = icmp ugt i32 %idx, 10
-  br i1 %cond, label %bb1, label %bb2
-
-bb1:
-  %ptr1 = phi ptr addrspace(1) [%ptr, %bb], [%ptr2, %bb1]
-  %acc1 = phi i32 [0, %bb], [%acc2, %bb1]
-  %gep = getelementptr i32, ptr addrspace(1) %ptr1, i32 4
-  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
-  %val = load i32, ptr addrspace(1) %gep, align 2
-  %acc2 = add i32 %acc1, %val
-  %ptr2 = getelementptr i32, ptr addrspace(1) %ptr1, i32 %idx
-  %exit = icmp eq ptr addrspace(1) %ptr2, %end
-  br i1 %exit, label %bb1, label %bb2
-
-bb2:
-  %sum = phi i32 [0, %bb], [%acc2, %bb1]
-  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
-  store i32 %sum, ptr addrspace(3) %tmp4, align 4
-  ret void
-}
-
-define amdgpu_kernel void @test_select(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
-; CHECK-LABEL: @test_select
-; select is not supported - align 2 not changed
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
-bb:
-  %cond = icmp ugt i32 %idx, 10
-  %off1_gep = getelementptr i32, ptr addrspace(1) %arg, i32 6
-  %off2_gep = getelementptr i32, ptr addrspace(1) %arg, i32 7
-  %tmp2 = select i1 %cond, ptr addrspace(1) %off1_gep, ptr addrspace(1) %off2_gep
-  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
-  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
-  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
-  store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
-  ret void
-}
-
-define amdgpu_kernel void @test_cast(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
-bb:
-; CHECK-LABEL: @test_cast
-; addrspacecast is not supported - align 2 not changed
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
-; store is a user of the GEP so, align 2 is changed to 4
-; CHECK: store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
-  %cast = addrspacecast ptr addrspace(3) %arg1 to ptr addrspace(1)
-  %tmp2 = getelementptr i32, ptr addrspace(1) %cast
-  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg1, i64 4) ]
-  %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
-  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
-  store i32 %tmp3, ptr addrspace(3) %tmp4, align 2
-  ret void
-}
-
-define amdgpu_kernel void @test_load_store_ptr_as_val(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
-bb:
-; CHECK-LABEL: @test_load_store_ptr_as_val
-; This store uses a pointer not as adress but as a value to store!
-; CHECK: store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2 
-  %tmp2 = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
-  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
-  %tmp3 = load ptr addrspace(1), ptr addrspace(1) %tmp2, align 2
-  %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
-  store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2
-  ret void
-}
-
-define amdgpu_kernel void @test_load_store_ptr_as_addr(ptr addrspace(1) nocapture readonly %arg, i32 %valToStore) {
-; CHECK-LABEL: @test_load_store_ptr_as_addr
-; CHECK: %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 4
-; store uses %tmp3 as an address BUT the %arg and %tmp3 have different address spaces
-; so, the align 2 is not changed
-; CHECK: store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
-bb:
-  %tmp2 = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
-  call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
-  %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 2
-  store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
-  ret void
-}
-
-
-declare void @llvm.assume(i1 noundef)



More information about the llvm-commits mailing list