[llvm] AlignmentFromAssumptions should only track pointer operand users (PR #73370)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 07:36:13 PST 2023
https://github.com/alex-t updated https://github.com/llvm/llvm-project/pull/73370
>From 09bceafc5dbff302157c1affe945cf9c3325fee2 Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Fri, 24 Nov 2023 21:11:35 +0100
Subject: [PATCH 1/7] AlignmentFromAssumptions should not track the load result
users
---
.../Scalar/AlignmentFromAssumptions.cpp | 20 +++++++++----------
.../alignment-from-assumptions_dont_crash.ll | 16 +++++++++++++++
2 files changed, 26 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 63b7903ef955d..905ff2e80cd11 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -83,11 +83,7 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
const SCEV *OffSCEV, Value *Ptr,
ScalarEvolution *SE) {
const SCEV *PtrSCEV = SE->getSCEV(Ptr);
- // On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
- // (*cough* AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
- // may disagree. Trunc/extend so they agree.
- PtrSCEV = SE->getTruncateOrZeroExtend(
- PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
+
const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
if (isa<SCEVCouldNotCompute>(DiffSCEV))
return Align(1);
@@ -216,6 +212,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
}
while (!WorkList.empty()) {
+ bool AddUsers = true;
Instruction *J = WorkList.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
if (!isValidAssumeForContext(ACall, J, DT))
@@ -226,6 +223,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
LI->setAlignment(NewAlignment);
++NumLoadAlignChanged;
}
+ // The user of a Load uses data - not a pointer!
+ AddUsers = false;
} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
if (!isValidAssumeForContext(ACall, J, DT))
continue;
@@ -267,11 +266,12 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
// Now that we've updated that use of the pointer, look for other uses of
// the pointer to update.
Visited.insert(J);
- for (User *UJ : J->users()) {
- Instruction *K = cast<Instruction>(UJ);
- if (!Visited.count(K))
- WorkList.push_back(K);
- }
+ if (AddUsers)
+ for (User *UJ : J->users()) {
+ Instruction *K = cast<Instruction>(UJ);
+ if (!Visited.count(K))
+ WorkList.push_back(K);
+ }
}
return true;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
new file mode 100644
index 0000000000000..107d677cdbc27
--- /dev/null
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -0,0 +1,16 @@
+; Test that we don't crash.
+; RUN: opt < %s -passes=alignment-from-assumptions -S
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @vectorize_global_local(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+bb:
+ %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
+ %tmp3 = load i32, ptr addrspace(1) %tmp2, align 4
+ %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+ store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+ ret void
+}
+declare void @llvm.assume(i1 noundef)
>From 6e4189199b206e4423070830ba24811e7457d133 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Wed, 29 Nov 2023 21:36:24 +0100
Subject: [PATCH 2/7] AlignmentFromAssumptions should only track pointer
operand users
---
.../Scalar/AlignmentFromAssumptions.h | 4 +-
.../Scalar/AlignmentFromAssumptions.cpp | 25 ++++---
.../alignment-from-assumptions_dont_crash.ll | 68 ++++++++++++++++++-
3 files changed, 82 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 10b6e1c6a21b6..83ee9c26fbd11 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -25,6 +25,7 @@ class AssumptionCache;
class DominatorTree;
class ScalarEvolution;
class SCEV;
+class AAResults;
struct AlignmentFromAssumptionsPass
: public PassInfoMixin<AlignmentFromAssumptionsPass> {
@@ -32,10 +33,11 @@ struct AlignmentFromAssumptionsPass
// Glue for old PM.
bool runImpl(Function &F, AssumptionCache &AC, ScalarEvolution *SE_,
- DominatorTree *DT_);
+ DominatorTree *DT_, AAResults *AA_);
ScalarEvolution *SE = nullptr;
DominatorTree *DT = nullptr;
+ AAResults *AA = nullptr;
bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 905ff2e80cd11..13e939521cbeb 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -212,7 +212,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
}
while (!WorkList.empty()) {
- bool AddUsers = true;
Instruction *J = WorkList.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
if (!isValidAssumeForContext(ACall, J, DT))
@@ -223,8 +222,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
LI->setAlignment(NewAlignment);
++NumLoadAlignChanged;
}
- // The user of a Load uses data - not a pointer!
- AddUsers = false;
} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
if (!isValidAssumeForContext(ACall, J, DT))
continue;
@@ -265,13 +262,17 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
// Now that we've updated that use of the pointer, look for other uses of
// the pointer to update.
- Visited.insert(J);
- if (AddUsers)
- for (User *UJ : J->users()) {
- Instruction *K = cast<Instruction>(UJ);
- if (!Visited.count(K))
- WorkList.push_back(K);
+ if (auto UJ = dyn_cast<User>(J))
+ for (auto &U : UJ->uses()) {
+ if (U->getType()->isPointerTy()) {
+ if (AA->alias(U, AAPtr)) {
+ Instruction *K = cast<Instruction>(U.getUser());
+ if (!Visited.count(K))
+ WorkList.push_back(K);
+ }
+ }
}
+
}
return true;
@@ -279,9 +280,10 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
ScalarEvolution *SE_,
- DominatorTree *DT_) {
+ DominatorTree *DT_, AAResults *AA_) {
SE = SE_;
DT = DT_;
+ AA = AA_;
bool Changed = false;
for (auto &AssumeVH : AC.assumptions())
@@ -300,7 +302,8 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
- if (!runImpl(F, AC, &SE, &DT))
+ AAResults &AA = AM.getResult<AAManager>(F);
+ if (!runImpl(F, AC, &SE, &DT, &AA))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index 107d677cdbc27..e55200aad44ae 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -1,16 +1,78 @@
; Test that we don't crash.
-; RUN: opt < %s -passes=alignment-from-assumptions -S
+; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
target triple = "amdgcn-amd-amdhsa"
-define amdgpu_kernel void @vectorize_global_local(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+define amdgpu_kernel void @test_gep(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_gep
+; GEPs are supported so the alignment is changed from 2 to 4
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
bb:
%tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
- %tmp3 = load i32, ptr addrspace(1) %tmp2, align 4
+ %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
%tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
ret void
}
+
+define amdgpu_kernel void @test_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_phi
+; PHI is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+bb:
+ %cond = icmp ugt i32 %idx, 10
+ br i1 %cond, label %bb1, label %bb2
+
+bb1:
+ %gep1 = getelementptr i32, ptr addrspace(1) %arg, i32 6
+ br label %bb3
+
+bb2:
+ %gep2 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+ br label %bb3
+
+bb3:
+ %gep3 = phi ptr addrspace(1) [%gep1, %bb1], [%gep2, %bb2]
+ %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %gep3, i64 4
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+ %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+ store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+ ret void
+}
+
+define amdgpu_kernel void @test_select(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_select
+; select is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+bb:
+ %cond = icmp ugt i32 %idx, 10
+ %off1_gep = getelementptr i32, ptr addrspace(1) %arg, i32 6
+ %off2_gep = getelementptr i32, ptr addrspace(1) %arg, i32 7
+ %tmp2 = select i1 %cond, ptr addrspace(1) %off1_gep, ptr addrspace(1) %off2_gep
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+ %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+ store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+ ret void
+}
+
+define amdgpu_kernel void @test_cast(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+bb:
+; CHECK-LABEL: @test_cast
+; addrspacecast is not supported - align 2 not changed
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
+; store is a user of the GEP so, align 2 is changed to 4
+; CHECK: store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+ %cast = addrspacecast ptr addrspace(3) %arg1 to ptr addrspace(1)
+ %tmp2 = getelementptr i32, ptr addrspace(1) %cast
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg1, i64 4) ]
+ %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+ %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+ store i32 %tmp3, ptr addrspace(3) %tmp4, align 2
+ ret void
+}
+
declare void @llvm.assume(i1 noundef)
>From e7ac710739dcbe07485b14131d9ba84792c7c9b2 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Wed, 29 Nov 2023 21:42:06 +0100
Subject: [PATCH 3/7] Whitespace error corrected
---
llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 13e939521cbeb..27422e7d8bb34 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -272,7 +272,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
}
}
}
-
}
return true;
>From e258b77caa7d847eda2f38b9f6c038d9904e1086 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Thu, 30 Nov 2023 17:08:50 +0100
Subject: [PATCH 4/7] AlignmentFromAssumptions: pointer as a value to store
case added
---
.../Scalar/AlignmentFromAssumptions.cpp | 4 ++++
.../alignment-from-assumptions_dont_crash.ll | 16 ++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 27422e7d8bb34..dc24c0ffec6fb 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -265,6 +265,10 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
if (auto UJ = dyn_cast<User>(J))
for (auto &U : UJ->uses()) {
if (U->getType()->isPointerTy()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(U.getUser())) {
+ if (SI->getPointerOperandIndex() != U.getOperandNo())
+ continue;
+ }
if (AA->alias(U, AAPtr)) {
Instruction *K = cast<Instruction>(U.getUser());
if (!Visited.count(K))
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index e55200aad44ae..2e8355a42b7ba 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -75,4 +75,20 @@ bb:
ret void
}
+define amdgpu_kernel void @test_store_ptr(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+bb:
+; CHECK-LABEL: @test_store_ptr
+; GEPs are supported so the alignment is changed from 2 to 4
+; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
+; This store uses a pointer not as adress but as a value to store!
+; CHECK: store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2
+ %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+ %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+ store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
+ store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2
+ ret void
+}
+
declare void @llvm.assume(i1 noundef)
>From 413806c12d66016d9249e81d5295a9c2d73c2028 Mon Sep 17 00:00:00 2001
From: Alexander <alexander.timofeev at amd.com>
Date: Thu, 30 Nov 2023 21:04:07 +0100
Subject: [PATCH 5/7] AlignmentFromAssumptions. Change AliasAnalysis to
instruction list
---
.../Scalar/AlignmentFromAssumptions.h | 4 +--
.../Scalar/AlignmentFromAssumptions.cpp | 17 ++++++-----
.../alignment-from-assumptions_dont_crash.ll | 28 +++++++++++++++++++
3 files changed, 37 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 83ee9c26fbd11..10b6e1c6a21b6 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -25,7 +25,6 @@ class AssumptionCache;
class DominatorTree;
class ScalarEvolution;
class SCEV;
-class AAResults;
struct AlignmentFromAssumptionsPass
: public PassInfoMixin<AlignmentFromAssumptionsPass> {
@@ -33,11 +32,10 @@ struct AlignmentFromAssumptionsPass
// Glue for old PM.
bool runImpl(Function &F, AssumptionCache &AC, ScalarEvolution *SE_,
- DominatorTree *DT_, AAResults *AA_);
+ DominatorTree *DT_);
ScalarEvolution *SE = nullptr;
DominatorTree *DT = nullptr;
- AAResults *AA = nullptr;
bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index dc24c0ffec6fb..c7e954d8856e3 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -262,14 +262,15 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
// Now that we've updated that use of the pointer, look for other uses of
// the pointer to update.
+ Visited.insert(J);
if (auto UJ = dyn_cast<User>(J))
for (auto &U : UJ->uses()) {
if (U->getType()->isPointerTy()) {
- if (StoreInst *SI = dyn_cast<StoreInst>(U.getUser())) {
- if (SI->getPointerOperandIndex() != U.getOperandNo())
- continue;
- }
- if (AA->alias(U, AAPtr)) {
+ if (isa<GetElementPtrInst>(U.getUser()) ||
+ isa<PHINode>(U.getUser()) ||
+ isa<LoadInst>(U.getUser()) ||
+ isa<StoreInst>(U.getUser()) ||
+ isa<MemIntrinsic>(U.getUser())) {
Instruction *K = cast<Instruction>(U.getUser());
if (!Visited.count(K))
WorkList.push_back(K);
@@ -283,10 +284,9 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
ScalarEvolution *SE_,
- DominatorTree *DT_, AAResults *AA_) {
+ DominatorTree *DT_) {
SE = SE_;
DT = DT_;
- AA = AA_;
bool Changed = false;
for (auto &AssumeVH : AC.assumptions())
@@ -305,8 +305,7 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
- AAResults &AA = AM.getResult<AAManager>(F);
- if (!runImpl(F, AC, &SE, &DT, &AA))
+ if (!runImpl(F, AC, &SE, &DT))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index 2e8355a42b7ba..a282584de9c03 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -43,6 +43,34 @@ bb3:
ret void
}
+define amdgpu_kernel void @test_loop_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: @test_loop_phi
+; PHI is supported - align 2 changed to 4
+; CHECK: load i32, ptr addrspace(1) %gep, align 4
+bb:
+ %ptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
+ %end = getelementptr i32, ptr addrspace(1) %arg, i32 10
+ %cond = icmp ugt i32 %idx, 10
+ br i1 %cond, label %bb1, label %bb2
+
+bb1:
+ %ptr1 = phi ptr addrspace(1) [%ptr, %bb], [%ptr2, %bb1]
+ %acc1 = phi i32 [0, %bb], [%acc2, %bb1]
+ %gep = getelementptr i32, ptr addrspace(1) %ptr1, i32 4
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %val = load i32, ptr addrspace(1) %gep, align 2
+ %acc2 = add i32 %acc1, %val
+ %ptr2 = getelementptr i32, ptr addrspace(1) %ptr1, i32 %idx
+ %exit = icmp eq ptr addrspace(1) %ptr2, %end
+ br i1 %exit, label %bb1, label %bb2
+
+bb2:
+ %sum = phi i32 [0, %bb], [%acc2, %bb1]
+ %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+ store i32 %sum, ptr addrspace(3) %tmp4, align 4
+ ret void
+}
+
define amdgpu_kernel void @test_select(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
; CHECK-LABEL: @test_select
; select is not supported - align 2 not changed
>From 88ad7d9f0d21b5b84783224042f11f0c43241eab Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Fri, 1 Dec 2023 18:05:58 +0100
Subject: [PATCH 6/7] AlignmentFromAssumptions. Store pointer operand index
check added. Test added accordingly.
---
.../Scalar/AlignmentFromAssumptions.cpp | 10 +++----
.../alignment-from-assumptions_dont_crash.ll | 30 +++++++++++++------
2 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index c7e954d8856e3..d3d71fe922156 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -266,11 +266,11 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
if (auto UJ = dyn_cast<User>(J))
for (auto &U : UJ->uses()) {
if (U->getType()->isPointerTy()) {
- if (isa<GetElementPtrInst>(U.getUser()) ||
- isa<PHINode>(U.getUser()) ||
- isa<LoadInst>(U.getUser()) ||
- isa<StoreInst>(U.getUser()) ||
- isa<MemIntrinsic>(U.getUser())) {
+ StoreInst *SI = dyn_cast<StoreInst>(U.getUser());
+ if ((SI && SI->getPointerOperandIndex() == U.getOperandNo()) ||
+ isa<GetElementPtrInst>(U.getUser()) ||
+ isa<PHINode>(U.getUser()) || isa<LoadInst>(U.getUser()) ||
+ isa<MemIntrinsic>(U.getUser())) {
Instruction *K = cast<Instruction>(U.getUser());
if (!Visited.count(K))
WorkList.push_back(K);
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
index a282584de9c03..f07e16d01ddf5 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
@@ -103,20 +103,32 @@ bb:
ret void
}
-define amdgpu_kernel void @test_store_ptr(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+define amdgpu_kernel void @test_load_store_ptr_as_val(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
bb:
-; CHECK-LABEL: @test_store_ptr
-; GEPs are supported so the alignment is changed from 2 to 4
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
+; CHECK-LABEL: @test_load_store_ptr_as_val
; This store uses a pointer not as adress but as a value to store!
-; CHECK: store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2
- %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+; CHECK: store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2
+ %tmp2 = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
- %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
+ %tmp3 = load ptr addrspace(1), ptr addrspace(1) %tmp2, align 2
%tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
- store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
- store ptr addrspace(1) %tmp2, ptr addrspace(3) %tmp4, align 2
+ store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2
ret void
}
+define amdgpu_kernel void @test_load_store_ptr_as_addr(ptr addrspace(1) nocapture readonly %arg, i32 %valToStore) {
+; CHECK-LABEL: @test_load_store_ptr_as_addr
+; CHECK: %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 4
+; store uses %tmp3 as an address BUT the %arg and %tmp3 have different address spaces
+; so, the align 2 is not changed
+; CHECK: store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
+bb:
+ %tmp2 = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 2
+ store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
+ ret void
+}
+
+
declare void @llvm.assume(i1 noundef)
>From c1016ec0b951c2af5e88ee6963eddc705b41f57a Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <alexander.timofeev at amd.com>
Date: Mon, 4 Dec 2023 16:34:58 +0100
Subject: [PATCH 7/7] AlignmentFromAssumptions. Test update
---
.../alignment-from-assumptions-track-users.ll | 206 ++++++++++++++++++
.../alignment-from-assumptions_dont_crash.ll | 134 ------------
2 files changed, 206 insertions(+), 134 deletions(-)
create mode 100644 llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll
delete mode 100644 llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll
new file mode 100644
index 0000000000000..3c825baab5433
--- /dev/null
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions-track-users.ll
@@ -0,0 +1,206 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
+; ModuleID = 'alignment-from-assumptions_dont_crash.ll'
+source_filename = "alignment-from-assumptions_dont_crash.ll"
+
+define amdgpu_kernel void @widget(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: define amdgpu_kernel void @widget(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(3) nocapture [[ARG1:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[GETELEMENTPTR]], i64 4) ]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR2:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG1]], i32 1
+; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR2]], align 4
+; CHECK-NEXT: ret void
+;
+bb:
+ %getelementptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %getelementptr, i64 4) ]
+ %load = load i32, ptr addrspace(1) %getelementptr, align 2
+ %getelementptr2 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+ store i32 %load, ptr addrspace(3) %getelementptr2, align 4
+ ret void
+}
+
+define amdgpu_kernel void @wibble(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define amdgpu_kernel void @wibble(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT: br i1 [[ICMP]], label [[BB4:%.*]], label [[BB5:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 6
+; CHECK-NEXT: br label [[BB7:%.*]]
+; CHECK: bb5:
+; CHECK-NEXT: [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 7
+; CHECK-NEXT: br label [[BB7]]
+; CHECK: bb7:
+; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], [[BB4]] ], [ [[GETELEMENTPTR6]], [[BB5]] ]
+; CHECK-NEXT: [[GETELEMENTPTR8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PHI]], i64 4
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR8]], align 2
+; CHECK-NEXT: [[GETELEMENTPTR9:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR9]], align 4
+; CHECK-NEXT: ret void
+;
+bb:
+ %icmp = icmp ugt i32 %arg2, 10
+ br i1 %icmp, label %bb4, label %bb5
+
+bb4: ; preds = %bb
+ %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 6
+ br label %bb7
+
+bb5: ; preds = %bb
+ %getelementptr6 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+ br label %bb7
+
+bb7: ; preds = %bb5, %bb4
+ %phi = phi ptr addrspace(1) [ %getelementptr, %bb4 ], [ %getelementptr6, %bb5 ]
+ %getelementptr8 = getelementptr inbounds i32, ptr addrspace(1) %phi, i64 4
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %load = load i32, ptr addrspace(1) %getelementptr8, align 2
+ %getelementptr9 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+ store i32 %load, ptr addrspace(3) %getelementptr9, align 4
+ ret void
+}
+
+define amdgpu_kernel void @ham(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define amdgpu_kernel void @ham(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 10
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT: br i1 [[ICMP]], label [[BB5:%.*]], label [[BB10:%.*]]
+; CHECK: bb5:
+; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(1) [ [[GETELEMENTPTR]], [[BB:%.*]] ], [ [[GETELEMENTPTR8:%.*]], [[BB5]] ]
+; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[ADD:%.*]], [[BB5]] ]
+; CHECK-NEXT: [[GETELEMENTPTR7:%.*]] = getelementptr i32, ptr addrspace(1) [[PHI]], i32 4
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR7]], align 4
+; CHECK-NEXT: [[ADD]] = add i32 [[PHI6]], [[LOAD]]
+; CHECK-NEXT: [[GETELEMENTPTR8]] = getelementptr i32, ptr addrspace(1) [[PHI]], i32 [[ARG2]]
+; CHECK-NEXT: [[ICMP9:%.*]] = icmp eq ptr addrspace(1) [[GETELEMENTPTR8]], [[GETELEMENTPTR4]]
+; CHECK-NEXT: br i1 [[ICMP9]], label [[BB5]], label [[BB10]]
+; CHECK: bb10:
+; CHECK-NEXT: [[PHI11:%.*]] = phi i32 [ 0, [[BB]] ], [ [[ADD]], [[BB5]] ]
+; CHECK-NEXT: [[GETELEMENTPTR12:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT: store i32 [[PHI11]], ptr addrspace(3) [[GETELEMENTPTR12]], align 4
+; CHECK-NEXT: ret void
+;
+bb:
+ %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
+ %getelementptr4 = getelementptr i32, ptr addrspace(1) %arg, i32 10
+ %icmp = icmp ugt i32 %arg2, 10
+ br i1 %icmp, label %bb5, label %bb10
+
+bb5: ; preds = %bb5, %bb
+ %phi = phi ptr addrspace(1) [ %getelementptr, %bb ], [ %getelementptr8, %bb5 ]
+ %phi6 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+ %getelementptr7 = getelementptr i32, ptr addrspace(1) %phi, i32 4
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %load = load i32, ptr addrspace(1) %getelementptr7, align 2
+ %add = add i32 %phi6, %load
+ %getelementptr8 = getelementptr i32, ptr addrspace(1) %phi, i32 %arg2
+ %icmp9 = icmp eq ptr addrspace(1) %getelementptr8, %getelementptr4
+ br i1 %icmp9, label %bb5, label %bb10
+
+bb10: ; preds = %bb5, %bb
+ %phi11 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+ %getelementptr12 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+ store i32 %phi11, ptr addrspace(3) %getelementptr12, align 4
+ ret void
+}
+
+define amdgpu_kernel void @quux(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define amdgpu_kernel void @quux(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i32 [[ARG2]], 10
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 6
+; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr addrspace(1) [[ARG]], i32 7
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], ptr addrspace(1) [[GETELEMENTPTR]], ptr addrspace(1) [[GETELEMENTPTR4]]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SELECT]], align 2
+; CHECK-NEXT: [[GETELEMENTPTR5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR5]], align 4
+; CHECK-NEXT: ret void
+;
+bb:
+ %icmp = icmp ugt i32 %arg2, 10
+ %getelementptr = getelementptr i32, ptr addrspace(1) %arg, i32 6
+ %getelementptr4 = getelementptr i32, ptr addrspace(1) %arg, i32 7
+ %select = select i1 %icmp, ptr addrspace(1) %getelementptr, ptr addrspace(1) %getelementptr4
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %load = load i32, ptr addrspace(1) %select, align 2
+ %getelementptr5 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+ store i32 %load, ptr addrspace(3) %getelementptr5, align 4
+ ret void
+}
+
+define amdgpu_kernel void @widget.1(ptr addrspace(1) nocapture readonly %arg, i32 %arg2, ptr addrspace(3) nocapture %arg3) {
+; CHECK-LABEL: define amdgpu_kernel void @widget.1(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG2:%.*]], ptr addrspace(3) nocapture [[ARG3:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[ADDRSPACECAST:%.*]] = addrspacecast ptr addrspace(3) [[ARG3]] to ptr addrspace(1)
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr addrspace(1) [[ADDRSPACECAST]]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) [[ARG3]], i64 4) ]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GETELEMENTPTR]], align 2
+; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG3]], i32 1
+; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR4]], align 4
+; CHECK-NEXT: ret void
+;
+bb:
+ %addrspacecast = addrspacecast ptr addrspace(3) %arg3 to ptr addrspace(1)
+ %getelementptr = getelementptr i32, ptr addrspace(1) %addrspacecast
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg3, i64 4) ]
+ %load = load i32, ptr addrspace(1) %getelementptr, align 2
+ %getelementptr4 = getelementptr inbounds i32, ptr addrspace(3) %arg3, i32 1
+ store i32 %load, ptr addrspace(3) %getelementptr4, align 2
+ ret void
+}
+
+define amdgpu_kernel void @baz(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
+; CHECK-LABEL: define amdgpu_kernel void @baz(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(3) nocapture [[ARG1:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[ARG]], i64 16
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR2:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[ARG1]], i32 1
+; CHECK-NEXT: store ptr addrspace(1) [[LOAD]], ptr addrspace(3) [[GETELEMENTPTR2]], align 2
+; CHECK-NEXT: ret void
+;
+bb:
+ %getelementptr = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %load = load ptr addrspace(1), ptr addrspace(1) %getelementptr, align 2
+ %getelementptr2 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
+ store ptr addrspace(1) %load, ptr addrspace(3) %getelementptr2, align 2
+ ret void
+}
+
+define amdgpu_kernel void @foo(ptr addrspace(1) nocapture readonly %arg, i32 %arg1) {
+; CHECK-LABEL: define amdgpu_kernel void @foo(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(3), ptr addrspace(1) [[ARG]], i64 16
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) [[ARG]], i64 4) ]
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[GETELEMENTPTR]], align 4
+; CHECK-NEXT: store i32 [[ARG1]], ptr addrspace(3) [[LOAD]], align 2
+; CHECK-NEXT: ret void
+;
+bb:
+ %getelementptr = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
+ call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
+ %load = load ptr addrspace(3), ptr addrspace(1) %getelementptr, align 2
+ store i32 %arg1, ptr addrspace(3) %load, align 2
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #0
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
deleted file mode 100644
index f07e16d01ddf5..0000000000000
--- a/llvm/test/Transforms/AlignmentFromAssumptions/alignment-from-assumptions_dont_crash.ll
+++ /dev/null
@@ -1,134 +0,0 @@
-; Test that we don't crash.
-; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
-
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
-target triple = "amdgcn-amd-amdhsa"
-
-define amdgpu_kernel void @test_gep(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
-; CHECK-LABEL: @test_gep
-; GEPs are supported so the alignment is changed from 2 to 4
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 4
-bb:
- %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
- call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %tmp2, i64 4) ]
- %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
- %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
- store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
- ret void
-}
-
-define amdgpu_kernel void @test_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
-; CHECK-LABEL: @test_phi
-; PHI is not supported - align 2 not changed
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
-bb:
- %cond = icmp ugt i32 %idx, 10
- br i1 %cond, label %bb1, label %bb2
-
-bb1:
- %gep1 = getelementptr i32, ptr addrspace(1) %arg, i32 6
- br label %bb3
-
-bb2:
- %gep2 = getelementptr i32, ptr addrspace(1) %arg, i32 7
- br label %bb3
-
-bb3:
- %gep3 = phi ptr addrspace(1) [%gep1, %bb1], [%gep2, %bb2]
- %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %gep3, i64 4
- call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
- %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
- %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
- store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
- ret void
-}
-
-define amdgpu_kernel void @test_loop_phi(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
-; CHECK-LABEL: @test_loop_phi
-; PHI is supported - align 2 changed to 4
-; CHECK: load i32, ptr addrspace(1) %gep, align 4
-bb:
- %ptr = getelementptr i32, ptr addrspace(1) %arg, i32 0
- %end = getelementptr i32, ptr addrspace(1) %arg, i32 10
- %cond = icmp ugt i32 %idx, 10
- br i1 %cond, label %bb1, label %bb2
-
-bb1:
- %ptr1 = phi ptr addrspace(1) [%ptr, %bb], [%ptr2, %bb1]
- %acc1 = phi i32 [0, %bb], [%acc2, %bb1]
- %gep = getelementptr i32, ptr addrspace(1) %ptr1, i32 4
- call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
- %val = load i32, ptr addrspace(1) %gep, align 2
- %acc2 = add i32 %acc1, %val
- %ptr2 = getelementptr i32, ptr addrspace(1) %ptr1, i32 %idx
- %exit = icmp eq ptr addrspace(1) %ptr2, %end
- br i1 %exit, label %bb1, label %bb2
-
-bb2:
- %sum = phi i32 [0, %bb], [%acc2, %bb1]
- %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
- store i32 %sum, ptr addrspace(3) %tmp4, align 4
- ret void
-}
-
-define amdgpu_kernel void @test_select(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
-; CHECK-LABEL: @test_select
-; select is not supported - align 2 not changed
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
-bb:
- %cond = icmp ugt i32 %idx, 10
- %off1_gep = getelementptr i32, ptr addrspace(1) %arg, i32 6
- %off2_gep = getelementptr i32, ptr addrspace(1) %arg, i32 7
- %tmp2 = select i1 %cond, ptr addrspace(1) %off1_gep, ptr addrspace(1) %off2_gep
- call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
- %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
- %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
- store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
- ret void
-}
-
-define amdgpu_kernel void @test_cast(ptr addrspace(1) nocapture readonly %arg, i32 %idx, ptr addrspace(3) nocapture %arg1) {
-bb:
-; CHECK-LABEL: @test_cast
-; addrspacecast is not supported - align 2 not changed
-; CHECK: load i32, ptr addrspace(1) %tmp2, align 2
-; store is a user of the GEP so, align 2 is changed to 4
-; CHECK: store i32 %tmp3, ptr addrspace(3) %tmp4, align 4
- %cast = addrspacecast ptr addrspace(3) %arg1 to ptr addrspace(1)
- %tmp2 = getelementptr i32, ptr addrspace(1) %cast
- call void @llvm.assume(i1 true) [ "align"(ptr addrspace(3) %arg1, i64 4) ]
- %tmp3 = load i32, ptr addrspace(1) %tmp2, align 2
- %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
- store i32 %tmp3, ptr addrspace(3) %tmp4, align 2
- ret void
-}
-
-define amdgpu_kernel void @test_load_store_ptr_as_val(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) {
-bb:
-; CHECK-LABEL: @test_load_store_ptr_as_val
-; This store uses a pointer not as adress but as a value to store!
-; CHECK: store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2
- %tmp2 = getelementptr ptr addrspace(1), ptr addrspace(1) %arg, i64 16
- call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
- %tmp3 = load ptr addrspace(1), ptr addrspace(1) %tmp2, align 2
- %tmp4 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 1
- store ptr addrspace(1) %tmp3, ptr addrspace(3) %tmp4, align 2
- ret void
-}
-
-define amdgpu_kernel void @test_load_store_ptr_as_addr(ptr addrspace(1) nocapture readonly %arg, i32 %valToStore) {
-; CHECK-LABEL: @test_load_store_ptr_as_addr
-; CHECK: %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 4
-; store uses %tmp3 as an address BUT the %arg and %tmp3 have different address spaces
-; so, the align 2 is not changed
-; CHECK: store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
-bb:
- %tmp2 = getelementptr ptr addrspace(3), ptr addrspace(1) %arg, i64 16
- call void @llvm.assume(i1 true) [ "align"(ptr addrspace(1) %arg, i64 4) ]
- %tmp3 = load ptr addrspace(3), ptr addrspace(1) %tmp2, align 2
- store i32 %valToStore, ptr addrspace(3) %tmp3, align 2
- ret void
-}
-
-
-declare void @llvm.assume(i1 noundef)
More information about the llvm-commits
mailing list