[llvm] [AMDGPU] Skip lowerNonKernelLDSAccesses if function is declaration. (PR #106975)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 22:46:08 PDT 2024
https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/106975
>From 010b2787204f821f7273b9e4800ff887cdb51d9d Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 2 Sep 2024 17:30:49 +0530
Subject: [PATCH 1/5] [AMDGPU] Skip lowerNonKernelLDSAccesses if function is
declaration.
---
llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index b2ab7e9c03e528..ca796200ee6c6f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -1218,6 +1218,8 @@ bool AMDGPUSwLowerLDS::run() {
for (auto &K : FuncLDSAccessInfo.NonKernelToLDSAccessMap) {
Function *Func = K.first;
DenseSet<GlobalVariable *> &LDSGlobals = K.second;
+ if (Func->isDeclaration() || LDSGlobals.empty())
+ continue;
SetVector<GlobalVariable *> OrderedLDSGlobals = sortByName(
std::vector<GlobalVariable *>(LDSGlobals.begin(), LDSGlobals.end()));
lowerNonKernelLDSAccesses(Func, OrderedLDSGlobals, NKLDSParams);
@@ -1226,6 +1228,8 @@ bool AMDGPUSwLowerLDS::run() {
auto &K = FuncLDSAccessInfo.NonKernelToLDSAccessMap;
if (K.find(Func) != K.end())
continue;
+ if (Func->isDeclaration())
+ continue;
SetVector<llvm::GlobalVariable *> Vec;
lowerNonKernelLDSAccesses(Func, Vec, NKLDSParams);
}
>From e3f18649c6e8205632ffc2f16618418b226e37ce Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 4 Sep 2024 19:40:01 +0530
Subject: [PATCH 2/5] [AMDGPU] Skip adding function to kernelToLDS maps if its
a declaration.
---
llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
mode change 100644 => 100755 llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
old mode 100644
new mode 100755
index ca796200ee6c6f..d311e129127167
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -274,6 +274,8 @@ void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) {
Function *CalledFunc = CallerCGN->getFunction();
if (!CalledFunc)
continue;
+ if (CalledFunc->isDeclaration())
+ continue;
if (AMDGPU::isKernelLDS(CalledFunc))
continue;
for (auto AI = CalledFunc->arg_begin(), E = CalledFunc->arg_end();
@@ -300,7 +302,8 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
for (User *V : GV->users()) {
if (auto *I = dyn_cast<Instruction>(V)) {
Function *F = I->getFunction();
- if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress))
+ if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
+ !(F->isDeclaration()))
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
}
}
@@ -1218,8 +1221,6 @@ bool AMDGPUSwLowerLDS::run() {
for (auto &K : FuncLDSAccessInfo.NonKernelToLDSAccessMap) {
Function *Func = K.first;
DenseSet<GlobalVariable *> &LDSGlobals = K.second;
- if (Func->isDeclaration() || LDSGlobals.empty())
- continue;
SetVector<GlobalVariable *> OrderedLDSGlobals = sortByName(
std::vector<GlobalVariable *>(LDSGlobals.begin(), LDSGlobals.end()));
lowerNonKernelLDSAccesses(Func, OrderedLDSGlobals, NKLDSParams);
@@ -1228,8 +1229,6 @@ bool AMDGPUSwLowerLDS::run() {
auto &K = FuncLDSAccessInfo.NonKernelToLDSAccessMap;
if (K.find(Func) != K.end())
continue;
- if (Func->isDeclaration())
- continue;
SetVector<llvm::GlobalVariable *> Vec;
lowerNonKernelLDSAccesses(Func, Vec, NKLDSParams);
}
>From 6666925300fae28d4cb51b3506019e2189c492ae Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 4 Sep 2024 20:02:51 +0530
Subject: [PATCH 3/5] [AMDGPU] Changes as per review comments:1
---
llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index d311e129127167..e75b70b00c3ef7 100755
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -272,9 +272,7 @@ void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) {
for (auto &I : *CGN) {
CallGraphNode *CallerCGN = I.second;
Function *CalledFunc = CallerCGN->getFunction();
- if (!CalledFunc)
- continue;
- if (CalledFunc->isDeclaration())
+ if (!CalledFunc || CalledFunc->isDeclaration())
continue;
if (AMDGPU::isKernelLDS(CalledFunc))
continue;
@@ -303,7 +301,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
if (auto *I = dyn_cast<Instruction>(V)) {
Function *F = I->getFunction();
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
- !(F->isDeclaration()))
+ !F->isDeclaration())
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
}
}
>From 237a34d17992727b8dd9292094055fc152026b73 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 6 Sep 2024 11:03:54 +0530
Subject: [PATCH 4/5] [AMDGPU] Add test case for non-kernel and declaration.
---
...gpu-sw-lower-lds-non-kernel-declaration.ll | 97 +++++++++++++++++++
1 file changed, 97 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
new file mode 100644
index 00000000000000..bd878d509579b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+ at lds = external addrspace(3) global [5 x i8], align 8
+declare void @kernel_declaration() sanitize_address
+
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k1.md.type { %llvm.amdgcn.sw.lds.k1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k1.md.item { i32 32, i32 5, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k1], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [1 x ptr addrspace(1)]] [[1 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0)]], no_sanitize_address
+;.
+define void @non_kernel_function() sanitize_address {
+; CHECK-LABEL: define void @non_kernel_function(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x [1 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
+; CHECK-NEXT: [[Y:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
+; CHECK-NEXT: store i8 5, ptr [[TMP9]], align 8
+; CHECK-NEXT: ret void
+;
+ %Y = addrspacecast ptr addrspace(3) @lds to ptr
+ store i8 5, ptr addrspacecast( ptr addrspace(3) @lds to ptr), align 8
+ ret void
+}
+
+define amdgpu_kernel void @k1() sanitize_address {
+; CHECK-LABEL: define amdgpu_kernel void @k1(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 37
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 27)
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
+; CHECK-NEXT: call void @non_kernel_function()
+; CHECK-NEXT: call void @kernel_declaration()
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+ call void @non_kernel_function()
+ call void @kernel_declaration()
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[META2]] = !{i32 0}
+;.
>From da60e9544c11a71f41249d64777f6058a1205678 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 6 Sep 2024 11:14:11 +0530
Subject: [PATCH 5/5] [AMDGPU] Update test name.
---
.../AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
index bd878d509579b0..ae2bcbbb81b5f1 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-sw-lower-lds < %s | FileCheck %s
@lds = external addrspace(3) global [5 x i8], align 8
-declare void @kernel_declaration() sanitize_address
+declare void @non_kernel_declaration() sanitize_address
;.
; CHECK: @llvm.amdgcn.sw.lds.k1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
@@ -63,7 +63,7 @@ define amdgpu_kernel void @k1() sanitize_address {
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
; CHECK-NEXT: call void @non_kernel_function()
-; CHECK-NEXT: call void @kernel_declaration()
+; CHECK-NEXT: call void @non_kernel_declaration()
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
@@ -78,7 +78,7 @@ define amdgpu_kernel void @k1() sanitize_address {
; CHECK-NEXT: ret void
;
call void @non_kernel_function()
- call void @kernel_declaration()
+ call void @non_kernel_declaration()
ret void
}
More information about the llvm-commits
mailing list