[llvm] [AMDGPU] Skip lowerNonKernelLDSAccesses if function is declaration. (PR #106975)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 5 22:46:08 PDT 2024


https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/106975

>From 010b2787204f821f7273b9e4800ff887cdb51d9d Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 2 Sep 2024 17:30:49 +0530
Subject: [PATCH 1/5] [AMDGPU] Skip lowerNonKernelLDSAccesses if function is
 declaration.

---
 llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index b2ab7e9c03e528..ca796200ee6c6f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -1218,6 +1218,8 @@ bool AMDGPUSwLowerLDS::run() {
     for (auto &K : FuncLDSAccessInfo.NonKernelToLDSAccessMap) {
       Function *Func = K.first;
       DenseSet<GlobalVariable *> &LDSGlobals = K.second;
+      if (Func->isDeclaration() || LDSGlobals.empty())
+        continue;
       SetVector<GlobalVariable *> OrderedLDSGlobals = sortByName(
           std::vector<GlobalVariable *>(LDSGlobals.begin(), LDSGlobals.end()));
       lowerNonKernelLDSAccesses(Func, OrderedLDSGlobals, NKLDSParams);
@@ -1226,6 +1228,8 @@ bool AMDGPUSwLowerLDS::run() {
       auto &K = FuncLDSAccessInfo.NonKernelToLDSAccessMap;
       if (K.find(Func) != K.end())
         continue;
+      if (Func->isDeclaration())
+        continue;
       SetVector<llvm::GlobalVariable *> Vec;
       lowerNonKernelLDSAccesses(Func, Vec, NKLDSParams);
     }

>From e3f18649c6e8205632ffc2f16618418b226e37ce Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 4 Sep 2024 19:40:01 +0530
Subject: [PATCH 2/5] [AMDGPU] Skip adding function to kernelToLDS maps if its
 a declaration.

---
 llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
old mode 100644
new mode 100755
index ca796200ee6c6f..d311e129127167
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -274,6 +274,8 @@ void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) {
       Function *CalledFunc = CallerCGN->getFunction();
       if (!CalledFunc)
         continue;
+      if (CalledFunc->isDeclaration())
+        continue;
       if (AMDGPU::isKernelLDS(CalledFunc))
         continue;
       for (auto AI = CalledFunc->arg_begin(), E = CalledFunc->arg_end();
@@ -300,7 +302,8 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
     for (User *V : GV->users()) {
       if (auto *I = dyn_cast<Instruction>(V)) {
         Function *F = I->getFunction();
-        if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress))
+        if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
+            !(F->isDeclaration()))
           FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
       }
     }
@@ -1218,8 +1221,6 @@ bool AMDGPUSwLowerLDS::run() {
     for (auto &K : FuncLDSAccessInfo.NonKernelToLDSAccessMap) {
       Function *Func = K.first;
       DenseSet<GlobalVariable *> &LDSGlobals = K.second;
-      if (Func->isDeclaration() || LDSGlobals.empty())
-        continue;
       SetVector<GlobalVariable *> OrderedLDSGlobals = sortByName(
           std::vector<GlobalVariable *>(LDSGlobals.begin(), LDSGlobals.end()));
       lowerNonKernelLDSAccesses(Func, OrderedLDSGlobals, NKLDSParams);
@@ -1228,8 +1229,6 @@ bool AMDGPUSwLowerLDS::run() {
       auto &K = FuncLDSAccessInfo.NonKernelToLDSAccessMap;
       if (K.find(Func) != K.end())
         continue;
-      if (Func->isDeclaration())
-        continue;
       SetVector<llvm::GlobalVariable *> Vec;
       lowerNonKernelLDSAccesses(Func, Vec, NKLDSParams);
     }

>From 6666925300fae28d4cb51b3506019e2189c492ae Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 4 Sep 2024 20:02:51 +0530
Subject: [PATCH 3/5] [AMDGPU] Changes as per review comments:1

---
 llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index d311e129127167..e75b70b00c3ef7 100755
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -272,9 +272,7 @@ void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) {
     for (auto &I : *CGN) {
       CallGraphNode *CallerCGN = I.second;
       Function *CalledFunc = CallerCGN->getFunction();
-      if (!CalledFunc)
-        continue;
-      if (CalledFunc->isDeclaration())
+      if (!CalledFunc || CalledFunc->isDeclaration())
         continue;
       if (AMDGPU::isKernelLDS(CalledFunc))
         continue;
@@ -303,7 +301,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
       if (auto *I = dyn_cast<Instruction>(V)) {
         Function *F = I->getFunction();
         if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
-            !(F->isDeclaration()))
+            !F->isDeclaration())
           FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
       }
     }

>From 237a34d17992727b8dd9292094055fc152026b73 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 6 Sep 2024 11:03:54 +0530
Subject: [PATCH 4/5] [AMDGPU] Add test case for non-kernel and declaration.

---
 ...gpu-sw-lower-lds-non-kernel-declaration.ll | 97 +++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
new file mode 100644
index 00000000000000..bd878d509579b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+ at lds = external addrspace(3) global [5 x i8], align 8
+declare void @kernel_declaration() sanitize_address
+
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k1.md.type { %llvm.amdgcn.sw.lds.k1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k1.md.item { i32 32, i32 5, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k1], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [1 x ptr addrspace(1)]] [[1 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0)]], no_sanitize_address
+;.
+define void @non_kernel_function() sanitize_address {
+; CHECK-LABEL: define void @non_kernel_function(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x [1 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
+; CHECK-NEXT:    [[Y:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
+; CHECK-NEXT:    [[TMP9:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
+; CHECK-NEXT:    store i8 5, ptr [[TMP9]], align 8
+; CHECK-NEXT:    ret void
+;
+  %Y = addrspacecast ptr addrspace(3) @lds to ptr
+  store i8 5, ptr addrspacecast( ptr addrspace(3) @lds to ptr), align 8
+  ret void
+}
+
+define amdgpu_kernel void @k1() sanitize_address {
+; CHECK-LABEL: define amdgpu_kernel void @k1(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK:       [[MALLOC]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT:    [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 37
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 27)
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
+; CHECK-NEXT:    call void @non_kernel_function()
+; CHECK-NEXT:    call void @kernel_declaration()
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
+; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+  call void @non_kernel_function()
+  call void @kernel_declaration()
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[META2]] = !{i32 0}
+;.

>From da60e9544c11a71f41249d64777f6058a1205678 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 6 Sep 2024 11:14:11 +0530
Subject: [PATCH 5/5] [AMDGPU] Update test name.

---
 .../AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
index bd878d509579b0..ae2bcbbb81b5f1 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-sw-lower-lds < %s | FileCheck %s
 @lds = external addrspace(3) global [5 x i8], align 8
-declare void @kernel_declaration() sanitize_address
+declare void @non_kernel_declaration() sanitize_address
 
 ;.
 ; CHECK: @llvm.amdgcn.sw.lds.k1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
@@ -63,7 +63,7 @@ define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
 ; CHECK-NEXT:    call void @non_kernel_function()
-; CHECK-NEXT:    call void @kernel_declaration()
+; CHECK-NEXT:    call void @non_kernel_declaration()
 ; CHECK-NEXT:    br label %[[CONDFREE:.*]]
 ; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
@@ -78,7 +78,7 @@ define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-NEXT:    ret void
 ;
   call void @non_kernel_function()
-  call void @kernel_declaration()
+  call void @non_kernel_declaration()
   ret void
 }
 



More information about the llvm-commits mailing list