[PATCH] D151153: [AMDGPU] Add attribute to AMDGPU ctor / dtor to indicate single threadedness
Joseph Huber via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon May 22 13:59:20 PDT 2023
jhuber6 updated this revision to Diff 524481.
jhuber6 added a comment.
Adding check on the elf notes
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D151153/new/
https://reviews.llvm.org/D151153
Files:
llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
Index: llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
+++ llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
@@ -57,5 +57,5 @@
ret void
}
-; CHECK: attributes #0 = { "device-init" }
-; CHECK: attributes #1 = { "device-fini" }
+; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
+; CHECK: attributes #1 = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
Index: llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -5,6 +5,7 @@
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-ctor-dtor,amdgpu-lower-ctor-dtor < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=VISIBILITY
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -S - 2>&1 | FileCheck %s -check-prefix=SECTION
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - 2>&1 | FileCheck %s -check-prefix=METADATA
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
@@ -49,6 +50,15 @@
; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.fini.kd
; SECTION: .init_array.1 INIT_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
; SECTION: .fini_array.1 FINI_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
+; METADATA: amdhsa.kernels:
+; METADATA: .kind: init
+; METADATA: .max_flat_workgroup_size: 1
+; METADATA: .name: amdgcn.device.init
+; METADATA: .symbol: amdgcn.device.init.kd
+; METADATA: .kind: fini
+; METADATA: .max_flat_workgroup_size: 1
+; METADATA: .name: amdgcn.device.fini
+; METADATA: .symbol: amdgcn.device.fini.kd
define internal void @foo() {
ret void
@@ -58,5 +68,5 @@
ret void
}
-; CHECK: attributes #0 = { "device-init" }
-; CHECK: attributes #1 = { "device-fini" }
+; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
+; CHECK: attributes #1 = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
Index: llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -64,5 +64,5 @@
; CHECK: while.end:
; CHECK-NEXT: ret void
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { "device-init" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { "device-fini" }
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
Index: llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -38,6 +38,7 @@
FunctionType::get(Type::getVoidTy(M.getContext()), false),
GlobalValue::WeakODRLinkage, 0, InitOrFiniKernelName, &M);
InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL);
+ InitOrFiniKernel->addFnAttr("amdgpu-flat-work-group-size", "1,1");
if (IsCtor)
InitOrFiniKernel->addFnAttr("device-init");
else
@@ -58,6 +59,7 @@
// void call_init_array_callbacks() {
// for (auto start = __init_array_start; start != __init_array_end; ++start)
// reinterpret_cast<InitCallback *>(*start)();
+// }
static void createInitOrFiniCalls(Function &F, bool IsCtor) {
Module &M = *F.getParent();
LLVMContext &C = M.getContext();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D151153.524481.patch
Type: text/x-patch
Size: 4089 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230522/8d3f96a7/attachment.bin>
More information about the llvm-commits
mailing list