[llvm] a1da746 - [AMDGPU] Place global constructors in .init_array and .fini_array
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 29 06:40:29 PDT 2023
Author: Joseph Huber
Date: 2023-04-29T08:40:19-05:00
New Revision: a1da7461571cf1763136e22a018a20a271bb70b9
URL: https://github.com/llvm/llvm-project/commit/a1da7461571cf1763136e22a018a20a271bb70b9
DIFF: https://github.com/llvm/llvm-project/commit/a1da7461571cf1763136e22a018a20a271bb70b9.diff
LOG: [AMDGPU] Place global constructors in .init_array and .fini_array
For the GPU, we emit external kernels that call the initializers and
constructors, however if we had a persistent kernel like in the `_start`
kernel for the `libc` project, we could initialize the standard way of
calling constructors. This patch adds new global variables containing
pointers to the constructors to be called. If these are placed in the
`.init_array` and `.fini_array` sections, then the backend will handle
them specially. The linker will then provide the `__init_array_` and
`__fini_array_` sections to traverse them. An implementation would look
like this.
```
extern uintptr_t __init_array_start[];
extern uintptr_t __init_array_end[];
extern uintptr_t __fini_array_start[];
extern uintptr_t __fini_array_end[];
using InitCallback = void(int, char **, char **);
using FiniCallback = void(void);
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
_start(int argc, char **argv, char **envp) {
uint64_t init_array_size = __init_array_end - __init_array_start;
for (uint64_t i = 0; i < init_array_size; ++i)
reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
uint64_t fini_array_size = __fini_array_end - __fini_array_start;
for (uint64_t i = 0; i < fini_array_size; ++i)
reinterpret_cast<FiniCallback *>(__fini_array_start[i])();
}
```
Reviewed By: yaxunl
Differential Revision: https://reviews.llvm.org/D149340
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index 39736aed9969f..ceb7dfcf12f67 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -31,6 +31,8 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
StringRef InitOrFiniKernelName = "amdgcn.device.init";
if (!IsCtor)
InitOrFiniKernelName = "amdgcn.device.fini";
+ if (Function *F = M.getFunction(InitOrFiniKernelName))
+ return F;
Function *InitOrFiniKernel = Function::createWithDefaultAttr(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
@@ -63,12 +65,18 @@ static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,
for (Value *V : GA->operands()) {
auto *CS = cast<ConstantStruct>(V);
- IRB.CreateCall(ConstructorTy, CS->getOperand(1));
+ bool AlreadyRegistered =
+ llvm::any_of(CS->getOperand(1)->uses(), [=](Use &U) {
+ if (auto *CB = dyn_cast<CallBase>(U.getUser()))
+ if (CB->getCaller() == InitOrFiniKernel)
+ return true;
+ return false;
+ });
+ if (!AlreadyRegistered)
+ IRB.CreateCall(ConstructorTy, CS->getOperand(1));
}
appendToUsed(M, {InitOrFiniKernel});
-
- GV->eraseFromParent();
return true;
}
@@ -83,9 +91,7 @@ class AMDGPUCtorDtorLoweringLegacy final : public ModulePass {
public:
static char ID;
AMDGPUCtorDtorLoweringLegacy() : ModulePass(ID) {}
- bool runOnModule(Module &M) override {
- return lowerCtorsAndDtors(M);
- }
+ bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); }
};
} // End anonymous namespace
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
index bf3f98223be23..825ebf203a3ba 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -18,8 +18,6 @@
@foo.alias = hidden alias void (), ptr @foo
;.
-; CHECK-NOT: @llvm.global_ctors
-; CHECK-NOT: @llvm.global_dtors
; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
; CHECK: @foo.alias = hidden alias void (), ptr @foo
;.
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
index 5fa23e4ae8235..f0919b971b104 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
@@ -7,25 +7,17 @@
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
-; CHECK-NOT: @llvm.global_ctors
-; CHECK-NOT: @llvm.global_dtors
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
-; CHECK-NEXT: store
-; CHECK-NEXT: ret void
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1
-; CHECK-NEXT: store
-; CHECK-NEXT: ret void
-
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init.1() #0
-; CHECK-NEXT: call void @foo
-; CHECK-NEXT: ret void
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini.2() #1
-; CHECK-NEXT: call void @bar
-; CHECK-NEXT: ret void
+; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 {
+; CHECK-NEXT: store volatile i32 1, ptr addrspace(1) null
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define amdgpu_kernel void @amdgcn.device.fini() #1 {
+; CHECK-NEXT: store volatile i32 0, ptr addrspace(1) null
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
; CHECK-NOT: amdgcn.device.
@@ -34,12 +26,6 @@
; CHECK-VIS: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini{{$}}
; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd{{$}}
-; CHECK-VIS: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.init.1{{$}}
-; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.1.kd{{$}}
-; CHECK-VIS: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini.2{{$}}
-; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.2.kd{{$}}
-
-
define internal void @foo() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
index f230d8d5e2dda..a197727eaec37 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -4,12 +4,12 @@
; Make sure we get the same result if we run multiple times
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-ctor-dtor,amdgpu-lower-ctor-dtor < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=VISIBILITY
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -S - 2>&1 | FileCheck %s -check-prefix=SECTION
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
-; CHECK-NOT: @llvm.global_ctors
-; CHECK-NOT: @llvm.global_dtors
+; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini]
; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
; CHECK-NEXT: call void @foo
@@ -25,6 +25,8 @@
; VISIBILITY: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd
; VISIBILITY: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini
; VISIBILITY: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd
+; SECTION: .init_array.1 INIT_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
+; SECTION: .fini_array.1 FINI_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
define internal void @foo() {
ret void
More information about the llvm-commits
mailing list