[llvm] a1da746 - [AMDGPU] Place global constructors in .init_array and .fini_array

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 29 06:40:29 PDT 2023


Author: Joseph Huber
Date: 2023-04-29T08:40:19-05:00
New Revision: a1da7461571cf1763136e22a018a20a271bb70b9

URL: https://github.com/llvm/llvm-project/commit/a1da7461571cf1763136e22a018a20a271bb70b9
DIFF: https://github.com/llvm/llvm-project/commit/a1da7461571cf1763136e22a018a20a271bb70b9.diff

LOG: [AMDGPU] Place global constructors in .init_array and .fini_array

For the GPU, we emit external kernels that call the initializers and
constructors, however if we had a persistent kernel like in the `_start`
kernel for the `libc` project, we could initialize the standard way of
calling constructors. This patch adds new global variables containing
pointers to the constructors to be called. If these are placed in the
`.init_array` and `.fini_array` sections, then the backend will handle
them specially. The linker will then provide the `__init_array_` and
`__fini_array_` sections to traverse them. An implementation would look
like this.

```
extern uintptr_t __init_array_start[];
extern uintptr_t __init_array_end[];
extern uintptr_t __fini_array_start[];
extern uintptr_t __fini_array_end[];

using InitCallback = void(int, char **, char **);
using FiniCallback = void(void);

extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
_start(int argc, char **argv, char **envp) {
  uint64_t init_array_size = __init_array_end - __init_array_start;
  for (uint64_t i = 0; i < init_array_size; ++i)
    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
  uint64_t fini_array_size = __fini_array_end - __fini_array_start;
  for (uint64_t i = 0; i < fini_array_size; ++i)
    reinterpret_cast<FiniCallback *>(__fini_array_start[i])();
}
```

Reviewed By: yaxunl

Differential Revision: https://reviews.llvm.org/D149340

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
    llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
    llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
    llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index 39736aed9969f..ceb7dfcf12f67 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -31,6 +31,8 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
   StringRef InitOrFiniKernelName = "amdgcn.device.init";
   if (!IsCtor)
     InitOrFiniKernelName = "amdgcn.device.fini";
+  if (Function *F = M.getFunction(InitOrFiniKernelName))
+    return F;
 
   Function *InitOrFiniKernel = Function::createWithDefaultAttr(
       FunctionType::get(Type::getVoidTy(M.getContext()), false),
@@ -63,12 +65,18 @@ static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,
 
   for (Value *V : GA->operands()) {
     auto *CS = cast<ConstantStruct>(V);
-    IRB.CreateCall(ConstructorTy, CS->getOperand(1));
+    bool AlreadyRegistered =
+        llvm::any_of(CS->getOperand(1)->uses(), [=](Use &U) {
+          if (auto *CB = dyn_cast<CallBase>(U.getUser()))
+            if (CB->getCaller() == InitOrFiniKernel)
+              return true;
+          return false;
+        });
+    if (!AlreadyRegistered)
+      IRB.CreateCall(ConstructorTy, CS->getOperand(1));
   }
 
   appendToUsed(M, {InitOrFiniKernel});
-
-  GV->eraseFromParent();
   return true;
 }
 
@@ -83,9 +91,7 @@ class AMDGPUCtorDtorLoweringLegacy final : public ModulePass {
 public:
   static char ID;
   AMDGPUCtorDtorLoweringLegacy() : ModulePass(ID) {}
-  bool runOnModule(Module &M) override {
-    return lowerCtorsAndDtors(M);
-  }
+  bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); }
 };
 
 } // End anonymous namespace

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
index bf3f98223be23..825ebf203a3ba 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -18,8 +18,6 @@
 @foo.alias = hidden alias void (), ptr @foo
 
 ;.
-; CHECK-NOT: @llvm.global_ctors
-; CHECK-NOT: @llvm.global_dtors
 ; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
 ; CHECK: @foo.alias = hidden alias void (), ptr @foo
 ;.

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
index 5fa23e4ae8235..f0919b971b104 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
@@ -7,25 +7,17 @@
 @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
 @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
 
-; CHECK-NOT: @llvm.global_ctors
-; CHECK-NOT: @llvm.global_dtors
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
-; CHECK-NEXT: store
-; CHECK-NEXT: ret void
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1
-; CHECK-NEXT: store
-; CHECK-NEXT: ret void
-
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init.1() #0
-; CHECK-NEXT: call void @foo
-; CHECK-NEXT: ret void
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini.2() #1
-; CHECK-NEXT: call void @bar
-; CHECK-NEXT: ret void
+; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 {
+; CHECK-NEXT:   store volatile i32 1, ptr addrspace(1) null
+; CHECK-NEXT:   call void @foo()
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define amdgpu_kernel void @amdgcn.device.fini() #1 {
+; CHECK-NEXT:    store volatile i32 0, ptr addrspace(1) null
+; CHECK-NEXT:    call void @bar()
+; CHECK-NEXT:    ret void
+; CHECK-NEXT:  }
 
 ; CHECK-NOT: amdgcn.device.
 
@@ -34,12 +26,6 @@
 ; CHECK-VIS: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.fini{{$}}
 ; CHECK-VIS: OBJECT   GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd{{$}}
 
-; CHECK-VIS: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.init.1{{$}}
-; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.1.kd{{$}}
-; CHECK-VIS: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.fini.2{{$}}
-; CHECK-VIS: OBJECT   GLOBAL DEFAULT {{.*}} amdgcn.device.fini.2.kd{{$}}
-
-
 define internal void @foo() {
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
index f230d8d5e2dda..a197727eaec37 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -4,12 +4,12 @@
 ; Make sure we get the same result if we run multiple times
 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-ctor-dtor,amdgpu-lower-ctor-dtor < %s | FileCheck %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=VISIBILITY
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -S - 2>&1 | FileCheck %s -check-prefix=SECTION
 
 @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
 @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
 
-; CHECK-NOT: @llvm.global_ctors
-; CHECK-NOT: @llvm.global_dtors
+; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini]
 
 ; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
 ; CHECK-NEXT: call void @foo
@@ -25,6 +25,8 @@
 ; VISIBILITY: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd
 ; VISIBILITY: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.fini
 ; VISIBILITY: OBJECT   GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd
+; SECTION: .init_array.1     INIT_ARRAY      {{.*}} {{.*}} 000008 00  WA  0   0  8
+; SECTION: .fini_array.1     FINI_ARRAY      {{.*}} {{.*}} 000008 00  WA  0   0  8
 
 define internal void @foo() {
   ret void


        


More information about the llvm-commits mailing list