[llvm] 8b13274 - [AMDGPU] Rewrite device ctor / dtor handling to use .init / .fini sections

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Fri May 19 14:22:10 PDT 2023


Author: Joseph Huber
Date: 2023-05-19T16:22:01-05:00
New Revision: 8b132747cd3bf351c34db0998b6908083bdf729a

URL: https://github.com/llvm/llvm-project/commit/8b132747cd3bf351c34db0998b6908083bdf729a
DIFF: https://github.com/llvm/llvm-project/commit/8b132747cd3bf351c34db0998b6908083bdf729a.diff

LOG: [AMDGPU] Rewrite device ctor / dtor handling to use .init / .fini sections

Currently, AMDGPU has special handling for constructors and destructors.
We manuall emit a kernel that calls the functoins listed in the global
constructor / destructor list. This currently has two main problems. The
first is that we do not repsect the priortiy and simply call them in any
order. The second is that we redefine the symbol unconditionally which
coulid have a different definition, meaning we cannot merge any code
with a constructor post-codegen. This patch changes the handling to
instead use the standard support for travering the `.init_array` and
`.fini_array` sections the compiler creates. This allows us to emit a
single kernel with `odr` semantics, so even if we emit this multiple
times they will be merged into a single kernel.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D150675

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
    llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
    llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
    llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
    llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index ceb7dfcf12f6..05786d64d5e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -31,16 +31,12 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
   StringRef InitOrFiniKernelName = "amdgcn.device.init";
   if (!IsCtor)
     InitOrFiniKernelName = "amdgcn.device.fini";
-  if (Function *F = M.getFunction(InitOrFiniKernelName))
-    return F;
+  if (M.getFunction(InitOrFiniKernelName))
+    return nullptr;
 
   Function *InitOrFiniKernel = Function::createWithDefaultAttr(
       FunctionType::get(Type::getVoidTy(M.getContext()), false),
-      GlobalValue::ExternalLinkage, 0, InitOrFiniKernelName, &M);
-  BasicBlock *InitOrFiniKernelBB =
-      BasicBlock::Create(M.getContext(), "", InitOrFiniKernel);
-  ReturnInst::Create(M.getContext(), InitOrFiniKernelBB);
-
+      GlobalValue::WeakODRLinkage, 0, InitOrFiniKernelName, &M);
   InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL);
   if (IsCtor)
     InitOrFiniKernel->addFnAttr("device-init");
@@ -49,6 +45,70 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
   return InitOrFiniKernel;
 }
 
+// The linker will provide the associated symbols to allow us to traverse the
+// global constructors / destructors in priority order. We create the IR
+// required to call each callback in this section. This is equivalent to the
+// following code.
+//
+// extern "C" void * __init_array_start[];
+// extern "C" void * __init_array_end[];
+//
+// using InitCallback = void();
+//
+// void call_init_array_callbacks() {
+//   for (auto start = __init_array_start; start != __init_array_end; ++start)
+//     reinterpret_cast<InitCallback *>(*start)();
+static void createInitOrFiniCalls(Function &F, bool IsCtor) {
+  Module &M = *F.getParent();
+  LLVMContext &C = M.getContext();
+
+  IRBuilder<> IRB(BasicBlock::Create(C, "entry", &F));
+  auto *LoopBB = BasicBlock::Create(C, "while.entry", &F);
+  auto *ExitBB = BasicBlock::Create(C, "while.end", &F);
+  Type *PtrTy = IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS);
+
+  auto *Begin = M.getOrInsertGlobal(
+      IsCtor ? "__init_array_start" : "__fini_array_start",
+      ArrayType::get(PtrTy, 0), [&]() {
+        return new GlobalVariable(
+            M, ArrayType::get(PtrTy, 0),
+            /*isConstant=*/true, GlobalValue::ExternalLinkage,
+            /*Initializer=*/nullptr,
+            IsCtor ? "__init_array_start" : "__fini_array_start",
+            /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,
+            /*AddressSpace=*/1);
+      });
+  auto *End = M.getOrInsertGlobal(
+      IsCtor ? "__init_array_end" : "__fini_array_end",
+      ArrayType::get(PtrTy, 0), [&]() {
+        return new GlobalVariable(
+            M, ArrayType::get(PtrTy, 0),
+            /*isConstant=*/true, GlobalValue::ExternalLinkage,
+            /*Initializer=*/nullptr,
+            IsCtor ? "__init_array_end" : "__fini_array_end",
+            /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,
+            /*AddressSpace=*/1);
+      });
+
+  // The constructor type is suppoed to allow using the argument vectors, but
+  // for now we just call them with no arguments.
+  auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});
+
+  IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB);
+  IRB.SetInsertPoint(LoopBB);
+  auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");
+  auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()),
+                                  CallBackPHI, "callback");
+  IRB.CreateCall(CallBackTy, CallBack);
+  auto *NewCallBack = IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, 1, "next");
+  auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end");
+  CallBackPHI->addIncoming(Begin, &F.getEntryBlock());
+  CallBackPHI->addIncoming(NewCallBack, LoopBB);
+  IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);
+  IRB.SetInsertPoint(ExitBB);
+  IRB.CreateRetVoid();
+}
+
 static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,
                                    bool IsCtor) {
   GlobalVariable *GV = M.getGlobalVariable(GlobalName);
@@ -59,22 +119,10 @@ static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,
     return false;
 
   Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
-  IRBuilder<> IRB(InitOrFiniKernel->getEntryBlock().getTerminator());
-
-  FunctionType *ConstructorTy = InitOrFiniKernel->getFunctionType();
-
-  for (Value *V : GA->operands()) {
-    auto *CS = cast<ConstantStruct>(V);
-    bool AlreadyRegistered =
-        llvm::any_of(CS->getOperand(1)->uses(), [=](Use &U) {
-          if (auto *CB = dyn_cast<CallBase>(U.getUser()))
-            if (CB->getCaller() == InitOrFiniKernel)
-              return true;
-          return false;
-        });
-    if (!AlreadyRegistered)
-      IRB.CreateCall(ConstructorTy, CS->getOperand(1));
-  }
+  if (!InitOrFiniKernel)
+    return false;
+
+  createInitOrFiniCalls(*InitOrFiniKernel, IsCtor);
 
   appendToUsed(M, {InitOrFiniKernel});
   return true;

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
index 825ebf203a3b..35849d318f05 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -1,5 +1,4 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-lower-ctor-dtor %s | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
 
 ; Make sure we emit code for constructor entries that aren't direct
 ; function calls.
@@ -18,6 +17,10 @@
 @foo.alias = hidden alias void (), ptr @foo
 
 ;.
+; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
 ; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
 ; CHECK: @foo.alias = hidden alias void (), ptr @foo
 ;.
@@ -35,46 +38,31 @@ define void @bar() addrspace(1) {
   ret void
 }
 
-; CHECK: define amdgpu_kernel void @amdgcn.device.init() #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: call void @foo.alias()
-; CHECK-NEXT: call void inttoptr (i64 4096 to ptr)()
-; CHECK-NEXT: ret void
-; CHECK-NEXT: }
-
-; CHECK: define amdgpu_kernel void @amdgcn.device.fini() #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: call void addrspacecast (ptr addrspace(1) @bar to ptr)()
-; CHECK-NEXT: ret void
-; CHECK-NEXT: }
-
-;.
-; CHECK: attributes #[[ATTR0]] = { "device-init" }
-; CHECK: attributes #[[ATTR1]] = { "device-fini" }
-
+; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.entry:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    call void [[CALLBACK]]()
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
+; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end
+; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
 
-; GCN-LABEL: foo:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
-;
-; GCN-LABEL: bar:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
-;
-; GCN-LABEL: amdgcn.device.init:
-; GCN:         s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; GCN-NEXT:    s_add_u32 s[[PC_LO]], s[[PC_LO]], foo.alias at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s[[PC_HI]], s[[PC_HI]], foo.alias at rel32@hi+12
-; GCN-NEXT:    s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.entry:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    call void [[CALLBACK]]()
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
+; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
+; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
 
-; GCN:         s_mov_b64 [[LIT_ADDR:s\[[0-9]+:[0-9]+\]]], 0x1000
-; GCN:         s_swappc_b64 s[30:31], [[LIT_ADDR]]
-; GCN-NEXT:    s_endpgm
-;
-; GCN-LABEL: amdgcn.device.fini:
-; GCN:         s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; GCN-NEXT:    s_add_u32 s[[PC_LO]], s[[PC_LO]], bar at gotpcrel32@lo+4
-; GCN-NEXT:    s_addc_u32 s[[PC_HI]], s[[PC_HI]], bar at gotpcrel32@hi+12
-; GCN-NEXT:    s_load_dwordx2 s{{\[}}[[GOT_LO:[0-9]+]]:[[GOT_HI:[0-9]+]]{{\]}}, s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}, 0x0
-; GCN:         s_swappc_b64 s[30:31], s{{\[}}[[GOT_LO]]:[[GOT_HI]]{{\]}}
-; GCN-NEXT:    s_endpgm
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { "device-init" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { "device-fini" }

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
index f0919b971b10..f399f9fface1 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-existing.ll
@@ -1,21 +1,19 @@
 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-ctor-dtor < %s | FileCheck %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS
 
-; Make sure there's no crash or error if amdgcn.device.init or
-; amdgcn.device.fini already exist.
+; Make sure that we don't modify the functions if amdgcn.device.init or
+; amdgcn.device.fini already exit.
 
 @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
 @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
 
 ; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0 {
 ; CHECK-NEXT:   store volatile i32 1, ptr addrspace(1) null
-; CHECK-NEXT:   call void @foo()
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 
 ; CHECK-LABEL: define amdgpu_kernel void @amdgcn.device.fini() #1 {
 ; CHECK-NEXT:    store volatile i32 0, ptr addrspace(1) null
-; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    ret void
 ; CHECK-NEXT:  }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
index a197727eaec3..d090d200953c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -9,22 +9,44 @@
 @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
 @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
 
+; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
 ; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini]
 
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
-; CHECK-NEXT: call void @foo
-; CHECK-NEXT: ret void
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1
-; CHECK-NEXT: call void @bar
-; CHECK-NEXT: ret void
+; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() #0
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.entry:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    call void [[CALLBACK]]()
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
+; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end
+; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
+
+; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() #1
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.entry:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    call void [[CALLBACK]]()
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
+; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
+; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
 
 ; CHECK-NOT: amdgcn.device.
 
-; VISIBILITY: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.init
-; VISIBILITY: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd
-; VISIBILITY: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.fini
-; VISIBILITY: OBJECT   GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd
+; VISIBILITY: FUNC   WEAK PROTECTED {{.*}} amdgcn.device.init
+; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd
+; VISIBILITY: FUNC   WEAK PROTECTED {{.*}} amdgcn.device.fini
+; VISIBILITY: OBJECT   WEAK DEFAULT {{.*}} amdgcn.device.fini.kd
 ; SECTION: .init_array.1     INIT_ARRAY      {{.*}} {{.*}} 000008 00  WA  0   0  8
 ; SECTION: .fini_array.1     FINI_ARRAY      {{.*}} {{.*}} 000008 00  WA  0   0  8
 

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
index ae52854d2f85..a5286ec45dee 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
@@ -4,18 +4,42 @@
 @llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }]
 @llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }]
 
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
-; CHECK-NEXT: call void @foo
-; CHECK-NEXT: call void @foo.5
-
-; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1
-; CHECK-NEXT: call void @bar
-; CHECK-NEXT: call void @bar.5
-
-; CHECK-VIS: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.init
-; CHECK-VIS: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd
-; CHECK-VIS: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.fini
-; CHECK-VIS: OBJECT   GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd
+; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
+; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini]
+
+; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() #0
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.entry:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    call void [[CALLBACK]]()
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
+; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end
+; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
+
+; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() #1
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.entry:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    call void [[CALLBACK]]()
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
+; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
+; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
+
+; CHECK-VIS: FUNC   WEAK PROTECTED {{.*}} amdgcn.device.init
+; CHECK-VIS: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd
+; CHECK-VIS: FUNC   WEAK PROTECTED {{.*}} amdgcn.device.fini
+; CHECK-VIS: OBJECT   WEAK DEFAULT {{.*}} amdgcn.device.fini.kd
 
 define internal void @foo() {
   ret void
@@ -34,4 +58,4 @@ define internal void @bar.5() {
 }
 
 ; CHECK: attributes #0 = { "device-init" }
-; CHECK: attributes #1 = { "device-fini" }
\ No newline at end of file
+; CHECK: attributes #1 = { "device-fini" }


        


More information about the llvm-commits mailing list