[llvm] [AMDGPU] Call the `FINI_ARRAY` destructors in the correct order (PR #71815)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 9 07:15:07 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
The AMDGPU backend uses the linker-provided INIT_ARRAY and FINI_ARRAY
sections to call all the global constructors in a single kernel.
Previously this mistakenly used the same iteration logic for both
arrays. The destructors stored in FINI_ARRAY are actually stored in
reverse order, so we must start at the end of the array and decrement.
This patch makes the neccesarry changes to properly respect priority.


---
Full diff: https://github.com/llvm/llvm-project/pull/71815.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp (+37-4) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll (+4-6) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll (+7-16) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll (+5-22) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index a13447586bd4ba3..8814d2ca456d8f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -53,13 +53,22 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
 //
 // extern "C" void * __init_array_start[];
 // extern "C" void * __init_array_end[];
+// extern "C" void * __fini_array_start[];
+// extern "C" void * __fini_array_end[];
 //
 // using InitCallback = void();
+// using FiniCallback = void(void);
 //
 // void call_init_array_callbacks() {
 //   for (auto start = __init_array_start; start != __init_array_end; ++start)
 //     reinterpret_cast<InitCallback *>(*start)();
 // }
+//
+// void call_fini_array_callbacks() {
+//  size_t fini_array_size = __fini_array_end - __fini_array_start;
+//  for (size_t i = fini_array_size; i > 0; --i)
+//    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
+// }
 static void createInitOrFiniCalls(Function &F, bool IsCtor) {
   Module &M = *F.getParent();
   LLVMContext &C = M.getContext();
@@ -96,15 +105,39 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) {
   // for now we just call them with no arguments.
   auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});
 
-  IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB);
+  Constant *Start = Begin;
+  Constant *Stop = End;
+  // The destructor array must be called in reverse order. Get a constant
+  // expression to the end of the array and iterate backwards instead.
+  if (!IsCtor) {
+    Type *Int64Ty = IntegerType::getInt64Ty(C);
+    auto *Offset = ConstantExpr::getSub(
+        ConstantExpr::getAShr(
+            ConstantExpr::getSub(ConstantExpr::getPtrToInt(End, Int64Ty),
+                                 ConstantExpr::getPtrToInt(Begin, Int64Ty)),
+            ConstantInt::get(Int64Ty, 3)),
+        ConstantInt::get(Int64Ty, 1));
+    Start = ConstantExpr::getGetElementPtr(
+        ArrayType::get(IRB.getPtrTy(), 0), Begin,
+        ArrayRef<Constant *>({ConstantInt::get(Int64Ty, 0), Offset}),
+        /*InBounds=*/true);
+    Stop = Begin;
+  }
+
+  IRB.CreateCondBr(
+      IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,
+                    Stop),
+      LoopBB, ExitBB);
   IRB.SetInsertPoint(LoopBB);
   auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");
   auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()),
                                   CallBackPHI, "callback");
   IRB.CreateCall(CallBackTy, CallBack);
-  auto *NewCallBack = IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, 1, "next");
-  auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end");
-  CallBackPHI->addIncoming(Begin, &F.getEntryBlock());
+  auto *NewCallBack =
+      IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next");
+  auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
+                               NewCallBack, Stop, "end");
+  CallBackPHI->addIncoming(Start, &F.getEntryBlock());
   CallBackPHI->addIncoming(NewCallBack, LoopBB);
   IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);
   IRB.SetInsertPoint(ExitBB);
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
index a1929a2e8931c11..f9dfa8b4e106656 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -25,8 +25,6 @@ define void @bar() addrspace(1) {
   ret void
 }
 
-
-
 ;.
 ; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo.alias, ptr null }, { i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), ptr null }]
 ; CHECK: @[[LLVM_GLOBAL_DTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), ptr null }]
@@ -65,13 +63,13 @@ define void @bar() addrspace(1) {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK-NEXT:    br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
 ; CHECK:       while.entry:
-; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
 ; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
 ; CHECK-NEXT:    call void [[CALLBACK]]()
-; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
-; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
+; CHECK-NEXT:    [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
 ; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
 ; CHECK:       while.end:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
index 968442182229723..4f228af90c65a00 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -12,20 +12,19 @@
 @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
 @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
 
-
-
-
-
 ; VISIBILITY: FUNC   WEAK PROTECTED {{.*}} amdgcn.device.init
 ; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd
 ; VISIBILITY: FUNC   WEAK PROTECTED {{.*}} amdgcn.device.fini
 ; VISIBILITY: OBJECT   WEAK DEFAULT {{.*}} amdgcn.device.fini.kd
+
 ; SECTION: .init_array.1     INIT_ARRAY      {{.*}} {{.*}} 000008 00  WA  0   0  8
 ; SECTION: .fini_array.1     FINI_ARRAY      {{.*}} {{.*}} 000008 00  WA  0   0  8
+
 ; DISABLED-NOT: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.init
 ; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd
 ; DISABLED-NOT: FUNC   GLOBAL PROTECTED {{.*}} amdgcn.device.fini
 ; DISABLED-NOT: OBJECT   GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd
+
 ; METADATA:  amdhsa.kernels:
 ; METADATA:    .kind:           init
 ; METADATA:    .max_flat_workgroup_size: 1
@@ -53,13 +52,6 @@ define internal void @bar() {
 ; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
 ; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
 ;.
-; CHECK-LABEL: define internal void @foo() {
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: define internal void @bar() {
-; CHECK-NEXT:    ret void
-;
 ;
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -79,13 +71,13 @@ define internal void @bar() {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK-NEXT:    br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
 ; CHECK:       while.entry:
-; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
 ; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
 ; CHECK-NEXT:    call void [[CALLBACK]]()
-; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
-; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
+; CHECK-NEXT:    [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
 ; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
 ; CHECK:       while.end:
 ; CHECK-NEXT:    ret void
@@ -93,4 +85,3 @@ define internal void @bar() {
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
-;.
diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
index 83bb61d1a632351..75445b99719281c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
@@ -3,10 +3,10 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS
 
 
-; UTC_ARGS: --disable
 @llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }]
 @llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }]
 
+; UTC_ARGS: --disable
 ; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
 ; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
 ; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
@@ -36,22 +36,6 @@ define internal void @bar.5() {
   ret void
 }
 
-; CHECK-LABEL: define internal void @foo() {
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: define internal void @bar() {
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: define internal void @foo.5() {
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: define internal void @bar.5() {
-; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
@@ -70,14 +54,13 @@ define internal void @bar.5() {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
+; CHECK-NEXT:    br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
 ; CHECK:       while.entry:
-; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
 ; CHECK-NEXT:    [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
 ; CHECK-NEXT:    call void [[CALLBACK]]()
-; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
-; CHECK-NEXT:    [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
+; CHECK-NEXT:    [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
+; CHECK-NEXT:    [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
 ; CHECK-NEXT:    br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
 ; CHECK:       while.end:
 ; CHECK-NEXT:    ret void
-;

``````````

</details>


https://github.com/llvm/llvm-project/pull/71815


More information about the llvm-commits mailing list