[Openmp-commits] [openmp] 9d3550c - [OpenMP] Add AMDGPU calling convention to ctor / dtor functions

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Fri Mar 25 19:44:37 PDT 2022


Author: Joseph Huber
Date: 2022-03-25T22:44:20-04:00
New Revision: 9d3550c5173373305c10a5a471a935be205191a2

URL: https://github.com/llvm/llvm-project/commit/9d3550c5173373305c10a5a471a935be205191a2
DIFF: https://github.com/llvm/llvm-project/commit/9d3550c5173373305c10a5a471a935be205191a2.diff

LOG: [OpenMP] Add AMDGPU calling convention to ctor / dtor functions

This patch adds the necessary AMDGPU calling convention to the ctor /
dtor kernels. These are fundamentally device kenels called by the host
on image load. Without this calling convention information the AMDGPU
plugin is unable to identify them.

Depends on D122504

Fixes #54091

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D122515

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/amdgcn_target_global_constructor.cpp
    openmp/libomptarget/test/offloading/global_constructor.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index eeefa608fb7ee..5cc1fdb56aa54 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1928,6 +1928,8 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
           FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
           llvm::GlobalValue::WeakODRLinkage);
+      if (CGM.getTriple().isAMDGCN())
+        Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
                             FunctionArgList(), Loc, Loc);
@@ -1972,6 +1974,8 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
           FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
           llvm::GlobalValue::WeakODRLinkage);
+      if (CGM.getTriple().isAMDGCN())
+        Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
                             FunctionArgList(), Loc, Loc);

diff  --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
index 35bebaafdf544..9ada77fe8a805 100644
--- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
+++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs --replace-value-regex "__omp_offloading__[0-9a-z]_[0-9a-z]_"
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
@@ -27,7 +27,7 @@ S A;
 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
 // CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
 //.
-// CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_613a0d56_A_l19_ctor
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading__[0-9a-z]+_[0-9a-z]+}}_A_l19_ctor
 // CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    call void @_ZN1SC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR3:[0-9]+]]
@@ -45,7 +45,7 @@ S A;
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_613a0d56_A_l19_dtor
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading__[0-9a-z]+_[0-9a-z]+}}_A_l19_dtor
 // CHECK-SAME: () #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR4:[0-9]+]]
@@ -92,11 +92,11 @@ S A;
 // CHECK: attributes #3 = { convergent }
 // CHECK: attributes #4 = { convergent nounwind }
 //.
-// CHECK: !0 = !{i32 0, i32 64770, i32 1631194454, !"__omp_offloading__fd02_613a0d56_A_l19_ctor", i32 19, i32 1}
-// CHECK: !1 = !{i32 0, i32 64770, i32 1631194454, !"__omp_offloading__fd02_613a0d56_A_l19_dtor", i32 19, i32 2}
+// CHECK: !0 = !{i32 0, i32 64770, i32 1680388141, !"{{__omp_offloading__[0-9a-z]+_[0-9a-z]+}}_A_l19_ctor", i32 19, i32 1}
+// CHECK: !1 = !{i32 0, i32 64770, i32 1680388141, !"{{__omp_offloading__[0-9a-z]+_[0-9a-z]+}}_A_l19_dtor", i32 19, i32 2}
 // CHECK: !2 = !{i32 1, !"A", i32 0, i32 0}
-// CHECK: !3 = !{void ()* @__omp_offloading__fd02_613a0d56_A_l19_ctor, !"kernel", i32 1}
-// CHECK: !4 = !{void ()* @__omp_offloading__fd02_613a0d56_A_l19_dtor, !"kernel", i32 1}
+// CHECK: !3 = !{void ()* @{{.*}}_A_l19_ctor, !"kernel", i32 1}
+// CHECK: !4 = !{void ()* @{{.*}}_A_l19_dtor, !"kernel", i32 1}
 // CHECK: !5 = !{i32 1, !"wchar_size", i32 4}
 // CHECK: !6 = !{i32 7, !"openmp", i32 50}
 // CHECK: !7 = !{i32 7, !"openmp-device", i32 50}

diff  --git a/openmp/libomptarget/test/offloading/global_constructor.cpp b/openmp/libomptarget/test/offloading/global_constructor.cpp
index 18dc4c978665d..eb68c5f78358b 100644
--- a/openmp/libomptarget/test/offloading/global_constructor.cpp
+++ b/openmp/libomptarget/test/offloading/global_constructor.cpp
@@ -1,23 +1,25 @@
 // RUN: %libomptarget-compilexx-generic && %libomptarget-run-generic | %fcheck-generic
 
-// Fails in DAGToDAG on an address space problem
-// UNSUPPORTED: amdgcn-amd-amdhsa
-// UNSUPPORTED: amdgcn-amd-amdhsa-newDriver
-
-#include <cmath>
 #include <cstdio>
 
-const double Host = log(2.0) / log(2.0);
-#pragma omp declare target
-const double Device = log(2.0) / log(2.0);
-#pragma omp end declare target
+int foo() { return 1; }
+
+class C {
+public:
+  C() : x(foo()) {}
+
+  int x;
+};
+
+C c;
+#pragma omp declare target(c)
 
 int main() {
-  double X;
-#pragma omp target map(from : X)
-  { X = Device; }
+  int x = 0;
+#pragma omp target map(from : x)
+  { x = c.x; }
 
   // CHECK: PASS
-  if (X == Host)
+  if (x == 1)
     printf("PASS\n");
 }


        


More information about the Openmp-commits mailing list