[clang] 9d3550c - [OpenMP] Add AMDGPU calling convention to ctor / dtor functions
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 25 19:44:35 PDT 2022
Author: Joseph Huber
Date: 2022-03-25T22:44:20-04:00
New Revision: 9d3550c5173373305c10a5a471a935be205191a2
URL: https://github.com/llvm/llvm-project/commit/9d3550c5173373305c10a5a471a935be205191a2
DIFF: https://github.com/llvm/llvm-project/commit/9d3550c5173373305c10a5a471a935be205191a2.diff
LOG: [OpenMP] Add AMDGPU calling convention to ctor / dtor functions
This patch adds the necessary AMDGPU calling convention to the ctor /
dtor kernels. These are fundamentally device kenels called by the host
on image load. Without this calling convention information the AMDGPU
plugin is unable to identify them.
Depends on D122504
Fixes #54091
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D122515
Added:
Modified:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/test/OpenMP/amdgcn_target_global_constructor.cpp
openmp/libomptarget/test/offloading/global_constructor.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index eeefa608fb7ee..5cc1fdb56aa54 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1928,6 +1928,8 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
llvm::GlobalValue::WeakODRLinkage);
+ if (CGM.getTriple().isAMDGCN())
+ Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
FunctionArgList(), Loc, Loc);
@@ -1972,6 +1974,8 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
llvm::GlobalValue::WeakODRLinkage);
+ if (CGM.getTriple().isAMDGCN())
+ Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
FunctionArgList(), Loc, Loc);
diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
index 35bebaafdf544..9ada77fe8a805 100644
--- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
+++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs --replace-value-regex "__omp_offloading__[0-9a-z]_[0-9a-z]_"
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
@@ -27,7 +27,7 @@ S A;
// CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
//.
-// CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_613a0d56_A_l19_ctor
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading__[0-9a-z]+_[0-9a-z]+}}_A_l19_ctor
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @_ZN1SC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR3:[0-9]+]]
@@ -45,7 +45,7 @@ S A;
// CHECK-NEXT: ret void
//
//
-// CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_613a0d56_A_l19_dtor
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading__[0-9a-z]+_[0-9a-z]+}}_A_l19_dtor
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR4:[0-9]+]]
@@ -92,11 +92,11 @@ S A;
// CHECK: attributes #3 = { convergent }
// CHECK: attributes #4 = { convergent nounwind }
//.
-// CHECK: !0 = !{i32 0, i32 64770, i32 1631194454, !"__omp_offloading__fd02_613a0d56_A_l19_ctor", i32 19, i32 1}
-// CHECK: !1 = !{i32 0, i32 64770, i32 1631194454, !"__omp_offloading__fd02_613a0d56_A_l19_dtor", i32 19, i32 2}
+// CHECK: !0 = !{i32 0, i32 64770, i32 1680388141, !"{{__omp_offloading__[0-9a-z]+_[0-9a-z]+}}_A_l19_ctor", i32 19, i32 1}
+// CHECK: !1 = !{i32 0, i32 64770, i32 1680388141, !"{{__omp_offloading__[0-9a-z]+_[0-9a-z]+}}_A_l19_dtor", i32 19, i32 2}
// CHECK: !2 = !{i32 1, !"A", i32 0, i32 0}
-// CHECK: !3 = !{void ()* @__omp_offloading__fd02_613a0d56_A_l19_ctor, !"kernel", i32 1}
-// CHECK: !4 = !{void ()* @__omp_offloading__fd02_613a0d56_A_l19_dtor, !"kernel", i32 1}
+// CHECK: !3 = !{void ()* @{{.*}}_A_l19_ctor, !"kernel", i32 1}
+// CHECK: !4 = !{void ()* @{{.*}}_A_l19_dtor, !"kernel", i32 1}
// CHECK: !5 = !{i32 1, !"wchar_size", i32 4}
// CHECK: !6 = !{i32 7, !"openmp", i32 50}
// CHECK: !7 = !{i32 7, !"openmp-device", i32 50}
diff --git a/openmp/libomptarget/test/offloading/global_constructor.cpp b/openmp/libomptarget/test/offloading/global_constructor.cpp
index 18dc4c978665d..eb68c5f78358b 100644
--- a/openmp/libomptarget/test/offloading/global_constructor.cpp
+++ b/openmp/libomptarget/test/offloading/global_constructor.cpp
@@ -1,23 +1,25 @@
// RUN: %libomptarget-compilexx-generic && %libomptarget-run-generic | %fcheck-generic
-// Fails in DAGToDAG on an address space problem
-// UNSUPPORTED: amdgcn-amd-amdhsa
-// UNSUPPORTED: amdgcn-amd-amdhsa-newDriver
-
-#include <cmath>
#include <cstdio>
-const double Host = log(2.0) / log(2.0);
-#pragma omp declare target
-const double Device = log(2.0) / log(2.0);
-#pragma omp end declare target
+int foo() { return 1; }
+
+class C {
+public:
+ C() : x(foo()) {}
+
+ int x;
+};
+
+C c;
+#pragma omp declare target(c)
int main() {
- double X;
-#pragma omp target map(from : X)
- { X = Device; }
+ int x = 0;
+#pragma omp target map(from : x)
+ { x = c.x; }
// CHECK: PASS
- if (X == Host)
+ if (x == 1)
printf("PASS\n");
}
More information about the cfe-commits
mailing list