[clang] 07b1766 - [OpenMP][FIX] Allow device constructors for AMD GPU
Johannes Doerfert via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 16 15:04:57 PDT 2022
Author: Johannes Doerfert
Date: 2022-03-16T17:04:28-05:00
New Revision: 07b176646134c3d88a4cecef5e0058e2de6b2409
URL: https://github.com/llvm/llvm-project/commit/07b176646134c3d88a4cecef5e0058e2de6b2409
DIFF: https://github.com/llvm/llvm-project/commit/07b176646134c3d88a4cecef5e0058e2de6b2409.diff
LOG: [OpenMP][FIX] Allow device constructors for AMD GPU
In AMD GPU device code the globals are in AS(1). Before, we crashed if
the global was a structure. Now we simply cast away the AS before we
generate the code to initialize the global.
Differential Revision: https://reviews.llvm.org/D121837
Added:
clang/test/OpenMP/amdgcn_target_global_constructor.cpp
Modified:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 3089cdc4e4a92..62a91af281b3d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Format.h"
@@ -1940,8 +1941,12 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
FunctionArgList(), Loc, Loc);
auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
+ auto *AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
+ Addr, llvm::PointerType::getWithSamePointeeType(
+ cast<llvm::PointerType>(Addr->getType()), 0));
CtorCGF.EmitAnyExprToMem(
- Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)),
+ Init,
+ Address::deprecated(AddrInAS0, CGM.getContext().getDeclAlign(VD)),
Init->getType().getQualifiers(),
/*IsInitializer=*/true);
CtorCGF.FinishFunction();
@@ -1980,9 +1985,12 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
// Create a scope with an artificial location for the body of this
// function.
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
+ auto *AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
+ Addr, llvm::PointerType::getWithSamePointeeType(
+ cast<llvm::PointerType>(Addr->getType()), 0));
DtorCGF.emitDestroy(
- Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy,
- DtorCGF.getDestroyer(ASTTy.isDestructedType()),
+ Address::deprecated(AddrInAS0, CGM.getContext().getDeclAlign(VD)),
+ ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
DtorCGF.FinishFunction();
Dtor = Fn;
diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
new file mode 100644
index 0000000000000..35236d53c4d55
--- /dev/null
+++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
@@ -0,0 +1,99 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void foo(void);
+
+struct S {
+ int a;
+ S() : a(1) {}
+ ~S() { foo(); }
+};
+
+#pragma omp declare target
+S A;
+#pragma omp end declare target
+
+#endif
+//.
+// CHECK: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0
+// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
+// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
+// CHECK: @llvm.used = appending addrspace(1) global [2 x i8*] [i8* bitcast (void ()* @__omp_offloading__35_bedc08f_A_l37_ctor to i8*), i8* bitcast (void ()* @__omp_offloading__35_bedc08f_A_l37_dtor to i8*)], section "llvm.metadata"
+//.
+// CHECK-LABEL: @__omp_offloading__35_bedc08f_A_l37_ctor(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @_ZN1SC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: @_ZN1SC1Ev(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8, addrspace(5)
+// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast %struct.S* addrspace(5)* [[THIS_ADDR]] to %struct.S**
+// CHECK-NEXT: store %struct.S* [[THIS:%.*]], %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @_ZN1SC2Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: @__omp_offloading__35_bedc08f_A_l37_dtor(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR4:[0-9]+]]
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: @_ZN1SD1Ev(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8, addrspace(5)
+// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast %struct.S* addrspace(5)* [[THIS_ADDR]] to %struct.S**
+// CHECK-NEXT: store %struct.S* [[THIS:%.*]], %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @_ZN1SD2Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]]
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: @_ZN1SC2Ev(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8, addrspace(5)
+// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast %struct.S* addrspace(5)* [[THIS_ADDR]] to %struct.S**
+// CHECK-NEXT: store %struct.S* [[THIS:%.*]], %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK-NEXT: store i32 1, i32* [[A]], align 4
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: @_ZN1SD2Ev(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8, addrspace(5)
+// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast %struct.S* addrspace(5)* [[THIS_ADDR]] to %struct.S**
+// CHECK-NEXT: store %struct.S* [[THIS:%.*]], %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @_Z3foov() #[[ATTR3]]
+// CHECK-NEXT: ret void
+//
+//.
+// CHECK: attributes #0 = { convergent noinline nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #1 = { convergent noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #2 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #3 = { convergent }
+// CHECK: attributes #4 = { convergent nounwind }
+//.
+// CHECK: !0 = !{i32 0, i32 53, i32 200130703, !"__omp_offloading__35_bedc08f_A_l37_dtor", i32 37, i32 2}
+// CHECK: !1 = !{i32 0, i32 53, i32 200130703, !"__omp_offloading__35_bedc08f_A_l37_ctor", i32 37, i32 1}
+// CHECK: !2 = !{i32 1, !"A", i32 0, i32 0}
+// CHECK: !3 = !{void ()* @__omp_offloading__35_bedc08f_A_l37_ctor, !"kernel", i32 1}
+// CHECK: !4 = !{void ()* @__omp_offloading__35_bedc08f_A_l37_dtor, !"kernel", i32 1}
+// CHECK: !5 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !6 = !{i32 7, !"openmp", i32 50}
+// CHECK: !7 = !{i32 7, !"openmp-device", i32 50}
+// CHECK: !8 = !{!"clang version 15.0.0"}
+//.
More information about the cfe-commits
mailing list