[clang] 07b1766 - [OpenMP][FIX] Allow device constructors for AMD GPU

Johannes Doerfert via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 16 15:04:57 PDT 2022


Author: Johannes Doerfert
Date: 2022-03-16T17:04:28-05:00
New Revision: 07b176646134c3d88a4cecef5e0058e2de6b2409

URL: https://github.com/llvm/llvm-project/commit/07b176646134c3d88a4cecef5e0058e2de6b2409
DIFF: https://github.com/llvm/llvm-project/commit/07b176646134c3d88a4cecef5e0058e2de6b2409.diff

LOG: [OpenMP][FIX] Allow device constructors for AMD GPU

In AMD GPU device code the globals are in AS(1). Before, we crashed if
the global was a structure. Now we simply cast away the AS before we
generate the code to initialize the global.

Differential Revision: https://reviews.llvm.org/D121837

Added: 
    clang/test/OpenMP/amdgcn_target_global_constructor.cpp

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 3089cdc4e4a92..62a91af281b3d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -35,6 +35,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Format.h"
@@ -1940,8 +1941,12 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
                             FunctionArgList(), Loc, Loc);
       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
+      auto *AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
+          Addr, llvm::PointerType::getWithSamePointeeType(
+                    cast<llvm::PointerType>(Addr->getType()), 0));
       CtorCGF.EmitAnyExprToMem(
-          Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)),
+          Init,
+          Address::deprecated(AddrInAS0, CGM.getContext().getDeclAlign(VD)),
           Init->getType().getQualifiers(),
           /*IsInitializer=*/true);
       CtorCGF.FinishFunction();
@@ -1980,9 +1985,12 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
       // Create a scope with an artificial location for the body of this
       // function.
       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
+      auto *AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
+          Addr, llvm::PointerType::getWithSamePointeeType(
+                    cast<llvm::PointerType>(Addr->getType()), 0));
       DtorCGF.emitDestroy(
-          Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy,
-          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
+          Address::deprecated(AddrInAS0, CGM.getContext().getDeclAlign(VD)),
+          ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
       DtorCGF.FinishFunction();
       Dtor = Fn;

diff  --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
new file mode 100644
index 0000000000000..35236d53c4d55
--- /dev/null
+++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
@@ -0,0 +1,99 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void foo(void);
+
+struct S {
+  int a;
+  S() : a(1) {}
+  ~S() { foo(); }
+};
+
+#pragma omp declare target
+S A;
+#pragma omp end declare target
+
+#endif
+//.
+// CHECK: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0
+// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
+// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
+// CHECK: @llvm.used = appending addrspace(1) global [2 x i8*] [i8* bitcast (void ()* @__omp_offloading__35_bedc08f_A_l37_ctor to i8*), i8* bitcast (void ()* @__omp_offloading__35_bedc08f_A_l37_dtor to i8*)], section "llvm.metadata"
+//.
+// CHECK-LABEL: @__omp_offloading__35_bedc08f_A_l37_ctor(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    call void @_ZN1SC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: @_ZN1SC1Ev(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8, addrspace(5)
+// CHECK-NEXT:    [[THIS_ADDR_ASCAST:%.*]] = addrspacecast %struct.S* addrspace(5)* [[THIS_ADDR]] to %struct.S**
+// CHECK-NEXT:    store %struct.S* [[THIS:%.*]], %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call void @_ZN1SC2Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: @__omp_offloading__35_bedc08f_A_l37_dtor(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR4:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: @_ZN1SD1Ev(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8, addrspace(5)
+// CHECK-NEXT:    [[THIS_ADDR_ASCAST:%.*]] = addrspacecast %struct.S* addrspace(5)* [[THIS_ADDR]] to %struct.S**
+// CHECK-NEXT:    store %struct.S* [[THIS:%.*]], %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call void @_ZN1SD2Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]]
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: @_ZN1SC2Ev(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8, addrspace(5)
+// CHECK-NEXT:    [[THIS_ADDR_ASCAST:%.*]] = addrspacecast %struct.S* addrspace(5)* [[THIS_ADDR]] to %struct.S**
+// CHECK-NEXT:    store %struct.S* [[THIS:%.*]], %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK-NEXT:    store i32 1, i32* [[A]], align 4
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: @_ZN1SD2Ev(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8, addrspace(5)
+// CHECK-NEXT:    [[THIS_ADDR_ASCAST:%.*]] = addrspacecast %struct.S* addrspace(5)* [[THIS_ADDR]] to %struct.S**
+// CHECK-NEXT:    store %struct.S* [[THIS:%.*]], %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call void @_Z3foov() #[[ATTR3]]
+// CHECK-NEXT:    ret void
+//
+//.
+// CHECK: attributes #0 = { convergent noinline nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #1 = { convergent noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #2 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #3 = { convergent }
+// CHECK: attributes #4 = { convergent nounwind }
+//.
+// CHECK: !0 = !{i32 0, i32 53, i32 200130703, !"__omp_offloading__35_bedc08f_A_l37_dtor", i32 37, i32 2}
+// CHECK: !1 = !{i32 0, i32 53, i32 200130703, !"__omp_offloading__35_bedc08f_A_l37_ctor", i32 37, i32 1}
+// CHECK: !2 = !{i32 1, !"A", i32 0, i32 0}
+// CHECK: !3 = !{void ()* @__omp_offloading__35_bedc08f_A_l37_ctor, !"kernel", i32 1}
+// CHECK: !4 = !{void ()* @__omp_offloading__35_bedc08f_A_l37_dtor, !"kernel", i32 1}
+// CHECK: !5 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !6 = !{i32 7, !"openmp", i32 50}
+// CHECK: !7 = !{i32 7, !"openmp-device", i32 50}
+// CHECK: !8 = !{!"clang version 15.0.0"}
+//.


        


More information about the cfe-commits mailing list