r363959 - [OpenMP] Add support for handling declare target to clause when unified memory is required
Gheorghe-Teodor Bercea via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 20 11:04:48 PDT 2019
Author: gbercea
Date: Thu Jun 20 11:04:47 2019
New Revision: 363959
URL: http://llvm.org/viewvc/llvm-project?rev=363959&view=rev
Log:
[OpenMP] Add support for handling declare target to clause when unified memory is required
Summary:
This patch adds support for the handling of the variables under the declare target to clause.
The variables in this case are handled like link variables are. A pointer is created on the host and then mapped to the device. The runtime will then copy the address of the host variable in the device pointer.
Reviewers: ABataev, AlexEichenberger, caomhin
Reviewed By: ABataev
Subscribers: guansong, jdoerfert, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D63108
Modified:
cfe/trunk/lib/CodeGen/CGDeclCXX.cpp
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/test/OpenMP/declare_target_codegen.cpp
cfe/trunk/test/OpenMP/declare_target_link_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
Modified: cfe/trunk/lib/CodeGen/CGDeclCXX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDeclCXX.cpp?rev=363959&r1=363958&r2=363959&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGDeclCXX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDeclCXX.cpp Thu Jun 20 11:04:47 2019
@@ -74,7 +74,7 @@ static void EmitDeclDestroy(CodeGenFunct
// bails even if the attribute is not present.
if (D.isNoDestroy(CGF.getContext()))
return;
-
+
CodeGenModule &CGM = CGF.CGM;
// FIXME: __attribute__((cleanup)) ?
Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=363959&r1=363958&r2=363959&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Thu Jun 20 11:04:47 2019
@@ -2295,15 +2295,22 @@ static LValue EmitThreadPrivateVarDeclLV
return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
}
-static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF,
- const VarDecl *VD, QualType T) {
+static Address emitDeclTargetVarDeclLValue(CodeGenFunction &CGF,
+ const VarDecl *VD, QualType T) {
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
- if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_To)
+ // Return an invalid address if variable is MT_To and unified
+ // memory is not enabled. For all other cases: MT_Link and
+ // MT_To with unified memory, return a valid address.
+ if (!Res || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()))
return Address::invalid();
- assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && "Expected link clause");
+ assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
+ (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) &&
+ "Expected link clause OR to clause with unified memory enabled.");
QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
- Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>());
}
@@ -2359,7 +2366,7 @@ static LValue EmitGlobalVarDeclLValue(Co
// Check if the variable is marked as declare target with link clause in
// device codegen.
if (CGF.getLangOpts().OpenMPIsDevice) {
- Address Addr = emitDeclTargetLinkVarDeclLValue(CGF, VD, T);
+ Address Addr = emitDeclTargetVarDeclLValue(CGF, VD, T);
if (Addr.isValid())
return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
}
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=363959&r1=363958&r2=363959&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Thu Jun 20 11:04:47 2019
@@ -2552,16 +2552,18 @@ CGOpenMPRuntime::createDispatchNextFunct
return CGM.CreateRuntimeFunction(FnTy, Name);
}
-Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
+Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
if (CGM.getLangOpts().OpenMPSimd)
return Address::invalid();
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
- if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
+ if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
+ (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ HasRequiresUnifiedSharedMemory))) {
SmallString<64> PtrName;
{
llvm::raw_svector_ostream OS(PtrName);
- OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
+ OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
}
llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
if (!Ptr) {
@@ -2778,7 +2780,9 @@ bool CGOpenMPRuntime::emitDeclareTargetV
bool PerformInit) {
Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
- if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
+ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
+ (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ HasRequiresUnifiedSharedMemory))
return CGM.getLangOpts().OpenMPIsDevice;
VD = VD->getDefinition(CGM.getContext());
if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
@@ -4194,6 +4198,9 @@ void CGOpenMPRuntime::createOffloadEntri
CE->getFlags());
switch (Flags) {
case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
+ if (CGM.getLangOpts().OpenMPIsDevice &&
+ CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
+ continue;
if (!CE->getAddress()) {
unsigned DiagID = CGM.getDiags().getCustomDiagID(
DiagnosticsEngine::Error,
@@ -7452,7 +7459,10 @@ private:
// Track if the map information being generated is the first for a capture.
bool IsCaptureFirstInfo = IsFirstComponentList;
- bool IsLink = false; // Is this variable a "declare target link"?
+ // When the variable is on a declare target link or in a to clause with
+ // unified memory, a reference is needed to hold the host/device address
+ // of the variable.
+ bool RequiresReference = false;
// Scan the components from the base to the complete expression.
auto CI = Components.rbegin();
@@ -7482,11 +7492,14 @@ private:
if (const auto *VD =
dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
- if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
- IsLink = true;
- BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
+ if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
+ (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
+ RequiresReference = true;
+ BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
}
+ }
}
// If the variable is a pointer and is being dereferenced (i.e. is not
@@ -7652,7 +7665,8 @@ private:
// (there is a set of entries for each capture).
OpenMPOffloadMappingFlags Flags = getMapTypeBits(
MapType, MapModifiers, IsImplicit,
- !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
+ !IsExpressionFirstInfo || RequiresReference,
+ IsCaptureFirstInfo && !RequiresReference);
if (!IsExpressionFirstInfo) {
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
@@ -9124,7 +9138,9 @@ bool CGOpenMPRuntime::emitTargetGlobalVa
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
cast<VarDecl>(GD.getDecl()));
- if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
+ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
+ (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ HasRequiresUnifiedSharedMemory)) {
DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
return true;
}
@@ -9183,8 +9199,9 @@ void CGOpenMPRuntime::registerTargetGlob
StringRef VarName;
CharUnits VarSize;
llvm::GlobalValue::LinkageTypes Linkage;
- switch (*Res) {
- case OMPDeclareTargetDeclAttr::MT_To:
+
+ if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ !HasRequiresUnifiedSharedMemory) {
Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
VarName = CGM.getMangledName(VD);
if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
@@ -9207,20 +9224,27 @@ void CGOpenMPRuntime::registerTargetGlob
CGM.addCompilerUsedGlobal(GVAddrRef);
}
}
- break;
- case OMPDeclareTargetDeclAttr::MT_Link:
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
+ } else {
+ assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
+ (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ HasRequiresUnifiedSharedMemory)) &&
+ "Declare target attribute must link or to with unified memory.");
+ if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
+ else
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
+
if (CGM.getLangOpts().OpenMPIsDevice) {
VarName = Addr->getName();
Addr = nullptr;
} else {
- VarName = getAddrOfDeclareTargetLink(VD).getName();
- Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
+ VarName = getAddrOfDeclareTargetVar(VD).getName();
+ Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
}
VarSize = CGM.getPointerSize();
Linkage = llvm::GlobalValue::WeakAnyLinkage;
- break;
}
+
OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
VarName, Addr, VarSize, Flags, Linkage);
}
@@ -9239,12 +9263,15 @@ void CGOpenMPRuntime::emitDeferredTarget
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res)
continue;
- if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
+ if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ !HasRequiresUnifiedSharedMemory) {
CGM.EmitGlobal(VD);
} else {
- assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
- "Expected to or link clauses.");
- (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
+ (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ HasRequiresUnifiedSharedMemory)) &&
+ "Expected link clause or to clause with unified memory.");
+ (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
}
}
}
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=363959&r1=363958&r2=363959&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Thu Jun 20 11:04:47 2019
@@ -1121,8 +1121,8 @@ public:
SourceLocation Loc);
/// Returns the address of the variable marked as declare target with link
- /// clause.
- virtual Address getAddrOfDeclareTargetLink(const VarDecl *VD);
+ /// clause OR as declare target with to clause and unified memory.
+ virtual Address getAddrOfDeclareTargetVar(const VarDecl *VD);
/// Emit a code for initialization of threadprivate variable. It emits
/// a call to runtime library which adds initial value to the newly created
Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=363959&r1=363958&r2=363959&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Thu Jun 20 11:04:47 2019
@@ -2475,13 +2475,19 @@ void CodeGenModule::EmitGlobal(GlobalDec
// Emit declaration of the must-be-emitted declare target variable.
if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
- if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
+ bool UnifiedMemoryEnabled =
+ getOpenMPRuntime().hasRequiresUnifiedSharedMemory();
+ if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ !UnifiedMemoryEnabled) {
(void)GetAddrOfGlobalVar(VD);
} else {
- assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
- "link claue expected.");
- (void)getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
+ (*Res == OMPDeclareTargetDeclAttr::MT_To &&
+ UnifiedMemoryEnabled)) &&
+ "Link clause or to clause with unified memory expected.");
+ (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
}
+
return;
}
}
Modified: cfe/trunk/test/OpenMP/declare_target_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/declare_target_codegen.cpp?rev=363959&r1=363958&r2=363959&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/declare_target_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/declare_target_codegen.cpp Thu Jun 20 11:04:47 2019
@@ -20,10 +20,10 @@
// CHECK-DAG: weak constant %struct.__tgt_offload_entry { i8* bitcast (i32* @bbb to i8*),
// CHECK-DAG: @ccc = external global i32,
// CHECK-DAG: @ddd ={{ dso_local | }}global i32 0,
-// CHECK-DAG: @hhh_decl_tgt_link_ptr = common global i32* null
-// CHECK-DAG: @ggg_decl_tgt_link_ptr = common global i32* null
-// CHECK-DAG: @fff_decl_tgt_link_ptr = common global i32* null
-// CHECK-DAG: @eee_decl_tgt_link_ptr = common global i32* null
+// CHECK-DAG: @hhh_decl_tgt_ref_ptr = common global i32* null
+// CHECK-DAG: @ggg_decl_tgt_ref_ptr = common global i32* null
+// CHECK-DAG: @fff_decl_tgt_ref_ptr = common global i32* null
+// CHECK-DAG: @eee_decl_tgt_ref_ptr = common global i32* null
// CHECK-DAG: @{{.*}}maini1{{.*}}aaa = internal global i64 23,
// CHECK-DAG: @b ={{ dso_local | }}global i32 15,
// CHECK-DAG: @d ={{ dso_local | }}global i32 0,
Modified: cfe/trunk/test/OpenMP/declare_target_link_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/declare_target_link_codegen.cpp?rev=363959&r1=363958&r2=363959&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/declare_target_link_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/declare_target_link_codegen.cpp Thu Jun 20 11:04:47 2019
@@ -18,15 +18,15 @@
#define HEADER
// HOST-DAG: @c = external global i32,
-// HOST-DAG: @c_decl_tgt_link_ptr = global i32* @c
+// HOST-DAG: @c_decl_tgt_ref_ptr = global i32* @c
// DEVICE-NOT: @c =
-// DEVICE: @c_decl_tgt_link_ptr = common global i32* null
+// DEVICE: @c_decl_tgt_ref_ptr = common global i32* null
// HOST: [[SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 4]
// HOST: [[MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 35, i64 531]
-// HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_link_ptr\00"
-// HOST: @.omp_offloading.entry.c_decl_tgt_link_ptr = weak constant %struct.__tgt_offload_entry { i8* bitcast (i32** @c_decl_tgt_link_ptr to i8*), i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 8, i32 1, i32 0 }, section ".omp_offloading.entries", align 1
-// DEVICE-NOT: internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_link_ptr\00"
-// CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (i32** @c_decl_tgt_link_ptr to i8*)]
+// HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_ref_ptr\00"
+// HOST: @.omp_offloading.entry.c_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { i8* bitcast (i32** @c_decl_tgt_ref_ptr to i8*), i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 8, i32 1, i32 0 }, section ".omp_offloading.entries", align 1
+// DEVICE-NOT: internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_ref_ptr\00"
+// CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (i32** @c_decl_tgt_ref_ptr to i8*)]
extern int c;
#pragma omp declare target link(c)
@@ -44,7 +44,7 @@ int maini1() {
}
// DEVICE: define weak void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l[[@LINE-10]](i32* dereferenceable{{[^,]*}}
-// DEVICE: [[C_REF:%.+]] = load i32*, i32** @c_decl_tgt_link_ptr,
+// DEVICE: [[C_REF:%.+]] = load i32*, i32** @c_decl_tgt_ref_ptr,
// DEVICE: [[C:%.+]] = load i32, i32* [[C_REF]],
// DEVICE: store i32 [[C]], i32* %
@@ -55,7 +55,7 @@ int maini1() {
// HOST: getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// HOST: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASEPTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
// HOST: [[BP1_CAST:%.+]] = bitcast i8** [[BP1]] to i32***
-// HOST: store i32** @c_decl_tgt_link_ptr, i32*** [[BP1_CAST]],
+// HOST: store i32** @c_decl_tgt_ref_ptr, i32*** [[BP1_CAST]],
// HOST: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
// HOST: [[P1_CAST:%.+]] = bitcast i8** [[P1]] to i32**
// HOST: store i32* @c, i32** [[P1_CAST]],
@@ -69,5 +69,5 @@ int maini1() {
// HOST: [[C:%.*]] = load i32, i32* @c,
// HOST: store i32 [[C]], i32* %
-// CHECK: !{i32 1, !"c_decl_tgt_link_ptr", i32 1, i32 {{[0-9]+}}}
+// CHECK: !{i32 1, !"c_decl_tgt_ref_ptr", i32 1, i32 {{[0-9]+}}}
#endif // HEADER
Modified: cfe/trunk/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp?rev=363959&r1=363958&r2=363959&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp Thu Jun 20 11:04:47 2019
@@ -1,5 +1,10 @@
// Test declare target link under unified memory requirement.
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-HOST
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK-DEVICE
+
// expected-no-diagnostics
#ifndef HEADER
@@ -8,53 +13,74 @@
#define N 1000
double var = 10.0;
+double to_var = 20.0;
#pragma omp requires unified_shared_memory
#pragma omp declare target link(var)
+#pragma omp declare target to(to_var)
int bar(int n){
double sum = 0;
#pragma omp target
for(int i = 0; i < n; i++) {
- sum += var;
+ sum += var + to_var;
}
return sum;
}
-// CHECK: [[VAR:@.+]] = global double 1.000000e+01
-// CHECK: [[VAR_DECL_TGT_LINK_PTR:@.+]] = global double* [[VAR]]
+// CHECK-HOST: [[VAR:@.+]] = global double 1.000000e+01
+// CHECK-HOST: [[VAR_DECL_TGT_LINK_PTR:@.+]] = global double* [[VAR]]
+
+// CHECK-HOST: [[TO_VAR:@.+]] = global double 2.000000e+01
+// CHECK-HOST: [[VAR_DECL_TGT_TO_PTR:@.+]] = global double* [[TO_VAR]]
+
+// CHECK-HOST: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 8]
+// CHECK-HOST: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800]
+
+// CHECK-HOST: [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR_NAME:@.+]] = internal unnamed_addr constant [21 x i8]
+// CHECK-HOST: [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR:@.+]] = weak constant %struct.__tgt_offload_entry { i8* bitcast (double** [[VAR_DECL_TGT_LINK_PTR]] to i8*), i8* getelementptr inbounds ([21 x i8], [21 x i8]* [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR_NAME]], i32 0, i32 0), i64 8, i32 1, i32 0 }, section ".omp_offloading.entries"
+
+// CHECK-HOST: [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR_NAME:@.+]] = internal unnamed_addr constant [24 x i8]
+// CHECK-HOST: [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR:@.+]] = weak constant %struct.__tgt_offload_entry { i8* bitcast (double** [[VAR_DECL_TGT_TO_PTR]] to i8*), i8* getelementptr inbounds ([24 x i8], [24 x i8]* [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR_NAME]], i32 0, i32 0), i64 8, i32 0, i32 0 }, section ".omp_offloading.entries"
+
+// CHECK-HOST: @llvm.used = appending global [2 x i8*] [i8* bitcast (double** [[VAR_DECL_TGT_LINK_PTR]] to i8*), i8* bitcast (double** [[VAR_DECL_TGT_TO_PTR]] to i8*)], section "llvm.metadata"
+
+// CHECK-HOST: [[N_CASTED:%.+]] = alloca i64
+// CHECK-HOST: [[SUM_CASTED:%.+]] = alloca i64
+
+// CHECK-HOST: [[OFFLOAD_BASEPTRS:%.+]] = alloca [2 x i8*]
+// CHECK-HOST: [[OFFLOAD_PTRS:%.+]] = alloca [2 x i8*]
+
+// CHECK-HOST: [[LOAD1:%.+]] = load i64, i64* [[N_CASTED]]
+// CHECK-HOST: [[LOAD2:%.+]] = load i64, i64* [[SUM_CASTED]]
+
+// CHECK-HOST: [[BPTR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-HOST: [[BCAST1:%.+]] = bitcast i8** [[BPTR1]] to i64*
+// CHECK-HOST: store i64 [[LOAD1]], i64* [[BCAST1]]
+// CHECK-HOST: [[BPTR2:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-HOST: [[BCAST2:%.+]] = bitcast i8** [[BPTR2]] to i64*
+// CHECK-HOST: store i64 [[LOAD1]], i64* [[BCAST2]]
+
+// CHECK-HOST: [[BPTR3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-HOST: [[BCAST3:%.+]] = bitcast i8** [[BPTR3]] to i64*
+// CHECK-HOST: store i64 [[LOAD2]], i64* [[BCAST3]]
+// CHECK-HOST: [[BPTR4:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-HOST: [[BCAST4:%.+]] = bitcast i8** [[BPTR4]] to i64*
+// CHECK-HOST: store i64 [[LOAD2]], i64* [[BCAST4]]
-// CHECK: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 8]
-// CHECK: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800]
+// CHECK-HOST: [[BPTR7:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-HOST: [[BPTR8:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
-// CHECK: [[N_CASTED:%.+]] = alloca i64
-// CHECK: [[SUM_CASTED:%.+]] = alloca i64
+// CHECK-HOST: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0))
-// CHECK: [[OFFLOAD_BASEPTRS:%.+]] = alloca [2 x i8*]
-// CHECK: [[OFFLOAD_PTRS:%.+]] = alloca [2 x i8*]
-
-// CHECK: [[LOAD1:%.+]] = load i64, i64* [[N_CASTED]]
-// CHECK: [[LOAD2:%.+]] = load i64, i64* [[SUM_CASTED]]
-
-// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK: [[BCAST1:%.+]] = bitcast i8** [[BPTR1]] to i64*
-// CHECK: store i64 [[LOAD1]], i64* [[BCAST1]]
-// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
-// CHECK: [[BCAST2:%.+]] = bitcast i8** [[BPTR2]] to i64*
-// CHECK: store i64 [[LOAD1]], i64* [[BCAST2]]
-
-// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK: [[BCAST3:%.+]] = bitcast i8** [[BPTR3]] to i64*
-// CHECK: store i64 [[LOAD2]], i64* [[BCAST3]]
-// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1
-// CHECK: [[BCAST4:%.+]] = bitcast i8** [[BPTR4]] to i64*
-// CHECK: store i64 [[LOAD2]], i64* [[BCAST4]]
+// CHECK-DEVICE: [[VAR_LINK:@.+]] = common global double* null
+// CHECK-DEVICE: [[VAR_TO:@.+]] = common global double* null
-// CHECK: [[BPTR7:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK: [[BPTR8:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-DEVICE: @llvm.used = appending global [2 x i8*] [i8* bitcast (double** [[VAR_LINK]] to i8*), i8* bitcast (double** [[VAR_TO]] to i8*)], section "llvm.metadata"
-// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0))
+// CHECK-DEVICE: [[VAR_TO_PTR:%.+]] = load double*, double** [[VAR_TO]]
+// CHECK-DEVICE: load double, double* [[VAR_TO_PTR]]
#endif
More information about the cfe-commits
mailing list