r276977 - [OpenMP] Codegen for use_device_ptr clause.
Samuel Antao via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 28 07:23:26 PDT 2016
Author: sfantao
Date: Thu Jul 28 09:23:26 2016
New Revision: 276977
URL: http://llvm.org/viewvc/llvm-project?rev=276977&view=rev
Log:
[OpenMP] Codegen for use_device_ptr clause.
Summary: This patch adds support for the use_device_ptr clause. It includes changes in SEMA that could not be tested without codegen, namely, the use of the first private logic and mappable expressions support.
Reviewers: hfinkel, carlo.bertolli, arpith-jacob, kkwli0, ABataev
Subscribers: caomhin, cfe-commits
Differential Revision: https://reviews.llvm.org/D22691
Added:
cfe/trunk/test/OpenMP/target_data_use_device_ptr_codegen.cpp
Modified:
cfe/trunk/include/clang/AST/OpenMPClause.h
cfe/trunk/lib/AST/OpenMPClause.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/lib/Sema/SemaOpenMP.cpp
cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
cfe/trunk/lib/Serialization/ASTWriterStmt.cpp
Modified: cfe/trunk/include/clang/AST/OpenMPClause.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/OpenMPClause.h?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/OpenMPClause.h (original)
+++ cfe/trunk/include/clang/AST/OpenMPClause.h Thu Jul 28 09:23:26 2016
@@ -4228,50 +4228,153 @@ public:
/// 'use_device_ptr' with the variables 'a' and 'b'.
///
class OMPUseDevicePtrClause final
- : public OMPVarListClause<OMPUseDevicePtrClause>,
- private llvm::TrailingObjects<OMPUseDevicePtrClause, Expr *> {
+ : public OMPMappableExprListClause<OMPUseDevicePtrClause>,
+ private llvm::TrailingObjects<
+ OMPUseDevicePtrClause, Expr *, ValueDecl *, unsigned,
+ OMPClauseMappableExprCommon::MappableComponent> {
friend TrailingObjects;
friend OMPVarListClause;
+ friend OMPMappableExprListClause;
friend class OMPClauseReader;
- /// Build clause with number of variables \a N.
+
+ /// Define the sizes of each trailing object array except the last one. This
+ /// is required for TrailingObjects to work properly.
+ size_t numTrailingObjects(OverloadToken<Expr *>) const {
+ return 3 * varlist_size();
+ }
+ size_t numTrailingObjects(OverloadToken<ValueDecl *>) const {
+ return getUniqueDeclarationsNum();
+ }
+ size_t numTrailingObjects(OverloadToken<unsigned>) const {
+ return getUniqueDeclarationsNum() + getTotalComponentListNum();
+ }
+
+ /// Build clause with number of variables \a NumVars.
///
/// \param StartLoc Starting location of the clause.
- /// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause.
- /// \param N Number of the variables in the clause.
- ///
- OMPUseDevicePtrClause(SourceLocation StartLoc, SourceLocation LParenLoc,
- SourceLocation EndLoc, unsigned N)
- : OMPVarListClause<OMPUseDevicePtrClause>(OMPC_use_device_ptr, StartLoc,
- LParenLoc, EndLoc, N) {}
+ /// \param NumVars Number of expressions listed in this clause.
+ /// \param NumUniqueDeclarations Number of unique base declarations in this
+ /// clause.
+ /// \param NumComponentLists Number of component lists in this clause.
+ /// \param NumComponents Total number of expression components in the clause.
+ ///
+ explicit OMPUseDevicePtrClause(SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc, unsigned NumVars,
+ unsigned NumUniqueDeclarations,
+ unsigned NumComponentLists,
+ unsigned NumComponents)
+ : OMPMappableExprListClause(OMPC_use_device_ptr, StartLoc, LParenLoc,
+ EndLoc, NumVars, NumUniqueDeclarations,
+ NumComponentLists, NumComponents) {}
- /// \brief Build an empty clause.
- ///
- /// \param N Number of variables.
+ /// Build an empty clause.
///
- explicit OMPUseDevicePtrClause(unsigned N)
- : OMPVarListClause<OMPUseDevicePtrClause>(
- OMPC_use_device_ptr, SourceLocation(), SourceLocation(),
- SourceLocation(), N) {}
+ /// \param NumVars Number of expressions listed in this clause.
+ /// \param NumUniqueDeclarations Number of unique base declarations in this
+ /// clause.
+ /// \param NumComponentLists Number of component lists in this clause.
+ /// \param NumComponents Total number of expression components in the clause.
+ ///
+ explicit OMPUseDevicePtrClause(unsigned NumVars,
+ unsigned NumUniqueDeclarations,
+ unsigned NumComponentLists,
+ unsigned NumComponents)
+ : OMPMappableExprListClause(OMPC_use_device_ptr, SourceLocation(),
+ SourceLocation(), SourceLocation(), NumVars,
+ NumUniqueDeclarations, NumComponentLists,
+ NumComponents) {}
+
+ /// Sets the list of references to private copies with initializers for new
+ /// private variables.
+ /// \param VL List of references.
+ void setPrivateCopies(ArrayRef<Expr *> VL);
+
+ /// Gets the list of references to private copies with initializers for new
+ /// private variables.
+ MutableArrayRef<Expr *> getPrivateCopies() {
+ return MutableArrayRef<Expr *>(varlist_end(), varlist_size());
+ }
+ ArrayRef<const Expr *> getPrivateCopies() const {
+ return llvm::makeArrayRef(varlist_end(), varlist_size());
+ }
+
+ /// Sets the list of references to initializer variables for new private
+ /// variables.
+ /// \param VL List of references.
+ void setInits(ArrayRef<Expr *> VL);
+
+ /// Gets the list of references to initializer variables for new private
+ /// variables.
+ MutableArrayRef<Expr *> getInits() {
+ return MutableArrayRef<Expr *>(getPrivateCopies().end(), varlist_size());
+ }
+ ArrayRef<const Expr *> getInits() const {
+ return llvm::makeArrayRef(getPrivateCopies().end(), varlist_size());
+ }
public:
- /// Creates clause with a list of variables \a VL.
+ /// Creates clause with a list of variables \a Vars.
///
/// \param C AST context.
/// \param StartLoc Starting location of the clause.
- /// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause.
- /// \param VL List of references to the variables.
+ /// \param Vars The original expression used in the clause.
+ /// \param PrivateVars Expressions referring to private copies.
+ /// \param Inits Expressions referring to private copy initializers.
+ /// \param Declarations Declarations used in the clause.
+ /// \param ComponentLists Component lists used in the clause.
///
static OMPUseDevicePtrClause *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
- SourceLocation EndLoc, ArrayRef<Expr *> VL);
- /// Creates an empty clause with the place for \a N variables.
+ SourceLocation EndLoc, ArrayRef<Expr *> Vars,
+ ArrayRef<Expr *> PrivateVars, ArrayRef<Expr *> Inits,
+ ArrayRef<ValueDecl *> Declarations,
+ MappableExprComponentListsRef ComponentLists);
+
+ /// Creates an empty clause with the place for \a NumVars variables.
///
/// \param C AST context.
- /// \param N The number of variables.
- ///
- static OMPUseDevicePtrClause *CreateEmpty(const ASTContext &C, unsigned N);
+ /// \param NumVars Number of expressions listed in the clause.
+ /// \param NumUniqueDeclarations Number of unique base declarations in this
+ /// clause.
+ /// \param NumComponentLists Number of unique base declarations in this
+ /// clause.
+ /// \param NumComponents Total number of expression components in the clause.
+ ///
+ static OMPUseDevicePtrClause *CreateEmpty(const ASTContext &C,
+ unsigned NumVars,
+ unsigned NumUniqueDeclarations,
+ unsigned NumComponentLists,
+ unsigned NumComponents);
+
+ typedef MutableArrayRef<Expr *>::iterator private_copies_iterator;
+ typedef ArrayRef<const Expr *>::iterator private_copies_const_iterator;
+ typedef llvm::iterator_range<private_copies_iterator> private_copies_range;
+ typedef llvm::iterator_range<private_copies_const_iterator>
+ private_copies_const_range;
+
+ private_copies_range private_copies() {
+ return private_copies_range(getPrivateCopies().begin(),
+ getPrivateCopies().end());
+ }
+ private_copies_const_range private_copies() const {
+ return private_copies_const_range(getPrivateCopies().begin(),
+ getPrivateCopies().end());
+ }
+
+ typedef MutableArrayRef<Expr *>::iterator inits_iterator;
+ typedef ArrayRef<const Expr *>::iterator inits_const_iterator;
+ typedef llvm::iterator_range<inits_iterator> inits_range;
+ typedef llvm::iterator_range<inits_const_iterator> inits_const_range;
+
+ inits_range inits() {
+ return inits_range(getInits().begin(), getInits().end());
+ }
+ inits_const_range inits() const {
+ return inits_const_range(getInits().begin(), getInits().end());
+ }
child_range children() {
return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
Modified: cfe/trunk/lib/AST/OpenMPClause.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/OpenMPClause.cpp?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/lib/AST/OpenMPClause.cpp (original)
+++ cfe/trunk/lib/AST/OpenMPClause.cpp Thu Jul 28 09:23:26 2016
@@ -732,22 +732,66 @@ OMPFromClause *OMPFromClause::CreateEmpt
NumComponentLists, NumComponents);
}
-OMPUseDevicePtrClause *OMPUseDevicePtrClause::Create(const ASTContext &C,
- SourceLocation StartLoc,
- SourceLocation LParenLoc,
- SourceLocation EndLoc,
- ArrayRef<Expr *> VL) {
- void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
- OMPUseDevicePtrClause *Clause =
- new (Mem) OMPUseDevicePtrClause(StartLoc, LParenLoc, EndLoc, VL.size());
- Clause->setVarRefs(VL);
+void OMPUseDevicePtrClause::setPrivateCopies(ArrayRef<Expr *> VL) {
+ assert(VL.size() == varlist_size() &&
+ "Number of private copies is not the same as the preallocated buffer");
+ std::copy(VL.begin(), VL.end(), varlist_end());
+}
+
+void OMPUseDevicePtrClause::setInits(ArrayRef<Expr *> VL) {
+ assert(VL.size() == varlist_size() &&
+ "Number of inits is not the same as the preallocated buffer");
+ std::copy(VL.begin(), VL.end(), getPrivateCopies().end());
+}
+
+OMPUseDevicePtrClause *OMPUseDevicePtrClause::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation EndLoc, ArrayRef<Expr *> Vars, ArrayRef<Expr *> PrivateVars,
+ ArrayRef<Expr *> Inits, ArrayRef<ValueDecl *> Declarations,
+ MappableExprComponentListsRef ComponentLists) {
+ unsigned NumVars = Vars.size();
+ unsigned NumUniqueDeclarations =
+ getUniqueDeclarationsTotalNumber(Declarations);
+ unsigned NumComponentLists = ComponentLists.size();
+ unsigned NumComponents = getComponentsTotalNumber(ComponentLists);
+
+ // We need to allocate:
+ // 3 x NumVars x Expr* - we have an original list expression for each clause
+ // list entry and an equal number of private copies and inits.
+ // NumUniqueDeclarations x ValueDecl* - unique base declarations associated
+ // with each component list.
+ // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the
+ // number of lists for each unique declaration and the size of each component
+ // list.
+ // NumComponents x MappableComponent - the total of all the components in all
+ // the lists.
+ void *Mem = C.Allocate(
+ totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
+ OMPClauseMappableExprCommon::MappableComponent>(
+ 3 * NumVars, NumUniqueDeclarations,
+ NumUniqueDeclarations + NumComponentLists, NumComponents));
+
+ OMPUseDevicePtrClause *Clause = new (Mem) OMPUseDevicePtrClause(
+ StartLoc, LParenLoc, EndLoc, NumVars, NumUniqueDeclarations,
+ NumComponentLists, NumComponents);
+
+ Clause->setVarRefs(Vars);
+ Clause->setPrivateCopies(PrivateVars);
+ Clause->setInits(Inits);
+ Clause->setClauseInfo(Declarations, ComponentLists);
return Clause;
}
-OMPUseDevicePtrClause *OMPUseDevicePtrClause::CreateEmpty(const ASTContext &C,
- unsigned N) {
- void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N));
- return new (Mem) OMPUseDevicePtrClause(N);
+OMPUseDevicePtrClause *OMPUseDevicePtrClause::CreateEmpty(
+ const ASTContext &C, unsigned NumVars, unsigned NumUniqueDeclarations,
+ unsigned NumComponentLists, unsigned NumComponents) {
+ void *Mem = C.Allocate(
+ totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
+ OMPClauseMappableExprCommon::MappableComponent>(
+ 3 * NumVars, NumUniqueDeclarations,
+ NumUniqueDeclarations + NumComponentLists, NumComponents));
+ return new (Mem) OMPUseDevicePtrClause(NumVars, NumUniqueDeclarations,
+ NumComponentLists, NumComponents);
}
OMPIsDevicePtrClause *OMPIsDevicePtrClause::Create(const ASTContext &C,
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Thu Jul 28 09:23:26 2016
@@ -4981,6 +4981,9 @@ public:
/// map/privatization results in multiple arguments passed to the runtime
/// library.
OMP_MAP_FIRST_REF = 0x20,
+ /// \brief Signal that the runtime library has to return the device pointer
+ /// in the current position for the data being mapped.
+ OMP_MAP_RETURN_PTR = 0x40,
/// \brief This flag signals that the reference being passed is a pointer to
/// private data.
OMP_MAP_PRIVATE_PTR = 0x80,
@@ -4988,6 +4991,24 @@ public:
OMP_MAP_PRIVATE_VAL = 0x100,
};
+ /// Class that associates information with a base pointer to be passed to the
+ /// runtime library.
+ class BasePointerInfo {
+ /// The base pointer.
+ llvm::Value *Ptr = nullptr;
+ /// The base declaration that refers to this device pointer, or null if
+ /// there is none.
+ const ValueDecl *DevPtrDecl = nullptr;
+
+ public:
+ BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
+ : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
+ llvm::Value *operator*() const { return Ptr; }
+ const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
+ void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
+ };
+
+ typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
@@ -5129,7 +5150,7 @@ private:
void generateInfoForComponentList(
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
- MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
+ MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
bool IsFirstComponentList) const {
@@ -5400,8 +5421,10 @@ public:
}
/// \brief Generate all the base pointers, section pointers, sizes and map
- /// types for the extracted mappable expressions.
- void generateAllInfo(MapValuesArrayTy &BasePointers,
+ /// types for the extracted mappable expressions. Also, for each item that
+ /// relates with a device pointer, a pair of the relevant declaration and
+ /// index where it occurs is appended to the device pointers info array.
+ void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const {
BasePointers.clear();
@@ -5410,9 +5433,28 @@ public:
Types.clear();
struct MapInfo {
+ /// Kind that defines how a device pointer has to be returned.
+ enum ReturnPointerKind {
+ // Don't have to return any pointer.
+ RPK_None,
+ // Pointer is the base of the declaration.
+ RPK_Base,
+ // Pointer is a member of the base declaration - 'this'
+ RPK_Member,
+ // Pointer is a reference and a member of the base declaration - 'this'
+ RPK_MemberReference,
+ };
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
- OpenMPMapClauseKind MapType;
- OpenMPMapClauseKind MapTypeModifier;
+ OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
+ OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
+ ReturnPointerKind ReturnDevicePointer = RPK_None;
+ MapInfo(
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
+ OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
+ ReturnPointerKind ReturnDevicePointer)
+ : Components(Components), MapType(MapType),
+ MapTypeModifier(MapTypeModifier),
+ ReturnDevicePointer(ReturnDevicePointer) {}
};
// We have to process the component lists that relate with the same
@@ -5422,14 +5464,15 @@ public:
// Helper function to fill the information map for the different supported
// clauses.
- auto &&InfoGen =
- [&Info](const ValueDecl *D,
- OMPClauseMappableExprCommon::MappableExprComponentListRef L,
- OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) {
- const ValueDecl *VD =
- D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
- Info[VD].push_back({L, MapType, MapModifier});
- };
+ auto &&InfoGen = [&Info](
+ const ValueDecl *D,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+ OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
+ MapInfo::ReturnPointerKind ReturnDevicePointer = MapInfo::RPK_None) {
+ const ValueDecl *VD =
+ D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+ Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer});
+ };
for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
for (auto L : C->component_lists())
@@ -5441,6 +5484,51 @@ public:
for (auto L : C->component_lists())
InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown);
+ // Look at the use_device_ptr clause information and mark the existing map
+ // entries as such. If there is no map information for an entry in the
+ // use_device_ptr list, we create one with map type 'alloc' and zero size
+ // section. It is the user fault if that was not mapped before.
+ for (auto *C : Directive.getClausesOfKind<OMPUseDevicePtrClause>())
+ for (auto L : C->component_lists()) {
+ assert(!L.second.empty() && "Not expecting empty list of components!");
+ const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+ VD = cast<ValueDecl>(VD->getCanonicalDecl());
+ auto *IE = L.second.back().getAssociatedExpression();
+ // If the first component is a member expression, we have to look into
+ // 'this', which maps to null in the map of map information. Otherwise
+ // look directly for the information.
+ auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
+
+ // We potentially have map information for this declaration already.
+ // Look for the first set of components that refer to it.
+ if (It != Info.end()) {
+ auto CI = std::find_if(
+ It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
+ return MI.Components.back().getAssociatedDeclaration() == VD;
+ });
+ // If we found a map entry, signal that the pointer has to be returned
+ // and move on to the next declaration.
+ if (CI != It->second.end()) {
+ CI->ReturnDevicePointer = isa<MemberExpr>(IE)
+ ? (VD->getType()->isReferenceType()
+ ? MapInfo::RPK_MemberReference
+ : MapInfo::RPK_Member)
+ : MapInfo::RPK_Base;
+ continue;
+ }
+ }
+
+ // We didn't find any match in our map information - generate a zero
+ // size array section.
+ llvm::Value *Ptr =
+ CGF.EmitLoadOfLValue(CGF.EmitLValue(IE), SourceLocation())
+ .getScalarVal();
+ BasePointers.push_back({Ptr, VD});
+ Pointers.push_back(Ptr);
+ Sizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
+ Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
+ }
+
for (auto &M : Info) {
// We need to know when we generate information for the first component
// associated with a capture, because the mapping flags depend on it.
@@ -5448,9 +5536,35 @@ public:
for (MapInfo &L : M.second) {
assert(!L.Components.empty() &&
"Not expecting declaration with no component lists.");
+
+ // Remember the current base pointer index.
+ unsigned CurrentBasePointersIdx = BasePointers.size();
generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components,
BasePointers, Pointers, Sizes, Types,
IsFirstComponentList);
+
+ // If this entry relates with a device pointer, set the relevant
+ // declaration and add the 'return pointer' flag.
+ if (IsFirstComponentList &&
+ L.ReturnDevicePointer != MapInfo::RPK_None) {
+ // If the pointer is not the base of the map, we need to skip the
+ // base. If it is a reference in a member field, we also need to skip
+ // the map of the reference.
+ if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
+ ++CurrentBasePointersIdx;
+ if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
+ ++CurrentBasePointersIdx;
+ }
+ assert(BasePointers.size() > CurrentBasePointersIdx &&
+ "Unexpected number of mapped base pointers.");
+
+ auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
+ assert(RelevantVD &&
+ "No relevant declaration related with device pointer??");
+
+ BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
+ Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
+ }
IsFirstComponentList = false;
}
}
@@ -5459,7 +5573,7 @@ public:
/// \brief Generate the base pointers, section pointers, sizes and map types
/// associated to a given capture.
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
- MapValuesArrayTy &BasePointers,
+ MapBaseValuesArrayTy &BasePointers,
MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const {
@@ -5496,12 +5610,12 @@ public:
/// \brief Generate the default map information for a given capture \a CI,
/// record field declaration \a RI and captured value \a CV.
- void generateDefaultMapInfo(
- const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV,
- MappableExprsHandler::MapValuesArrayTy &CurBasePointers,
- MappableExprsHandler::MapValuesArrayTy &CurPointers,
- MappableExprsHandler::MapValuesArrayTy &CurSizes,
- MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) {
+ void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
+ const FieldDecl &RI, llvm::Value *CV,
+ MapBaseValuesArrayTy &CurBasePointers,
+ MapValuesArrayTy &CurPointers,
+ MapValuesArrayTy &CurSizes,
+ MapFlagsArrayTy &CurMapTypes) {
// Do the default mapping.
if (CI.capturesThis()) {
@@ -5510,15 +5624,14 @@ public:
const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
// Default map type.
- CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO |
- MappableExprsHandler::OMP_MAP_FROM);
+ CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
} else if (CI.capturesVariableByCopy()) {
CurBasePointers.push_back(CV);
CurPointers.push_back(CV);
if (!RI.getType()->isAnyPointerType()) {
// We have to signal to the runtime captures passed by value that are
// not pointers.
- CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL);
+ CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
CurSizes.push_back(CGF.getTypeSize(RI.getType()));
} else {
// Pointers are implicitly mapped with a zero size and no flags
@@ -5539,9 +5652,8 @@ public:
// default the value doesn't have to be retrieved. For an aggregate
// type, the default is 'tofrom'.
CurMapTypes.push_back(ElementType->isAggregateType()
- ? (MappableExprsHandler::OMP_MAP_TO |
- MappableExprsHandler::OMP_MAP_FROM)
- : MappableExprsHandler::OMP_MAP_TO);
+ ? (OMP_MAP_TO | OMP_MAP_FROM)
+ : OMP_MAP_TO);
// If we have a capture by reference we may need to add the private
// pointer flag if the base declaration shows in some first-private
@@ -5551,7 +5663,7 @@ public:
}
// Every default map produces a single argument, so, it is always the
// first one.
- CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF;
+ CurMapTypes.back() |= OMP_MAP_FIRST_REF;
}
};
@@ -5566,19 +5678,20 @@ enum OpenMPOffloadingReservedDeviceIDs {
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference.
static void
-emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
- llvm::Value *&PointersArray, llvm::Value *&SizesArray,
- llvm::Value *&MapTypesArray,
- MappableExprsHandler::MapValuesArrayTy &BasePointers,
+emitOffloadingArrays(CodeGenFunction &CGF,
+ MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
MappableExprsHandler::MapValuesArrayTy &Pointers,
MappableExprsHandler::MapValuesArrayTy &Sizes,
- MappableExprsHandler::MapFlagsArrayTy &MapTypes) {
+ MappableExprsHandler::MapFlagsArrayTy &MapTypes,
+ CGOpenMPRuntime::TargetDataInfo &Info) {
auto &CGM = CGF.CGM;
auto &Ctx = CGF.getContext();
- BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr;
+ // Reset the array information.
+ Info.clearArrayInfo();
+ Info.NumberOfPtrs = BasePointers.size();
- if (unsigned PointerNumVal = BasePointers.size()) {
+ if (Info.NumberOfPtrs) {
// Detect if we have any capture size requiring runtime evaluation of the
// size so that a constant array could be eventually used.
bool hasRuntimeEvaluationCaptureSize = false;
@@ -5588,14 +5701,14 @@ emitOffloadingArrays(CodeGenFunction &CG
break;
}
- llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
+ llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
QualType PointerArrayType =
Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
/*IndexTypeQuals=*/0);
- BasePointersArray =
+ Info.BasePointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
- PointersArray =
+ Info.PointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
// If we don't have any VLA types or other types that require runtime
@@ -5605,7 +5718,7 @@ emitOffloadingArrays(CodeGenFunction &CG
QualType SizeArrayType = Ctx.getConstantArrayType(
Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
/*IndexTypeQuals=*/0);
- SizesArray =
+ Info.SizesArray =
CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
} else {
// We expect all the sizes to be constant, so we collect them to create
@@ -5621,7 +5734,7 @@ emitOffloadingArrays(CodeGenFunction &CG
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
SizesArrayInit, ".offload_sizes");
SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- SizesArray = SizesArrayGbl;
+ Info.SizesArray = SizesArrayGbl;
}
// The map types are always constant so we don't need to generate code to
@@ -5633,10 +5746,10 @@ emitOffloadingArrays(CodeGenFunction &CG
/*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
MapTypesArrayInit, ".offload_maptypes");
MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- MapTypesArray = MapTypesArrayGbl;
+ Info.MapTypesArray = MapTypesArrayGbl;
- for (unsigned i = 0; i < PointerNumVal; ++i) {
- llvm::Value *BPVal = BasePointers[i];
+ for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
+ llvm::Value *BPVal = *BasePointers[i];
if (BPVal->getType()->isPointerTy())
BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
else {
@@ -5645,11 +5758,15 @@ emitOffloadingArrays(CodeGenFunction &CG
BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
}
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
- 0, i);
+ llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
+ Info.BasePointersArray, 0, i);
Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(BPVal, BPAddr);
+ if (Info.requiresDevicePointerInfo())
+ if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
+ Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
+
llvm::Value *PVal = Pointers[i];
if (PVal->getType()->isPointerTy())
PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
@@ -5659,14 +5776,15 @@ emitOffloadingArrays(CodeGenFunction &CG
PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
}
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
- i);
+ llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
+ Info.PointersArray, 0, i);
Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(PVal, PAddr);
if (hasRuntimeEvaluationCaptureSize) {
llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
+ llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
+ Info.SizesArray,
/*Idx0=*/0,
/*Idx1=*/i);
Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
@@ -5682,23 +5800,24 @@ emitOffloadingArrays(CodeGenFunction &CG
static void emitOffloadingArraysArgument(
CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
- llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray,
- llvm::Value *PointersArray, llvm::Value *SizesArray,
- llvm::Value *MapTypesArray, unsigned NumElems) {
+ llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
auto &CGM = CGF.CGM;
- if (NumElems) {
+ if (Info.NumberOfPtrs) {
BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray,
+ llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
+ Info.BasePointersArray,
/*Idx0=*/0, /*Idx1=*/0);
PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray,
+ llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
+ Info.PointersArray,
/*Idx0=*/0,
/*Idx1=*/0);
SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray,
+ llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
/*Idx0=*/0, /*Idx1=*/0);
MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray,
+ llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
+ Info.MapTypesArray,
/*Idx0=*/0,
/*Idx1=*/0);
} else {
@@ -5725,12 +5844,12 @@ void CGOpenMPRuntime::emitTargetCall(Cod
// Fill up the arrays with all the captured variables.
MappableExprsHandler::MapValuesArrayTy KernelArgs;
- MappableExprsHandler::MapValuesArrayTy BasePointers;
+ MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
MappableExprsHandler::MapValuesArrayTy Sizes;
MappableExprsHandler::MapFlagsArrayTy MapTypes;
- MappableExprsHandler::MapValuesArrayTy CurBasePointers;
+ MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
MappableExprsHandler::MapValuesArrayTy CurPointers;
MappableExprsHandler::MapValuesArrayTy CurSizes;
MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
@@ -5779,7 +5898,7 @@ void CGOpenMPRuntime::emitTargetCall(Cod
// The kernel args are always the first elements of the base pointers
// associated with a capture.
- KernelArgs.push_back(CurBasePointers.front());
+ KernelArgs.push_back(*CurBasePointers.front());
// We need to append the results of this capture to what we already have.
BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
Pointers.append(CurPointers.begin(), CurPointers.end());
@@ -5802,17 +5921,11 @@ void CGOpenMPRuntime::emitTargetCall(Cod
&D](CodeGenFunction &CGF, PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
// Emit the offloading arrays.
- llvm::Value *BasePointersArray;
- llvm::Value *PointersArray;
- llvm::Value *SizesArray;
- llvm::Value *MapTypesArray;
- emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
- MapTypesArray, BasePointers, Pointers, Sizes,
- MapTypes);
- emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray,
- SizesArray, MapTypesArray, BasePointersArray,
- PointersArray, SizesArray, MapTypesArray,
- BasePointers.size());
+ TargetDataInfo Info;
+ emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+ emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
+ Info.PointersArray, Info.SizesArray,
+ Info.MapTypesArray, Info);
// On top of the arrays that were filled up, the target offloading call
// takes as arguments the device id as well as the host pointer. The host
@@ -5853,15 +5966,19 @@ void CGOpenMPRuntime::emitTargetCall(Cod
assert(ThreadLimit && "Thread limit expression should be available along "
"with number of teams.");
llvm::Value *OffloadingArgs[] = {
- DeviceID, OutlinedFnID, PointerNum,
- BasePointersArray, PointersArray, SizesArray,
- MapTypesArray, NumTeams, ThreadLimit};
+ DeviceID, OutlinedFnID,
+ PointerNum, Info.BasePointersArray,
+ Info.PointersArray, Info.SizesArray,
+ Info.MapTypesArray, NumTeams,
+ ThreadLimit};
Return = CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
} else {
llvm::Value *OffloadingArgs[] = {
- DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
- PointersArray, SizesArray, MapTypesArray};
+ DeviceID, OutlinedFnID,
+ PointerNum, Info.BasePointersArray,
+ Info.PointersArray, Info.SizesArray,
+ Info.MapTypesArray};
Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
OffloadingArgs);
}
@@ -6073,29 +6190,23 @@ void CGOpenMPRuntime::emitNumTeamsClause
PushNumTeamsArgs);
}
-void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- const Expr *IfCond,
- const Expr *Device,
- const RegionCodeGenTy &CodeGen) {
-
+void CGOpenMPRuntime::emitTargetDataCalls(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
+ const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
if (!CGF.HaveInsertPoint())
return;
- llvm::Value *BasePointersArray = nullptr;
- llvm::Value *PointersArray = nullptr;
- llvm::Value *SizesArray = nullptr;
- llvm::Value *MapTypesArray = nullptr;
- unsigned NumOfPtrs = 0;
+ // Action used to replace the default codegen action and turn privatization
+ // off.
+ PrePostActionTy NoPrivAction;
// Generate the code for the opening of the data environment. Capture all the
// arguments of the runtime call by reference because they are used in the
// closing of the region.
- auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray,
- &SizesArray, &MapTypesArray, Device,
- &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
+ CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
- MappableExprsHandler::MapValuesArrayTy BasePointers;
+ MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
MappableExprsHandler::MapValuesArrayTy Sizes;
MappableExprsHandler::MapFlagsArrayTy MapTypes;
@@ -6103,21 +6214,16 @@ void CGOpenMPRuntime::emitTargetDataCall
// Get map clause information.
MappableExprsHandler MCHandler(D, CGF);
MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
- NumOfPtrs = BasePointers.size();
// Fill up the arrays and create the arguments.
- emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
- MapTypesArray, BasePointers, Pointers, Sizes,
- MapTypes);
+ emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
llvm::Value *BasePointersArrayArg = nullptr;
llvm::Value *PointersArrayArg = nullptr;
llvm::Value *SizesArrayArg = nullptr;
llvm::Value *MapTypesArrayArg = nullptr;
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
- SizesArrayArg, MapTypesArrayArg,
- BasePointersArray, PointersArray, SizesArray,
- MapTypesArray, NumOfPtrs);
+ SizesArrayArg, MapTypesArrayArg, Info);
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
@@ -6128,7 +6234,7 @@ void CGOpenMPRuntime::emitTargetDataCall
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
// Emit the number of elements in the offloading arrays.
- auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
+ auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg,
@@ -6136,23 +6242,24 @@ void CGOpenMPRuntime::emitTargetDataCall
auto &RT = CGF.CGM.getOpenMPRuntime();
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
OffloadingArgs);
+
+ // If device pointer privatization is required, emit the body of the region
+ // here. It will have to be duplicated: with and without privatization.
+ if (!Info.CaptureDeviceAddrMap.empty())
+ CodeGen(CGF);
};
// Generate code for the closing of the data region.
- auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray,
- &MapTypesArray, Device,
- &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
- assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray &&
- NumOfPtrs && "Invalid data environment closing arguments.");
+ auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ assert(Info.isValid() && "Invalid data environment closing arguments.");
llvm::Value *BasePointersArrayArg = nullptr;
llvm::Value *PointersArrayArg = nullptr;
llvm::Value *SizesArrayArg = nullptr;
llvm::Value *MapTypesArrayArg = nullptr;
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
- SizesArrayArg, MapTypesArrayArg,
- BasePointersArray, PointersArray, SizesArray,
- MapTypesArray, NumOfPtrs);
+ SizesArrayArg, MapTypesArrayArg, Info);
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
@@ -6163,7 +6270,7 @@ void CGOpenMPRuntime::emitTargetDataCall
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
// Emit the number of elements in the offloading arrays.
- auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
+ auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
llvm::Value *OffloadingArgs[] = {
DeviceID, PointerNum, BasePointersArrayArg,
@@ -6173,24 +6280,40 @@ void CGOpenMPRuntime::emitTargetDataCall
OffloadingArgs);
};
- // In the event we get an if clause, we don't have to take any action on the
- // else side.
- auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
+ // If we need device pointer privatization, we need to emit the body of the
+ // region with no privatization in the 'else' branch of the conditional.
+ // Otherwise, we don't have to do anything.
+ auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ if (!Info.CaptureDeviceAddrMap.empty()) {
+ CodeGen.setAction(NoPrivAction);
+ CodeGen(CGF);
+ }
+ };
+
+ // We don't have to do anything to close the region if the if clause evaluates
+ // to false.
+ auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen);
+ emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
} else {
- RegionCodeGenTy BeginThenRCG(BeginThenGen);
- BeginThenRCG(CGF);
+ RegionCodeGenTy RCG(BeginThenGen);
+ RCG(CGF);
}
- CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen);
+ // If we don't require privatization of device pointers, we emit the body in
+ // between the runtime calls. This avoids duplicating the body code.
+ if (Info.CaptureDeviceAddrMap.empty()) {
+ CodeGen.setAction(NoPrivAction);
+ CodeGen(CGF);
+ }
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen);
+ emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
} else {
- RegionCodeGenTy EndThenRCG(EndThenGen);
- EndThenRCG(CGF);
+ RegionCodeGenTy RCG(EndThenGen);
+ RCG(CGF);
}
}
@@ -6208,7 +6331,7 @@ void CGOpenMPRuntime::emitTargetDataStan
// Generate the code for the opening of the data environment.
auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
- MappableExprsHandler::MapValuesArrayTy BasePointers;
+ MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
MappableExprsHandler::MapValuesArrayTy Sizes;
MappableExprsHandler::MapFlagsArrayTy MapTypes;
@@ -6217,19 +6340,12 @@ void CGOpenMPRuntime::emitTargetDataStan
MappableExprsHandler MEHandler(D, CGF);
MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
- llvm::Value *BasePointersArrayArg = nullptr;
- llvm::Value *PointersArrayArg = nullptr;
- llvm::Value *SizesArrayArg = nullptr;
- llvm::Value *MapTypesArrayArg = nullptr;
-
// Fill up the arrays and create the arguments.
- emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg,
- SizesArrayArg, MapTypesArrayArg, BasePointers,
- Pointers, Sizes, MapTypes);
- emitOffloadingArraysArgument(
- CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
- MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
- MapTypesArrayArg, BasePointers.size());
+ TargetDataInfo Info;
+ emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+ emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
+ Info.PointersArray, Info.SizesArray,
+ Info.MapTypesArray, Info);
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
@@ -6243,8 +6359,8 @@ void CGOpenMPRuntime::emitTargetDataStan
auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
llvm::Value *OffloadingArgs[] = {
- DeviceID, PointerNum, BasePointersArrayArg,
- PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
+ DeviceID, PointerNum, Info.BasePointersArray,
+ Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
auto &RT = CGF.CGM.getOpenMPRuntime();
// Select the right runtime function call for each expected standalone
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Thu Jul 28 09:23:26 2016
@@ -997,17 +997,59 @@ public:
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
const Expr *ThreadLimit, SourceLocation Loc);
+ /// Struct that keeps all the relevant information that should be kept
+ /// throughout a 'target data' region.
+ class TargetDataInfo {
+ /// Set to true if device pointer information have to be obtained.
+ bool RequiresDevicePointerInfo = false;
+
+ public:
+ /// The array of base pointer passed to the runtime library.
+ llvm::Value *BasePointersArray = nullptr;
+ /// The array of section pointers passed to the runtime library.
+ llvm::Value *PointersArray = nullptr;
+ /// The array of sizes passed to the runtime library.
+ llvm::Value *SizesArray = nullptr;
+ /// The array of map types passed to the runtime library.
+ llvm::Value *MapTypesArray = nullptr;
+ /// The total number of pointers passed to the runtime library.
+ unsigned NumberOfPtrs = 0u;
+ /// Map between the a declaration of a capture and the corresponding base
+ /// pointer address where the runtime returns the device pointers.
+ llvm::DenseMap<const ValueDecl *, Address> CaptureDeviceAddrMap;
+
+ explicit TargetDataInfo() {}
+ explicit TargetDataInfo(bool RequiresDevicePointerInfo)
+ : RequiresDevicePointerInfo(RequiresDevicePointerInfo) {}
+ /// Clear information about the data arrays.
+ void clearArrayInfo() {
+ BasePointersArray = nullptr;
+ PointersArray = nullptr;
+ SizesArray = nullptr;
+ MapTypesArray = nullptr;
+ NumberOfPtrs = 0u;
+ }
+ /// Return true if the current target data information has valid arrays.
+ bool isValid() {
+ return BasePointersArray && PointersArray && SizesArray &&
+ MapTypesArray && NumberOfPtrs;
+ }
+ bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
+ };
+
/// \brief Emit the target data mapping code associated with \a D.
/// \param D Directive to emit.
- /// \param IfCond Expression evaluated in if clause associated with the target
- /// directive, or null if no if clause is used.
+ /// \param IfCond Expression evaluated in if clause associated with the
+ /// target directive, or null if no device clause is used.
/// \param Device Expression evaluated in device clause associated with the
/// target directive, or null if no device clause is used.
- /// \param CodeGen Function that emits the enclosed region.
+ /// \param Info A record used to store information that needs to be preserved
+ /// until the region is closed.
virtual void emitTargetDataCalls(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
const Expr *IfCond, const Expr *Device,
- const RegionCodeGenTy &CodeGen);
+ const RegionCodeGenTy &CodeGen,
+ TargetDataInfo &Info);
/// \brief Emit the data mapping/movement code associated with the directive
/// \a D that should be of the form 'target [{enter|exit} data | update]'.
Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Thu Jul 28 09:23:26 2016
@@ -3400,22 +3400,137 @@ CodeGenFunction::getOMPCancelDestination
return BreakContinueStack.back().BreakBlock;
}
+void CodeGenFunction::EmitOMPUseDevicePtrClause(
+ const OMPClause &NC, OMPPrivateScope &PrivateScope,
+ const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
+ const auto &C = cast<OMPUseDevicePtrClause>(NC);
+ auto OrigVarIt = C.varlist_begin();
+ auto InitIt = C.inits().begin();
+ for (auto PvtVarIt : C.private_copies()) {
+ auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
+ auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
+ auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
+
+ // In order to identify the right initializer we need to match the
+ // declaration used by the mapping logic. In some cases we may get
+ // OMPCapturedExprDecl that refers to the original declaration.
+ const ValueDecl *MatchingVD = OrigVD;
+ if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
+ // OMPCapturedExprDecl are used to privative fields of the current
+ // structure.
+ auto *ME = cast<MemberExpr>(OED->getInit());
+ assert(isa<CXXThisExpr>(ME->getBase()) &&
+ "Base should be the current struct!");
+ MatchingVD = ME->getMemberDecl();
+ }
+
+ // If we don't have information about the current list item, move on to
+ // the next one.
+ auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
+ if (InitAddrIt == CaptureDeviceAddrMap.end())
+ continue;
+
+ bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
+ // Initialize the temporary initialization variable with the address we
+ // get from the runtime library. We have to cast the source address
+ // because it is always a void *. References are materialized in the
+ // privatization scope, so the initialization here disregards the fact
+ // the original variable is a reference.
+ QualType AddrQTy =
+ getContext().getPointerType(OrigVD->getType().getNonReferenceType());
+ llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
+ Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
+ setAddrOfLocalVar(InitVD, InitAddr);
+
+ // Emit private declaration, it will be initialized by the value we
+ // declaration we just added to the local declarations map.
+ EmitDecl(*PvtVD);
+
+ // The initialization variables reached its purpose in the emission
+ // ofthe previous declaration, so we don't need it anymore.
+ LocalDeclMap.erase(InitVD);
+
+ // Return the address of the private variable.
+ return GetAddrOfLocalVar(PvtVD);
+ });
+ assert(IsRegistered && "firstprivate var already registered as private");
+ // Silence the warning about unused variable.
+ (void)IsRegistered;
+
+ ++OrigVarIt;
+ ++InitIt;
+ }
+}
+
// Generate the instructions for '#pragma omp target data' directive.
void CodeGenFunction::EmitOMPTargetDataDirective(
const OMPTargetDataDirective &S) {
- // The target data enclosed region is implemented just by emitting the
- // statement.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
+
+ // Create a pre/post action to signal the privatization of the device pointer.
+ // This action can be replaced by the OpenMP runtime code generation to
+ // deactivate privatization.
+ bool PrivatizeDevicePointers = false;
+ class DevicePointerPrivActionTy : public PrePostActionTy {
+ bool &PrivatizeDevicePointers;
+
+ public:
+ explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
+ : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
+ void Enter(CodeGenFunction &CGF) override {
+ PrivatizeDevicePointers = true;
+ }
};
+ DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
+
+ auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitStmt(
+ cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ };
+
+ // Codegen that selects wheather to generate the privatization code or not.
+ auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
+ &InnermostCodeGen](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ RegionCodeGenTy RCG(InnermostCodeGen);
+ PrivatizeDevicePointers = false;
+
+ // Call the pre-action to change the status of PrivatizeDevicePointers if
+ // needed.
+ Action.Enter(CGF);
+
+ if (PrivatizeDevicePointers) {
+ OMPPrivateScope PrivateScope(CGF);
+ // Emit all instances of the use_device_ptr clause.
+ for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
+ CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
+ Info.CaptureDeviceAddrMap);
+ (void)PrivateScope.Privatize();
+ RCG(CGF);
+ } else
+ RCG(CGF);
+ };
+
+ // Forward the provided action to the privatization codegen.
+ RegionCodeGenTy PrivRCG(PrivCodeGen);
+ PrivRCG.setAction(Action);
+
+ // Notwithstanding the body of the region is emitted as inlined directive,
+ // we don't use an inline scope as changes in the references inside the
+ // region are expected to be visible outside, so we do not privative them.
+ OMPLexicalScope Scope(CGF, S);
+ CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
+ PrivRCG);
+ };
+
+ RegionCodeGenTy RCG(CodeGen);
// If we don't have target devices, don't bother emitting the data mapping
// code.
if (CGM.getLangOpts().OMPTargetTriples.empty()) {
- OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
-
- CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data,
- CodeGen);
+ RCG(*this);
return;
}
@@ -3429,7 +3544,12 @@ void CodeGenFunction::EmitOMPTargetDataD
if (auto *C = S.getSingleClause<OMPDeviceClause>())
Device = C->getDevice();
- CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen);
+ // Set the action to signal privatization of device pointers.
+ RCG.setAction(PrivAction);
+
+ // Emit region code.
+ CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
+ Info);
}
void CodeGenFunction::EmitOMPTargetEnterDataDirective(
Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Thu Jul 28 09:23:26 2016
@@ -2392,6 +2392,9 @@ public:
OMPPrivateScope &PrivateScope);
void EmitOMPPrivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
+ void EmitOMPUseDevicePtrClause(
+ const OMPClause &C, OMPPrivateScope &PrivateScope,
+ const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap);
/// \brief Emit code for copyin clause in \a D directive. The next code is
/// generated at the start of outlined functions for directives:
/// \code
Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Thu Jul 28 09:23:26 2016
@@ -11800,7 +11800,10 @@ OMPClause *Sema::ActOnOpenMPUseDevicePtr
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
- SmallVector<Expr *, 8> Vars;
+ MappableVarListInfo MVLI(VarList);
+ SmallVector<Expr *, 8> PrivateCopies;
+ SmallVector<Expr *, 8> Inits;
+
for (auto &RefExpr : VarList) {
assert(RefExpr && "NULL expr in OpenMP use_device_ptr clause.");
SourceLocation ELoc;
@@ -11809,27 +11812,73 @@ OMPClause *Sema::ActOnOpenMPUseDevicePtr
auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
if (Res.second) {
// It will be analyzed later.
- Vars.push_back(RefExpr);
+ MVLI.ProcessedVarList.push_back(RefExpr);
+ PrivateCopies.push_back(nullptr);
+ Inits.push_back(nullptr);
}
ValueDecl *D = Res.first;
if (!D)
continue;
QualType Type = D->getType();
- // item should be a pointer or reference to pointer
- if (!Type.getNonReferenceType()->isPointerType()) {
+ Type = Type.getNonReferenceType().getUnqualifiedType();
+
+ auto *VD = dyn_cast<VarDecl>(D);
+
+ // Item should be a pointer or reference to pointer.
+ if (!Type->isPointerType()) {
Diag(ELoc, diag::err_omp_usedeviceptr_not_a_pointer)
<< 0 << RefExpr->getSourceRange();
continue;
}
- Vars.push_back(RefExpr->IgnoreParens());
+
+ // Build the private variable and the expression that refers to it.
+ auto VDPrivate = buildVarDecl(*this, ELoc, Type, D->getName(),
+ D->hasAttrs() ? &D->getAttrs() : nullptr);
+ if (VDPrivate->isInvalidDecl())
+ continue;
+
+ CurContext->addDecl(VDPrivate);
+ auto VDPrivateRefExpr = buildDeclRefExpr(
+ *this, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc);
+
+ // Add temporary variable to initialize the private copy of the pointer.
+ auto *VDInit =
+ buildVarDecl(*this, RefExpr->getExprLoc(), Type, ".devptr.temp");
+ auto *VDInitRefExpr = buildDeclRefExpr(*this, VDInit, RefExpr->getType(),
+ RefExpr->getExprLoc());
+ AddInitializerToDecl(VDPrivate,
+ DefaultLvalueConversion(VDInitRefExpr).get(),
+ /*DirectInit=*/false, /*TypeMayContainAuto=*/false);
+
+ // If required, build a capture to implement the privatization initialized
+ // with the current list item value.
+ DeclRefExpr *Ref = nullptr;
+ if (!VD)
+ Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true);
+ MVLI.ProcessedVarList.push_back(VD ? RefExpr->IgnoreParens() : Ref);
+ PrivateCopies.push_back(VDPrivateRefExpr);
+ Inits.push_back(VDInitRefExpr);
+
+ // We need to add a data sharing attribute for this variable to make sure it
+ // is correctly captured. A variable that shows up in a use_device_ptr has
+ // similar properties of a first private variable.
+ DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_firstprivate, Ref);
+
+ // Create a mappable component for the list item. List items in this clause
+ // only need a component.
+ MVLI.VarBaseDeclarations.push_back(D);
+ MVLI.VarComponents.resize(MVLI.VarComponents.size() + 1);
+ MVLI.VarComponents.back().push_back(
+ OMPClauseMappableExprCommon::MappableComponent(SimpleRefExpr, D));
}
- if (Vars.empty())
+ if (MVLI.ProcessedVarList.empty())
return nullptr;
- return OMPUseDevicePtrClause::Create(Context, StartLoc, LParenLoc, EndLoc,
- Vars);
+ return OMPUseDevicePtrClause::Create(
+ Context, StartLoc, LParenLoc, EndLoc, MVLI.ProcessedVarList,
+ PrivateCopies, Inits, MVLI.VarBaseDeclarations, MVLI.VarComponents);
}
OMPClause *Sema::ActOnOpenMPIsDevicePtrClause(ArrayRef<Expr *> VarList,
Modified: cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReaderStmt.cpp?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReaderStmt.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReaderStmt.cpp Thu Jul 28 09:23:26 2016
@@ -1932,9 +1932,15 @@ OMPClause *OMPClauseReader::readClause()
NumComponents);
break;
}
- case OMPC_use_device_ptr:
- C = OMPUseDevicePtrClause::CreateEmpty(Context, Record[Idx++]);
+ case OMPC_use_device_ptr: {
+ unsigned NumVars = Record[Idx++];
+ unsigned NumDeclarations = Record[Idx++];
+ unsigned NumLists = Record[Idx++];
+ unsigned NumComponents = Record[Idx++];
+ C = OMPUseDevicePtrClause::CreateEmpty(Context, NumVars, NumDeclarations,
+ NumLists, NumComponents);
break;
+ }
case OMPC_is_device_ptr:
C = OMPIsDevicePtrClause::CreateEmpty(Context, Record[Idx++]);
break;
@@ -2457,13 +2463,54 @@ void OMPClauseReader::VisitOMPFromClause
void OMPClauseReader::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) {
C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
- unsigned NumVars = C->varlist_size();
+ auto NumVars = C->varlist_size();
+ auto UniqueDecls = C->getUniqueDeclarationsNum();
+ auto TotalLists = C->getTotalComponentListNum();
+ auto TotalComponents = C->getTotalComponentsNum();
+
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Reader->Reader.ReadSubExpr());
C->setVarRefs(Vars);
Vars.clear();
+ for (unsigned i = 0; i != NumVars; ++i)
+ Vars.push_back(Reader->Reader.ReadSubExpr());
+ C->setPrivateCopies(Vars);
+ Vars.clear();
+ for (unsigned i = 0; i != NumVars; ++i)
+ Vars.push_back(Reader->Reader.ReadSubExpr());
+ C->setInits(Vars);
+
+ SmallVector<ValueDecl *, 16> Decls;
+ Decls.reserve(UniqueDecls);
+ for (unsigned i = 0; i < UniqueDecls; ++i)
+ Decls.push_back(
+ Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx));
+ C->setUniqueDecls(Decls);
+
+ SmallVector<unsigned, 16> ListsPerDecl;
+ ListsPerDecl.reserve(UniqueDecls);
+ for (unsigned i = 0; i < UniqueDecls; ++i)
+ ListsPerDecl.push_back(Record[Idx++]);
+ C->setDeclNumLists(ListsPerDecl);
+
+ SmallVector<unsigned, 32> ListSizes;
+ ListSizes.reserve(TotalLists);
+ for (unsigned i = 0; i < TotalLists; ++i)
+ ListSizes.push_back(Record[Idx++]);
+ C->setComponentListSizes(ListSizes);
+
+ SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
+ Components.reserve(TotalComponents);
+ for (unsigned i = 0; i < TotalComponents; ++i) {
+ Expr *AssociatedExpr = Reader->Reader.ReadSubExpr();
+ ValueDecl *AssociatedDecl =
+ Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx);
+ Components.push_back(OMPClauseMappableExprCommon::MappableComponent(
+ AssociatedExpr, AssociatedDecl));
+ }
+ C->setComponents(Components, ListSizes);
}
void OMPClauseReader::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {
Modified: cfe/trunk/lib/Serialization/ASTWriterStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriterStmt.cpp?rev=276977&r1=276976&r2=276977&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriterStmt.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriterStmt.cpp Thu Jul 28 09:23:26 2016
@@ -2151,9 +2151,25 @@ void OMPClauseWriter::VisitOMPFromClause
void OMPClauseWriter::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) {
Record.push_back(C->varlist_size());
+ Record.push_back(C->getUniqueDeclarationsNum());
+ Record.push_back(C->getTotalComponentListNum());
+ Record.push_back(C->getTotalComponentsNum());
Record.AddSourceLocation(C->getLParenLoc());
- for (auto *VE : C->varlists()) {
+ for (auto *E : C->varlists())
+ Record.AddStmt(E);
+ for (auto *VE : C->private_copies())
Record.AddStmt(VE);
+ for (auto *VE : C->inits())
+ Record.AddStmt(VE);
+ for (auto *D : C->all_decls())
+ Record.AddDeclRef(D);
+ for (auto N : C->all_num_lists())
+ Record.push_back(N);
+ for (auto N : C->all_lists_sizes())
+ Record.push_back(N);
+ for (auto &M : C->all_components()) {
+ Record.AddStmt(M.getAssociatedExpression());
+ Record.AddDeclRef(M.getAssociatedDeclaration());
}
}
Added: cfe/trunk/test/OpenMP/target_data_use_device_ptr_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_data_use_device_ptr_codegen.cpp?rev=276977&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_data_use_device_ptr_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_data_use_device_ptr_codegen.cpp Thu Jul 28 09:23:26 2016
@@ -0,0 +1,464 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+///==========================================================================///
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+double *g;
+
+// CK1: @g = global double*
+// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i32] [i32 99]
+// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i32] [i32 99]
+// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i32] [i32 99]
+// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i32] [i32 99]
+// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i32] [i32 99]
+// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i32] [i32 99]
+// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i32] [i32 99]
+// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i32] [{{i32 35, i32 99|i32 99, i32 35}}]
+// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i32] [i32 99, i32 99]
+// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i32] [i32 99, i32 99]
+// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
+// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
+
+// CK1-LABEL: @_Z3foo
+template<typename T>
+void foo(float *&lr, T *&tr) {
+ float *l;
+ T *t;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast double* [[T:%.+]] to i8*
+ // CK1-DAG: [[T]] = load double*, double** [[DECL:@g]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE00]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
+ // CK1: [[VAL:%.+]] = load double*, double** [[CBP]],
+ // CK1-NOT: store double* [[VAL]], double** [[DECL]],
+ // CK1: store double* [[VAL]], double** [[PVT:%.+]],
+ // CK1: [[TT:%.+]] = load double*, double** [[PVT]],
+ // CK1: getelementptr inbounds double, double* [[TT]], i32 1
+ #pragma omp target data map(g[:10]) use_device_ptr(g)
+ {
+ ++g;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE00]]
+ // CK1: [[TTT:%.+]] = load double*, double** [[DECL]],
+ // CK1: getelementptr inbounds double, double* [[TTT]], i32 1
+ ++g;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE01]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
+ // CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
+ // CK1-NOT: store float* [[VAL]], float** [[DECL]],
+ // CK1: store float* [[VAL]], float** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load float*, float** [[PVT]],
+ // CK1: getelementptr inbounds float, float* [[TT1]], i32 1
+ #pragma omp target data map(l[:10]) use_device_ptr(l)
+ {
+ ++l;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE01]]
+ // CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
+ // CK1: getelementptr inbounds float, float* [[TTT]], i32 1
+ ++l;
+
+ // CK1-NOT: call void @__tgt_target
+ // CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
+ // CK1: getelementptr inbounds float, float* [[TTT]], i32 1
+ #pragma omp target data map(l[:10]) use_device_ptr(l) if(0)
+ {
+ ++l;
+ }
+ // CK1-NOT: call void @__tgt_target
+ // CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
+ // CK1: getelementptr inbounds float, float* [[TTT]], i32 1
+ ++l;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE03]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
+ // CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
+ // CK1-NOT: store float* [[VAL]], float** [[DECL]],
+ // CK1: store float* [[VAL]], float** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load float*, float** [[PVT]],
+ // CK1: getelementptr inbounds float, float* [[TT1]], i32 1
+ #pragma omp target data map(l[:10]) use_device_ptr(l) if(1)
+ {
+ ++l;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE03]]
+ // CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
+ // CK1: getelementptr inbounds float, float* [[TTT]], i32 1
+ ++l;
+
+ // CK1: [[CMP:%.+]] = icmp ne float* %{{.+}}, null
+ // CK1: br i1 [[CMP]], label %[[BTHEN:.+]], label %[[BELSE:.+]]
+
+ // CK1: [[BTHEN]]:
+ // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE04]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
+ // CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
+ // CK1-NOT: store float* [[VAL]], float** [[DECL]],
+ // CK1: store float* [[VAL]], float** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load float*, float** [[PVT]],
+ // CK1: getelementptr inbounds float, float* [[TT1]], i32 1
+ // CK1: br label %[[BEND:.+]]
+
+ // CK1: [[BELSE]]:
+ // CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
+ // CK1: getelementptr inbounds float, float* [[TTT]], i32 1
+ // CK1: br label %[[BEND]]
+ #pragma omp target data map(l[:10]) use_device_ptr(l) if(lr != 0)
+ {
+ ++l;
+ }
+ // CK1: [[BEND]]:
+ // CK1: [[CMP:%.+]] = icmp ne float* %{{.+}}, null
+ // CK1: br i1 [[CMP]], label %[[BTHEN:.+]], label %[[BELSE:.+]]
+
+ // CK1: [[BTHEN]]:
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE04]]
+ // CK1: br label %[[BEND:.+]]
+
+ // CK1: [[BELSE]]:
+ // CK1: br label %[[BEND]]
+
+ // CK1: [[BEND]]:
+ // CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
+ // CK1: getelementptr inbounds float, float* [[TTT]], i32 1
+ ++l;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load float*, float** [[T2:%.+]],
+ // CK1-DAG: [[T2]] = load float**, float*** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE05]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
+ // CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
+ // CK1: store float* [[VAL]], float** [[PVTV:%.+]],
+ // CK1-NOT: store float** [[PVTV]], float*** [[DECL]],
+ // CK1: store float** [[PVTV]], float*** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load float**, float*** [[PVT]],
+ // CK1: [[TT2:%.+]] = load float*, float** [[TT1]],
+ // CK1: getelementptr inbounds float, float* [[TT2]], i32 1
+ #pragma omp target data map(lr[:10]) use_device_ptr(lr)
+ {
+ ++lr;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE05]]
+ // CK1: [[TTT:%.+]] = load float**, float*** [[DECL]],
+ // CK1: [[TTTT:%.+]] = load float*, float** [[TTT]],
+ // CK1: getelementptr inbounds float, float* [[TTTT]], i32 1
+ ++lr;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load i32*, i32** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE06]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
+ // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
+ // CK1-NOT: store i32* [[VAL]], i32** [[DECL]],
+ // CK1: store i32* [[VAL]], i32** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]],
+ // CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1
+ #pragma omp target data map(t[:10]) use_device_ptr(t)
+ {
+ ++t;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE06]]
+ // CK1: [[TTT:%.+]] = load i32*, i32** [[DECL]],
+ // CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1
+ ++t;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load i32*, i32** [[T2:%.+]],
+ // CK1-DAG: [[T2]] = load i32**, i32*** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE07]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
+ // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
+ // CK1: store i32* [[VAL]], i32** [[PVTV:%.+]],
+ // CK1-NOT: store i32** [[PVTV]], i32*** [[DECL]],
+ // CK1: store i32** [[PVTV]], i32*** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load i32**, i32*** [[PVT]],
+ // CK1: [[TT2:%.+]] = load i32*, i32** [[TT1]],
+ // CK1: getelementptr inbounds i32, i32* [[TT2]], i32 1
+ #pragma omp target data map(tr[:10]) use_device_ptr(tr)
+ {
+ ++tr;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE07]]
+ // CK1: [[TTT:%.+]] = load i32**, i32*** [[DECL]],
+ // CK1: [[TTTT:%.+]] = load i32*, i32** [[TTT]],
+ // CK1: getelementptr inbounds i32, i32* [[TTTT]], i32 1
+ ++tr;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE08]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float**
+ // CK1: [[VAL:%.+]] = load float*, float** [[CBP]],
+ // CK1-NOT: store float* [[VAL]], float** [[DECL]],
+ // CK1: store float* [[VAL]], float** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load float*, float** [[PVT]],
+ // CK1: getelementptr inbounds float, float* [[TT1]], i32 1
+ #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l)
+ {
+ ++l; ++t;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE08]]
+ // CK1: [[TTT:%.+]] = load float*, float** [[DECL]],
+ // CK1: getelementptr inbounds float, float* [[TTT]], i32 1
+ ++l; ++t;
+
+
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE09]]
+ // CK1: [[_CBP:%.+]] = bitcast i8** {{%.+}} to float**
+ // CK1: [[_VAL:%.+]] = load float*, float** [[_CBP]],
+ // CK1: store float* [[_VAL]], float** [[_PVT:%.+]],
+ // CK1: [[CBP:%.+]] = bitcast i8** {{%.+}} to i32**
+ // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
+ // CK1: store i32* [[VAL]], i32** [[PVT:%.+]],
+ // CK1: [[_TT1:%.+]] = load float*, float** [[_PVT]],
+ // CK1: getelementptr inbounds float, float* [[_TT1]], i32 1
+ // CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]],
+ // CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1
+ #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l) use_device_ptr(t)
+ {
+ ++l; ++t;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE09]]
+ // CK1: [[_TTT:%.+]] = load float*, float** {{%.+}},
+ // CK1: getelementptr inbounds float, float* [[_TTT]], i32 1
+ // CK1: [[TTT:%.+]] = load i32*, i32** {{%.+}},
+ // CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1
+ ++l; ++t;
+
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE10]]
+ // CK1: [[_CBP:%.+]] = bitcast i8** {{%.+}} to float**
+ // CK1: [[_VAL:%.+]] = load float*, float** [[_CBP]],
+ // CK1: store float* [[_VAL]], float** [[_PVT:%.+]],
+ // CK1: [[CBP:%.+]] = bitcast i8** {{%.+}} to i32**
+ // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
+ // CK1: store i32* [[VAL]], i32** [[PVT:%.+]],
+ // CK1: [[_TT1:%.+]] = load float*, float** [[_PVT]],
+ // CK1: getelementptr inbounds float, float* [[_TT1]], i32 1
+ // CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]],
+ // CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1
+ #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l,t)
+ {
+ ++l; ++t;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE10]]
+ // CK1: [[_TTT:%.+]] = load float*, float** {{%.+}},
+ // CK1: getelementptr inbounds float, float* [[_TTT]], i32 1
+ // CK1: [[TTT:%.+]] = load i32*, i32** {{%.+}},
+ // CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1
+ ++l; ++t;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load i32*, i32** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE11]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
+ // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
+ // CK1-NOT: store i32* [[VAL]], i32** [[DECL]],
+ // CK1: store i32* [[VAL]], i32** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]],
+ // CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1
+ #pragma omp target data map(l[:10]) use_device_ptr(t)
+ {
+ ++l; ++t;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE11]]
+ // CK1: [[TTT:%.+]] = load i32*, i32** [[DECL]],
+ // CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1
+ ++l; ++t;
+
+ // CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8*
+ // CK1-DAG: [[T1]] = load i32*, i32** [[T2:%.+]],
+ // CK1-DAG: [[T2]] = load i32**, i32*** [[DECL:%.+]],
+ // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK1: store i8* [[RVAL]], i8** [[BP]],
+ // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE12]]
+ // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
+ // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]],
+ // CK1: store i32* [[VAL]], i32** [[PVTV:%.+]],
+ // CK1-NOT: store i32** [[PVTV]], i32*** [[DECL]],
+ // CK1: store i32** [[PVTV]], i32*** [[PVT:%.+]],
+ // CK1: [[TT1:%.+]] = load i32**, i32*** [[PVT]],
+ // CK1: [[TT2:%.+]] = load i32*, i32** [[TT1]],
+ // CK1: getelementptr inbounds i32, i32* [[TT2]], i32 1
+ #pragma omp target data map(l[:10]) use_device_ptr(tr)
+ {
+ ++l; ++tr;
+ }
+ // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE12]]
+ // CK1: [[TTT:%.+]] = load i32**, i32*** [[DECL]],
+ // CK1: [[TTTT:%.+]] = load i32*, i32** [[TTT]],
+ // CK1: getelementptr inbounds i32, i32* [[TTTT]], i32 1
+ ++l; ++tr;
+
+}
+
+void bar(float *&a, int *&b) {
+ foo<int>(a,b);
+}
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+#ifdef CK2
+
+// CK2: [[ST:%.+]] = type { double*, double** }
+// CK2: [[MTYPE00:@.+]] = {{.*}}constant [2 x i32] [i32 35, i32 83]
+// CK2: [[MTYPE01:@.+]] = {{.*}}constant [3 x i32] [i32 32, i32 19, i32 83]
+// CK2: [[MTYPE02:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35]
+// CK2: [[MTYPE03:@.+]] = {{.*}}constant [4 x i32] [i32 96, i32 32, i32 19, i32 83]
+
+template <typename T>
+struct ST {
+ T *a;
+ double *&b;
+ ST(double *&b) : a(0), b(b) {}
+
+ // CK2-LABEL: @{{.*}}foo{{.*}}
+ void foo(double *&arg) {
+ int *la = 0;
+
+ // CK2: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1
+ // CK2: store i8* [[RVAL:%.+]], i8** [[BP]],
+ // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE00]]
+ // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
+ // CK2: [[VAL:%.+]] = load double*, double** [[CBP]],
+ // CK2: store double* [[VAL]], double** [[PVT:%.+]],
+ // CK2: store double** [[PVT]], double*** [[PVT2:%.+]],
+ // CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]],
+ // CK2: [[TT2:%.+]] = load double*, double** [[TT1]],
+ // CK2: getelementptr inbounds double, double* [[TT2]], i32 1
+ #pragma omp target data map(a[:10]) use_device_ptr(a)
+ {
+ a++;
+ }
+ // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE00]]
+ // CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0
+ // CK2: [[TTT:%.+]] = load double*, double** [[DECL]],
+ // CK2: getelementptr inbounds double, double* [[TTT]], i32 1
+ a++;
+
+ // CK2: [[BP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* %{{.+}}, i32 0, i32 2
+ // CK2: store i8* [[RVAL:%.+]], i8** [[BP]],
+ // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE01]]
+ // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
+ // CK2: [[VAL:%.+]] = load double*, double** [[CBP]],
+ // CK2: store double* [[VAL]], double** [[PVT:%.+]],
+ // CK2: store double** [[PVT]], double*** [[PVT2:%.+]],
+ // CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]],
+ // CK2: [[TT2:%.+]] = load double*, double** [[TT1]],
+ // CK2: getelementptr inbounds double, double* [[TT2]], i32 1
+ #pragma omp target data map(b[:10]) use_device_ptr(b)
+ {
+ b++;
+ }
+ // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE01]]
+ // CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %{{.+}}, i32 0, i32 1
+ // CK2: [[TTT:%.+]] = load double**, double*** [[DECL]],
+ // CK2: [[TTTT:%.+]] = load double*, double** [[TTT]],
+ // CK2: getelementptr inbounds double, double* [[TTTT]], i32 1
+ b++;
+
+ // CK2: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK2: store i8* [[RVAL:%.+]], i8** [[BP]],
+ // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE02]]
+ // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
+ // CK2: [[VAL:%.+]] = load double*, double** [[CBP]],
+ // CK2: store double* [[VAL]], double** [[PVT:%.+]],
+ // CK2: store double** [[PVT]], double*** [[PVT2:%.+]],
+ // CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]],
+ // CK2: [[TT2:%.+]] = load double*, double** [[TT1]],
+ // CK2: getelementptr inbounds double, double* [[TT2]], i32 1
+ #pragma omp target data map(la[:10]) use_device_ptr(a)
+ {
+ a++;
+ la++;
+ }
+ // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE02]]
+ // CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0
+ // CK2: [[TTT:%.+]] = load double*, double** [[DECL]],
+ // CK2: getelementptr inbounds double, double* [[TTT]], i32 1
+ a++;
+ la++;
+
+ // CK2: [[BP:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* %{{.+}}, i32 0, i32 0
+ // CK2: store i8* [[RVAL:%.+]], i8** [[BP]],
+ // CK2: [[_BP:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* %{{.+}}, i32 0, i32 3
+ // CK2: store i8* [[_RVAL:%.+]], i8** [[_BP]],
+ // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE03]]
+ // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
+ // CK2: [[VAL:%.+]] = load double*, double** [[CBP]],
+ // CK2: store double* [[VAL]], double** [[PVT:%.+]],
+ // CK2: store double** [[PVT]], double*** [[PVT2:%.+]],
+ // CK2: [[_CBP:%.+]] = bitcast i8** [[_BP]] to double**
+ // CK2: [[_VAL:%.+]] = load double*, double** [[_CBP]],
+ // CK2: store double* [[_VAL]], double** [[_PVT:%.+]],
+ // CK2: store double** [[_PVT]], double*** [[_PVT2:%.+]],
+ // CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]],
+ // CK2: [[TT2:%.+]] = load double*, double** [[TT1]],
+ // CK2: getelementptr inbounds double, double* [[TT2]], i32 1
+ // CK2: [[_TT1:%.+]] = load double**, double*** [[_PVT2]],
+ // CK2: [[_TT2:%.+]] = load double*, double** [[_TT1]],
+ // CK2: getelementptr inbounds double, double* [[_TT2]], i32 1
+ #pragma omp target data map(b[:10]) use_device_ptr(a, b)
+ {
+ a++;
+ b++;
+ }
+ // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE03]]
+ // CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0
+ // CK2: [[TTT:%.+]] = load double*, double** [[DECL]],
+ // CK2: getelementptr inbounds double, double* [[TTT]], i32 1
+ // CK2: [[_DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 1
+ // CK2: [[_TTT:%.+]] = load double**, double*** [[_DECL]],
+ // CK2: [[_TTTT:%.+]] = load double*, double** [[_TTT]],
+ // CK2: getelementptr inbounds double, double* [[_TTTT]], i32 1
+ a++;
+ b++;
+ }
+};
+
+void bar(double *arg){
+ ST<double> A(arg);
+ A.foo(arg);
+ ++arg;
+}
+#endif
+#endif
More information about the cfe-commits
mailing list