[clang] 172f146 - [OPENMP]Reduce number of captured global vars.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 13 07:52:01 PDT 2020
Author: Alexey Bataev
Date: 2020-03-13T10:47:54-04:00
New Revision: 172f1460ae05ab5c33c757142c8bdb10acfbdbe1
URL: https://github.com/llvm/llvm-project/commit/172f1460ae05ab5c33c757142c8bdb10acfbdbe1
DIFF: https://github.com/llvm/llvm-project/commit/172f1460ae05ab5c33c757142c8bdb10acfbdbe1.diff
LOG: [OPENMP]Reduce number of captured global vars.
Try to reduce the number of global vars captured in the OpenMP regions
by capturing them only the regions, which mark them as not-shared.
Added:
Modified:
clang/include/clang/Sema/Sema.h
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/lib/Sema/SemaExpr.cpp
clang/lib/Sema/SemaOpenMP.cpp
clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp
clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp
clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp
clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index ee02d3189816..ead15bf7da2d 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9858,6 +9858,13 @@ class Sema final {
bool isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level,
unsigned CaptureLevel) const;
+ /// Check if the specified global variable must be captured by outer capture
+ /// regions.
+ /// \param Level Relative level of nested OpenMP construct for that
+ /// the check is performed.
+ bool isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level,
+ unsigned CaptureLevel) const;
+
ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
Expr *Op);
/// Called on start of new data sharing attribute block.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 55057c19df4f..83807f609e90 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -57,7 +57,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
return CGF.LambdaCaptureFields.lookup(VD) ||
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
- (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
+ (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
+ cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
}
public:
@@ -5551,7 +5552,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
// Emit outlined function for task construct.
const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
- Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ Address CapturedStruct = Address::invalid();
+ {
+ OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
+ CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ }
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index b2fe4fe2c588..8cc5ac267774 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16523,13 +16523,19 @@ bool Sema::tryCaptureVariable(
!IsOpenMPPrivateDecl &&
isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel,
RSI->OpenMPCaptureLevel);
+ // Do not capture global if it is not privatized in outer regions.
+ bool IsGlobalCap =
+ IsGlobal && isOpenMPGlobalCapturedDecl(Var, RSI->OpenMPLevel,
+ RSI->OpenMPCaptureLevel);
+
// When we detect target captures we are looking from inside the
// target region, therefore we need to propagate the capture from the
// enclosing region. Therefore, the capture is not initially nested.
if (IsTargetCap)
adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel);
- if (IsTargetCap || IsOpenMPPrivateDecl) {
+ if (IsTargetCap || IsOpenMPPrivateDecl ||
+ (IsGlobal && !IsGlobalCap)) {
Nested = !IsTargetCap;
DeclRefType = DeclRefType.getUnqualifiedType();
CaptureType = Context.getLValueReferenceType(DeclRefType);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 89b7c0b1cd0d..a3506bf046f7 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -498,6 +498,8 @@ class DSAStackTy {
const DSAVarData getTopDSA(ValueDecl *D, bool FromParent);
/// Returns data-sharing attributes for the specified declaration.
const DSAVarData getImplicitDSA(ValueDecl *D, bool FromParent) const;
+ /// Returns data-sharing attributes for the specified declaration.
+ const DSAVarData getImplicitDSA(ValueDecl *D, unsigned Level) const;
/// Checks if the specified variables has data-sharing attributes which
/// match specified \a CPred predicate in any directive which matches \a DPred
/// predicate.
@@ -1552,6 +1554,15 @@ const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
return getDSA(StartI, D);
}
+const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
+ unsigned Level) const {
+ if (getStackSize() <= Level)
+ return DSAVarData();
+ D = getCanonicalDecl(D);
+ const_iterator StartI = std::next(begin(), getStackSize() - 1 - Level);
+ return getDSA(StartI, D);
+}
+
const DSAStackTy::DSAVarData
DSAStackTy::hasDSA(ValueDecl *D,
const llvm::function_ref<bool(OpenMPClauseKind)> CPred,
@@ -2108,9 +2119,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
void Sema::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex,
unsigned Level) const {
- SmallVector<OpenMPDirectiveKind, 4> Regions;
- getOpenMPCaptureRegions(Regions, DSAStack->getDirective(Level));
- FunctionScopesIndex -= Regions.size();
+ FunctionScopesIndex -= getOpenMPCaptureLevels(DSAStack->getDirective(Level));
}
void Sema::startOpenMPLoop() {
@@ -2213,6 +2222,29 @@ bool Sema::isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level,
Regions[CaptureLevel] != OMPD_task;
}
+bool Sema::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level,
+ unsigned CaptureLevel) const {
+ assert(LangOpts.OpenMP && "OpenMP is not allowed");
+ // Return true if the current level is no longer enclosed in a target region.
+
+ if (const auto *VD = dyn_cast<VarDecl>(D)) {
+ if (!VD->hasLocalStorage()) {
+ DSAStackTy::DSAVarData TopDVar =
+ DSAStack->getTopDSA(D, /*FromParent=*/false);
+ unsigned NumLevels =
+ getOpenMPCaptureLevels(DSAStack->getDirective(Level));
+ if (Level == 0)
+ return (NumLevels == CaptureLevel + 1) && TopDVar.CKind != OMPC_shared;
+ DSAStackTy::DSAVarData DVar = DSAStack->getImplicitDSA(D, Level - 1);
+ return DVar.CKind != OMPC_shared ||
+ isOpenMPGlobalCapturedDecl(
+ D, Level - 1,
+ getOpenMPCaptureLevels(DSAStack->getDirective(Level - 1)) - 1);
+ }
+ }
+ return true;
+}
+
void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
void Sema::finalizeOpenMPDelayedAnalysis() {
@@ -3575,7 +3607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
};
// Start a captured region for 'parallel'.
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
- ParamsParallel, /*OpenMPCaptureLevel=*/1);
+ ParamsParallel, /*OpenMPCaptureLevel=*/0);
QualType Args[] = {VoidPtrTy};
FunctionProtoType::ExtProtoInfo EPI;
EPI.Variadic = true;
@@ -3596,7 +3628,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
- Params, /*OpenMPCaptureLevel=*/2);
+ Params, /*OpenMPCaptureLevel=*/1);
// Mark this captured region as inlined, because we don't use outlined
// function directly.
getCurCapturedRegion()->TheCapturedDecl->addAttr(
diff --git a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp
index ec40af192b27..39c3184f2f89 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp
@@ -206,7 +206,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
-// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
+// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}},
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],
// Allocate task.
diff --git a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp
index 52cd2662e142..e74030da788f 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp
@@ -187,7 +187,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
-// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
+// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]],
// Allocate task.
// Returns struct kmp_task_t {
diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp
index 80897ff1fcfb..689d3bb966cc 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp
@@ -206,7 +206,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
-// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
+// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}},
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],
// Allocate task.
diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
index 9014ce378281..efb32f025215 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
@@ -187,7 +187,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
-// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
+// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]],
// Allocate task.
// Returns struct kmp_task_t {
More information about the cfe-commits
mailing list