[clang] e6d2583 - [OPENMP50]Track changes of lastprivate conditional in parallel-based
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 27 11:57:19 PST 2020
Author: Alexey Bataev
Date: 2020-01-27T14:53:25-05:00
New Revision: e6d2583e45ef3af7646d4a58bada23333e639121
URL: https://github.com/llvm/llvm-project/commit/e6d2583e45ef3af7646d4a58bada23333e639121
DIFF: https://github.com/llvm/llvm-project/commit/e6d2583e45ef3af7646d4a58bada23333e639121.diff
LOG: [OPENMP50]Track changes of lastprivate conditional in parallel-based
regions with reductions, lastprivates or linears clauses.
If the lastprivate conditional variable is updated in inner parallel
region with reduction, lastprivate or linear clause, the value must be
considred as a candidate for lastprivate conditional. Also, tracking in
inner parallel regions is not required.
Added:
clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp
Modified:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CGOpenMPRuntime.h
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/for_lastprivate_codegen.cpp
clang/test/OpenMP/sections_lastprivate_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 896cf378c16b..aecf150401bb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -11357,25 +11357,7 @@ CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
}
}
Data.IVLVal = IVLVal;
- // In simd only mode or for simd directives no need to generate threadprivate
- // references for the loop iteration counter, we can use the original one
- // since outlining cannot happen in simd regions.
- if (CGF.getLangOpts().OpenMPSimd ||
- isOpenMPSimdDirective(S.getDirectiveKind())) {
- Data.UseOriginalIV = true;
- return;
- }
- PresumedLoc PLoc =
- CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
- assert(PLoc.isValid() && "Source location is expected to be always valid.");
-
- llvm::sys::fs::UniqueID ID;
- if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
- CGM.getDiags().Report(diag::err_cannot_open_file)
- << PLoc.getFilename() << EC.message();
- Data.IVName = CGM.getOpenMPRuntime().getName(
- {"pl_cond", llvm::utostr(ID.getDevice()), llvm::utostr(ID.getFile()),
- llvm::utostr(PLoc.getLine()), llvm::utostr(PLoc.getColumn()), "iv"});
+ Data.CGF = &CGF;
}
CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
@@ -11384,27 +11366,6 @@ CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
}
-void CGOpenMPRuntime::initLastprivateConditionalCounter(
- CodeGenFunction &CGF, const OMPExecutableDirective &S) {
- if (CGM.getLangOpts().OpenMPSimd ||
- !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
- [](const OMPLastprivateClause *C) {
- return C->getKind() == OMPC_LASTPRIVATE_conditional;
- }))
- return;
- const CGOpenMPRuntime::LastprivateConditionalData &Data =
- LastprivateConditionalStack.back();
- if (Data.UseOriginalIV)
- return;
- // Global loop counter. Required to handle inner parallel-for regions.
- // global_iv = iv;
- Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
- CGF, Data.IVLVal.getType(), Data.IVName);
- LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
- llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
- CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
-}
-
namespace {
/// Checks if the lastprivate conditional variable is referenced in LHS.
class LastprivateConditionalRefChecker final
@@ -11415,9 +11376,7 @@ class LastprivateConditionalRefChecker final
const Decl *FoundD = nullptr;
StringRef UniqueDeclName;
LValue IVLVal;
- StringRef IVName;
SourceLocation Loc;
- bool UseOriginalIV = false;
public:
bool VisitDeclRefExpr(const DeclRefExpr *E) {
@@ -11430,8 +11389,6 @@ class LastprivateConditionalRefChecker final
FoundD = E->getDecl()->getCanonicalDecl();
UniqueDeclName = It->getSecond();
IVLVal = D.IVLVal;
- IVName = D.IVName;
- UseOriginalIV = D.UseOriginalIV;
break;
}
return FoundE == E;
@@ -11448,8 +11405,6 @@ class LastprivateConditionalRefChecker final
FoundD = E->getMemberDecl()->getCanonicalDecl();
UniqueDeclName = It->getSecond();
IVLVal = D.IVLVal;
- IVName = D.IVName;
- UseOriginalIV = D.UseOriginalIV;
break;
}
return FoundE == E;
@@ -11470,17 +11425,17 @@ class LastprivateConditionalRefChecker final
CodeGenFunction &CGF,
ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
: CGF(CGF), LPM(LPM) {}
- std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
+ std::tuple<const Expr *, const Decl *, StringRef, LValue>
getFoundData() const {
- return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
- UseOriginalIV);
+ return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal);
}
};
} // namespace
void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
const Expr *LHS) {
- if (CGF.getLangOpts().OpenMP < 50)
+ if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty() ||
+ LastprivateConditionalStack.back().CGF != &CGF)
return;
LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
if (!Checker.Visit(LHS))
@@ -11489,10 +11444,7 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
const Decl *FoundD;
StringRef UniqueDeclName;
LValue IVLVal;
- StringRef IVName;
- bool UseOriginalIV;
- std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
- Checker.getFoundData();
+ std::tie(FoundE, FoundD, UniqueDeclName, IVLVal) = Checker.getFoundData();
// Last updated loop counter for the lastprivate conditional var.
// int<xx> last_iv = 0;
@@ -11517,11 +11469,6 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
// Global loop counter. Required to handle inner parallel-for regions.
// global_iv
- if (!UseOriginalIV) {
- Address IVAddr =
- getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
- IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
- }
llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
// #pragma omp critical(a)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 8159f5e8b790..dbbf253c2dd0 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -233,11 +233,7 @@ class CGOpenMPRuntime {
llvm::SmallDenseMap<CanonicalDeclPtr<const Decl>, SmallString<16>>
DeclToUniqeName;
LValue IVLVal;
- SmallString<16> IVName;
- /// True if original lvalue for loop counter can be used in codegen (simd
- /// region or simd only mode) and no need to create threadprivate
- /// references.
- bool UseOriginalIV = false;
+ CodeGenFunction *CGF = nullptr;
};
/// Manages list of lastprivate conditional decls for the specified directive.
class LastprivateConditionalRAII {
@@ -1692,11 +1688,6 @@ class CGOpenMPRuntime {
/// current context.
bool isNontemporalDecl(const ValueDecl *VD) const;
- /// Initializes global counter for lastprivate conditional.
- virtual void
- initLastprivateConditionalCounter(CodeGenFunction &CGF,
- const OMPExecutableDirective &S);
-
/// Checks if the provided \p LVal is lastprivate conditional and emits the
/// code to update the value of the original variable.
/// \code
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f44405a03622..0e41d520da20 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -18,6 +18,7 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/Basic/PrettyStackTrace.h"
@@ -1332,6 +1333,19 @@ static void emitCommonOMPParallelDirective(
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
CapturedVars, IfCond);
+ // Check for outer lastprivate conditional update.
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ for (const Expr *Ref : C->varlists())
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
+ for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+ for (const Expr *Ref : C->varlists())
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
+ for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
+ for (const Expr *Ref : C->varlists())
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
}
static void emitEmptyBoundParameters(CodeGenFunction &,
@@ -1890,7 +1904,6 @@ void CodeGenFunction::EmitOMPSimdFinal(
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
const OMPLoopDirective &S,
CodeGenFunction::JumpDest LoopExit) {
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
CGF.EmitOMPLoopBody(S, LoopExit);
CGF.EmitStopPoint(&S);
}
@@ -2011,8 +2024,6 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPInnerLoop(
S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
[&S](CodeGenFunction &CGF) {
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(
- CGF, S);
CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
CGF.EmitStopPoint(&S);
},
@@ -2667,8 +2678,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
: S.getCond(),
StaticChunkedOne ? S.getDistInc() : S.getInc(),
[&S, LoopExit](CodeGenFunction &CGF) {
- CGF.CGM.getOpenMPRuntime()
- .initLastprivateConditionalCounter(CGF, S);
CGF.EmitOMPLoopBody(S, LoopExit);
CGF.EmitStopPoint(&S);
},
@@ -2851,7 +2860,6 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// break;
// }
// .omp.sections.exit:
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
llvm::SwitchInst *SwitchStmt =
CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp
index b310055447be..6a2f4d6a0f95 100644
--- a/clang/test/OpenMP/for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp
@@ -176,7 +176,6 @@ char cnt;
// CHECK-DAG: [[X:@.+]] = global double 0.0
// CHECK-DAG: [[F:@.+]] = global float 0.0
// CHECK-DAG: [[CNT:@.+]] = global i8 0
-// OMP50-DAG: [[IV_REF:@.+]] = {{.*}}common global i32 0
// OMP50-DAG: [[LAST_IV_F:@.+]] = {{.*}}common global i32 0
// OMP50-DAG: [[LAST_F:@.+]] = {{.*}}common global float 0.000000e+00,
@@ -674,16 +673,10 @@ int main() {
// CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
// <Skip loop body>
-// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
-// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
-// OMP50: store i32 %{{.+}}, i32* [[BC]],
// CHECK: store float 0.000000e+00, float* [[F_PRIV:%.+]],
-// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
-// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
-// OMP50: [[IV:%.+]] = load i32, i32* [[BC]],
// OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[F_REGION:@.+]])
// OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_F]],
-// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]]
+// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV:%.+]]
// OMP50: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]]
// OMP50: [[LP_THEN]]:
diff --git a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp
new file mode 100644
index 000000000000..e05a5b977a4c
--- /dev/null
+++ b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+int main() {
+ int a = 0;
+#pragma omp parallel for lastprivate(conditional: a)
+ for (int i = 0; i < 10; ++i) {
+ if (i < 5) {
+ a = 0;
+#pragma omp parallel reduction(+:a) num_threads(10)
+ a += i;
+ }
+ }
+ return 0;
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](
+// CHECK: call void @__kmpc_push_num_threads(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 10)
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @{{.+}} to void (i32*, i32*, ...)*), i32* {{.+}} i32* %{{.+}})
+// CHECK: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 %{{.+}}, [8 x i32]* @{{.+}})
+// CHECK: [[LAST_IV_VAL:%.+]] = load i32, i32* [[LAST_IV:@.+]],
+// CHECK: [[RES:%.+]] = icmp sle i32 [[LAST_IV_VAL]], [[IV:%.+]]
+// CHECK: br i1 [[RES]], label %[[THEN:.+]], label %[[DONE:.+]]
+// CHECK: [[THEN]]:
+// CHECK: store i32 [[IV]], i32* [[LAST_IV]],
+// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PRIV:%.+]],
+// CHECK: store i32 [[A_VAL]], i32* [[A_GLOB:@.+]],
+// CHECK: br label %[[DONE]]
+// CHECK: [[DONE]]:
+// CHECK: call void @__kmpc_end_critical(%struct.ident_t* @{{.+}}, i32 %{{.+}}, [8 x i32]* @{{.+}})
+// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}})
+// CHECK: [[IS_LAST:%.+]] = load i32, i32* %{{.+}},
+// CHECK: [[RES:%.+]] = icmp ne i32 [[IS_LAST]], 0
+// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
+// CHECK: br i1 [[RES]], label %[[THEN:.+]], label %[[DONE:.+]]
+// CHECK: [[THEN]]:
+// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_GLOB]],
+// CHECK: store i32 [[A_VAL]], i32* [[A_PRIV]],
+// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PRIV]],
+// CHECK: store i32 [[A_VAL]], i32* %{{.+}},
+// CHECK: br label %[[DONE]]
+// CHECK: [[DONE]]:
+// CHECK: ret void
+
+#endif // HEADER
diff --git a/clang/test/OpenMP/sections_lastprivate_codegen.cpp b/clang/test/OpenMP/sections_lastprivate_codegen.cpp
index 93b417ad0293..5e323ad9f00f 100644
--- a/clang/test/OpenMP/sections_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/sections_lastprivate_codegen.cpp
@@ -46,7 +46,6 @@ volatile int g = 1212;
// CHECK: [[S_INT_TY:%.+]] = type { i32 }
// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
// CHECK-DAG: [[X:@.+]] = global double 0.0
-// OMP50-DAG: [[IV_REF:@.+]] = common global i32 0
// OMP50-DAG: [[LAST_IV_X:@.+]] = {{.*}}common global i32 0
// OMP50-DAG: [[LAST_X:@.+]] = {{.*}}common global double 0.000000e+00,
template <typename T>
@@ -294,15 +293,9 @@ int main() {
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// <Skip loop body>
-// OMP50: [[IV_GLOB_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
-// OMP50: [[BC:%.+]] = bitcast i8* [[IV_GLOB_REF]] to i32*
-// OMP50: store i32 %{{.+}}, i32* [[BC]],
-// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
-// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
-// OMP50: [[IV:%.+]] = load i32, i32* [[BC]],
// OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[X_REGION:@.+]])
// OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_X]],
-// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]]
+// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV:%.+]]
// OMP50: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]]
// OMP50: [[LP_THEN]]:
More information about the cfe-commits
mailing list