r260877 - [OPENMP] Remove extra sync barriers for 'firstprivate' clause.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Mon Feb 15 00:07:18 PST 2016
Author: abataev
Date: Mon Feb 15 02:07:17 2016
New Revision: 260877
URL: http://llvm.org/viewvc/llvm-project?rev=260877&view=rev
Log:
[OPENMP] Remove extra sync barriers for 'firstprivate' clause.
Sync barrier will be emitted after generation of firstprivate variables
only if one of the firstprivate vars is used in lastprivate clause.
Modified:
cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
cfe/trunk/test/OpenMP/for_firstprivate_codegen.cpp
cfe/trunk/test/OpenMP/parallel_firstprivate_codegen.cpp
cfe/trunk/test/OpenMP/sections_firstprivate_codegen.cpp
cfe/trunk/test/OpenMP/single_firstprivate_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=260877&r1=260876&r2=260877&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Mon Feb 15 02:07:17 2016
@@ -376,14 +376,23 @@ bool CodeGenFunction::EmitOMPFirstprivat
OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return false;
+ bool FirstprivateIsLastprivate = false;
+ llvm::DenseSet<const VarDecl *> Lastprivates;
+ for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
+ for (const auto *D : C->varlists())
+ Lastprivates.insert(
+ cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+ }
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
auto IRef = C->varlist_begin();
auto InitsRef = C->inits().begin();
for (auto IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
- if (EmittedAsFirstprivate.count(OrigVD) == 0) {
- EmittedAsFirstprivate.insert(OrigVD);
+ FirstprivateIsLastprivate =
+ FirstprivateIsLastprivate ||
+ (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0);
+ if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
bool IsRegistered;
@@ -443,7 +452,7 @@ bool CodeGenFunction::EmitOMPFirstprivat
++IRef, ++InitsRef;
}
}
- return !EmittedAsFirstprivate.empty();
+ return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
}
void CodeGenFunction::EmitOMPPrivateClause(
@@ -490,7 +499,6 @@ bool CodeGenFunction::EmitOMPCopyinClaus
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
QualType Type = VD->getType();
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
-
// Get the address of the master variable. If we are emitting code with
// TLS support, the address is passed from the master as field in the
// captured declaration.
@@ -964,12 +972,11 @@ void CodeGenFunction::EmitOMPParallelDir
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
OMPPrivateScope PrivateScope(CGF);
bool Copyins = CGF.EmitOMPCopyinClause(S);
- bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
- if (Copyins || Firstprivates) {
+ (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ if (Copyins) {
// Emit implicit barrier to synchronize threads and avoid data races on
- // initialization of firstprivate variables or propagation master's thread
- // values of threadprivate variables to local instances of that variables
- // of all other implicit threads.
+ // propagation master's thread values of threadprivate variables to local
+ // instances of that variables of all other implicit threads.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
@@ -1605,7 +1612,8 @@ bool CodeGenFunction::EmitOMPWorksharing
OMPPrivateScope LoopScope(*this);
if (EmitOMPFirstprivateClause(S, LoopScope)) {
// Emit implicit barrier to synchronize threads and avoid data races on
- // initialization of firstprivate variables.
+ // initialization of firstprivate variables and post-update of
+ // lastprivate variables.
CGM.getOpenMPRuntime().emitBarrierCall(
*this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
@@ -1804,7 +1812,8 @@ CodeGenFunction::EmitSections(const OMPE
CodeGenFunction::OMPPrivateScope LoopScope(CGF);
if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
// Emit implicit barrier to synchronize threads and avoid data races on
- // initialization of firstprivate variables.
+ // initialization of firstprivate variables and post-update of lastprivate
+ // variables.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
@@ -1883,8 +1892,7 @@ void CodeGenFunction::EmitOMPSingleDirec
llvm::SmallVector<const Expr *, 8> SrcExprs;
llvm::SmallVector<const Expr *, 8> AssignmentOps;
// Check if there are any 'copyprivate' clauses associated with this
- // 'single'
- // construct.
+ // 'single' construct.
// Build a list of copyprivate variables along with helper expressions
// (<source>, <destination>, <destination>=<source> expressions)
for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
@@ -1897,10 +1905,9 @@ void CodeGenFunction::EmitOMPSingleDirec
}
LexicalScope Scope(*this, S.getSourceRange());
// Emit code for 'single' region along with 'copyprivate' clauses
- bool HasFirstprivates;
- auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF) {
CodeGenFunction::OMPPrivateScope SingleScope(CGF);
- HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
+ (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
CGF.EmitOMPPrivateClause(S, SingleScope);
(void)SingleScope.Privatize();
@@ -1909,10 +1916,9 @@ void CodeGenFunction::EmitOMPSingleDirec
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
CopyprivateVars, DestExprs, SrcExprs,
AssignmentOps);
- // Emit an implicit barrier at the end (to avoid data race on firstprivate
- // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
- if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
- CopyprivateVars.empty()) {
+ // Emit an implicit barrier at the end (to avoid data race if no 'nowait'
+ // clause was specified and no 'copyprivate' clause).
+ if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
CGM.getOpenMPRuntime().emitBarrierCall(
*this, S.getLocStart(),
S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
Modified: cfe/trunk/test/OpenMP/for_firstprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/for_firstprivate_codegen.cpp?rev=260877&r1=260876&r2=260877&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/for_firstprivate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/for_firstprivate_codegen.cpp Mon Feb 15 02:07:17 2016
@@ -95,7 +95,7 @@ int main() {
// LAMBDA: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR_REF]]
// LAMBDA: store i{{[0-9]+}} [[SIVAR2_VAL]], i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
- // LAMBDA: call void @__kmpc_barrier(
+ // LAMBDA-NOT: call void @__kmpc_barrier(
g = 1;
g1 = 1;
sivar = 2;
@@ -158,7 +158,7 @@ int main() {
// BLOCKS: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF_ADDRR]]
// BLOCKS: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
- // BLOCKS: call void @__kmpc_barrier(
+ // BLOCKS-NOT: call void @__kmpc_barrier(
g = 1;
g1 =1;
sivar = 2;
@@ -246,7 +246,7 @@ int main() {
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]]
// Synchronization for initialization.
-// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_for_static_fini(
@@ -310,10 +310,8 @@ int main() {
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
-// Synchronization for initialization.
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// No synchronization for initialization.
+// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_for_static_fini(
Modified: cfe/trunk/test/OpenMP/parallel_firstprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/parallel_firstprivate_codegen.cpp?rev=260877&r1=260876&r2=260877&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/parallel_firstprivate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/parallel_firstprivate_codegen.cpp Mon Feb 15 02:07:17 2016
@@ -31,7 +31,6 @@ struct S {
// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
template <typename T>
T tmain() {
@@ -70,7 +69,7 @@ int main() {
// LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
- // LAMBDA: call {{.*}}void @__kmpc_barrier(
+ // LAMBDA-NOT: call {{.*}}void @__kmpc_barrier(
g = 1;
sivar = 2;
// LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
@@ -115,7 +114,7 @@ int main() {
// BLOCK: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_REF_ADDR]]
// BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]],
// BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
- // BLOCKS: call {{.*}}void @__kmpc_barrier(
+ // BLOCKS-NOT: call {{.*}}void @__kmpc_barrier(
g = 1;
sivar = 2;
// BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
@@ -244,9 +243,7 @@ int main() {
// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK-NOT: call {{.*}}void @__kmpc_barrier(
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
// CHECK: ret void
Modified: cfe/trunk/test/OpenMP/sections_firstprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/sections_firstprivate_codegen.cpp?rev=260877&r1=260876&r2=260877&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/sections_firstprivate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/sections_firstprivate_codegen.cpp Mon Feb 15 02:07:17 2016
@@ -59,7 +59,6 @@ S<float> s_arr[] = {1, 2};
// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
S<float> var(3);
// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
@@ -94,7 +93,7 @@ int main() {
// LAMBDA: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]]
// LAMBDA: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]]
- // LAMBDA: call void @__kmpc_barrier(
+ // LAMBDA-NOT: call void @__kmpc_barrier(
{
g = 1;
sivar = 10;
@@ -154,7 +153,7 @@ int main() {
// BLOCKS: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]],
// BLOCKS: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
- // BLOCKS: call void @__kmpc_barrier(
+ // BLOCKS-NOT: call void @__kmpc_barrier(
{
g = 1;
sivar = 10;
@@ -242,7 +241,7 @@ int main() {
// CHECK: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR]],
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]],
-// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_for_static_fini(
@@ -304,10 +303,8 @@ int main() {
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
-// Synchronization for initialization.
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// No synchronization for initialization.
+// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_for_static_fini(
Modified: cfe/trunk/test/OpenMP/single_firstprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/single_firstprivate_codegen.cpp?rev=260877&r1=260876&r2=260877&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/single_firstprivate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/single_firstprivate_codegen.cpp Mon Feb 15 02:07:17 2016
@@ -57,7 +57,6 @@ int vec[] = {1, 2};
S<float> s_arr[] = {1, 2};
// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
S<float> var(3);
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
// CHECK-DAG: [[SINGLE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
@@ -215,7 +214,7 @@ int main() {
// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
// CHECK: call void @__kmpc_end_single(
-// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
More information about the cfe-commits
mailing list