[flang-commits] [flang] aa41460 - [flang][OpenMP] Handle the data race for firstprivate and lastprivate

Peixin Qiao via flang-commits flang-commits at lists.llvm.org
Sat Aug 20 08:32:36 PDT 2022


Author: Peixin Qiao
Date: 2022-08-20T23:31:13+08:00
New Revision: aa4146031132c8007bc9b9109221036c24923d2a

URL: https://github.com/llvm/llvm-project/commit/aa4146031132c8007bc9b9109221036c24923d2a
DIFF: https://github.com/llvm/llvm-project/commit/aa4146031132c8007bc9b9109221036c24923d2a.diff

LOG: [flang][OpenMP] Handle the data race for firstprivate and lastprivate

Remove barriers for firstprivate except if the variable is also
lastprivatized. Re-arrange code and put all last-privates inside a
single scf.if.

As OpenMP Spec 5.0, to avoid the data races, concurrent updates of the
original list item must be synchronized with the read of the original
list item that occurs as a result of the firstprivate clause. Adding
barrier(s) before and/or after the worksharing region would remove the
data races, and it is the application(user)'s job. However, when
one list item is in both firstprivate and lastprivate clauses, the
standard (https://www.openmp.org/spec-html/5.0/openmpsu105.html) states
the following:
```
If a list item appears in both firstprivate and lastprivate clauses, the
update required for the lastprivate clause occurs after all
initializations for the firstprivate clause.
```

So, the synchronization should be ensured by compiler such as emiting
one barrier since the lastprivate clause follows the reads of the
original list item performed for the initialization of each of the
firstprivate list item.

Add FIXME for two special cases, sections construct and linear clause.

The data race problem for single construct will be handled later.

This implementation is based on the discussion with OpenMP committee and
clang code (clang/lib/CodeGen/CGStmtOpenMP.cpp).

Reviewed By: kiranchandramohan, NimishMishra

Differential Revision: https://reviews.llvm.org/D131832

Added: 
    

Modified: 
    flang/lib/Lower/OpenMP.cpp
    flang/test/Lower/OpenMP/default-clause.f90
    flang/test/Lower/OpenMP/omp-parallel-firstprivate-clause-scalar.f90
    flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90
    flang/test/Lower/OpenMP/omp-parallel-wsloop-firstpriv.f90
    flang/test/Lower/OpenMP/omp-parallel-wsloop.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index b79156ed37fb8..77a952614d161 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -60,28 +60,21 @@ getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject) {
   return sym;
 }
 
-template <typename T>
 static void
-createPrivateVarSyms(Fortran::lower::AbstractConverter &converter,
-                     const T *clause,
-                     [[maybe_unused]] Block *lastPrivBlock = nullptr) {
-  const Fortran::parser::OmpObjectList &ompObjectList = clause->v;
-  for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) {
-    Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
-    // Privatization for symbols which are pre-determined (like loop index
-    // variables) happen separately, for everything else privatize here.
-    if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined))
-      continue;
-    bool success = converter.createHostAssociateVarClone(*sym);
-    (void)success;
-    assert(success && "Privatization failed due to existing binding");
-    if constexpr (std::is_same_v<T, Fortran::parser::OmpClause::Firstprivate>) {
-      converter.copyHostAssociateVar(*sym);
-    } else if constexpr (std::is_same_v<
-                             T, Fortran::parser::OmpClause::Lastprivate>) {
-      converter.copyHostAssociateVar(*sym, lastPrivBlock);
-    }
-  }
+privatizeSymbol(Fortran::lower::AbstractConverter &converter,
+                const Fortran::semantics::Symbol *sym,
+                [[maybe_unused]] mlir::Block *lastPrivBlock = nullptr) {
+  // Privatization for symbols which are pre-determined (like loop index
+  // variables) happen separately, for everything else privatize here.
+  if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined))
+    return;
+  bool success = converter.createHostAssociateVarClone(*sym);
+  (void)success;
+  assert(success && "Privatization failed due to existing binding");
+  if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate))
+    converter.copyHostAssociateVar(*sym);
+  if (sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate))
+    converter.copyHostAssociateVar(*sym, lastPrivBlock);
 }
 
 template <typename Op>
@@ -90,10 +83,7 @@ static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter,
                           Fortran::lower::pft::Evaluation &eval) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   auto insPt = firOpBuilder.saveInsertionPoint();
-  firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
-  bool hasFirstPrivateOp = false;
-  bool hasLastPrivateOp = false;
-  // Symbols in private and/or firstprivate clauses.
+  // Symbols in private, firstprivate, and/or lastprivate clauses.
   llvm::SetVector<const Fortran::semantics::Symbol *> privatizedSymbols;
   auto collectOmpObjectListSymbol =
       [&](const Fortran::parser::OmpObjectList &ompObjectList,
@@ -105,7 +95,8 @@ static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter,
       };
   // We need just one ICmpOp for multiple LastPrivate clauses.
   mlir::arith::CmpIOp cmpOp;
-
+  mlir::Block *lastPrivBlock = nullptr;
+  bool hasLastPrivateOp = false;
   for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
     if (const auto &privateClause =
             std::get_if<Fortran::parser::OmpClause::Private>(&clause.u)) {
@@ -114,15 +105,14 @@ static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter,
                    std::get_if<Fortran::parser::OmpClause::Firstprivate>(
                        &clause.u)) {
       collectOmpObjectListSymbol(firstPrivateClause->v, privatizedSymbols);
-      hasFirstPrivateOp = true;
     } else if (const auto &lastPrivateClause =
                    std::get_if<Fortran::parser::OmpClause::Lastprivate>(
                        &clause.u)) {
       // TODO: Add lastprivate support for sections construct, simd construct
       if (std::is_same_v<Op, omp::WsLoopOp>) {
         omp::WsLoopOp *wsLoopOp = dyn_cast<omp::WsLoopOp>(&op);
-        fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-        auto insPt = firOpBuilder.saveInsertionPoint();
+        mlir::Operation *lastOper = wsLoopOp->region().back().getTerminator();
+        firOpBuilder.setInsertionPoint(lastOper);
 
         // Our goal here is to introduce the following control flow
         // just before exiting the worksharing loop.
@@ -146,10 +136,6 @@ static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter,
         //    omp.yield
         // }
 
-        Operation *lastOper = wsLoopOp->region().back().getTerminator();
-
-        firOpBuilder.setInsertionPoint(lastOper);
-
         // TODO: The following will not work when there is collapse present.
         // Have to modify this in future.
         for (const Fortran::parser::OmpClause &clause : opClauseList.v)
@@ -158,7 +144,7 @@ static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter,
             TODO(converter.getCurrentLocation(),
                  "Collapse clause with lastprivate");
         // Only generate the compare once in presence of multiple LastPrivate
-        // clauses
+        // clauses.
         if (!hasLastPrivateOp) {
           cmpOp = firOpBuilder.create<mlir::arith::CmpIOp>(
               wsLoopOp->getLoc(), mlir::arith::CmpIPredicate::eq,
@@ -167,14 +153,12 @@ static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter,
         }
         mlir::scf::IfOp ifOp = firOpBuilder.create<mlir::scf::IfOp>(
             wsLoopOp->getLoc(), cmpOp, /*else*/ false);
-
-        firOpBuilder.restoreInsertionPoint(insPt);
-        createPrivateVarSyms(converter, lastPrivateClause,
-                             &(ifOp.getThenRegion().front()));
+        lastPrivBlock = &ifOp.getThenRegion().front();
       } else {
         TODO(converter.getCurrentLocation(),
-             "lastprivate clause in constructs other than work-share loop");
+             "lastprivate clause in constructs other than worksharing-loop");
       }
+      collectOmpObjectListSymbol(lastPrivateClause->v, privatizedSymbols);
       hasLastPrivateOp = true;
     }
   }
@@ -215,30 +199,31 @@ static bool privatizeVars(Op &op, Fortran::lower::AbstractConverter &converter,
     }
   }
 
-  auto privatizeSymbol = [&](const Fortran::semantics::Symbol *sym) {
-    // Privatization for symbols which are pre-determined (like loop index
-    // variables) happen separately, for everything else privatize here.
-    if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined))
-      return;
-    bool success = converter.createHostAssociateVarClone(*sym);
-    (void)success;
-    assert(success && "Privatization failed due to existing binding");
-    if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
-      converter.copyHostAssociateVar(*sym);
-      hasFirstPrivateOp = true;
-    }
-  };
-
-  for (auto sym : privatizedSymbols)
-    privatizeSymbol(sym);
+  bool needBarrier = false;
+  firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+  for (auto sym : privatizedSymbols) {
+    privatizeSymbol(converter, sym, lastPrivBlock);
+    if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) &&
+        sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate))
+      needBarrier = true;
+  }
 
   for (auto sym : defaultSymbols)
     if (!symbolsInNestedRegions.contains(sym) &&
         !symbolsInParentRegions.contains(sym) &&
         !privatizedSymbols.contains(sym))
-      privatizeSymbol(sym);
-  if (hasFirstPrivateOp)
+      privatizeSymbol(converter, sym);
+
+  // Emit implicit barrier to synchronize threads and avoid data races on
+  // initialization of firstprivate variables and post-update of lastprivate
+  // variables.
+  // FIXME: Emit barrier for lastprivate clause when 'sections' directive has
+  // 'nowait' clause. Otherwise, emit barrier when 'sections' directive has
+  // both firstprivate and lastprivate clause.
+  // Emit implicit barrier for linear clause. Maybe on somewhere else.
+  if (needBarrier)
     firOpBuilder.create<mlir::omp::BarrierOp>(converter.getCurrentLocation());
+
   firOpBuilder.restoreInsertionPoint(insPt);
   return hasLastPrivateOp;
 }

diff  --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90
index cc1451b6262db..7ad357c63d194 100644
--- a/flang/test/Lower/OpenMP/default-clause.f90
+++ b/flang/test/Lower/OpenMP/default-clause.f90
@@ -16,7 +16,6 @@
 !CHECK: fir.store %[[const]] to %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
 !CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
-!CHECK: omp.barrier
 !CHECK: %[[const:.*]] = arith.constant 2 : i32
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: %[[result:.*]] = arith.muli %[[const]], %[[temp]] : i32
@@ -65,7 +64,6 @@ program default_clause_lowering
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: omp.terminator
@@ -83,7 +81,6 @@ program default_clause_lowering
 !CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
 !CHECK: %[[temp:.*]] = fir.load %[[W]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_W]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[const:.*]] = arith.constant 2 : i32
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: %[[result:.*]] = arith.muli %[[const]], %[[temp]] : i32
@@ -115,7 +112,6 @@ program default_clause_lowering
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_W]] : !fir.ref<i32>
 !CHECK: omp.terminator
@@ -149,7 +145,6 @@ subroutine nested_default_clause_tests
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
 !CHECK: %[[PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
-!CHECK: omp.barrier
 !CHECK: omp.parallel {
 !CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[INNER_PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
@@ -167,7 +162,6 @@ subroutine nested_default_clause_tests
 !CHECK: %[[INNER_PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_K]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[INNER_PRIVATE_K]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[const:.*]] = arith.constant 30 : i32
 !CHECK: fir.store %[[const]] to %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: %[[const:.*]] = arith.constant 40 : i32
@@ -207,7 +201,6 @@ subroutine nested_default_clause_tests
 !CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[INNER_PRIVATE_Y]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[INNER_PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_INNER_X]] : !fir.ref<i32>
 !CHECK: omp.terminator
@@ -243,7 +236,6 @@ subroutine nested_default_clause_tests
 !CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[INNER_PRIVATE_Y]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[INNER_PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[INNER_PRIVATE_X]] : !fir.ref<i32>
 !CHECK: omp.terminator
@@ -273,7 +265,6 @@ subroutine nested_default_clause_tests
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[temp:.*]] = fir.load %[[Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_Y]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: omp.single {
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>

diff  --git a/flang/test/Lower/OpenMP/omp-parallel-firstprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/omp-parallel-firstprivate-clause-scalar.f90
index 0fb1b3ea7105f..378ae8e07c4b8 100644
--- a/flang/test/Lower/OpenMP/omp-parallel-firstprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/omp-parallel-firstprivate-clause-scalar.f90
@@ -11,7 +11,6 @@
 !FIRDialect:     %[[ARG2_PVT:.*]] = fir.alloca !fir.complex<8> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_complexEarg2"}
 !FIRDialect:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2]] : !fir.ref<!fir.complex<8>>
 !FIRDialect:     fir.store %[[ARG2_VAL]] to %[[ARG2_PVT]] : !fir.ref<!fir.complex<8>>
-!FIRDialect:     omp.barrier
 !FIRDialect:     fir.call @_QPfoo(%[[ARG1_PVT]], %[[ARG2_PVT]]) : (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<8>>) -> ()
 !FIRDialect:     omp.terminator
 !FIRDialect:   }
@@ -46,7 +45,6 @@ subroutine firstprivate_complex(arg1, arg2)
 !FIRDialect:    %[[ARG6_PVT:.*]] = fir.alloca i128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_integerEarg6"}
 !FIRDialect:    %[[ARG6_VAL:.*]] = fir.load %[[ARG6]] : !fir.ref<i128>
 !FIRDialect:    fir.store %[[ARG6_VAL]] to %[[ARG6_PVT]] : !fir.ref<i128>
-!FIRDialect:    omp.barrier
 !FIRDialect:    fir.call @_QPbar(%[[ARG1_PVT]], %[[ARG2_PVT]], %[[ARG3_PVT]], %[[ARG4_PVT]], %[[ARG5_PVT]], %[[ARG6_PVT]]) : (!fir.ref<i32>, !fir.ref<i8>, !fir.ref<i16>, !fir.ref<i32>, !fir.ref<i64>, !fir.ref<i128>) -> ()
 !FIRDialect:    omp.terminator
 !FIRDialect:  }
@@ -82,7 +80,6 @@ subroutine firstprivate_integer(arg1, arg2, arg3, arg4, arg5, arg6)
 !FIRDialect:     %[[ARG5_PVT:.*]] = fir.alloca !fir.logical<8> {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_logicalEarg5"}
 !FIRDialect:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5]] : !fir.ref<!fir.logical<8>>
 !FIRDialect:     fir.store %[[ARG5_VAL]] to %[[ARG5_PVT]] : !fir.ref<!fir.logical<8>>
-!FIRDialect:     omp.barrier
 !FIRDialect:     fir.call @_QPbaz(%[[ARG1_PVT]], %[[ARG2_PVT]], %[[ARG3_PVT]], %[[ARG4_PVT]], %[[ARG5_PVT]]) : (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<1>>, !fir.ref<!fir.logical<2>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<8>>) -> ()
 !FIRDialect:     omp.terminator
 !FIRDialect:   }
@@ -120,7 +117,6 @@ subroutine firstprivate_logical(arg1, arg2, arg3, arg4, arg5)
 !FIRDialect:     %[[ARG6_PVT:.*]] = fir.alloca f128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_realEarg6"}
 !FIRDialect:     %[[ARG6_VAL:.*]] = fir.load %[[ARG6]] : !fir.ref<f128>
 !FIRDialect:     fir.store %[[ARG6_VAL]] to %[[ARG6_PVT]] : !fir.ref<f128>
-!FIRDialect:     omp.barrier
 !FIRDialect:     fir.call @_QPqux(%[[ARG1_PVT]], %[[ARG2_PVT]], %[[ARG3_PVT]], %[[ARG4_PVT]], %[[ARG5_PVT]], %[[ARG6_PVT]]) : (!fir.ref<f32>, !fir.ref<f16>, !fir.ref<f32>, !fir.ref<f64>, !fir.ref<f80>, !fir.ref<f128>) -> ()
 !FIRDialect:     omp.terminator
 !FIRDialect:   }
@@ -149,8 +145,6 @@ subroutine firstprivate_real(arg1, arg2, arg3, arg4, arg5, arg6)
 !FIRDialect:             %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFmultiple_firstprivateEb"}
 !FIRDialect:             %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<i32>
 !FIRDialect:             fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref<i32>
-!FIRDialect:             omp.barrier
-!FIRDialect-NOT:         omp.barrier
 !FIRDialect:             fir.call @_QPquux(%[[A_PRIV_ADDR]], %[[B_PRIV_ADDR]]) : (!fir.ref<i32>, !fir.ref<i32>) -> ()
 !FIRDialect:             omp.terminator
 !FIRDialect:           }

diff  --git a/flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90
index bf42bf57dc033..237ed8183b4a8 100644
--- a/flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90
@@ -1,5 +1,4 @@
 ! This test checks lowering of `FIRSTPRIVATE` clause for scalar types.
-! TODO: Add a test for same var being first and lastprivate when support is there.
 
 ! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
 ! RUN: flang-new -fc1 -fopenmp -emit-fir %s -o - | FileCheck %s
@@ -84,24 +83,21 @@ subroutine lastprivate_int(arg1)
 end subroutine
 
 !CHECK: func.func @_QPmult_lastprivate_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
-!CHECK-DAG: omp.parallel  {
+!CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
-!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
-!CHECK-DAG: scf.if %[[IV_CMP1]] {
-! Testing lastprivate val update
-!CHECK-NEXT: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
-!CHECK-DAG: }
-!CHECK-DAG: scf.if %[[IV_CMP1]] {
+!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
+!CHECK-NEXT: scf.if %[[IV_CMP1]] {
 ! Testing lastprivate val update
-!CHECK-NEXT: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
-!CHECK-DAG: }
-!CHECK-DAG: omp.yield
+!CHECK-DAG: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
+!CHECK-DAG: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
+!CHECK: }
+!CHECK: omp.yield
 
 subroutine mult_lastprivate_int(arg1, arg2)
         integer :: arg1, arg2
@@ -118,21 +114,21 @@ subroutine mult_lastprivate_int(arg1, arg2)
 end subroutine
 
 !CHECK: func.func @_QPmult_lastprivate_int2(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
-!CHECK-DAG: omp.parallel  {
+!CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
 
-! Testing last iteration check
-!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
-!CHECK-DAG: scf.if %[[IV_CMP1]] {
-! Testing lastprivate val update
-!CHECK-NEXT: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
-!CHECK-NEXT: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
-!CHECK-NEXT: }
-!CHECK-NEXT: omp.yield
+!Testing last iteration check
+!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
+!CHECK-NEXT: scf.if %[[IV_CMP1]] {
+!Testing lastprivate val update
+!CHECK-DAG: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
+!CHECK-DAG: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
+!CHECK: }
+!CHECK: omp.yield
 
 subroutine mult_lastprivate_int2(arg1, arg2)
         integer :: arg1, arg2
@@ -149,19 +145,19 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 end subroutine
 
 !CHECK: func.func @_QPfirstpriv_lastpriv_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
-!CHECK-DAG: omp.parallel  {
-! Lastprivate Allocation
-!CHECK-NEXT: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
-!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
+!CHECK: omp.parallel  {
 ! Firstprivate update
-!CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
-!CHECK-NEXT: omp.barrier
+!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
+!CHECK-DAG: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
+! Lastprivate Allocation
+!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
+!CHECK-NOT: omp.barrier
 !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
-!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
-!CHECK-DAG: scf.if %[[IV_CMP1]] {
+!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
+!CHECK-NEXT: scf.if %[[IV_CMP1]] {
 ! Testing lastprivate val update
 !CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
 !CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG2]] : !fir.ref<i32>
@@ -182,4 +178,32 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 print *, arg1, arg2
 end subroutine
 
+!CHECK: func.func @_QPfirstpriv_lastpriv_int2(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}) {
+!CHECK: omp.parallel  {
+! Firstprivate update
+!CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
+!CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+! Testing last iteration check
+!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
+!CHECK-NEXT: scf.if %[[IV_CMP1]] {
+! Testing lastprivate val update
+!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
+!CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref<i32>
+!CHECK-NEXT: }
+!CHECK-NEXT: omp.yield
 
+subroutine firstpriv_lastpriv_int2(arg1)
+        integer :: arg1
+!$OMP PARALLEL 
+!$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1)
+do n = 1, 5
+        arg1 = 2
+        print *, arg1
+end do
+!$OMP END DO
+!$OMP END PARALLEL
+print *, arg1
+end subroutine

diff  --git a/flang/test/Lower/OpenMP/omp-parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/omp-parallel-wsloop-firstpriv.f90
index 79feaa7c32179..8826932d90a77 100644
--- a/flang/test/Lower/OpenMP/omp-parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/omp-parallel-wsloop-firstpriv.f90
@@ -13,7 +13,6 @@ subroutine omp_do_firstprivate(a)
   ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned
   ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref<i32>
-  ! CHECK-NEXT: omp.barrier
   ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
@@ -43,7 +42,6 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "n", pinned
   ! CHECK-NEXT: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[LD1]] to %[[CLONE1]] : !fir.ref<i32>
-  ! CHECK-NEXT: omp.barrier
   ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 
 

diff  --git a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
index cacd0d19df599..39a90333d3180 100644
--- a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
@@ -87,7 +87,6 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   ! CHECK:    %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
   ! CHECK:    %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
   ! CHECK:    fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
-  ! CHECK:    omp.barrier
   ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
@@ -138,7 +137,6 @@ end subroutine parallel_private_do
 ! CHECK:             %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_private_doEnt"}
 ! CHECK:             %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[NT]] to %[[NT_ADDR]] : !fir.ref<i32>
-! CHECK:             omp.barrier
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
@@ -153,8 +151,7 @@ end subroutine parallel_private_do
 ! CHECK:         }
 
 !===============================================================================
-! Checking for the following construct - multiple firstprivate clauses must emit
-! only one barrier
+! Checking for the following construct
 !   !$omp parallel
 !   !$omp do firstprivate(...) firstprivate(...)
 !===============================================================================
@@ -182,7 +179,6 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK:             %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEb"}
 ! CHECK:             %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref<i32>
-! CHECK:             omp.barrier
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
@@ -224,7 +220,6 @@ end subroutine parallel_do_private
 ! CHECK:             %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"}
 ! CHECK:             %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[NT]] to %[[NT_ADDR]] : !fir.ref<i32>
-! CHECK:             omp.barrier
 ! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
@@ -240,8 +235,7 @@ end subroutine parallel_do_private
 ! CHECK:         }
 
 !===============================================================================
-! Checking for the following construct - multiple firstprivate clauses must emit
-! only one barrier
+! Checking for the following construct
 !   !$omp parallel
 !   !$omp do firstprivate(...) firstprivate(...)
 !===============================================================================
@@ -268,8 +262,6 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:             %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"}
 ! CHECK:             %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref<i32>
-! CHECK:             omp.barrier
-! CHECK-NOT:         omp.barrier
 ! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32


        


More information about the flang-commits mailing list