[flang-commits] [flang] [flang][OpenMP] Move privatizations out of sections (PR #88191)

Leandro Lupori via flang-commits flang-commits at lists.llvm.org
Mon Apr 15 12:49:48 PDT 2024


https://github.com/luporl updated https://github.com/llvm/llvm-project/pull/88191

>From 1d56d787dba92ac94774b2cf378dadd52e081bb1 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Tue, 9 Apr 2024 17:24:35 -0300
Subject: [PATCH 1/4] [flang][OpenMP] Move privatizations out of sections

Besides duplicating code, privatizing variables in every section
causes problems when synchronization barriers are used. This
happens because each section is executed by a given thread, which
will cause the program to hang if not all running threads execute
the barrier operation.

Fixes https://github.com/llvm/llvm-project/issues/72824
---
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp | 94 +++----------------
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 63 ++++++++++---
 flang/test/Lower/OpenMP/FIR/sections.f90      | 56 ++++-------
 flang/test/Lower/OpenMP/sections.f90          | 76 +++++----------
 4 files changed, 102 insertions(+), 187 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 5a42e6a6aa4175..efbc265ffb3e62 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -34,9 +34,12 @@ void DataSharingProcessor::processStep1(
 }
 
 void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
-  insPt = firOpBuilder.saveInsertionPoint();
-  copyLastPrivatize(op);
-  firOpBuilder.restoreInsertionPoint(insPt);
+  // 'sections' lastprivate is handled by genOMP()
+  if (!mlir::isa<mlir::omp::SectionsOp>(op)) {
+    insPt = firOpBuilder.saveInsertionPoint();
+    copyLastPrivatize(op);
+    firOpBuilder.restoreInsertionPoint(insPt);
+  }
 
   if (isLoop) {
     // push deallocs out of the loop
@@ -114,6 +117,10 @@ void DataSharingProcessor::collectSymbolsForPrivatization() {
 }
 
 bool DataSharingProcessor::needBarrier() {
+  // Emit implicit barrier to synchronize threads and avoid data races on
+  // initialization of firstprivate variables and post-update of lastprivate
+  // variables.
+  // Emit implicit barrier for linear clause. Maybe on somewhere else.
   for (const Fortran::semantics::Symbol *sym : privatizedSymbols) {
     if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) &&
         sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate))
@@ -123,13 +130,6 @@ bool DataSharingProcessor::needBarrier() {
 }
 
 void DataSharingProcessor::insertBarrier() {
-  // Emit implicit barrier to synchronize threads and avoid data races on
-  // initialization of firstprivate variables and post-update of lastprivate
-  // variables.
-  // FIXME: Emit barrier for lastprivate clause when 'sections' directive has
-  // 'nowait' clause. Otherwise, emit barrier when 'sections' directive has
-  // both firstprivate and lastprivate clause.
-  // Emit implicit barrier for linear clause. Maybe on somewhere else.
   if (needBarrier())
     firOpBuilder.create<mlir::omp::BarrierOp>(converter.getCurrentLocation());
 }
@@ -141,77 +141,7 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
     if (clause.id != llvm::omp::OMPC_lastprivate)
       continue;
     // TODO: Add lastprivate support for simd construct
-    if (mlir::isa<mlir::omp::SectionOp>(op)) {
-      if (&eval == &eval.parentConstruct->getLastNestedEvaluation()) {
-        // For `omp.sections`, lastprivatized variables occur in
-        // lexically final `omp.section` operation. The following FIR
-        // shall be generated for the same:
-        //
-        // omp.sections lastprivate(...) {
-        //  omp.section {...}
-        //  omp.section {...}
-        //  omp.section {
-        //      fir.allocate for `private`/`firstprivate`
-        //      <More operations here>
-        //      fir.if %true {
-        //          ^%lpv_update_blk
-        //      }
-        //  }
-        // }
-        //
-        // To keep code consistency while handling privatization
-        // through this control flow, add a `fir.if` operation
-        // that always evaluates to true, in order to create
-        // a dedicated sub-region in `omp.section` where
-        // lastprivate FIR can reside. Later canonicalizations
-        // will optimize away this operation.
-        if (!eval.lowerAsUnstructured()) {
-          auto ifOp = firOpBuilder.create<fir::IfOp>(
-              op->getLoc(),
-              firOpBuilder.createIntegerConstant(
-                  op->getLoc(), firOpBuilder.getIntegerType(1), 0x1),
-              /*else*/ false);
-          firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-
-          const Fortran::parser::OpenMPConstruct *parentOmpConstruct =
-              eval.parentConstruct->getIf<Fortran::parser::OpenMPConstruct>();
-          assert(parentOmpConstruct &&
-                 "Expected a valid enclosing OpenMP construct");
-          const Fortran::parser::OpenMPSectionsConstruct *sectionsConstruct =
-              std::get_if<Fortran::parser::OpenMPSectionsConstruct>(
-                  &parentOmpConstruct->u);
-          assert(sectionsConstruct &&
-                 "Expected an enclosing omp.sections construct");
-          const Fortran::parser::OmpClauseList &sectionsEndClauseList =
-              std::get<Fortran::parser::OmpClauseList>(
-                  std::get<Fortran::parser::OmpEndSectionsDirective>(
-                      sectionsConstruct->t)
-                      .t);
-          for (const Fortran::parser::OmpClause &otherClause :
-               sectionsEndClauseList.v)
-            if (std::get_if<Fortran::parser::OmpClause::Nowait>(&otherClause.u))
-              // Emit implicit barrier to synchronize threads and avoid data
-              // races on post-update of lastprivate variables when `nowait`
-              // clause is present.
-              firOpBuilder.create<mlir::omp::BarrierOp>(
-                  converter.getCurrentLocation());
-          firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-          lastPrivIP = firOpBuilder.saveInsertionPoint();
-          firOpBuilder.setInsertionPoint(ifOp);
-          insPt = firOpBuilder.saveInsertionPoint();
-        } else {
-          // Lastprivate operation is inserted at the end
-          // of the lexically last section in the sections
-          // construct
-          mlir::OpBuilder::InsertPoint unstructuredSectionsIP =
-              firOpBuilder.saveInsertionPoint();
-          mlir::Operation *lastOper = op->getRegion(0).back().getTerminator();
-          firOpBuilder.setInsertionPoint(lastOper);
-          lastPrivIP = firOpBuilder.saveInsertionPoint();
-          firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP);
-        }
-      }
-    } else if (mlir::isa<mlir::omp::WsloopOp>(op)) {
+    if (mlir::isa<mlir::omp::WsloopOp>(op)) {
       // Update the original variable just before exiting the worksharing
       // loop. Conversion as follows:
       //
@@ -262,6 +192,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
       assert(loopIV && "loopIV was not set");
       firOpBuilder.create<fir::StoreOp>(op->getLoc(), v, loopIV);
       lastPrivIP = firOpBuilder.saveInsertionPoint();
+    } else if (mlir::isa<mlir::omp::SectionsOp>(op)) {
+      // Already handled by genOMP()
     } else {
       TODO(converter.getCurrentLocation(),
            "lastprivate clause in constructs other than "
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 3dcfe0fd775dc5..01b46c4489f265 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -851,14 +851,10 @@ static mlir::omp::SectionOp
 genSectionOp(Fortran::lower::AbstractConverter &converter,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, bool genNested,
-             mlir::Location currentLocation,
-             const Fortran::parser::OmpClauseList &sectionsClauseList) {
-  // Currently only private/firstprivate clause is handled, and
-  // all privatization is done within `omp.section` operations.
+             mlir::Location currentLocation) {
   return genOpWithBody<mlir::omp::SectionOp>(
       OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&sectionsClauseList));
+          .setGenNested(genNested));
 }
 
 static mlir::omp::SingleOp
@@ -2239,6 +2235,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
           .v;
 
   // Parallel wrapper of PARALLEL SECTIONS construct
+  bool hasNowait = false;
   if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
     genParallelOp(converter, symTable, semaCtx, eval,
                   /*genNested=*/false, currentLocation, sectionsClauseList,
@@ -2248,28 +2245,66 @@ genOMP(Fortran::lower::AbstractConverter &converter,
         std::get<Fortran::parser::OmpEndSectionsDirective>(sectionsConstruct.t);
     const auto &endSectionsClauseList =
         std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t);
-    ClauseProcessor(converter, semaCtx, endSectionsClauseList)
+    hasNowait = ClauseProcessor(converter, semaCtx, endSectionsClauseList)
         .processNowait(clauseOps);
   }
 
+  // Insert privatizations before SECTIONS
+  symTable.pushScope();
+  DataSharingProcessor dsp(converter, semaCtx, sectionsClauseList, eval);
+  dsp.processStep1();
+
   // SECTIONS construct
-  genOpWithBody<mlir::omp::SectionsOp>(
+  auto sectionsOp = genOpWithBody<mlir::omp::SectionsOp>(
       OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
           .setGenNested(false),
       clauseOps);
 
+  std::optional<Clause> lastPrivateClause;
+  for (const Fortran::parser::OmpClause &clause : sectionsClauseList.v) {
+    if (std::holds_alternative<Fortran::parser::OmpClause::Lastprivate>(
+            clause.u)) {
+      lastPrivateClause = makeClause(clause, semaCtx);
+    }
+  }
+
   const auto &sectionBlocks =
       std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
   auto &firOpBuilder = converter.getFirOpBuilder();
   auto ip = firOpBuilder.saveInsertionPoint();
-  for (const auto &[nblock, neval] :
-       llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
-    symTable.pushScope();
-    genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation,
-                 sectionsClauseList);
-    symTable.popScope();
+  auto zippy = llvm::zip(sectionBlocks.v, eval.getNestedEvaluations());
+  auto it = zippy.begin(), next = it, end = zippy.end();
+  ++next;
+  for (; it != end; it = next, ++next) {
+    const auto &[nblock, neval] = *it;
+    mlir::omp::SectionOp sectionOp = genSectionOp(
+        converter, semaCtx, neval, /*genNested=*/true, currentLocation);
+    // For `omp.sections`, lastprivatized variables occur in
+    // lexically final `omp.section` operation.
+    if (next == end && lastPrivateClause) {
+      clause::Lastprivate &lastPrivate =
+          std::get<clause::Lastprivate>(lastPrivateClause.value().u);
+      firOpBuilder.setInsertionPoint(
+          sectionOp.getRegion().back().getTerminator());
+      mlir::OpBuilder::InsertPoint lastPrivIP =
+          converter.getFirOpBuilder().saveInsertionPoint();
+      for (const Object &obj : lastPrivate.v) {
+        Fortran::semantics::Symbol *sym = obj.id();
+        converter.copyHostAssociateVar(*sym, &lastPrivIP);
+      }
+    }
     firOpBuilder.restoreInsertionPoint(ip);
   }
+
+  // Perform DataSharingProcessor's step2 out of SECTIONS
+  firOpBuilder.setInsertionPointAfter(sectionsOp.getOperation());
+  dsp.processStep2(sectionsOp, false);
+  // Emit implicit barrier to synchronize threads and avoid data
+  // races on post-update of lastprivate variables when `nowait`
+  // clause is present.
+  if (hasNowait && lastPrivateClause)
+    firOpBuilder.create<mlir::omp::BarrierOp>(converter.getCurrentLocation());
+  symTable.popScope();
 }
 
 static void
diff --git a/flang/test/Lower/OpenMP/FIR/sections.f90 b/flang/test/Lower/OpenMP/FIR/sections.f90
index 7b313f3dc0b41f..9d8edb5c75d5ce 100644
--- a/flang/test/Lower/OpenMP/FIR/sections.f90
+++ b/flang/test/Lower/OpenMP/FIR/sections.f90
@@ -8,10 +8,10 @@
 !CHECK:   %[[COUNT:.*]] = fir.address_of(@_QFEcount) : !fir.ref<i32>
 !CHECK:   %[[ETA:.*]] = fir.alloca f32 {bindc_name = "eta", uniq_name = "_QFEeta"}
 !CHECK:   %[[CONST_1:.*]] = arith.constant 4 : i64
+!CHECK:   %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
+!CHECK:   %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} 
 !CHECK:   omp.sections allocate(%[[CONST_1]] : i64 -> %0 : !fir.ref<i32>)  {
 !CHECK:     omp.section {
-!CHECK:       %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} 
 !CHECK:       %[[const:.*]] = arith.constant 5 : i32
 !CHECK:       fir.store %[[const]] to %[[COUNT]] : !fir.ref<i32>
 !CHECK:       %[[temp_count:.*]] = fir.load %[[COUNT]] : !fir.ref<i32>
@@ -22,8 +22,6 @@
 !CHECK:       omp.terminator
 !CHECK:     }
 !CHECK:     omp.section {
-!CHECK:       %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} 
 !CHECK:       %[[temp:.*]] = fir.load %[[PRIVATE_DOUBLE_COUNT]] : !fir.ref<i32>
 !CHECK:       %[[const:.*]] = arith.constant 1 : i32
 !CHECK:       %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
@@ -31,8 +29,6 @@
 !CHECK:       omp.terminator
 !CHECK:     }
 !CHECK:     omp.section {
-!CHECK:       %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} 
 !CHECK:       %[[temp:.*]] = fir.load %[[PRIVATE_ETA]] : !fir.ref<f32>
 !CHECK:       %[[const:.*]] = arith.constant 7.000000e+00 : f32
 !CHECK:       %[[result:.*]] = arith.subf %[[temp]], %[[const]] {{.*}}: f32
@@ -79,11 +75,11 @@ program sample
 end program sample
 
 !CHECK: func @_QPfirstprivate(%[[ARG:.*]]: !fir.ref<f32> {fir.bindc_name = "alpha"}) {
+!CHECK:   %[[PRIVATE_ALPHA:.*]] = fir.alloca f32 {bindc_name = "alpha", pinned, uniq_name = "_QFfirstprivateEalpha"}
+!CHECK:   %[[temp:.*]] = fir.load %[[ARG]] : !fir.ref<f32>
+!CHECK:   fir.store %[[temp]] to %[[PRIVATE_ALPHA]] : !fir.ref<f32>
 !CHECK:   omp.sections {
 !CHECK:     omp.section  {
-!CHECK:         %[[PRIVATE_ALPHA:.*]] = fir.alloca f32 {bindc_name = "alpha", pinned, uniq_name = "_QFfirstprivateEalpha"}
-!CHECK:         %[[temp:.*]] = fir.load %[[ARG]] : !fir.ref<f32>
-!CHECK:         fir.store %[[temp]] to %[[PRIVATE_ALPHA]] : !fir.ref<f32>
 !CHECK:       omp.terminator
 !CHECK:     }
 !CHECK:     omp.terminator
@@ -114,10 +110,10 @@ subroutine firstprivate(alpha)
 subroutine lastprivate()
 	integer :: x
 !CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlastprivateEx"}
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: omp.sections   {
 	!$omp sections lastprivate(x)
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[const:.*]] = arith.constant 10 : i32
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[result:.*]] = arith.muli %c10_i32, %[[temp]] : i32
@@ -127,16 +123,12 @@ subroutine lastprivate()
         !$omp section
             x = x * 10
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[const:.*]] = arith.constant 1 : i32
 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: %[[true:.*]] = arith.constant true
-!CHECK: fir.if %[[true]] {
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
-!CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
         !$omp section
@@ -145,13 +137,13 @@ subroutine lastprivate()
 !CHECK: }
     !$omp end sections
     
-!CHECK: omp.sections   {
-    !$omp sections firstprivate(x) lastprivate(x)
-!CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: omp.barrier
+!CHECK: omp.sections   {
+    !$omp sections firstprivate(x) lastprivate(x)
+!CHECK: omp.section {
 !CHECK: %[[const:.*]] = arith.constant 10 : i32
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[result:.*]] = arith.muli %c10_i32, %[[temp]] : i32
@@ -161,19 +153,12 @@ subroutine lastprivate()
         !$omp section
             x = x * 10
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
-!CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
-!CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[const:.*]] = arith.constant 1 : i32
 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: %[[true:.*]] = arith.constant true
-!CHECK: fir.if %true {
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
-!CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
         !$omp section
@@ -182,13 +167,13 @@ subroutine lastprivate()
 !CHECK: }
     !$omp end sections
 
-!CHECK: omp.sections nowait {
-    !$omp sections firstprivate(x) lastprivate(x)
-!CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: omp.barrier
+!CHECK: omp.sections nowait {
+    !$omp sections firstprivate(x) lastprivate(x)
+!CHECK: omp.section {
 !CHECK: %[[const:.*]] = arith.constant 10 : i32
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[result:.*]] = arith.muli %c10_i32, %[[temp]] : i32
@@ -198,31 +183,24 @@ subroutine lastprivate()
         !$omp section
             x = x * 10
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
-!CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
-!CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[const:.*]] = arith.constant 1 : i32
 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: %[[true:.*]] = arith.constant true
-!CHECK: fir.if %true {
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
-!CHECK: omp.barrier
-!CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
         !$omp section
             x = x + 1
 !CHECK: omp.terminator
 !CHECK: }
+!CHECK: omp.barrier
     !$omp end sections nowait
 
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: omp.sections {
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: cf.br ^bb1
 !CHECK: ^bb1:  // pred: ^bb0
 !CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
@@ -247,9 +225,9 @@ subroutine lastprivate()
 
 subroutine unstructured_sections_privatization()
 !CHECK: %[[X:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFunstructured_sections_privatizationEx"}
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFunstructured_sections_privatizationEx"}
 !CHECK: omp.sections {
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFunstructured_sections_privatizationEx"}
 !CHECK: cf.br ^bb1
 !CHECK: ^bb1:  // pred: ^bb0
 !CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<f32>
@@ -265,11 +243,11 @@ subroutine unstructured_sections_privatization()
             goto 40
         40  x = x + 1
     !$omp end sections
-!CHECK: omp.sections {
-!CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFunstructured_sections_privatizationEx"}
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<f32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<f32>
+!CHECK: omp.sections {
+!CHECK: omp.section {
 !CHECK: cf.br ^bb1
 !CHECK: ^bb1:  // pred: ^bb0
 !CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<f32>
diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90
index 018848e635733b..f598e1f64f88f8 100644
--- a/flang/test/Lower/OpenMP/sections.f90
+++ b/flang/test/Lower/OpenMP/sections.f90
@@ -10,12 +10,12 @@
 !CHECK:   %[[COUNT_DECL:.*]]:2 = hlfir.declare %[[COUNT]] {uniq_name = "_QFEcount"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:   %[[ETA:.*]] = fir.alloca f32 {bindc_name = "eta", uniq_name = "_QFEeta"}
 !CHECK:   %[[CONST_1:.*]] = arith.constant 4 : i64
+!CHECK:   %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
+!CHECK:   %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:   %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} 
+!CHECK:   %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:   omp.sections allocate(%[[CONST_1]] : i64 -> %[[COUNT_DECL]]#1 : !fir.ref<i32>)  {
 !CHECK:     omp.section {
-!CHECK:       %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
-!CHECK:       %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} 
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:       %[[CONST5:.*]] = arith.constant 5 : i32
 !CHECK:       hlfir.assign %[[CONST5]] to %[[COUNT_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK:       %[[TEMP_COUNT:.*]] = fir.load %[[COUNT_DECL]]#0 : !fir.ref<i32>
@@ -26,10 +26,6 @@
 !CHECK:       omp.terminator
 !CHECK:     }
 !CHECK:     omp.section {
-!CHECK:       %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
-!CHECK:       %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} 
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:       %[[TEMP:.*]] = fir.load %[[PRIVATE_DOUBLE_COUNT_DECL]]#0 : !fir.ref<i32>
 !CHECK:       %[[CONST:.*]] = arith.constant 1 : i32
 !CHECK:       %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
@@ -37,10 +33,6 @@
 !CHECK:       omp.terminator
 !CHECK:     }
 !CHECK:     omp.section {
-!CHECK:       %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
-!CHECK:       %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} 
-!CHECK:       %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:       %[[TEMP:.*]] = fir.load %[[PRIVATE_ETA_DECL]]#0 : !fir.ref<f32>
 !CHECK:       %[[CONST:.*]] = arith.constant 7.000000e+00 : f32
 !CHECK:       %[[RESULT:.*]] = arith.subf %[[TEMP]], %[[CONST]] {{.*}}: f32
@@ -88,12 +80,12 @@ end program sample
 
 !CHECK: func @_QPfirstprivate(%[[ARG:.*]]: !fir.ref<f32> {fir.bindc_name = "alpha"}) {
 !CHECK:   %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]] {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>) 
+!CHECK:   %[[PRIVATE_ALPHA:.*]] = fir.alloca f32 {bindc_name = "alpha", pinned, uniq_name = "_QFfirstprivateEalpha"}
+!CHECK:   %[[PRIVATE_ALPHA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ALPHA]] {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:   %[[TEMP:.*]] = fir.load %[[ARG_DECL]]#0 : !fir.ref<f32>
+!CHECK:   hlfir.assign %[[TEMP]] to %[[PRIVATE_ALPHA_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
 !CHECK:   omp.sections {
 !CHECK:     omp.section  {
-!CHECK:         %[[PRIVATE_ALPHA:.*]] = fir.alloca f32 {bindc_name = "alpha", pinned, uniq_name = "_QFfirstprivateEalpha"}
-!CHECK:         %[[PRIVATE_ALPHA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ALPHA]] {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:         %[[TEMP:.*]] = fir.load %[[ARG_DECL]]#0 : !fir.ref<f32>
-!CHECK:         hlfir.assign %[[TEMP]] to %[[PRIVATE_ALPHA_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
 !CHECK:       omp.terminator
 !CHECK:     }
 !CHECK:     omp.terminator
@@ -126,11 +118,11 @@ subroutine lastprivate()
         integer :: x
 !CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: omp.sections   {
 	!$omp sections lastprivate(x)
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
-!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[CONST10:.*]] = arith.constant 10 : i32
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[RESULT:.*]] = arith.muli %[[CONST10]], %[[TEMP]] : i32
@@ -141,17 +133,12 @@ subroutine lastprivate()
             x = x * 10
 
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
-!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 1 : i32
 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
-!CHECK: %[[TRUE:.*]] = arith.constant true
-!CHECK: fir.if %[[TRUE]] {
 !CHECK: %[[TEMP1:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP1]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
         !$omp section
@@ -160,14 +147,14 @@ subroutine lastprivate()
 !CHECK: }
     !$omp end sections
 
-!CHECK: omp.sections   {
-    !$omp sections firstprivate(x) lastprivate(x)
-!CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK: omp.barrier
+!CHECK: omp.sections {
+    !$omp sections firstprivate(x) lastprivate(x)
+!CHECK: omp.section {
 !CHECK: %[[CONST:.*]] = arith.constant 10 : i32
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[RESULT:.*]] = arith.muli %[[CONST]], %[[TEMP]] : i32
@@ -177,20 +164,12 @@ subroutine lastprivate()
         !$omp section
             x = x * 10
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
-!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
-!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 1 : i32
 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
-!CHECK: %[[TRUE:.*]] = arith.constant true
-!CHECK: fir.if %[[TRUE]] {
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
         !$omp section
@@ -199,14 +178,14 @@ subroutine lastprivate()
 !CHECK: }
     !$omp end sections
 
-!CHECK: omp.sections nowait {
-    !$omp sections firstprivate(x) lastprivate(x)
-!CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK: omp.barrier
+!CHECK: omp.sections nowait {
+    !$omp sections firstprivate(x) lastprivate(x)
+!CHECK: omp.section {
 !CHECK: %[[CONST:.*]] = arith.constant 10 : i32
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[RESULT:.*]] = arith.muli %[[CONST]], %[[TEMP]] : i32
@@ -216,33 +195,25 @@ subroutine lastprivate()
         !$omp section
             x = x * 10
 !CHECK: omp.section {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
-!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
-!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 1 : i32
 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
-!CHECK: %[[TRUE:.*]] = arith.constant true
-!CHECK: fir.if %[[TRUE]] {
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK: omp.barrier
-!CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
         !$omp section
             x = x + 1
 !CHECK: omp.terminator
 !CHECK: }
+!CHECK: omp.barrier
      !$omp end sections nowait
 
-!CHECK: omp.sections {
-!CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.sections {
+!CHECK: omp.section {
 !CHECK: cf.br ^bb1
 !CHECK: ^bb1:  // pred: ^bb0
 !CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
@@ -257,7 +228,6 @@ subroutine lastprivate()
 !CHECK: }
 !CHECK: return
 !CHECK: }
-
     !$omp sections lastprivate(x)
         !$omp section
                 goto 30
@@ -269,10 +239,10 @@ subroutine lastprivate()
 subroutine unstructured_sections_privatization()
 !CHECK: %[[X:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFunstructured_sections_privatizationEx"}
 !CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFunstructured_sections_privatizationEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK: omp.sections {
-!CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFunstructured_sections_privatizationEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFunstructured_sections_privatizationEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK: omp.sections {
+!CHECK: omp.section {
 !CHECK: cf.br ^bb1
 !CHECK: ^bb1:  // pred: ^bb0
 !CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<f32>
@@ -288,12 +258,12 @@ subroutine unstructured_sections_privatization()
             goto 40
         40  x = x + 1
     !$omp end sections
-!CHECK: omp.sections {
-!CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFunstructured_sections_privatizationEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFunstructured_sections_privatizationEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<f32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK: omp.sections {
+!CHECK: omp.section {
 !CHECK: cf.br ^bb1
 !CHECK: ^bb1:
 !CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<f32>

>From 7d24f1e002158b249891ace414fdfa7e885bc52e Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Wed, 10 Apr 2024 11:10:23 -0300
Subject: [PATCH 2/4] Move lastprivatization out of section ops loop

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 51 +++++++++++++++----------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 01b46c4489f265..c53c17904533ec 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2260,40 +2260,39 @@ genOMP(Fortran::lower::AbstractConverter &converter,
           .setGenNested(false),
       clauseOps);
 
+  const auto &sectionBlocks =
+      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
+  auto &firOpBuilder = converter.getFirOpBuilder();
+  auto ip = firOpBuilder.saveInsertionPoint();
+  mlir::omp::SectionOp lastSectionOp;
+  for (const auto &[nblock, neval] :
+       llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
+    lastSectionOp = genSectionOp(converter, semaCtx, neval, /*genNested=*/true,
+                                 currentLocation);
+    firOpBuilder.restoreInsertionPoint(ip);
+  }
+
+  // For `omp.sections`, lastprivatized variables occur in
+  // lexically final `omp.section` operation.
   std::optional<Clause> lastPrivateClause;
   for (const Fortran::parser::OmpClause &clause : sectionsClauseList.v) {
     if (std::holds_alternative<Fortran::parser::OmpClause::Lastprivate>(
             clause.u)) {
       lastPrivateClause = makeClause(clause, semaCtx);
+      break;
     }
   }
-
-  const auto &sectionBlocks =
-      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
-  auto &firOpBuilder = converter.getFirOpBuilder();
-  auto ip = firOpBuilder.saveInsertionPoint();
-  auto zippy = llvm::zip(sectionBlocks.v, eval.getNestedEvaluations());
-  auto it = zippy.begin(), next = it, end = zippy.end();
-  ++next;
-  for (; it != end; it = next, ++next) {
-    const auto &[nblock, neval] = *it;
-    mlir::omp::SectionOp sectionOp = genSectionOp(
-        converter, semaCtx, neval, /*genNested=*/true, currentLocation);
-    // For `omp.sections`, lastprivatized variables occur in
-    // lexically final `omp.section` operation.
-    if (next == end && lastPrivateClause) {
-      clause::Lastprivate &lastPrivate =
-          std::get<clause::Lastprivate>(lastPrivateClause.value().u);
-      firOpBuilder.setInsertionPoint(
-          sectionOp.getRegion().back().getTerminator());
-      mlir::OpBuilder::InsertPoint lastPrivIP =
-          converter.getFirOpBuilder().saveInsertionPoint();
-      for (const Object &obj : lastPrivate.v) {
-        Fortran::semantics::Symbol *sym = obj.id();
-        converter.copyHostAssociateVar(*sym, &lastPrivIP);
-      }
+  if (lastSectionOp && lastPrivateClause) {
+    clause::Lastprivate &lastPrivate =
+        std::get<clause::Lastprivate>(lastPrivateClause.value().u);
+    firOpBuilder.setInsertionPoint(
+        lastSectionOp.getRegion().back().getTerminator());
+    mlir::OpBuilder::InsertPoint lastPrivIP =
+        converter.getFirOpBuilder().saveInsertionPoint();
+    for (const Object &obj : lastPrivate.v) {
+      Fortran::semantics::Symbol *sym = obj.id();
+      converter.copyHostAssociateVar(*sym, &lastPrivIP);
     }
-    firOpBuilder.restoreInsertionPoint(ip);
   }
 
   // Perform DataSharingProcessor's step2 out of SECTIONS

>From 7b82b917d92cbb760ff10620c34daa162e2bf302 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Mon, 15 Apr 2024 19:10:53 +0000
Subject: [PATCH 3/4] Handle multiple lastprivate clauses

---
 flang/lib/Lower/OpenMP/OpenMP.cpp    | 38 ++++++++++++++--------------
 flang/test/Lower/OpenMP/sections.f90 | 28 ++++++++++++++++++++
 2 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index c53c17904533ec..d042ceb1809181 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2274,24 +2274,24 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   // For `omp.sections`, lastprivatized variables occur in
   // lexically final `omp.section` operation.
-  std::optional<Clause> lastPrivateClause;
-  for (const Fortran::parser::OmpClause &clause : sectionsClauseList.v) {
-    if (std::holds_alternative<Fortran::parser::OmpClause::Lastprivate>(
-            clause.u)) {
-      lastPrivateClause = makeClause(clause, semaCtx);
-      break;
-    }
-  }
-  if (lastSectionOp && lastPrivateClause) {
-    clause::Lastprivate &lastPrivate =
-        std::get<clause::Lastprivate>(lastPrivateClause.value().u);
-    firOpBuilder.setInsertionPoint(
-        lastSectionOp.getRegion().back().getTerminator());
-    mlir::OpBuilder::InsertPoint lastPrivIP =
-        converter.getFirOpBuilder().saveInsertionPoint();
-    for (const Object &obj : lastPrivate.v) {
-      Fortran::semantics::Symbol *sym = obj.id();
-      converter.copyHostAssociateVar(*sym, &lastPrivIP);
+  bool hasLastPrivate = false;
+  if (lastSectionOp) {
+    for (const Fortran::parser::OmpClause &clause : sectionsClauseList.v) {
+      if (std::holds_alternative<Fortran::parser::OmpClause::Lastprivate>(
+              clause.u)) {
+        hasLastPrivate = true;
+        Clause lastPrivateClause = makeClause(clause, semaCtx);
+        clause::Lastprivate &lastPrivate =
+            std::get<clause::Lastprivate>(lastPrivateClause.u);
+        firOpBuilder.setInsertionPoint(
+            lastSectionOp.getRegion().back().getTerminator());
+        mlir::OpBuilder::InsertPoint lastPrivIP =
+            converter.getFirOpBuilder().saveInsertionPoint();
+        for (const Object &obj : lastPrivate.v) {
+          Fortran::semantics::Symbol *sym = obj.id();
+          converter.copyHostAssociateVar(*sym, &lastPrivIP);
+        }
+      }
     }
   }
 
@@ -2301,7 +2301,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   // Emit implicit barrier to synchronize threads and avoid data
   // races on post-update of lastprivate variables when `nowait`
   // clause is present.
-  if (hasNowait && lastPrivateClause)
+  if (hasNowait && hasLastPrivate)
     firOpBuilder.create<mlir::omp::BarrierOp>(converter.getCurrentLocation());
   symTable.popScope();
 }
diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90
index f598e1f64f88f8..a93d6e34e3742c 100644
--- a/flang/test/Lower/OpenMP/sections.f90
+++ b/flang/test/Lower/OpenMP/sections.f90
@@ -235,6 +235,34 @@ subroutine lastprivate()
     !$omp end sections
 end subroutine
 
+!CHECK-LABEL: func @_QPlastprivate2
+!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlastprivate2Ex"}
+!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFlastprivate2Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFlastprivate2Ey"}
+!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFlastprivate2Ey"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivate2Ex"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivate2Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate2Ey"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFlastprivate2Ey"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.sections {
+!CHECK:   omp.section {
+!CHECK:     %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
+!CHECK:     hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:     %[[TEMP2:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
+!CHECK:     hlfir.assign %[[TEMP2]] to %[[Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:     omp.terminator
+!CHECK:   }
+!CHECK:   omp.terminator
+!CHECK: }
+subroutine lastprivate2()
+    integer :: x, y
+
+    !$omp sections lastprivate(x) lastprivate(y)
+        !$omp section
+          x = y + 1
+    !$omp end sections
+end subroutine
+
 !CHECK-LABEL: func @_QPunstructured_sections_privatization
 subroutine unstructured_sections_privatization()
 !CHECK: %[[X:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFunstructured_sections_privatizationEx"}

>From c2c06a1931bbb7f7f5cce954dfb0e6648b3f2d41 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Mon, 15 Apr 2024 19:20:01 +0000
Subject: [PATCH 4/4] Rebase, fix build error and format

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index d042ceb1809181..153a4fee64012a 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2246,7 +2246,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     const auto &endSectionsClauseList =
         std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t);
     hasNowait = ClauseProcessor(converter, semaCtx, endSectionsClauseList)
-        .processNowait(clauseOps);
+                    .processNowait(clauseOps);
   }
 
   // Insert privatizations before SECTIONS
@@ -2283,11 +2283,12 @@ genOMP(Fortran::lower::AbstractConverter &converter,
         Clause lastPrivateClause = makeClause(clause, semaCtx);
         clause::Lastprivate &lastPrivate =
             std::get<clause::Lastprivate>(lastPrivateClause.u);
+        const auto &objList = std::get<1>(lastPrivate.t);
         firOpBuilder.setInsertionPoint(
             lastSectionOp.getRegion().back().getTerminator());
         mlir::OpBuilder::InsertPoint lastPrivIP =
             converter.getFirOpBuilder().saveInsertionPoint();
-        for (const Object &obj : lastPrivate.v) {
+        for (const Object &obj : objList) {
           Fortran::semantics::Symbol *sym = obj.id();
           converter.copyHostAssociateVar(*sym, &lastPrivIP);
         }



More information about the flang-commits mailing list