[llvm-branch-commits] [flang] [Flang][OpenMP] Restructure recursive lowering in `createBodyOfOp` (PR #77761)

Krzysztof Parzyszek via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jan 11 05:19:51 PST 2024


https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/77761

>From 1b5524ae8874e389d373a55417919afa56beb2b5 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Mon, 8 Jan 2024 15:53:07 -0600
Subject: [PATCH] [Flang][OpenMP] Restructure recursive lowering in
 `createBodyOfOp`

This brings `createBodyOfOp` to its final intended form. First, input
privatization is performed, then the recursive lowering takes place,
and finally the output privatization (lastprivate) is done.

This enables fixing a known issue with infinite loops inside of an
OpenMP region, and the fix is included in this patch.

Fixes https://github.com/llvm/llvm-project/issues/74348.

Recursive lowering [5/5]
---
 flang/include/flang/Lower/OpenMP.h            |   4 +-
 flang/lib/Lower/OpenMP.cpp                    | 133 ++++++++++++------
 flang/test/Lower/OpenMP/FIR/sections.f90      |   6 +-
 .../OpenMP/infinite-loop-in-construct.f90     |  19 +++
 flang/test/Lower/OpenMP/sections.f90          |   6 +-
 5 files changed, 119 insertions(+), 49 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/infinite-loop-in-construct.f90

diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h
index 6e772c43d8c46e..477d3e7d9da3a8 100644
--- a/flang/include/flang/Lower/OpenMP.h
+++ b/flang/include/flang/Lower/OpenMP.h
@@ -50,8 +50,8 @@ struct Variable;
 } // namespace pft
 
 // Generate the OpenMP terminator for Operation at Location.
-void genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *,
-                         mlir::Location);
+mlir::Operation *genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *,
+                                     mlir::Location);
 
 void genOpenMPConstruct(AbstractConverter &, Fortran::lower::SymMap &,
                         semantics::SemanticsContext &, pft::Evaluation &,
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 6e93387fbff7cc..bb92fdce4ac56e 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -383,7 +383,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
             // construct
             mlir::OpBuilder::InsertPoint unstructuredSectionsIP =
                 firOpBuilder.saveInsertionPoint();
-            firOpBuilder.setInsertionPointToStart(&op->getRegion(0).back());
+            mlir::Operation *lastOper = op->getRegion(0).back().getTerminator();
+            firOpBuilder.setInsertionPoint(lastOper);
             lastPrivIP = firOpBuilder.saveInsertionPoint();
             firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP);
           }
@@ -2133,15 +2134,6 @@ static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
   return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
 }
 
-static void resetBeforeTerminator(fir::FirOpBuilder &firOpBuilder,
-                                  mlir::Operation *storeOp,
-                                  mlir::Block &block) {
-  if (storeOp)
-    firOpBuilder.setInsertionPointAfter(storeOp);
-  else
-    firOpBuilder.setInsertionPointToStart(&block);
-}
-
 static mlir::Operation *
 createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
                               mlir::Location loc, mlir::Value indexVal,
@@ -2183,11 +2175,43 @@ static void createBodyOfOp(
     const llvm::SmallVector<const Fortran::semantics::Symbol *> &args = {},
     bool outerCombined = false, DataSharingProcessor *dsp = nullptr) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
+  auto insertMarker = [](fir::FirOpBuilder &builder) {
+    mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
+                                                     builder.getIndexType());
+    return undef.getDefiningOp();
+  };
+
+  // Find the block where the OMP terminator should go. In simple cases
+  // it is the single block in the operation's region. When the region
+  // is more complicated, especially with unstructured control flow, there
+  // may be multiple blocks, and some of them may have non-OMP terminators
+  // resulting from lowering of the code contained within the operation.
+  // By OpenMP rules, there should be a single exit point from the region:
+  // here exit means transfering control to the code following the operation.
+  // STOP statement is allowed and does not count as exit for the purpose of
+  // inserting terminators.
+  auto findExitBlock = [&](mlir::Region &region) -> mlir::Block * {
+    auto isTerminated = [](mlir::Block &block) -> bool {
+      if (block.empty())
+        return false;
+      return block.back().hasTrait<mlir::OpTrait::IsTerminator>();
+    };
+
+    mlir::Block *exit = nullptr;
+    for (auto &block : region) {
+      if (!isTerminated(block)) {
+        assert(exit == nullptr && "Multiple exit block in OpenMP region");
+        exit = █
+      }
+    }
+    return exit;
+  };
+
   // If an argument for the region is provided then create the block with that
   // argument. Also update the symbol's address with the mlir argument value.
   // e.g. For loops the argument is the induction variable. And all further
   // uses of the induction variable should use this mlir value.
-  mlir::Operation *storeOp = nullptr;
   if (args.size()) {
     std::size_t loopVarTypeSize = 0;
     for (const Fortran::semantics::Symbol *arg : args)
@@ -2198,20 +2222,20 @@ static void createBodyOfOp(
     firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs);
     // The argument is not currently in memory, so make a temporary for the
     // argument, and store it there, then bind that location to the argument.
+    mlir::Operation *storeOp = nullptr;
     for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
       mlir::Value indexVal =
           fir::getBase(op.getRegion().front().getArgument(argIndex));
       storeOp =
           createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
     }
+    firOpBuilder.setInsertionPointAfter(storeOp);
   } else {
     firOpBuilder.createBlock(&op.getRegion());
   }
-  // Set the insert for the terminator operation to go at the end of the
-  // block - this is either empty or the block with the stores above,
-  // the end of the block works for both.
-  mlir::Block &block = op.getRegion().back();
-  firOpBuilder.setInsertionPointToEnd(&block);
+
+  // Mark the earliest insertion point.
+  mlir::Operation *marker = insertMarker(firOpBuilder);
 
   // If it is an unstructured region and is not the outer region of a combined
   // construct, create empty blocks for all evaluations.
@@ -2220,37 +2244,64 @@ static void createBodyOfOp(
                                             mlir::omp::YieldOp>(
         firOpBuilder, eval.getNestedEvaluations());
 
-  // Insert the terminator.
-  Fortran::lower::genOpenMPTerminator(firOpBuilder, op.getOperation(), loc);
-  // Reset the insert point to before the terminator.
-  resetBeforeTerminator(firOpBuilder, storeOp, block);
+  // Start with privatization, so that the lowering of the nested
+  // code will use the right symbols.
+  constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> ||
+                          std::is_same_v<Op, mlir::omp::SimdLoopOp>;
+  bool privatize = clauses && !outerCombined;
 
-  // Handle privatization. Do not privatize if this is the outer operation.
-  if (clauses && !outerCombined) {
-    constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> ||
-                            std::is_same_v<Op, mlir::omp::SimdLoopOp>;
+  firOpBuilder.setInsertionPoint(marker);
+  std::optional<DataSharingProcessor> tempDsp;
+  if (privatize) {
     if (!dsp) {
-      DataSharingProcessor proc(converter, *clauses, eval);
-      proc.processStep1();
-      proc.processStep2(op, isLoop);
-    } else {
-      if (isLoop && args.size() > 0)
-        dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
-      dsp->processStep2(op, isLoop);
+      tempDsp.emplace(converter, *clauses, eval);
+      tempDsp->processStep1();
     }
-
-    if (storeOp)
-      firOpBuilder.setInsertionPointAfter(storeOp);
   }
 
   if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) {
     threadPrivatizeVars(converter, eval);
-    if (clauses)
+    if (clauses) {
+      firOpBuilder.setInsertionPoint(marker);
       ClauseProcessor(converter, *clauses).processCopyin();
+    }
   }
 
-  if (genNested)
+  if (genNested) {
+    // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
+    // a lot of trouble if the terminator generation is delayed past this
+    // point. Insert a temporary terminator here, then delete it.
+    firOpBuilder.setInsertionPointToEnd(&op.getRegion().back());
+    auto *temp = Fortran::lower::genOpenMPTerminator(firOpBuilder,
+                                                     op.getOperation(), loc);
+    firOpBuilder.setInsertionPointAfter(marker);
     genNestedEvaluations(converter, eval);
+    temp->erase();
+  }
+
+  if (auto *exitBlock = findExitBlock(op.getRegion())) {
+    firOpBuilder.setInsertionPointToEnd(exitBlock);
+    auto *term = Fortran::lower::genOpenMPTerminator(firOpBuilder,
+                                                     op.getOperation(), loc);
+    // Only insert lastprivate code when there actually is an exit block.
+    // Such a block may not exist if the nested code produced an infinite
+    // loop (this may not make sense in production code, but a user could
+    // write that and we should handle it).
+    firOpBuilder.setInsertionPoint(term);
+    if (privatize) {
+      if (!dsp) {
+        assert(tempDsp.has_value());
+        tempDsp->processStep2(op, isLoop);
+      } else {
+        if (isLoop && args.size() > 0)
+          dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
+        dsp->processStep2(op, isLoop);
+      }
+    }
+  }
+
+  firOpBuilder.setInsertionPointAfter(marker);
+  marker->erase();
 }
 
 static void genBodyOfTargetDataOp(
@@ -3664,14 +3715,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 // Public functions
 //===----------------------------------------------------------------------===//
 
-void Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
-                                         mlir::Operation *op,
-                                         mlir::Location loc) {
+mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
+                                                     mlir::Operation *op,
+                                                     mlir::Location loc) {
   if (mlir::isa<mlir::omp::WsLoopOp, mlir::omp::ReductionDeclareOp,
                 mlir::omp::AtomicUpdateOp, mlir::omp::SimdLoopOp>(op))
-    builder.create<mlir::omp::YieldOp>(loc);
+    return builder.create<mlir::omp::YieldOp>(loc);
   else
-    builder.create<mlir::omp::TerminatorOp>(loc);
+    return builder.create<mlir::omp::TerminatorOp>(loc);
 }
 
 void Fortran::lower::genOpenMPConstruct(
diff --git a/flang/test/Lower/OpenMP/FIR/sections.f90 b/flang/test/Lower/OpenMP/FIR/sections.f90
index 87d34e58321be0..640ec34a05bc21 100644
--- a/flang/test/Lower/OpenMP/FIR/sections.f90
+++ b/flang/test/Lower/OpenMP/FIR/sections.f90
@@ -126,11 +126,11 @@ subroutine lastprivate()
             x = x * 10
 !CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
-!CHECK: %[[true:.*]] = arith.constant true
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[const:.*]] = arith.constant 1 : i32
 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
+!CHECK: %[[true:.*]] = arith.constant true
 !CHECK: fir.if %[[true]] {
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
@@ -163,11 +163,11 @@ subroutine lastprivate()
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: omp.barrier
-!CHECK: %[[true:.*]] = arith.constant true
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[const:.*]] = arith.constant 1 : i32
 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
+!CHECK: %[[true:.*]] = arith.constant true
 !CHECK: fir.if %true {
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
@@ -200,11 +200,11 @@ subroutine lastprivate()
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: omp.barrier
-!CHECK: %[[true:.*]] = arith.constant true
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[const:.*]] = arith.constant 1 : i32
 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
+!CHECK: %[[true:.*]] = arith.constant true
 !CHECK: fir.if %true {
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90
new file mode 100644
index 00000000000000..d44b440a72d90d
--- /dev/null
+++ b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90
@@ -0,0 +1,19 @@
+! RUN: bbc -fopenmp -o - %s | FileCheck %s
+
+! Check that this test can be lowered successfully.
+! See https://github.com/llvm/llvm-project/issues/74348
+
+! CHECK-LABEL: func.func @_QPsb
+! CHECK: omp.parallel
+subroutine sb(ninter, numnod)
+  integer :: ninter, numnod
+  integer, dimension(:), allocatable :: indx_nm
+
+  !$omp parallel
+  if (ninter>0) then
+    allocate(indx_nm(numnod))
+  endif
+  220 continue
+  goto 220
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90
index 16426d070d9a94..6bad688058d282 100644
--- a/flang/test/Lower/OpenMP/sections.f90
+++ b/flang/test/Lower/OpenMP/sections.f90
@@ -141,11 +141,11 @@ subroutine lastprivate()
 !CHECK: omp.section {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: %[[TRUE:.*]] = arith.constant true
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 1 : i32
 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TRUE:.*]] = arith.constant true
 !CHECK: fir.if %[[TRUE]] {
 !CHECK: %[[TEMP1:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP1]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
@@ -180,11 +180,11 @@ subroutine lastprivate()
 !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK: omp.barrier
-!CHECK: %[[TRUE:.*]] = arith.constant true
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 1 : i32
 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TRUE:.*]] = arith.constant true
 !CHECK: fir.if %[[TRUE]] {
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
@@ -219,11 +219,11 @@ subroutine lastprivate()
 !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK: omp.barrier
-!CHECK: %[[TRUE:.*]] = arith.constant true
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 1 : i32
 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TRUE:.*]] = arith.constant true
 !CHECK: fir.if %[[TRUE]] {
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>



More information about the llvm-branch-commits mailing list