[flang-commits] [flang] 55cb52b - [Flang][OpenMP] Restructure recursive lowering in `createBodyOfOp` (#77761)
via flang-commits
flang-commits at lists.llvm.org
Mon Jan 22 06:41:12 PST 2024
Author: Krzysztof Parzyszek
Date: 2024-01-22T08:41:07-06:00
New Revision: 55cb52bbc539777aeb176f22fef0b4bfc70883fc
URL: https://github.com/llvm/llvm-project/commit/55cb52bbc539777aeb176f22fef0b4bfc70883fc
DIFF: https://github.com/llvm/llvm-project/commit/55cb52bbc539777aeb176f22fef0b4bfc70883fc.diff
LOG: [Flang][OpenMP] Restructure recursive lowering in `createBodyOfOp` (#77761)
This brings `createBodyOfOp` to its final intended form. First, input
privatization is performed, then the recursive lowering takes place, and
finally the output privatization (lastprivate) is done.
This enables fixing a known issue with infinite loops inside of an
OpenMP region, and the fix is included in this patch.
Fixes https://github.com/llvm/llvm-project/issues/74348.
Recursive lowering [5/5]
---------
Co-authored-by: Kiran Chandramohan <kiran.chandramohan at arm.com>
Added:
flang/test/Lower/OpenMP/infinite-loop-in-construct.f90
flang/test/Lower/OpenMP/wsloop-unstructured.f90
Modified:
flang/include/flang/Lower/OpenMP.h
flang/lib/Lower/OpenMP.cpp
flang/test/Lower/OpenMP/FIR/sections.f90
flang/test/Lower/OpenMP/sections.f90
Removed:
################################################################################
diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h
index 872b7d50c3e7a8..d2b7a423d77810 100644
--- a/flang/include/flang/Lower/OpenMP.h
+++ b/flang/include/flang/Lower/OpenMP.h
@@ -50,8 +50,8 @@ struct Variable;
} // namespace pft
// Generate the OpenMP terminator for Operation at Location.
-void genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *,
- mlir::Location);
+mlir::Operation *genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *,
+ mlir::Location);
void genOpenMPConstruct(AbstractConverter &, Fortran::lower::SymMap &,
semantics::SemanticsContext &, pft::Evaluation &,
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 7dd25f75d9eb76..3deeb6019b19bb 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -27,6 +27,7 @@
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/openmp-directive-sets.h"
#include "flang/Semantics/tools.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Transforms/RegionUtils.h"
@@ -385,7 +386,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
// construct
mlir::OpBuilder::InsertPoint unstructuredSectionsIP =
firOpBuilder.saveInsertionPoint();
- firOpBuilder.setInsertionPointToStart(&op->getRegion(0).back());
+ mlir::Operation *lastOper = op->getRegion(0).back().getTerminator();
+ firOpBuilder.setInsertionPoint(lastOper);
lastPrivIP = firOpBuilder.saveInsertionPoint();
firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP);
}
@@ -2206,15 +2208,6 @@ static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
}
-static void resetBeforeTerminator(fir::FirOpBuilder &firOpBuilder,
- mlir::Operation *storeOp,
- mlir::Block &block) {
- if (storeOp)
- firOpBuilder.setInsertionPointAfter(storeOp);
- else
- firOpBuilder.setInsertionPointToStart(&block);
-}
-
static mlir::Operation *
createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
mlir::Location loc, mlir::Value indexVal,
@@ -2257,11 +2250,17 @@ static void createBodyOfOp(
const llvm::SmallVector<const Fortran::semantics::Symbol *> &args = {},
bool outerCombined = false, DataSharingProcessor *dsp = nullptr) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
+ auto insertMarker = [](fir::FirOpBuilder &builder) {
+ mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
+ builder.getIndexType());
+ return undef.getDefiningOp();
+ };
+
// If an argument for the region is provided then create the block with that
// argument. Also update the symbol's address with the mlir argument value.
// e.g. For loops the argument is the induction variable. And all further
// uses of the induction variable should use this mlir value.
- mlir::Operation *storeOp = nullptr;
if (args.size()) {
std::size_t loopVarTypeSize = 0;
for (const Fortran::semantics::Symbol *arg : args)
@@ -2272,20 +2271,20 @@ static void createBodyOfOp(
firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs);
// The argument is not currently in memory, so make a temporary for the
// argument, and store it there, then bind that location to the argument.
+ mlir::Operation *storeOp = nullptr;
for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
mlir::Value indexVal =
fir::getBase(op.getRegion().front().getArgument(argIndex));
storeOp =
createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
}
+ firOpBuilder.setInsertionPointAfter(storeOp);
} else {
firOpBuilder.createBlock(&op.getRegion());
}
- // Set the insert for the terminator operation to go at the end of the
- // block - this is either empty or the block with the stores above,
- // the end of the block works for both.
- mlir::Block &block = op.getRegion().back();
- firOpBuilder.setInsertionPointToEnd(&block);
+
+ // Mark the earliest insertion point.
+ mlir::Operation *marker = insertMarker(firOpBuilder);
// If it is an unstructured region and is not the outer region of a combined
// construct, create empty blocks for all evaluations.
@@ -2294,37 +2293,100 @@ static void createBodyOfOp(
mlir::omp::YieldOp>(
firOpBuilder, eval.getNestedEvaluations());
- // Insert the terminator.
- Fortran::lower::genOpenMPTerminator(firOpBuilder, op.getOperation(), loc);
- // Reset the insert point to before the terminator.
- resetBeforeTerminator(firOpBuilder, storeOp, block);
+ // Start with privatization, so that the lowering of the nested
+ // code will use the right symbols.
+ constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> ||
+ std::is_same_v<Op, mlir::omp::SimdLoopOp>;
+ bool privatize = clauses && !outerCombined;
- // Handle privatization. Do not privatize if this is the outer operation.
- if (clauses && !outerCombined) {
- constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> ||
- std::is_same_v<Op, mlir::omp::SimdLoopOp>;
+ firOpBuilder.setInsertionPoint(marker);
+ std::optional<DataSharingProcessor> tempDsp;
+ if (privatize) {
if (!dsp) {
- DataSharingProcessor proc(converter, *clauses, eval);
- proc.processStep1();
- proc.processStep2(op, isLoop);
- } else {
- if (isLoop && args.size() > 0)
- dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
- dsp->processStep2(op, isLoop);
+ tempDsp.emplace(converter, *clauses, eval);
+ tempDsp->processStep1();
}
-
- if (storeOp)
- firOpBuilder.setInsertionPointAfter(storeOp);
}
if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) {
threadPrivatizeVars(converter, eval);
- if (clauses)
+ if (clauses) {
+ firOpBuilder.setInsertionPoint(marker);
ClauseProcessor(converter, *clauses).processCopyin();
+ }
}
- if (genNested)
+ if (genNested) {
+ // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
+ // a lot of trouble if the terminator generation is delayed past this
+ // point. Insert a temporary terminator here, then delete it.
+ firOpBuilder.setInsertionPointToEnd(&op.getRegion().back());
+ auto *temp = Fortran::lower::genOpenMPTerminator(firOpBuilder,
+ op.getOperation(), loc);
+ firOpBuilder.setInsertionPointAfter(marker);
genNestedEvaluations(converter, eval);
+ temp->erase();
+ }
+
+ // Get or create a unique exiting block from the given region, or
+ // return nullptr if there is no exiting block.
+ auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * {
+ // Find the blocks where the OMP terminator should go. In simple cases
+ // it is the single block in the operation's region. When the region
+ // is more complicated, especially with unstructured control flow, there
+ // may be multiple blocks, and some of them may have non-OMP terminators
+ // resulting from lowering of the code contained within the operation.
+ // All the remaining blocks are potential exit points from the op's region.
+ //
+ // Explicit control flow cannot exit any OpenMP region (other than via
+ // STOP), and that is enforced by semantic checks prior to lowering. STOP
+ // statements are lowered to a function call.
+
+ // Collect unterminated blocks.
+ llvm::SmallVector<mlir::Block *> exits;
+ for (mlir::Block &b : region) {
+ if (b.empty() || !b.back().hasTrait<mlir::OpTrait::IsTerminator>())
+ exits.push_back(&b);
+ }
+
+ if (exits.empty())
+ return nullptr;
+ // If there already is a unique exiting block, do not create another one.
+ // Additionally, some ops (e.g. omp.sections) require only 1 block in
+ // its region.
+ if (exits.size() == 1)
+ return exits[0];
+ mlir::Block *exit = firOpBuilder.createBlock(®ion);
+ for (mlir::Block *b : exits) {
+ firOpBuilder.setInsertionPointToEnd(b);
+ firOpBuilder.create<mlir::cf::BranchOp>(loc, exit);
+ }
+ return exit;
+ };
+
+ if (auto *exitBlock = getUniqueExit(op.getRegion())) {
+ firOpBuilder.setInsertionPointToEnd(exitBlock);
+ auto *term = Fortran::lower::genOpenMPTerminator(firOpBuilder,
+ op.getOperation(), loc);
+ // Only insert lastprivate code when there actually is an exit block.
+ // Such a block may not exist if the nested code produced an infinite
+ // loop (this may not make sense in production code, but a user could
+ // write that and we should handle it).
+ firOpBuilder.setInsertionPoint(term);
+ if (privatize) {
+ if (!dsp) {
+ assert(tempDsp.has_value());
+ tempDsp->processStep2(op, isLoop);
+ } else {
+ if (isLoop && args.size() > 0)
+ dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
+ dsp->processStep2(op, isLoop);
+ }
+ }
+ }
+
+ firOpBuilder.setInsertionPointAfter(marker);
+ marker->erase();
}
static void genBodyOfTargetDataOp(
@@ -3756,14 +3818,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
// Public functions
//===----------------------------------------------------------------------===//
-void Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
- mlir::Operation *op,
- mlir::Location loc) {
+mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
+ mlir::Operation *op,
+ mlir::Location loc) {
if (mlir::isa<mlir::omp::WsLoopOp, mlir::omp::ReductionDeclareOp,
mlir::omp::AtomicUpdateOp, mlir::omp::SimdLoopOp>(op))
- builder.create<mlir::omp::YieldOp>(loc);
+ return builder.create<mlir::omp::YieldOp>(loc);
else
- builder.create<mlir::omp::TerminatorOp>(loc);
+ return builder.create<mlir::omp::TerminatorOp>(loc);
}
void Fortran::lower::genOpenMPConstruct(
diff --git a/flang/test/Lower/OpenMP/FIR/sections.f90 b/flang/test/Lower/OpenMP/FIR/sections.f90
index 87d34e58321be0..640ec34a05bc21 100644
--- a/flang/test/Lower/OpenMP/FIR/sections.f90
+++ b/flang/test/Lower/OpenMP/FIR/sections.f90
@@ -126,11 +126,11 @@ subroutine lastprivate()
x = x * 10
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
-!CHECK: %[[true:.*]] = arith.constant true
!CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 1 : i32
!CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
!CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
+!CHECK: %[[true:.*]] = arith.constant true
!CHECK: fir.if %[[true]] {
!CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
!CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
@@ -163,11 +163,11 @@ subroutine lastprivate()
!CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
!CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
!CHECK: omp.barrier
-!CHECK: %[[true:.*]] = arith.constant true
!CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 1 : i32
!CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
!CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
+!CHECK: %[[true:.*]] = arith.constant true
!CHECK: fir.if %true {
!CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
!CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
@@ -200,11 +200,11 @@ subroutine lastprivate()
!CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
!CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
!CHECK: omp.barrier
-!CHECK: %[[true:.*]] = arith.constant true
!CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 1 : i32
!CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32
!CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32>
+!CHECK: %[[true:.*]] = arith.constant true
!CHECK: fir.if %true {
!CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
!CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90
new file mode 100644
index 00000000000000..16b400a2318609
--- /dev/null
+++ b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90
@@ -0,0 +1,26 @@
+! RUN: bbc -fopenmp -o - %s | FileCheck %s
+
+! Check that this test can be lowered successfully.
+! See https://github.com/llvm/llvm-project/issues/74348
+
+! CHECK-LABEL: func.func @_QPsb
+! CHECK: omp.parallel
+! CHECK: cf.cond_br %{{[0-9]+}}, ^bb1, ^bb2
+! CHECK-NEXT: ^bb1: // pred: ^bb0
+! CHECK: cf.br ^bb2
+! CHECK-NEXT: ^bb2: // 3 preds: ^bb0, ^bb1, ^bb2
+! CHECK-NEXT: cf.br ^bb2
+! CHECK-NEXT: }
+
+subroutine sb(ninter, numnod)
+ integer :: ninter, numnod
+ integer, dimension(:), allocatable :: indx_nm
+
+ !$omp parallel
+ if (ninter>0) then
+ allocate(indx_nm(numnod))
+ endif
+ 220 continue
+ goto 220
+ !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90
index 16426d070d9a94..6bad688058d282 100644
--- a/flang/test/Lower/OpenMP/sections.f90
+++ b/flang/test/Lower/OpenMP/sections.f90
@@ -141,11 +141,11 @@ subroutine lastprivate()
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[CONST:.*]] = arith.constant 1 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: fir.if %[[TRUE]] {
!CHECK: %[[TEMP1:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP1]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
@@ -180,11 +180,11 @@ subroutine lastprivate()
!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: omp.barrier
-!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[CONST:.*]] = arith.constant 1 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: fir.if %[[TRUE]] {
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
@@ -219,11 +219,11 @@ subroutine lastprivate()
!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: omp.barrier
-!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[CONST:.*]] = arith.constant 1 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: fir.if %[[TRUE]] {
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
new file mode 100644
index 00000000000000..7fe63a1fe607c2
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
@@ -0,0 +1,61 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
+
+subroutine sub(imax, jmax, x, y)
+ integer, intent(in) :: imax, jmax
+ real, intent(in), dimension(1:imax, 1:jmax) :: x, y
+
+ integer :: i, j, ii
+
+ ! collapse(2) is needed to reproduce the issue
+ !$omp parallel do collapse(2)
+ do j = 1, jmax
+ do i = 1, imax
+ do ii = 1, imax ! note that this loop is not collapsed
+ if (x(i,j) < y(ii,j)) then
+ ! exit needed to force unstructured control flow
+ exit
+ endif
+ enddo
+ enddo
+ enddo
+end subroutine sub
+
+! this is testing that we don't crash generating code for this: in particular
+! that all blocks are terminated
+
+! CHECK-LABEL: func.func @_QPsub(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<i32> {fir.bindc_name = "imax"},
+! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "jmax"},
+! CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[VAL_3:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) {
+! [...]
+! CHECK: omp.wsloop for (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
+! [...]
+! CHECK: cf.br ^bb1
+! CHECK: ^bb1:
+! CHECK: cf.br ^bb2
+! CHECK: ^bb2:
+! [...]
+! CHECK: cf.br ^bb3
+! CHECK: ^bb3:
+! [...]
+! CHECK: %[[VAL_63:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32
+! CHECK: cf.cond_br %[[VAL_63]], ^bb4, ^bb7
+! CHECK: ^bb4:
+! [...]
+! CHECK: %[[VAL_76:.*]] = arith.cmpf olt, %{{.*}}, %{{.*}} fastmath<contract> : f32
+! CHECK: cf.cond_br %[[VAL_76]], ^bb5, ^bb6
+! CHECK: ^bb5:
+! CHECK: cf.br ^bb7
+! CHECK: ^bb6:
+! [...]
+! CHECK: cf.br ^bb3
+! CHECK: ^bb7:
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: cf.br ^bb1
+! CHECK: ^bb1:
+! CHECK: return
+! CHECK: }
More information about the flang-commits
mailing list