[flang-commits] [flang] 4202d69 - [Flang][OpenMP] Upstream the lowering of the parallel do combined construct
Kiran Chandramohan via flang-commits
flang-commits at lists.llvm.org
Thu May 19 14:19:01 PDT 2022
Author: Kiran Chandramohan
Date: 2022-05-19T21:13:50Z
New Revision: 4202d69d9efe450f6f9f09ef58fcc008b707d24e
URL: https://github.com/llvm/llvm-project/commit/4202d69d9efe450f6f9f09ef58fcc008b707d24e
DIFF: https://github.com/llvm/llvm-project/commit/4202d69d9efe450f6f9f09ef58fcc008b707d24e.diff
LOG: [Flang][OpenMP] Upstream the lowering of the parallel do combined construct
When parallel is used in a combined construct, then use a separate
function to create the parallel operation. It handles the parallel
specific clauses and leaves the rest for handling at the inner
operations.
Reviewed By: peixin, shraiysh
Differential Revision: https://reviews.llvm.org/D125465
Co-authored-by: Sourabh Singh Tomar <SourabhSingh.Tomar at amd.com>
Co-authored-by: Eric Schweitz <eschweitz at nvidia.com>
Co-authored-by: Valentin Clement <clementval at gmail.com>
Co-authored-by: Nimish Mishra <neelam.nimish at gmail.com>
Added:
flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
Modified:
flang/lib/Lower/OpenMP.cpp
flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
flang/test/Lower/OpenMP/parallel-sections.f90
Removed:
################################################################################
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 571cf5c85a87c..35e0efb7ecca7 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -278,6 +278,80 @@ genOMP(Fortran::lower::AbstractConverter &converter,
standaloneConstruct.u);
}
+static omp::ClauseProcBindKindAttr genProcBindKindAttr(
+ fir::FirOpBuilder &firOpBuilder,
+ const Fortran::parser::OmpClause::ProcBind *procBindClause) {
+ omp::ClauseProcBindKind pbKind;
+ switch (procBindClause->v.v) {
+ case Fortran::parser::OmpProcBindClause::Type::Master:
+ pbKind = omp::ClauseProcBindKind::Master;
+ break;
+ case Fortran::parser::OmpProcBindClause::Type::Close:
+ pbKind = omp::ClauseProcBindKind::Close;
+ break;
+ case Fortran::parser::OmpProcBindClause::Type::Spread:
+ pbKind = omp::ClauseProcBindKind::Spread;
+ break;
+ case Fortran::parser::OmpProcBindClause::Type::Primary:
+ pbKind = omp::ClauseProcBindKind::Primary;
+ break;
+ }
+ return omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
+}
+
+/* When parallel is used in a combined construct, then use this function to
+ * create the parallel operation. It handles the parallel specific clauses
+ * and leaves the rest for handling at the inner operations.
+ * TODO: Refactor clause handling
+ */
+template <typename Directive>
+static void
+createCombinedParallelOp(Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::pft::Evaluation &eval,
+ const Directive &directive) {
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ mlir::Location currentLocation = converter.getCurrentLocation();
+ Fortran::lower::StatementContext stmtCtx;
+ llvm::ArrayRef<mlir::Type> argTy;
+ mlir::Value ifClauseOperand, numThreadsClauseOperand;
+ SmallVector<Value> allocatorOperands, allocateOperands;
+ mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
+ const auto &opClauseList =
+ std::get<Fortran::parser::OmpClauseList>(directive.t);
+ // TODO: Handle the following clauses
+ // 1. default
+ // 2. copyin
+ // Note: rest of the clauses are handled when the inner operation is created
+ for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
+ if (const auto &ifClause =
+ std::get_if<Fortran::parser::OmpClause::If>(&clause.u)) {
+ auto &expr = std::get<Fortran::parser::ScalarLogicalExpr>(ifClause->v.t);
+ mlir::Value ifVal = fir::getBase(
+ converter.genExprValue(*Fortran::semantics::GetExpr(expr), stmtCtx));
+ ifClauseOperand = firOpBuilder.createConvert(
+ currentLocation, firOpBuilder.getI1Type(), ifVal);
+ } else if (const auto &numThreadsClause =
+ std::get_if<Fortran::parser::OmpClause::NumThreads>(
+ &clause.u)) {
+ numThreadsClauseOperand = fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
+ } else if (const auto &procBindClause =
+ std::get_if<Fortran::parser::OmpClause::ProcBind>(
+ &clause.u)) {
+ procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
+ }
+ }
+ // Create and insert the operation.
+ auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
+ currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand,
+ allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
+ /*reductions=*/nullptr, procBindKindAttr);
+
+ createBodyOfOp<omp::ParallelOp>(parallelOp, converter, currentLocation,
+ &opClauseList, /*iv=*/{},
+ /*isCombined=*/true);
+}
+
static void
genOMP(Fortran::lower::AbstractConverter &converter,
Fortran::lower::pft::Evaluation &eval,
@@ -318,23 +392,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
} else if (const auto &procBindClause =
std::get_if<Fortran::parser::OmpClause::ProcBind>(
&clause.u)) {
- omp::ClauseProcBindKind pbKind;
- switch (procBindClause->v.v) {
- case Fortran::parser::OmpProcBindClause::Type::Master:
- pbKind = omp::ClauseProcBindKind::Master;
- break;
- case Fortran::parser::OmpProcBindClause::Type::Close:
- pbKind = omp::ClauseProcBindKind::Close;
- break;
- case Fortran::parser::OmpProcBindClause::Type::Spread:
- pbKind = omp::ClauseProcBindKind::Spread;
- break;
- case Fortran::parser::OmpProcBindClause::Type::Primary:
- pbKind = omp::ClauseProcBindKind::Primary;
- break;
- }
- procBindKindAttr =
- omp::ClauseProcBindKindAttr::get(firOpBuilder.getContext(), pbKind);
+ procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
} else if (const auto &allocateClause =
std::get_if<Fortran::parser::OmpClause::Allocate>(
&clause.u)) {
@@ -419,11 +477,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
noWaitClauseOperand, orderedClauseOperand, orderClauseOperand;
const auto &wsLoopOpClauseList = std::get<Fortran::parser::OmpClauseList>(
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t);
- if (llvm::omp::OMPD_do !=
+
+ const auto ompDirective =
std::get<Fortran::parser::OmpLoopDirective>(
std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t).t)
- .v) {
- TODO(converter.getCurrentLocation(), "Combined worksharing loop construct");
+ .v;
+ if (llvm::omp::OMPD_parallel_do == ompDirective) {
+ createCombinedParallelOp<Fortran::parser::OmpBeginLoopDirective>(
+ converter, eval,
+ std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t));
+ } else if (llvm::omp::OMPD_do != ompDirective) {
+ TODO(converter.getCurrentLocation(), "Construct enclosing do loop");
}
int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
@@ -648,15 +712,14 @@ genOMP(Fortran::lower::AbstractConverter &converter,
// Parallel Sections Construct
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
- auto parallelOp = firOpBuilder.create<mlir::omp::ParallelOp>(
- currentLocation, /*if_expr_var*/ nullptr, /*num_threads_var*/ nullptr,
- allocateOperands, allocatorOperands, /*reduction_vars=*/ValueRange(),
- /*reductions=*/nullptr, /*proc_bind_val*/ nullptr);
- createBodyOfOp(parallelOp, converter, currentLocation);
+ createCombinedParallelOp<Fortran::parser::OmpBeginSectionsDirective>(
+ converter, eval,
+ std::get<Fortran::parser::OmpBeginSectionsDirective>(
+ sectionsConstruct.t));
auto sectionsOp = firOpBuilder.create<mlir::omp::SectionsOp>(
currentLocation, /*reduction_vars*/ ValueRange(),
- /*reductions=*/nullptr, /*allocate_vars*/ ValueRange(),
- /*allocators_vars*/ ValueRange(), /*nowait=*/nullptr);
+ /*reductions=*/nullptr, allocateOperands, allocatorOperands,
+ /*nowait=*/nullptr);
createBodyOfOp(sectionsOp, converter, currentLocation);
// Sections Construct
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index 6d2cf700d81ea..601cf70815cf0 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -71,3 +71,36 @@ func.func @_QPsb2(%arg0: !fir.ref<i32> {fir.bindc_name = "x"}, %arg1: !fir.ref<i
// CHECK: }
// CHECK: llvm.return
// CHECK: }
+
+
+// -----
+
+func.func @_QPsb(%arr: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr"}) {
+ %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsbEi"}
+ omp.parallel {
+ %c1 = arith.constant 1 : i32
+ %c50 = arith.constant 50 : i32
+ omp.wsloop for (%indx) : i32 = (%c1) to (%c50) inclusive step (%c1) {
+ %1 = fir.convert %indx : (i32) -> i64
+ %c1_i64 = arith.constant 1 : i64
+ %2 = arith.subi %1, %c1_i64 : i64
+ %3 = fir.coordinate_of %arr, %2 : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+ fir.store %indx to %3 : !fir.ref<i32>
+ omp.yield
+ }
+ omp.terminator
+ }
+ return
+}
+
+// Check only for the structure of the OpenMP portion and the feasibility of the conversion
+// CHECK-LABEL: @_QPsb
+// CHECK-SAME: %{{.*}}: !llvm.ptr<struct<({{.*}})>> {fir.bindc_name = "arr"}
+// CHECK: omp.parallel {
+// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32
+// CHECK: %[[C50:.*]] = llvm.mlir.constant(50 : i32) : i32
+// CHECK: omp.wsloop for (%[[INDX:.*]]) : i32 = (%[[C1]]) to (%[[C50]]) inclusive step (%[[C1]]) {
+// CHECK: llvm.store %[[INDX]], %{{.*}} : !llvm.ptr<i32>
+// CHECK: omp.yield
+// CHECK: omp.terminator
+// CHECK: llvm.return
diff --git a/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90 b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
new file mode 100644
index 0000000000000..80bb1ca19daeb
--- /dev/null
+++ b/flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
@@ -0,0 +1,96 @@
+! This test checks lowering of OpenMP DO Directive (Worksharing).
+
+! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPsimple_parallel_do()
+subroutine simple_parallel_do
+ integer :: i
+ ! CHECK: omp.parallel
+ ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
+ ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
+ ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+ !$OMP PARALLEL DO
+ do i=1, 9
+ ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+ print*, i
+ end do
+ ! CHECK: omp.yield
+ ! CHECK: omp.terminator
+ !$OMP END PARALLEL DO
+end subroutine
+
+! CHECK-LABEL: func @_QPparallel_do_with_parallel_clauses
+! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
+subroutine parallel_do_with_parallel_clauses(cond, nt)
+ logical :: cond
+ integer :: nt
+ integer :: i
+ ! CHECK: %[[COND:.*]] = fir.load %[[COND_REF]] : !fir.ref<!fir.logical<4>>
+ ! CHECK: %[[COND_CVT:.*]] = fir.convert %[[COND]] : (!fir.logical<4>) -> i1
+ ! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
+ ! CHECK: omp.parallel if(%[[COND_CVT]] : i1) num_threads(%[[NT]] : i32) proc_bind(close)
+ ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
+ ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
+ ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+ !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
+ do i=1, 9
+ ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+ print*, i
+ end do
+ ! CHECK: omp.yield
+ ! CHECK: omp.terminator
+ !$OMP END PARALLEL DO
+end subroutine
+
+! CHECK-LABEL: func @_QPparallel_do_with_clauses
+! CHECK-SAME: %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
+subroutine parallel_do_with_clauses(nt)
+ integer :: nt
+ integer :: i
+ ! CHECK: %[[NT:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
+ ! CHECK: omp.parallel num_threads(%[[NT]] : i32)
+ ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
+ ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
+ ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+ !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
+ do i=1, 9
+ ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+ print*, i
+ end do
+ ! CHECK: omp.yield
+ ! CHECK: omp.terminator
+ !$OMP END PARALLEL DO
+end subroutine
+
+! CHECK-LABEL: func @_QPparallel_do_with_privatisation_clauses
+! CHECK-SAME: %[[COND_REF:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}, %[[NT_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}
+subroutine parallel_do_with_privatisation_clauses(cond,nt)
+ logical :: cond
+ integer :: nt
+ integer :: i
+ ! CHECK: omp.parallel
+ ! CHECK: %[[PRIVATE_COND_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEcond"}
+ ! CHECK: %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
+ ! CHECK: %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
+ ! CHECK: fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
+ ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32
+ ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32
+ ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32
+ ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+ !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
+ do i=1, 9
+ ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) : (!fir.ref<i8>, i32) -> i1
+ ! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
+ ! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
+ ! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) : (!fir.ref<i8>, i1) -> i1
+ ! CHECK: %[[PRIVATE_NT_VAL:.*]] = fir.load %[[PRIVATE_NT_REF]] : !fir.ref<i32>
+ ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[PRIVATE_NT_VAL]]) : (!fir.ref<i8>, i32) -> i1
+ print*, i, cond, nt
+ end do
+ ! CHECK: omp.yield
+ ! CHECK: omp.terminator
+ !$OMP END PARALLEL DO
+end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-sections.f90 b/flang/test/Lower/OpenMP/parallel-sections.f90
index c88d60cc8f88b..e9759072c5234 100644
--- a/flang/test/Lower/OpenMP/parallel-sections.f90
+++ b/flang/test/Lower/OpenMP/parallel-sections.f90
@@ -40,8 +40,8 @@ subroutine omp_parallel_sections_allocate(x, y)
integer, intent(inout) :: x, y
!FIRDialect: %[[allocator:.*]] = arith.constant 1 : i32
!LLVMDialect: %[[allocator:.*]] = llvm.mlir.constant(1 : i32) : i32
- !OMPDialect: omp.parallel allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
- !OMPDialect: omp.sections {
+ !OMPDialect: omp.parallel {
+ !OMPDialect: omp.sections allocate(%[[allocator]] : i32 -> %{{.*}} : !fir.ref<i32>) {
!$omp parallel sections allocate(omp_high_bw_mem_alloc: x)
!OMPDialect: omp.section {
!$omp section
More information about the flang-commits
mailing list