[flang-commits] [flang] dd42112 - [Flang][OpenMP] MLIR lowering support for grainsize and num_tasks clause (#128490)

via flang-commits flang-commits at lists.llvm.org
Fri May 9 04:56:32 PDT 2025


Author: Kaviya Rajendiran
Date: 2025-05-09T17:26:28+05:30
New Revision: dd42112c82d7b12669513dca4048167664b211b2

URL: https://github.com/llvm/llvm-project/commit/dd42112c82d7b12669513dca4048167664b211b2
DIFF: https://github.com/llvm/llvm-project/commit/dd42112c82d7b12669513dca4048167664b211b2.diff

LOG: [Flang][OpenMP] MLIR lowering support for grainsize and num_tasks clause (#128490)

- Added MLIR lowering for grainsize and num_tasks clauses of taskloop construct.

Added: 
    flang/test/Lower/OpenMP/taskloop-grainsize.f90
    flang/test/Lower/OpenMP/taskloop-numtasks.f90

Modified: 
    flang/lib/Lower/OpenMP/ClauseProcessor.cpp
    flang/lib/Lower/OpenMP/ClauseProcessor.h
    flang/lib/Lower/OpenMP/OpenMP.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 318455f0afe80..79b5087e4da68 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -388,6 +388,27 @@ bool ClauseProcessor::processNowait(mlir::omp::NowaitClauseOps &result) const {
   return markClauseOccurrence<omp::clause::Nowait>(result.nowait);
 }
 
+bool ClauseProcessor::processNumTasks(
+    lower::StatementContext &stmtCtx,
+    mlir::omp::NumTasksClauseOps &result) const {
+  using NumTasks = omp::clause::NumTasks;
+  if (auto *clause = findUniqueClause<NumTasks>()) {
+    fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+    mlir::MLIRContext *context = firOpBuilder.getContext();
+    const auto &modifier =
+        std::get<std::optional<NumTasks::Prescriptiveness>>(clause->t);
+    if (modifier && *modifier == NumTasks::Prescriptiveness::Strict) {
+      result.numTasksMod = mlir::omp::ClauseNumTasksTypeAttr::get(
+          context, mlir::omp::ClauseNumTasksType::Strict);
+    }
+    const auto &numtasksExpr = std::get<omp::SomeExpr>(clause->t);
+    result.numTasks =
+        fir::getBase(converter.genExprValue(numtasksExpr, stmtCtx));
+    return true;
+  }
+  return false;
+}
+
 bool ClauseProcessor::processNumTeams(
     lower::StatementContext &stmtCtx,
     mlir::omp::NumTeamsClauseOps &result) const {
@@ -934,6 +955,27 @@ bool ClauseProcessor::processDepend(lower::SymMap &symMap,
   return findRepeatableClause<omp::clause::Depend>(process);
 }
 
+bool ClauseProcessor::processGrainsize(
+    lower::StatementContext &stmtCtx,
+    mlir::omp::GrainsizeClauseOps &result) const {
+  using Grainsize = omp::clause::Grainsize;
+  if (auto *clause = findUniqueClause<Grainsize>()) {
+    fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+    mlir::MLIRContext *context = firOpBuilder.getContext();
+    const auto &modifier =
+        std::get<std::optional<Grainsize::Prescriptiveness>>(clause->t);
+    if (modifier && *modifier == Grainsize::Prescriptiveness::Strict) {
+      result.grainsizeMod = mlir::omp::ClauseGrainsizeTypeAttr::get(
+          context, mlir::omp::ClauseGrainsizeType::Strict);
+    }
+    const auto &grainsizeExpr = std::get<omp::SomeExpr>(clause->t);
+    result.grainsize =
+        fir::getBase(converter.genExprValue(grainsizeExpr, stmtCtx));
+    return true;
+  }
+  return false;
+}
+
 bool ClauseProcessor::processHasDeviceAddr(
     lower::StatementContext &stmtCtx, mlir::omp::HasDeviceAddrClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceSyms) const {

diff  --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 3d3f26f06da26..2e4d911aab35e 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -73,6 +73,8 @@ class ClauseProcessor {
                      mlir::omp::FilterClauseOps &result) const;
   bool processFinal(lower::StatementContext &stmtCtx,
                     mlir::omp::FinalClauseOps &result) const;
+  bool processGrainsize(lower::StatementContext &stmtCtx,
+                        mlir::omp::GrainsizeClauseOps &result) const;
   bool processHasDeviceAddr(
       lower::StatementContext &stmtCtx,
       mlir::omp::HasDeviceAddrClauseOps &result,
@@ -82,6 +84,8 @@ class ClauseProcessor {
                         mlir::omp::InclusiveClauseOps &result) const;
   bool processMergeable(mlir::omp::MergeableClauseOps &result) const;
   bool processNowait(mlir::omp::NowaitClauseOps &result) const;
+  bool processNumTasks(lower::StatementContext &stmtCtx,
+                       mlir::omp::NumTasksClauseOps &result) const;
   bool processNumTeams(lower::StatementContext &stmtCtx,
                        mlir::omp::NumTeamsClauseOps &result) const;
   bool processNumThreads(lower::StatementContext &stmtCtx,

diff  --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 099d5c604060f..1a326345379f5 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1783,17 +1783,19 @@ static void genTaskgroupClauses(
 
 static void genTaskloopClauses(lower::AbstractConverter &converter,
                                semantics::SemanticsContext &semaCtx,
+                               lower::StatementContext &stmtCtx,
                                const List<Clause> &clauses, mlir::Location loc,
                                mlir::omp::TaskloopOperands &clauseOps) {
 
   ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processGrainsize(stmtCtx, clauseOps);
+  cp.processNumTasks(stmtCtx, clauseOps);
 
   cp.processTODO<clause::Allocate, clause::Collapse, clause::Default,
-                 clause::Final, clause::Grainsize, clause::If,
-                 clause::InReduction, clause::Lastprivate, clause::Mergeable,
-                 clause::Nogroup, clause::NumTasks, clause::Priority,
-                 clause::Reduction, clause::Shared, clause::Untied>(
-      loc, llvm::omp::Directive::OMPD_taskloop);
+                 clause::Final, clause::If, clause::InReduction,
+                 clause::Lastprivate, clause::Mergeable, clause::Nogroup,
+                 clause::Priority, clause::Reduction, clause::Shared,
+                 clause::Untied>(loc, llvm::omp::Directive::OMPD_taskloop);
 }
 
 static void genTaskwaitClauses(lower::AbstractConverter &converter,
@@ -3270,12 +3272,12 @@ genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
 
 static mlir::omp::TaskloopOp genStandaloneTaskloop(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
-    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-    mlir::Location loc, const ConstructQueue &queue,
-    ConstructQueue::const_iterator item) {
+    lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
+    lower::pft::Evaluation &eval, mlir::Location loc,
+    const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::TaskloopOperands taskloopClauseOps;
-  genTaskloopClauses(converter, semaCtx, item->clauses, loc, taskloopClauseOps);
-
+  genTaskloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
+                     taskloopClauseOps);
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            enableDelayedPrivatization, symTable);
@@ -3736,8 +3738,8 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
         genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_taskloop:
-    newOp = genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc,
-                                  queue, item);
+    newOp = genStandaloneTaskloop(converter, symTable, stmtCtx, semaCtx, eval,
+                                  loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_taskwait:
     newOp = genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);

diff  --git a/flang/test/Lower/OpenMP/taskloop-grainsize.f90 b/flang/test/Lower/OpenMP/taskloop-grainsize.f90
new file mode 100644
index 0000000000000..43db8acdeceac
--- /dev/null
+++ b/flang/test/Lower/OpenMP/taskloop-grainsize.f90
@@ -0,0 +1,51 @@
+! This test checks lowering of grainsize clause in taskloop directive.
+
+! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+
+! CHECK-LABEL:  omp.private
+! CHECK-SAME:       {type = private} @[[I_PRIVATE_TEST2:.*]] : i32
+
+! CHECK-LABEL:  omp.private
+! CHECK-SAME:        {type = firstprivate} @[[X_FIRSTPRIVATE_TEST2:.*]] : i32
+! CHECK-SAME:   copy {
+! CHECK:         hlfir.assign
+
+! CHECK-LABEL:  omp.private
+! CHECK-SAME:       {type = private} @[[I_PRIVATE:.*]] : i32
+
+! CHECK-LABEL:  omp.private
+! CHECK-SAME:        {type = firstprivate} @[[X_FIRSTPRIVATE:.*]] : i32
+! CHECK-SAME:   copy {
+! CHECK:         hlfir.assign
+
+! CHECK-LABEL:  func.func @_QPtest_grainsize
+! CHECK:          %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_grainsizeEi"}
+! CHECK:          %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFtest_grainsizeEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:          %[[ALLOCA_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_grainsizeEx"}
+! CHECK:          %[[DECL_X:.*]]:2 = hlfir.declare %[[ALLOCA_X]] {uniq_name = "_QFtest_grainsizeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:          %[[GRAINSIZE:.*]] = arith.constant 10 : i32
+subroutine test_grainsize
+   integer :: i, x
+   ! CHECK:          omp.taskloop grainsize(%[[GRAINSIZE]]: i32) 
+   ! CHECK-SAME:        private(@[[X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:            omp.loop_nest (%[[ARG2:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+   !$omp taskloop grainsize(10)
+   do i = 1, 1000
+      x = x + 1
+   end do
+   !$omp end taskloop
+end subroutine test_grainsize
+
+!CHECK-LABEL: func.func @_QPtest_grainsize_strict()
+subroutine test_grainsize_strict
+  integer :: i, x
+  ! CHECK: %[[GRAINSIZE:.*]] = arith.constant 10 : i32
+  ! CHECK: omp.taskloop grainsize(strict, %[[GRAINSIZE]]: i32)
+  !$omp taskloop grainsize(strict:10)
+  do i = 1, 1000
+     !CHECK: arith.addi
+     x = x + 1
+  end do
+  !$omp end taskloop
+end subroutine

diff  --git a/flang/test/Lower/OpenMP/taskloop-numtasks.f90 b/flang/test/Lower/OpenMP/taskloop-numtasks.f90
new file mode 100644
index 0000000000000..f68f3a2d6ad26
--- /dev/null
+++ b/flang/test/Lower/OpenMP/taskloop-numtasks.f90
@@ -0,0 +1,51 @@
+! This test checks lowering of num_tasks clause in taskloop directive.
+
+! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+
+! CHECK-LABEL:  omp.private
+! CHECK-SAME:       {type = private} @[[I_PRIVATE_TEST2:.*]] : i32
+
+! CHECK-LABEL:  omp.private
+! CHECK-SAME:        {type = firstprivate} @[[X_FIRSTPRIVATE_TEST2:.*]] : i32
+! CHECK-SAME:   copy {
+! CHECK:         hlfir.assign
+
+! CHECK-LABEL:  omp.private
+! CHECK-SAME:       {type = private} @[[I_PRIVATE:.*]] : i32
+
+! CHECK-LABEL:  omp.private
+! CHECK-SAME:        {type = firstprivate} @[[X_FIRSTPRIVATE:.*]] : i32
+! CHECK-SAME:   copy {
+! CHECK:         hlfir.assign
+
+! CHECK-LABEL:  func.func @_QPtest_num_tasks
+! CHECK:          %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_num_tasksEi"}
+! CHECK:          %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFtest_num_tasksEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:          %[[ALLOCA_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_num_tasksEx"}
+! CHECK:          %[[DECL_X:.*]]:2 = hlfir.declare %[[ALLOCA_X]] {uniq_name = "_QFtest_num_tasksEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:          %[[VAL_NUMTASKS:.*]] = arith.constant 10 : i32
+subroutine test_num_tasks
+   integer :: i, x
+   ! CHECK:          omp.taskloop num_tasks(%[[VAL_NUMTASKS]]: i32) 
+   ! CHECK-SAME:        private(@[[X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:            omp.loop_nest (%[[ARG2:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+   !$omp taskloop num_tasks(10)
+   do i = 1, 1000
+      x = x + 1
+   end do
+   !$omp end taskloop
+end subroutine test_num_tasks
+
+! CHECK-LABEL:  func.func @_QPtest_num_tasks_strict
+subroutine test_num_tasks_strict
+  integer :: x, i
+  ! CHECK:  %[[NUM_TASKS:.*]] = arith.constant 10 : i32
+  ! CHECK: omp.taskloop num_tasks(strict, %[[NUM_TASKS]]: i32)
+  !$omp taskloop num_tasks(strict:10)
+  do i = 1, 100
+     !CHECK: arith.addi
+     x = x + 1
+  end do
+  !$omp end taskloop
+end subroutine


        


More information about the flang-commits mailing list