[flang-commits] [flang] 18dd299 - [Flang][MLIR][OpenMP] Host-evaluation of omp.loop bounds (#133908)

via flang-commits flang-commits at lists.llvm.org
Thu Apr 3 07:06:22 PDT 2025


Author: Sergio Afonso
Date: 2025-04-03T15:06:19+01:00
New Revision: 18dd299fb109792d0716156af0a2d8c0ca781c57

URL: https://github.com/llvm/llvm-project/commit/18dd299fb109792d0716156af0a2d8c0ca781c57
DIFF: https://github.com/llvm/llvm-project/commit/18dd299fb109792d0716156af0a2d8c0ca781c57.diff

LOG: [Flang][MLIR][OpenMP] Host-evaluation of omp.loop bounds (#133908)

This patch updates Flang lowering and kernel flags identification in
MLIR so that loop bounds on `target teams loop` constructs are evaluated
on the host, making the trip count available to the corresponding
`__tgt_target_kernel` call emitted for the target region.

This is necessary in order to properly execute these constructs as
`target teams distribute parallel do`.

Co-authored-by: Kareem Ergawy <kareem.ergawy at amd.com>

Added: 
    

Modified: 
    flang/lib/Lower/OpenMP/OpenMP.cpp
    flang/test/Lower/OpenMP/generic-loop-rewriting.f90
    flang/test/Lower/OpenMP/host-eval.f90
    mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
    mlir/test/Dialect/OpenMP/ops.mlir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index ab90b4609e855..b04d57ec30e4f 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -557,7 +557,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
     HostEvalInfo &hostInfo = hostEvalInfo.back();
 
     switch (extractOmpDirective(*ompEval)) {
-    // Cases where 'teams' and target SPMD clauses might be present.
     case OMPD_teams_distribute_parallel_do:
     case OMPD_teams_distribute_parallel_do_simd:
       cp.processThreadLimit(stmtCtx, hostInfo.ops);
@@ -575,18 +574,16 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
       cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
       break;
 
-    // Cases where 'teams' clauses might be present, and target SPMD is
-    // possible by looking at nested evaluations.
     case OMPD_teams:
       cp.processThreadLimit(stmtCtx, hostInfo.ops);
       [[fallthrough]];
     case OMPD_target_teams:
       cp.processNumTeams(stmtCtx, hostInfo.ops);
-      processSingleNestedIf(
-          [](Directive nestedDir) { return topDistributeSet.test(nestedDir); });
+      processSingleNestedIf([](Directive nestedDir) {
+        return topDistributeSet.test(nestedDir) || topLoopSet.test(nestedDir);
+      });
       break;
 
-    // Cases where only 'teams' host-evaluated clauses might be present.
     case OMPD_teams_distribute:
     case OMPD_teams_distribute_simd:
       cp.processThreadLimit(stmtCtx, hostInfo.ops);
@@ -597,6 +594,16 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
       cp.processNumTeams(stmtCtx, hostInfo.ops);
       break;
 
+    case OMPD_teams_loop:
+      cp.processThreadLimit(stmtCtx, hostInfo.ops);
+      [[fallthrough]];
+    case OMPD_target_teams_loop:
+      cp.processNumTeams(stmtCtx, hostInfo.ops);
+      [[fallthrough]];
+    case OMPD_loop:
+      cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
+      break;
+
     // Standalone 'target' case.
     case OMPD_target: {
       processSingleNestedIf(

diff  --git a/flang/test/Lower/OpenMP/generic-loop-rewriting.f90 b/flang/test/Lower/OpenMP/generic-loop-rewriting.f90
index e1adf5afb0eba..eaf31e3ffb779 100644
--- a/flang/test/Lower/OpenMP/generic-loop-rewriting.f90
+++ b/flang/test/Lower/OpenMP/generic-loop-rewriting.f90
@@ -11,7 +11,7 @@ subroutine target_teams_loop
     implicit none
     integer :: x, i
 
-    !$omp target teams loop
+    !$omp teams loop
     do i = 0, 10
       x = x + i
     end do
@@ -22,19 +22,15 @@ subroutine target_teams_loop
     implicit none
     integer :: x, i
 
-    !$omp target teams loop bind(teams)
+    !$omp teams loop bind(teams)
     do i = 0, 10
       x = x + i
     end do
 end subroutine target_teams_loop
 
 !CHECK-LABEL: func.func @_QPtarget_teams_loop
-!CHECK:         omp.target map_entries(
-!CHECK-SAME:      %{{.*}} -> %[[I_ARG:[^[:space:]]+]],
-!CHECK-SAME:      %{{.*}} -> %[[X_ARG:[^[:space:]]+]] : {{.*}}) {
-
-!CHECK:           %[[I_DECL:.*]]:2 = hlfir.declare %[[I_ARG]]
-!CHECK:           %[[X_DECL:.*]]:2 = hlfir.declare %[[X_ARG]]
+!CHECK:           %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}i"}
+!CHECK:           %[[X_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}x"}
 
 !CHECK:           omp.teams {
 
@@ -51,6 +47,7 @@ end subroutine target_teams_loop
 !CHECK-SAME:                (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
 !CHECK:                     %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ARG]]
 !CHECK:                     hlfir.assign %{{.*}} to %[[I_PRIV_DECL]]#0 : i32, !fir.ref<i32>
+!CHECK:                     hlfir.assign %{{.*}} to %[[X_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK:                   }
 !CHECK:                 }
 !CHECK:               }

diff  --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90
index 65258c91e5daf..fe5b9597f8620 100644
--- a/flang/test/Lower/OpenMP/host-eval.f90
+++ b/flang/test/Lower/OpenMP/host-eval.f90
@@ -258,3 +258,28 @@ subroutine distribute_simd()
   !$omp end distribute simd
   !$omp end teams
 end subroutine distribute_simd
+
+! BOTH-LABEL: func.func @_QPloop
+subroutine loop()
+  ! BOTH: omp.target
+  
+  ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32)
+  
+  ! DEVICE-NOT: host_eval({{.*}})
+  ! DEVICE-SAME: {
+
+  ! BOTH: omp.teams
+  !$omp target teams
+
+  ! BOTH: omp.parallel
+
+  ! BOTH: omp.distribute
+  ! BOTH-NEXT: omp.wsloop
+  ! BOTH-NEXT: omp.loop_nest
+
+  ! HOST-SAME: (%{{.*}}) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  !$omp loop
+  do i=1,10
+  end do
+  !$omp end target teams
+end subroutine loop

diff  --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 882bc4071482f..4ac9f49f12161 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2058,8 +2058,9 @@ TargetOp::getKernelExecFlags(Operation *capturedOp) {
   long numWrappers = std::distance(innermostWrapper, wrappers.end());
 
   // Detect Generic-SPMD: target-teams-distribute[-simd].
+  // Detect SPMD:         target-teams-loop.
   if (numWrappers == 1) {
-    if (!isa<DistributeOp>(innermostWrapper))
+    if (!isa<DistributeOp, LoopOp>(innermostWrapper))
       return OMP_TGT_EXEC_MODE_GENERIC;
 
     Operation *teamsOp = (*innermostWrapper)->getParentOp();
@@ -2067,7 +2068,9 @@ TargetOp::getKernelExecFlags(Operation *capturedOp) {
       return OMP_TGT_EXEC_MODE_GENERIC;
 
     if (teamsOp->getParentOp() == targetOp.getOperation())
-      return OMP_TGT_EXEC_MODE_GENERIC_SPMD;
+      return isa<DistributeOp>(innermostWrapper)
+                 ? OMP_TGT_EXEC_MODE_GENERIC_SPMD
+                 : OMP_TGT_EXEC_MODE_SPMD;
   }
 
   // Detect SPMD: target-teams-distribute-parallel-wsloop[-simd].

diff  --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index a5cf789402726..0a10626cd4877 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2879,6 +2879,22 @@ func.func @omp_target_host_eval(%x : i32) {
     }
     omp.terminator
   }
+
+  // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) {
+  // CHECK: omp.teams {
+  // CHECK: omp.loop {
+  // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%[[HOST_ARG]]) to (%[[HOST_ARG]]) step (%[[HOST_ARG]]) {
+  omp.target host_eval(%x -> %arg0 : i32) {
+    omp.teams {
+      omp.loop {
+        omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
+          omp.yield
+        }
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }
   return
 }
 


        


More information about the flang-commits mailing list