[flang-commits] [flang] [flang][OpenMP] Support `parallel loop` construct. (PR #127588)

Tue Feb 18 00:09:24 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Kareem Ergawy (ergawy)

<details>
<summary>Changes</summary>

Extends support for the `loop` directive by adding support for `parallel
loop` combined directive.

Parent PR: #127489. Only the latest commit is relevant.

---
Full diff: https://github.com/llvm/llvm-project/pull/127588.diff


4 Files Affected:

- (modified) flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp (+56-24) 
- (modified) flang/test/Lower/OpenMP/loop-directive.f90 (+120-2) 
- (modified) flang/test/Transforms/generic-loop-rewriting-todo.mlir (-16) 
- (modified) flang/test/Transforms/generic-loop-rewriting.mlir (+33) 


``````````diff

diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index d2581e3ad0a0a..fd8203d4ea64a 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -52,11 +52,13 @@ class GenericLoopConversionPattern
       rewriteStandaloneLoop(loopOp, rewriter);
       break;
     case GenericLoopCombinedInfo::ParallelLoop:
-      llvm_unreachable(
-          "not yet implemented: Combined `parallel loop` directive");
+      rewriteToWsloop(loopOp, rewriter);
       break;
     case GenericLoopCombinedInfo::TeamsLoop:
-      rewriteToDistributeParallelDo(loopOp, rewriter);
+      if (teamsLoopCanBeParallelFor(loopOp))
+        rewriteToDistributeParallelDo(loopOp, rewriter);
+      else
+        rewriteToDistrbute(loopOp, rewriter);
       break;
     }
 
@@ -66,39 +68,18 @@ class GenericLoopConversionPattern
 
   static mlir::LogicalResult
   checkLoopConversionSupportStatus(mlir::omp::LoopOp loopOp) {
-    GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
-
-    switch (combinedInfo) {
-    case GenericLoopCombinedInfo::Standalone:
-      break;
-    case GenericLoopCombinedInfo::ParallelLoop:
-      return loopOp.emitError(
-          "not yet implemented: Combined `parallel loop` directive");
-    case GenericLoopCombinedInfo::TeamsLoop:
-      break;
-    }
-
     auto todo = [&loopOp](mlir::StringRef clauseName) {
       return loopOp.emitError()
              << "not yet implemented: Unhandled clause " << clauseName << " in "
              << loopOp->getName() << " operation";
     };
 
-    // For `loop` and `teams loop` directives, `bind` is supported.
-    // Additionally, for `teams loop`, semantic checking verifies that the
-    // `bind` clause modifier is `teams`, so no need to check this here again.
-    if (combinedInfo == GenericLoopCombinedInfo::ParallelLoop &&
-        loopOp.getBindKind())
-      return todo("bind");
-
     if (loopOp.getOrder())
       return todo("order");
 
     if (!loopOp.getReductionVars().empty())
       return todo("reduction");
 
-    // TODO For `teams loop`, check similar constrains to what is checked
-    // by `TeamsLoopChecker` in SemaOpenMP.cpp.
     return mlir::success();
   }
 
@@ -118,6 +99,57 @@ class GenericLoopConversionPattern
     return result;
   }
 
+  /// Checks whether a `teams loop` construct can be rewriten to `teams
+  /// distribute parallel do` or it has to be converted to `teams distribute`.
+  ///
+  /// This checks similar constrains to what is checked by `TeamsLoopChecker` in
+  /// SemaOpenMP.cpp in clang.
+  static bool teamsLoopCanBeParallelFor(mlir::omp::LoopOp loopOp) {
+    bool canBeParallelFor = true;
+    loopOp.walk([&](mlir::omp::LoopOp nestedLoopOp) {
+      if (nestedLoopOp == loopOp)
+        mlir::WalkResult::advance();
+
+      GenericLoopCombinedInfo combinedInfo =
+          findGenericLoopCombineInfo(nestedLoopOp);
+
+      // Worksharing loops cannot be nested inside each other. Therefore, if the
+      // current `loop` directive nests another `loop` whose `bind` modifier is
+      // `parallel`, this `loop` directive cannot be mapped to `distribute
+      // parallel for` but rather only to `distribute`.
+      if (combinedInfo == GenericLoopCombinedInfo::Standalone &&
+          nestedLoopOp.getBindKind() &&
+          *nestedLoopOp.getBindKind() == mlir::omp::ClauseBindKind::Parallel)
+        canBeParallelFor = false;
+
+      if (combinedInfo == GenericLoopCombinedInfo::ParallelLoop)
+        canBeParallelFor = false;
+
+      return canBeParallelFor ? mlir::WalkResult::advance()
+                              : mlir::WalkResult::interrupt();
+    });
+
+    loopOp.walk([&](mlir::CallOpInterface callOp) {
+      // Calls to non-OpenMP API runtime functions inhibits transformation to
+      // `teams distribute parallel do` since the called functions might have
+      // nested parallelism themselves.
+      bool isOpenMPAPI = false;
+      mlir::CallInterfaceCallable callable = callOp.getCallableForCallee();
+
+      if (auto callableSymRef = mlir::dyn_cast<mlir::SymbolRefAttr>(callable))
+        isOpenMPAPI =
+            callableSymRef.getRootReference().strref().find("omp_") == 0;
+
+      if (!isOpenMPAPI)
+        canBeParallelFor = false;
+
+      return canBeParallelFor ? mlir::WalkResult::advance()
+                              : mlir::WalkResult::interrupt();
+    });
+
+    return canBeParallelFor;
+  }
+
   void rewriteStandaloneLoop(mlir::omp::LoopOp loopOp,
                              mlir::ConversionPatternRewriter &rewriter) const {
     using namespace mlir::omp;
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index 785f732e1b4f5..f7e7537c4985b 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -1,7 +1,8 @@
 ! This test checks lowering of OpenMP loop Directive.
 
-! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
-! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+! REQUIRES: openmp_runtime
+
+! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
 
 ! CHECK: omp.declare_reduction @[[RED:add_reduction_i32]] : i32
 ! CHECK: omp.private {type = private} @[[DUMMY_PRIV:.*test_privateEdummy_private.*]] : i32
@@ -179,3 +180,120 @@ subroutine test_standalone_bind_parallel
     c(i) = a(i) * b(i)
   end do
 end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_cannot_be_parallel_for
+subroutine teams_loop_cannot_be_parallel_for
+  implicit none
+  integer :: iter, iter2, val(20)
+  val = 0
+  ! CHECK: omp.teams {
+
+  ! Verify the outer `loop` directive was mapped to only `distribute`.
+  ! CHECK-NOT: omp.parallel {{.*}}
+  ! CHECK:     omp.distribute {{.*}} {
+  ! CHECK-NOT:   omp.wsloop
+  ! CHECK:       omp.loop_nest {{.*}} {
+
+  ! Verify the inner `loop` directive was mapped to a worksharing loop.
+  ! CHECK:         omp.wsloop {{.*}} {
+  ! CHECK:           omp.loop_nest {{.*}} {
+  ! CHECK:           }
+  ! CHECK:         }
+
+  ! CHECK:       }
+  ! CHECK:     }
+
+  ! CHECK: }
+  !$omp target teams loop map(tofrom:val)
+  DO iter = 1, 5
+    !$omp loop bind(parallel)
+    DO iter2 = 1, 5
+      val(iter+iter2) = iter+iter2
+    END DO
+  END DO
+end subroutine
+
+subroutine foo()
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_cannot_be_parallel_for_2
+subroutine teams_loop_cannot_be_parallel_for_2
+  implicit none
+  integer :: iter, val(20)
+  val = 0
+
+  ! CHECK: omp.teams {
+
+  ! Verify the `loop` directive was mapped to only `distribute`.
+  ! CHECK-NOT: omp.parallel {{.*}}
+  ! CHECK:     omp.distribute {{.*}} {
+  ! CHECK-NOT:   omp.wsloop
+  ! CHECK:       omp.loop_nest {{.*}} {
+  ! CHECK:         fir.call @_QPfoo
+  ! CHECK:       }
+  ! CHECK:     }
+
+  ! CHECK: }
+  !$omp target teams loop map(tofrom:val)
+  DO iter = 1, 5
+    call foo()
+  END DO
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_cannot_be_parallel_for_3
+subroutine teams_loop_cannot_be_parallel_for_3
+  use omp_lib
+  implicit none
+  integer :: iter, tid, val(20)
+  val = 0
+
+  !CHECK: omp.teams {
+  !CHECK:   omp.parallel {{.*}} {
+  !CHECK:     omp.distribute {
+  !CHECK:       omp.wsloop {
+  !CHECK:         omp.loop_nest {{.*}} {
+  !CHECK:           fir.call @omp_get_thread_num()
+  !CHECK:         }
+  !CHECK:       }
+  !CHECK:     }
+  !CHECK:   }
+  !CHECK: }
+  !$omp target teams loop map(tofrom:val)
+  DO iter = 1, 5
+    tid = omp_get_thread_num()
+  END DO
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_cannot_be_parallel_for_4
+subroutine teams_loop_cannot_be_parallel_for_4
+  implicit none
+  integer :: iter, iter2, tid, val(20)
+
+  ! CHECK: omp.teams {
+
+  ! Verify the outer `loop` directive was mapped to only `distribute`.
+  ! CHECK-NOT: omp.parallel {{.*}}
+  ! CHECK:     omp.distribute {{.*}} {
+  ! CHECK-NOT:   omp.wsloop
+  ! CHECK:       omp.loop_nest {{.*}} {
+
+  ! Verify the inner `loop` directive was mapped to a worksharing loop.
+  ! CHECK:         omp.wsloop {{.*}} {
+  ! CHECK:           omp.loop_nest {{.*}} {
+  ! CHECK:           }
+  ! CHECK:         }
+
+  ! CHECK:       }
+  ! CHECK:     }
+
+  ! CHECK: }
+  !$omp target teams loop map(tofrom:val)
+  DO iter = 1, 5
+    !$omp parallel
+    !$omp loop
+    DO iter2 = 1, 5
+      val(iter+iter2) = iter+iter2
+    END DO
+    !$omp end parallel
+  END DO
+end subroutine
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index 25baffe34e394..e992296c9a837 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -1,21 +1,5 @@
 // RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
 
-func.func @_QPparallel_loop() {
-  omp.parallel {
-    %c0 = arith.constant 0 : i32
-    %c10 = arith.constant 10 : i32
-    %c1 = arith.constant 1 : i32
-    // expected-error at below {{not yet implemented: Combined `parallel loop` directive}}
-    omp.loop {
-      omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
-        omp.yield
-      }
-    }
-    omp.terminator
-  }
-  return
-}
-
 omp.declare_reduction @add_reduction_i32 : i32 init {
   ^bb0(%arg0: i32):
     %c0_i32 = arith.constant 0 : i32
diff --git a/flang/test/Transforms/generic-loop-rewriting.mlir b/flang/test/Transforms/generic-loop-rewriting.mlir
index 49caf242fe324..6996beeb9f430 100644
--- a/flang/test/Transforms/generic-loop-rewriting.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting.mlir
@@ -45,3 +45,36 @@ func.func @_QPteams_loop() {
 // CHECK:             }
 // CHECK:           }
 // CHECK:         }
+
+func.func @_QPparallel_loop() {
+  %i = fir.alloca i32
+  omp.parallel {
+    %c0 = arith.constant 0 : i32
+    %c10 = arith.constant 10 : i32
+    %c1 = arith.constant 1 : i32
+    omp.loop private(@_QFteams_loopEi_private_i32 %i -> %arg2 : !fir.ref<i32>) {
+      omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
+        fir.store %arg3 to %arg2 : !fir.ref<i32>
+        omp.yield
+      }
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: func.func @_QPparallel_loop
+// CHECK:         %[[I:.*]] = fir.alloca i32
+// CHECK:         omp.parallel {
+
+// CHECK:           %[[LB:.*]] = arith.constant 0 : i32
+// CHECK:           %[[UB:.*]] = arith.constant 10 : i32
+// CHECK:           %[[STEP:.*]] = arith.constant 1 : i32
+// CHECK:           omp.wsloop private(@{{.*}} %[[I]]
+// CHECK-SAME:        -> %[[I_PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>) {
+// CHECK:              omp.loop_nest (%{{.*}}) : i32 =
+// CHECK-SAME:           (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+// CHECK:                fir.store %{{.*}} to %[[I_PRIV_ARG]] : !fir.ref<i32>
+// CHECK:              }
+// CHECK:           }
+// CHECK:         }

``````````

</details>


https://github.com/llvm/llvm-project/pull/127588