[flang-commits] [flang] bff6b92 - [flang][OpenMP] Map `teams loop` to `teams distribute` when required. (#127489)
via flang-commits
flang-commits at lists.llvm.org
Fri Feb 21 06:39:56 PST 2025
Author: Kareem Ergawy
Date: 2025-02-21T15:39:52+01:00
New Revision: bff6b926e2a42c15451058828fca313ff40e36dd
URL: https://github.com/llvm/llvm-project/commit/bff6b926e2a42c15451058828fca313ff40e36dd
DIFF: https://github.com/llvm/llvm-project/commit/bff6b926e2a42c15451058828fca313ff40e36dd.diff
LOG: [flang][OpenMP] Map `teams loop` to `teams distribute` when required. (#127489)
This extends support for generic `loop` rewriting by:
1. Preventing nesting multiple worksharing loops inside each other. This
is checked by walking the `teams loop` region searching for any `loop`
directive whose `bind` modifier is `parallel`.
2. Preventing convert to worksharing loop if calls to unknow functions
are found in the `loop` directive's body.
We walk the `teams loop` body to identify either of the above 2
conditions, if either of them is found to be true, we map the `loop`
directive to `distribute`.
Added:
Modified:
flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
flang/test/Lower/OpenMP/loop-directive.f90
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index d2581e3ad0a0a..d3aece73b274f 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -56,7 +56,10 @@ class GenericLoopConversionPattern
"not yet implemented: Combined `parallel loop` directive");
break;
case GenericLoopCombinedInfo::TeamsLoop:
- rewriteToDistributeParallelDo(loopOp, rewriter);
+ if (teamsLoopCanBeParallelFor(loopOp))
+ rewriteToDistributeParallelDo(loopOp, rewriter);
+ else
+ rewriteToDistrbute(loopOp, rewriter);
break;
}
@@ -97,8 +100,6 @@ class GenericLoopConversionPattern
if (!loopOp.getReductionVars().empty())
return todo("reduction");
- // TODO For `teams loop`, check similar constrains to what is checked
- // by `TeamsLoopChecker` in SemaOpenMP.cpp.
return mlir::success();
}
@@ -118,6 +119,62 @@ class GenericLoopConversionPattern
return result;
}
+ /// Checks whether a `teams loop` construct can be rewriten to `teams
+ /// distribute parallel do` or it has to be converted to `teams distribute`.
+ ///
+ /// This checks similar constrains to what is checked by `TeamsLoopChecker` in
+ /// SemaOpenMP.cpp in clang.
+ static bool teamsLoopCanBeParallelFor(mlir::omp::LoopOp loopOp) {
+ bool canBeParallelFor =
+ !loopOp
+ .walk<mlir::WalkOrder::PreOrder>([&](mlir::Operation *nestedOp) {
+ if (nestedOp == loopOp)
+ return mlir::WalkResult::advance();
+
+ if (auto nestedLoopOp =
+ mlir::dyn_cast<mlir::omp::LoopOp>(nestedOp)) {
+ GenericLoopCombinedInfo combinedInfo =
+ findGenericLoopCombineInfo(nestedLoopOp);
+
+ // Worksharing loops cannot be nested inside each other.
+ // Therefore, if the current `loop` directive nests another
+ // `loop` whose `bind` modifier is `parallel`, this `loop`
+ // directive cannot be mapped to `distribute parallel for`
+ // but rather only to `distribute`.
+ if (combinedInfo == GenericLoopCombinedInfo::Standalone &&
+ nestedLoopOp.getBindKind() &&
+ *nestedLoopOp.getBindKind() ==
+ mlir::omp::ClauseBindKind::Parallel)
+ return mlir::WalkResult::interrupt();
+
+ // TODO check for combined `parallel loop` when we support
+ // it.
+ } else if (auto callOp =
+ mlir::dyn_cast<mlir::CallOpInterface>(nestedOp)) {
+ // Calls to non-OpenMP API runtime functions inhibits
+ // transformation to `teams distribute parallel do` since the
+ // called functions might have nested parallelism themselves.
+ bool isOpenMPAPI = false;
+ mlir::CallInterfaceCallable callable =
+ callOp.getCallableForCallee();
+
+ if (auto callableSymRef =
+ mlir::dyn_cast<mlir::SymbolRefAttr>(callable))
+ isOpenMPAPI =
+ callableSymRef.getRootReference().strref().starts_with(
+ "omp_");
+
+ if (!isOpenMPAPI)
+ return mlir::WalkResult::interrupt();
+ }
+
+ return mlir::WalkResult::advance();
+ })
+ .wasInterrupted();
+
+ return canBeParallelFor;
+ }
+
void rewriteStandaloneLoop(mlir::omp::LoopOp loopOp,
mlir::ConversionPatternRewriter &rewriter) const {
using namespace mlir::omp;
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index 785f732e1b4f5..13cf2bfd15135 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -1,7 +1,8 @@
! This test checks lowering of OpenMP loop Directive.
-! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
-! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+! REQUIRES: openmp_runtime
+
+! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
! CHECK: omp.declare_reduction @[[RED:add_reduction_i32]] : i32
! CHECK: omp.private {type = private} @[[DUMMY_PRIV:.*test_privateEdummy_private.*]] : i32
@@ -179,3 +180,84 @@ subroutine test_standalone_bind_parallel
c(i) = a(i) * b(i)
end do
end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_cannot_be_parallel_for
+subroutine teams_loop_cannot_be_parallel_for
+ implicit none
+ integer :: iter, iter2, val(20)
+ val = 0
+ ! CHECK: omp.teams {
+
+ ! Verify the outer `loop` directive was mapped to only `distribute`.
+ ! CHECK-NOT: omp.parallel {{.*}}
+ ! CHECK: omp.distribute {{.*}} {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+
+ ! Verify the inner `loop` directive was mapped to a worksharing loop.
+ ! CHECK: omp.wsloop {{.*}} {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK: }
+ ! CHECK: }
+
+ ! CHECK: }
+ ! CHECK: }
+
+ ! CHECK: }
+ !$omp target teams loop map(tofrom:val)
+ DO iter = 1, 5
+ !$omp loop bind(parallel)
+ DO iter2 = 1, 5
+ val(iter+iter2) = iter+iter2
+ END DO
+ END DO
+end subroutine
+
+subroutine foo()
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_cannot_be_parallel_for_2
+subroutine teams_loop_cannot_be_parallel_for_2
+ implicit none
+ integer :: iter, val(20)
+ val = 0
+
+ ! CHECK: omp.teams {
+
+ ! Verify the `loop` directive was mapped to only `distribute`.
+ ! CHECK-NOT: omp.parallel {{.*}}
+ ! CHECK: omp.distribute {{.*}} {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK: fir.call @_QPfoo
+ ! CHECK: }
+ ! CHECK: }
+
+ ! CHECK: }
+ !$omp target teams loop map(tofrom:val)
+ DO iter = 1, 5
+ call foo()
+ END DO
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_can_be_parallel_for
+subroutine teams_loop_can_be_parallel_for
+ use omp_lib
+ implicit none
+ integer :: iter, tid, val(20)
+ val = 0
+
+ !CHECK: omp.teams {
+ !CHECK: omp.parallel {{.*}} {
+ !CHECK: omp.distribute {
+ !CHECK: omp.wsloop {
+ !CHECK: omp.loop_nest {{.*}} {
+ !CHECK: fir.call @omp_get_thread_num()
+ !CHECK: }
+ !CHECK: }
+ !CHECK: }
+ !CHECK: }
+ !CHECK: }
+ !$omp target teams loop map(tofrom:val)
+ DO iter = 1, 5
+ tid = omp_get_thread_num()
+ END DO
+end subroutine
More information about the flang-commits
mailing list