[flang-commits] [flang] [flang][OpenMP] Support `bind` clause code-gen for standalone `loop`s (PR #122674)
via flang-commits
flang-commits at lists.llvm.org
Mon Jan 13 00:15:25 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-openmp
@llvm/pr-subscribers-flang-fir-hlfir
Author: Kareem Ergawy (ergawy)
<details>
<summary>Changes</summary>
Extends rewriting of `loop` directives by supporting `bind` clause for
standalone directives. This follows both the spec and the current state
of clang as follows:
* No `bind` or `bind(thread)`: the `loop` is rewritten to `simd`.
* `bind(parallel)`: the `loop` is rewritten to `do`.
* `bind(teams)`: the `loop` is rewritten to `distribute`.
This is a follow-up PR for https://github.com/llvm/llvm-project/pull/122632, only the latest commit in this PR is relevant to the PR.
---
Full diff: https://github.com/llvm/llvm-project/pull/122674.diff
3 Files Affected:
- (modified) flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp (+124-7)
- (modified) flang/test/Lower/OpenMP/loop-directive.f90 (+83-4)
- (modified) flang/test/Transforms/generic-loop-rewriting-todo.mlir (-13)
``````````diff
diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index c3c1f3b2848b82..949d2d37c85114 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -30,19 +30,37 @@ class GenericLoopConversionPattern
: public mlir::OpConversionPattern<mlir::omp::LoopOp> {
public:
enum class GenericLoopCombinedInfo {
- None,
+ Standalone,
TargetTeamsLoop,
TargetParallelLoop
};
using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;
+ explicit GenericLoopConversionPattern(mlir::MLIRContext *ctx)
+ : mlir::OpConversionPattern<mlir::omp::LoopOp>{ctx} {
+ this->setHasBoundedRewriteRecursion(true);
+ }
+
mlir::LogicalResult
matchAndRewrite(mlir::omp::LoopOp loopOp, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
assert(mlir::succeeded(checkLoopConversionSupportStatus(loopOp)));
- rewriteToDistributeParallelDo(loopOp, rewriter);
+ GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
+
+ switch (combinedInfo) {
+ case GenericLoopCombinedInfo::Standalone:
+ rewriteStandaloneLoop(loopOp, rewriter);
+ break;
+ case GenericLoopCombinedInfo::TargetParallelLoop:
+ assert(false);
+ break;
+ case GenericLoopCombinedInfo::TargetTeamsLoop:
+ rewriteToDistributeParallelDo(loopOp, rewriter);
+ break;
+ }
+
rewriter.eraseOp(loopOp);
return mlir::success();
}
@@ -52,9 +70,8 @@ class GenericLoopConversionPattern
GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
switch (combinedInfo) {
- case GenericLoopCombinedInfo::None:
- return loopOp.emitError(
- "not yet implemented: Standalone `omp loop` directive");
+ case GenericLoopCombinedInfo::Standalone:
+ break;
case GenericLoopCombinedInfo::TargetParallelLoop:
return loopOp.emitError(
"not yet implemented: Combined `omp target parallel loop` directive");
@@ -68,7 +85,10 @@ class GenericLoopConversionPattern
<< loopOp->getName() << " operation";
};
- if (loopOp.getBindKind())
+ // For standalone directives, `bind` is already supported. Other combined
+ // forms will be supported in a follow-up PR.
+ if (combinedInfo != GenericLoopCombinedInfo::Standalone &&
+ loopOp.getBindKind())
return todo("bind");
if (loopOp.getOrder())
@@ -86,7 +106,7 @@ class GenericLoopConversionPattern
static GenericLoopCombinedInfo
findGenericLoopCombineInfo(mlir::omp::LoopOp loopOp) {
mlir::Operation *parentOp = loopOp->getParentOp();
- GenericLoopCombinedInfo result = GenericLoopCombinedInfo::None;
+ GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone;
if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp()))
@@ -100,6 +120,103 @@ class GenericLoopConversionPattern
return result;
}
+ void rewriteStandaloneLoop(mlir::omp::LoopOp loopOp,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ using namespace mlir::omp;
+ std::optional<ClauseBindKind> bindKind = loopOp.getBindKind();
+
+ if (!bindKind.has_value())
+ return rewriteToSimdLoop(loopOp, rewriter);
+
+ switch (*loopOp.getBindKind()) {
+ case ClauseBindKind::Parallel:
+ return rewriteToWsloop(loopOp, rewriter);
+ case ClauseBindKind::Teams:
+ return rewriteToDistrbute(loopOp, rewriter);
+ case ClauseBindKind::Thread:
+ return rewriteToSimdLoop(loopOp, rewriter);
+ }
+ }
+
+ /// Rewrites standalone `loop` (with `bind` clause with `bind(parallel)`)
+ /// directives to equivalent `simd` constructs.
+ ///
+ /// The reasoning behind this decision is that according to the spec (version
+ /// 5.2, section 11.7.1):
+ ///
+ /// "If the bind clause is not specified on a construct for which it may be
+ /// specified and the construct is closely nested inside a teams or parallel
+ /// construct, the effect is as if binding is teams or parallel. If none of
+ /// those conditions hold, the binding region is not defined."
+ ///
+ /// which means that standalone `loop` directives have undefined binding
+ /// region. Moreover, the spec says (in the next paragraph):
+ ///
+ /// "The specified binding region determines the binding thread set.
+ /// Specifically, if the binding region is a teams region, then the binding
+ /// thread set is the set of initial threads that are executing that region
+ /// while if the binding region is a parallel region, then the binding thread
+ /// set is the team of threads that are executing that region. If the binding
+ /// region is not defined, then the binding thread set is the encountering
+ /// thread."
+ ///
+ /// which means that the binding thread set for a standalone `loop` directive
+ /// is only the encountering thread.
+ ///
+ /// Since the encountering thread is the binding thread (set) for a
+ /// standalone `loop` directive, the best we can do in such case is to "simd"
+ /// the directive.
+ void rewriteToSimdLoop(mlir::omp::LoopOp loopOp,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ loopOp.emitWarning(
+ "Detected standalone OpenMP `loop` directive with thread binding, "
+ "the associated loop will be rewritten to `simd`.");
+ rewriteToSingleWrapperOp<mlir::omp::SimdOp, mlir::omp::SimdOperands>(
+ loopOp, rewriter);
+ }
+
+ void rewriteToDistrbute(mlir::omp::LoopOp loopOp,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ rewriteToSingleWrapperOp<mlir::omp::DistributeOp,
+ mlir::omp::DistributeOperands>(loopOp, rewriter);
+ }
+
+ void rewriteToWsloop(mlir::omp::LoopOp loopOp,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ rewriteToSingleWrapperOp<mlir::omp::WsloopOp, mlir::omp::WsloopOperands>(
+ loopOp, rewriter);
+ }
+
+ template <typename OpTy, typename OpOperandsTy>
+ void
+ rewriteToSingleWrapperOp(mlir::omp::LoopOp loopOp,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ OpOperandsTy distributeClauseOps;
+ distributeClauseOps.privateVars = loopOp.getPrivateVars();
+
+ auto privateSyms = loopOp.getPrivateSyms();
+ if (privateSyms)
+ distributeClauseOps.privateSyms.assign(privateSyms->begin(),
+ privateSyms->end());
+
+ Fortran::common::openmp::EntryBlockArgs distributeArgs;
+ distributeArgs.priv.vars = distributeClauseOps.privateVars;
+
+ auto distributeOp =
+ rewriter.create<OpTy>(loopOp.getLoc(), distributeClauseOps);
+ mlir::Block *distributeBlock =
+ genEntryBlock(rewriter, distributeArgs, distributeOp.getRegion());
+
+ mlir::IRMapping mapper;
+ mlir::Block &loopBlock = *loopOp.getRegion().begin();
+
+ for (auto [loopOpArg, distributeOpArg] : llvm::zip_equal(
+ loopBlock.getArguments(), distributeBlock->getArguments()))
+ mapper.map(loopOpArg, distributeOpArg);
+
+ rewriter.clone(*loopOp.begin(), mapper);
+ }
+
void rewriteToDistributeParallelDo(
mlir::omp::LoopOp loopOp,
mlir::ConversionPatternRewriter &rewriter) const {
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index 4b4d640e449eeb..845905da0fcba2 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -11,7 +11,7 @@
subroutine test_no_clauses()
integer :: i, j, dummy = 1
- ! CHECK: omp.loop private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK: omp.simd private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
! CHECK-NEXT: omp.loop_nest (%[[IV:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
! CHECK: %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]]
! CHECK: fir.store %[[IV]] to %[[ARG_DECL]]#1 : !fir.ref<i32>
@@ -27,7 +27,7 @@ subroutine test_no_clauses()
! CHECK-LABEL: func.func @_QPtest_collapse
subroutine test_collapse()
integer :: i, j, dummy = 1
- ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
+ ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
! CHECK-NEXT: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 {{.*}} {
! CHECK: }
! CHECK: }
@@ -43,7 +43,7 @@ subroutine test_collapse()
! CHECK-LABEL: func.func @_QPtest_private
subroutine test_private()
integer :: i, dummy = 1
- ! CHECK: omp.loop private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
+ ! CHECK: omp.simd private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_privateEdummy"}
! CHECK: %{{.*}} = fir.load %[[DUMMY_DECL]]#0
@@ -92,7 +92,7 @@ subroutine test_reduction()
! CHECK-LABEL: func.func @_QPtest_bind
subroutine test_bind()
integer :: i, dummy = 1
- ! CHECK: omp.loop bind(thread) private(@{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
+ ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
! CHECK: }
!$omp loop bind(thread)
do i=1,10
@@ -100,3 +100,82 @@ subroutine test_bind()
end do
!$omp end loop
end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_nested_directives
+subroutine test_nested_directives
+ implicit none
+ integer, parameter :: N = 100000
+ integer a(N), b(N), c(N)
+ integer j,i, num, flag;
+ num = N
+
+ ! CHECK: omp.teams {
+
+ ! Verify the first `loop` directive was combined with `target teams` into
+ ! `target teams distribute parallel do`.
+ ! CHECK: omp.parallel {{.*}} {
+ ! CHECK: omp.distribute {
+ ! CHECK: omp.wsloop {
+ ! CHECK: omp.loop_nest {{.*}} {
+
+ ! Very the second `loop` directive was rewritten to `simd`.
+ ! CHECK: omp.simd {{.*}} {
+ ! CHECK: omp.loop_nest {{.*}} {
+ ! CHECK: }
+ ! CHECK: }
+
+ ! CHECK: }
+ ! CHECK: } {omp.composite}
+ ! CHECK: } {omp.composite}
+ ! CHECK: } {omp.composite}
+ ! CHECK: }
+ !$omp target teams map(to: a,b) map(from: c)
+ !$omp loop
+ do j=1,1000
+ !$omp loop
+ do i=1,N
+ c(i) = a(i) * b(i)
+ end do
+ end do
+ !$omp end target teams
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_standalone_bind_teams
+subroutine test_standalone_bind_teams
+ implicit none
+ integer, parameter :: N = 100000
+ integer a(N), b(N), c(N)
+ integer j,i, num, flag;
+ num = N
+
+ ! CHECK: omp.distribute
+ ! CHECK-SAME: private(@{{.*}}Ea_private_ref_100000xi32 {{[^,]*}},
+ ! CHECK-SAME: @{{.*}}Ei_private_ref_i32 {{.*}} : {{.*}}) {
+ ! CHECK: omp.loop_nest {{.*}} {
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(teams) private(a)
+ do i=1,N
+ c(i) = a(i) * b(i)
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_standalone_bind_parallel
+subroutine test_standalone_bind_parallel
+ implicit none
+ integer, parameter :: N = 100000
+ integer a(N), b(N), c(N)
+ integer j,i, num, flag;
+ num = N
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: private(@{{.*}}Ea_private_ref_100000xi32 {{[^,]*}},
+ ! CHECK-SAME: @{{.*}}Ei_private_ref_i32 {{.*}} : {{.*}}) {
+ ! CHECK: omp.loop_nest {{.*}} {
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) private(a)
+ do i=1,N
+ c(i) = a(i) * b(i)
+ end do
+end subroutine
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index 9ea6bf001b6685..becd6b8dcb5cb4 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -1,18 +1,5 @@
// RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
-func.func @_QPtarget_loop() {
- %c0 = arith.constant 0 : i32
- %c10 = arith.constant 10 : i32
- %c1 = arith.constant 1 : i32
- // expected-error at below {{not yet implemented: Standalone `omp loop` directive}}
- omp.loop {
- omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
- omp.yield
- }
- }
- return
-}
-
func.func @_QPtarget_parallel_loop() {
omp.target {
omp.parallel {
``````````
</details>
https://github.com/llvm/llvm-project/pull/122674
More information about the flang-commits
mailing list