[flang-commits] [flang] [flang][OpenMP] Clone reduction info from nested `loop` ops to parent `teams` ops (PR #132003)

Kareem Ergawy via flang-commits flang-commits at lists.llvm.org
Wed Mar 19 21:13:15 PDT 2025


https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/132003

>From aed180b0b761023b0baf84ada2a9ee2fd5ca5ed8 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 19 Mar 2025 06:02:27 -0500
Subject: [PATCH 1/2] [flang][OpenMP] Clone reduction info from nested `loop`
 ops to parent `teams` ops

Fixes a bug in reductions when converting `teams loop` constructs with
`reduction` clauses.

According to the spec (v5.2, p340, 36):

```
The effect of the reduction clause is as if it is applied to all leaf
constructs that permit the clause, except for the following constructs:
* ....
* The teams construct, when combined with the loop construct.
```

Therefore, for a combined directive similar to: `!$omp teams loop
reduction(...)`, the earlier stages of the compiler assign the `reduction`
clauses only to the `loop` leaf and not to the `teams` leaf.

On the other hand, if we have a combined construct similar to: `!$omp teams
distribute parallel do`, the `reduction` clauses are assigned both to the
`teams` and the `do` leaves. We need to match this behavior when we convert
`teams` op with a nested `loop` op since the target set of constructs/ops
will be incorrect without moving the reductions up to the `teams` op as
well.

This PR introduces a pattern that does exactly this. Given the following input:
```
omp.teams {
  omp.loop reduction(@red_sym %red_op -> %red_arg : !fir.ref<i32>) {
    omp.loop_nest ... {
      ...
    }
  }
}
```
this pattern updates the `omp.teams` op in-place to:
```
omp.teams reduction(@red_sym %red_op -> %teams_red_arg : !fir.ref<i32>) {
  omp.loop reduction(@red_sym %teams_red_arg -> %red_arg : !fir.ref<i32>) {
    omp.loop_nest ... {
      ...
    }
  }
}
```

Note the following:
* The nested `omp.loop` is not rewritten by this pattern, this happens
  through `GenericLoopConversionPattern`.
* The reduction info are cloned from the nested `omp.loop` op to the parent
  `omp.teams` op.
* The reduction operand of the `omp.loop` op is updated to be the **new**
  reduction block argument of the `omp.teams` op.
---
 .../OpenMP/GenericLoopConversion.cpp          | 123 +++++++++++++++++-
 flang/test/Lower/OpenMP/loop-directive.f90    |   4 +-
 2 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index b0014a3aced6b..38ae90519c5cd 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -343,6 +343,115 @@ class GenericLoopConversionPattern
   }
 };
 
+/// According to the spec (v5.2, p340, 36):
+///
+/// ```
+/// The effect of the reduction clause is as if it is applied to all leaf
+/// constructs that permit the clause, except for the following constructs:
+/// * ....
+/// * The teams construct, when combined with the loop construct.
+/// ```
+///
+/// Therefore, for a combined directive similar to: `!$omp teams loop
+/// reduction(...)`, the earlier stages of the compiler assign the `reduction`
+/// clauses only to the `loop` leaf and not to the `teams` leaf.
+///
+/// On the other hand, if we have a combined construct similar to: `!$omp teams
+/// distribute parallel do`, the `reduction` clauses are assigned both to the
+/// `teams` and the `do` leaves. We need to match this behavior when we convert
+/// `teams` op with a nested `loop` op since the target set of constructs/ops
+/// will be incorrect without moving the reductions up to the `teams` op as
+/// well.
+///
+/// This pattern does exactly this. Given the following input:
+/// ```
+/// omp.teams {
+///   omp.loop reduction(@red_sym %red_op -> %red_arg : !fir.ref<i32>) {
+///     omp.loop_nest ... {
+///       ...
+///     }
+///   }
+/// }
+/// ```
+/// this pattern updates the `omp.teams` op in-place to:
+/// ```
+/// omp.teams reduction(@red_sym %red_op -> %teams_red_arg : !fir.ref<i32>) {
+///   omp.loop reduction(@red_sym %teams_red_arg -> %red_arg : !fir.ref<i32>) {
+///     omp.loop_nest ... {
+///       ...
+///     }
+///   }
+/// }
+/// ```
+///
+/// Note the following:
+/// * The nested `omp.loop` is not rewritten by this pattern, this happens
+///   through `GenericLoopConversionPattern`.
+/// * The reduction info are cloned from the nested `omp.loop` op to the parent
+///   `omp.teams` op.
+/// * The reduction operand of the `omp.loop` op is updated to be the **new**
+///   reduction block argument of the `omp.teams` op.
+class ReductionsMoverConverPattern
+    : public mlir::OpConversionPattern<mlir::omp::TeamsOp> {
+public:
+  using mlir::OpConversionPattern<mlir::omp::TeamsOp>::OpConversionPattern;
+
+  static mlir::omp::LoopOp
+  tryToFindNestedLoopWithReuctions(mlir::omp::TeamsOp teamsOp) {
+    assert(!teamsOp.getRegion().empty() &&
+           teamsOp.getRegion().getBlocks().size() == 1);
+
+    mlir::Block &teamsBlock = *teamsOp.getRegion().begin();
+    auto loopOpIter = llvm::find_if(teamsBlock, [](mlir::Operation &op) {
+      auto nestedLoopOp = llvm::dyn_cast<mlir::omp::LoopOp>(&op);
+
+      if (!nestedLoopOp)
+        return false;
+
+      return !nestedLoopOp.getReductionVars().empty();
+    });
+
+    if (loopOpIter == teamsBlock.end())
+      return nullptr;
+
+    // TODO return error if more than one loop op is nested. We need to
+    // coalesce reductions in this case.
+    return llvm::cast<mlir::omp::LoopOp>(loopOpIter);
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::omp::TeamsOp teamsOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::omp::LoopOp nestedLoopOp = tryToFindNestedLoopWithReuctions(teamsOp);
+
+    rewriter.modifyOpInPlace(teamsOp, [&]() {
+      teamsOp.setReductionMod(nestedLoopOp.getReductionMod());
+      teamsOp.getReductionVarsMutable().assign(nestedLoopOp.getReductionVars());
+      teamsOp.setReductionByref(nestedLoopOp.getReductionByref());
+      teamsOp.setReductionSymsAttr(nestedLoopOp.getReductionSymsAttr());
+
+      auto blockArgIface =
+          llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*teamsOp);
+      unsigned reductionArgsStart = blockArgIface.getPrivateBlockArgsStart() +
+                                    blockArgIface.numPrivateBlockArgs();
+      llvm::SmallVector<mlir::Value> newLoopOpReductionOperands;
+
+      for (auto [idx, reductionVar] :
+           llvm::enumerate(nestedLoopOp.getReductionVars())) {
+        mlir::BlockArgument newTeamsOpReductionBlockArg =
+            teamsOp.getRegion().insertArgument(reductionArgsStart + idx,
+                                               reductionVar.getType(),
+                                               reductionVar.getLoc());
+        newLoopOpReductionOperands.push_back(newTeamsOpReductionBlockArg);
+      }
+
+      nestedLoopOp.getReductionVarsMutable().assign(newLoopOpReductionOperands);
+    });
+
+    return mlir::success();
+  }
+};
+
 class GenericLoopConversionPass
     : public flangomp::impl::GenericLoopConversionPassBase<
           GenericLoopConversionPass> {
@@ -357,11 +466,23 @@ class GenericLoopConversionPass
 
     mlir::MLIRContext *context = &getContext();
     mlir::RewritePatternSet patterns(context);
-    patterns.insert<GenericLoopConversionPattern>(context);
+    patterns.insert<ReductionsMoverConverPattern, GenericLoopConversionPattern>(
+        context);
     mlir::ConversionTarget target(*context);
 
     target.markUnknownOpDynamicallyLegal(
         [](mlir::Operation *) { return true; });
+
+    target.addDynamicallyLegalOp<mlir::omp::TeamsOp>(
+        [](mlir::omp::TeamsOp teamsOp) {
+          // If teamsOp's reductions are already populated, then the op is
+          // legal. Additionally, the op is legal if it does not nest a LoopOp
+          // with reductions.
+          return !teamsOp.getReductionVars().empty() ||
+                 ReductionsMoverConverPattern::tryToFindNestedLoopWithReuctions(
+                     teamsOp) == nullptr;
+        });
+
     target.addDynamicallyLegalOp<mlir::omp::LoopOp>(
         [](mlir::omp::LoopOp loopOp) {
           return mlir::failed(
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index 123dfffa350d7..61ac59d577c63 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -318,12 +318,12 @@ subroutine loop_parallel_bind_reduction
 subroutine loop_teams_loop_reduction
   implicit none
   integer :: x, i
-  ! CHECK: omp.teams {
+  ! CHECK: omp.teams reduction(@add_reduction_i32 %{{.*}}#0 -> %[[TEAMS_RED_ARG:.*]] : !fir.ref<i32>) {
   ! CHECK:   omp.parallel
   ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>) {
   ! CHECK:      omp.distribute {
   ! CHECK:        omp.wsloop
-  ! CHECK-SAME:     reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+  ! CHECK-SAME:     reduction(@add_reduction_i32 %[[TEAMS_RED_ARG]] -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
   ! CHECK-NEXT:     omp.loop_nest {{.*}} {
   ! CHECK-NEXT:       hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"}
   ! CHECK-NEXT:       hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}

>From 37f2f8ebaeb64a812875eea041176480464d9fa6 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 19 Mar 2025 23:12:59 -0500
Subject: [PATCH 2/2] Handle review comments

---
 flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index 38ae90519c5cd..727d48b986e7f 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -397,7 +397,7 @@ class ReductionsMoverConverPattern
   using mlir::OpConversionPattern<mlir::omp::TeamsOp>::OpConversionPattern;
 
   static mlir::omp::LoopOp
-  tryToFindNestedLoopWithReuctions(mlir::omp::TeamsOp teamsOp) {
+  tryToFindNestedLoopWithReduction(mlir::omp::TeamsOp teamsOp) {
     assert(!teamsOp.getRegion().empty() &&
            teamsOp.getRegion().getBlocks().size() == 1);
 
@@ -422,7 +422,7 @@ class ReductionsMoverConverPattern
   mlir::LogicalResult
   matchAndRewrite(mlir::omp::TeamsOp teamsOp, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
-    mlir::omp::LoopOp nestedLoopOp = tryToFindNestedLoopWithReuctions(teamsOp);
+    mlir::omp::LoopOp nestedLoopOp = tryToFindNestedLoopWithReduction(teamsOp);
 
     rewriter.modifyOpInPlace(teamsOp, [&]() {
       teamsOp.setReductionMod(nestedLoopOp.getReductionMod());
@@ -479,7 +479,7 @@ class GenericLoopConversionPass
           // legal. Additionally, the op is legal if it does not nest a LoopOp
           // with reductions.
           return !teamsOp.getReductionVars().empty() ||
-                 ReductionsMoverConverPattern::tryToFindNestedLoopWithReuctions(
+                 ReductionsMoverConverPattern::tryToFindNestedLoopWithReduction(
                      teamsOp) == nullptr;
         });
 



More information about the flang-commits mailing list