[flang-commits] [flang] 6333f84 - [flang][OpenMP] Move reductions from `loop` to `teams` when `loop` is mapped to `distribute` (#132920)

via flang-commits flang-commits at lists.llvm.org
Thu Apr 3 21:20:55 PDT 2025


Author: Kareem Ergawy
Date: 2025-04-04T06:20:51+02:00
New Revision: 6333f8457c43f5a2d19d6552689726e86196dea7

URL: https://github.com/llvm/llvm-project/commit/6333f8457c43f5a2d19d6552689726e86196dea7
DIFF: https://github.com/llvm/llvm-project/commit/6333f8457c43f5a2d19d6552689726e86196dea7.diff

LOG: [flang][OpenMP] Move reductions from `loop` to `teams` when `loop` is mapped to `distribute` (#132920)

Follow-up to #132003, in particular, see
https://github.com/llvm/llvm-project/pull/132003#issuecomment-2739701936.

This PR extends reduction support for `loop` directives. Consider the
following scenario:
```fortran
subroutine bar
  implicit none
  integer :: x, i

  !$omp teams loop reduction(+: x)
  DO i = 1, 5
    call foo()
  END DO
end subroutine
```
Note the following:
* According to the spec, the `reduction` clause will be attached to
`loop` during earlier stages in the compiler.
* Additionally, `loop` cannot be mapped to `distribute parallel for` due
to the call to a foreign function inside the loop's body.
* Therefore, `loop` must be mapped to `distribute`.
* However, `distribute` does not have `reduction` clauses.
* As a result, we have to move the `reduction`s from the `loop` to its
parent `teams` directive, which is what is done by this PR.

Added: 
    

Modified: 
    flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
    flang/test/Lower/OpenMP/loop-directive.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index 74ad6330b11a7..3009746954984 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -57,10 +57,38 @@ class GenericLoopConversionPattern
       rewriteToWsloop(loopOp, rewriter);
       break;
     case GenericLoopCombinedInfo::TeamsLoop:
-      if (teamsLoopCanBeParallelFor(loopOp))
+      if (teamsLoopCanBeParallelFor(loopOp)) {
         rewriteToDistributeParallelDo(loopOp, rewriter);
-      else
+      } else {
+        auto teamsOp = llvm::cast<mlir::omp::TeamsOp>(loopOp->getParentOp());
+        auto teamsBlockArgIface =
+            llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*teamsOp);
+        auto loopBlockArgIface =
+            llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp);
+
+        for (unsigned i = 0; i < loopBlockArgIface.numReductionBlockArgs();
+             ++i) {
+          mlir::BlockArgument loopRedBlockArg =
+              loopBlockArgIface.getReductionBlockArgs()[i];
+          mlir::BlockArgument teamsRedBlockArg =
+              teamsBlockArgIface.getReductionBlockArgs()[i];
+          rewriter.replaceAllUsesWith(loopRedBlockArg, teamsRedBlockArg);
+        }
+
+        for (unsigned i = 0; i < loopBlockArgIface.numReductionBlockArgs();
+             ++i) {
+          loopOp.getRegion().eraseArgument(
+              loopBlockArgIface.getReductionBlockArgsStart());
+        }
+
+        loopOp.removeReductionModAttr();
+        loopOp.getReductionVarsMutable().clear();
+        loopOp.removeReductionByrefAttr();
+        loopOp.removeReductionSymsAttr();
+
         rewriteToDistribute(loopOp, rewriter);
+      }
+
       break;
     }
 

diff  --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index 954985e2d64f1..a974f264cc040 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -358,3 +358,40 @@ subroutine multi_block_teams
   end select
   !$omp end target teams
 end subroutine
+
+
+! Verifies that reductions are hoisted to the parent `teams` directive and removed
+! from the `loop` directive when `loop` is mapped to `distribute`.
+
+! CHECK-LABEL: func.func @_QPteams_loop_cannot_be_parallel_for_with_reductions
+subroutine teams_loop_cannot_be_parallel_for_with_reductions
+  implicit none
+  integer :: x, y, i, p
+
+  ! CHECK: %[[ADD_RED:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QF{{.*}}Ex"}
+  ! CHECK: %[[MUL_RED:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QF{{.*}}Ey"}
+  ! CHECK: omp.teams reduction(
+  ! CHECK-SAME:  @add_reduction_i32 %[[ADD_RED]]#0 -> %[[ADD_RED_ARG:[^[:space:]]*]], 
+  ! CHECK-SAME:  @multiply_reduction_i32 %[[MUL_RED]]#0 -> %[[MUL_RED_ARG:.*]] : {{.*}}) {
+
+  ! CHECK:       omp.distribute private(@{{.*}} %{{.*}} -> %{{.*}}, @{{.*}} %{{.*}} -> %{{.*}} : {{.*}}) {
+  ! CHECK:         %[[ADD_RED_DECL:.*]]:2 = hlfir.declare %[[ADD_RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+  ! CHECK:         %[[MUL_RED_DECL:.*]]:2 = hlfir.declare %[[MUL_RED_ARG]] {uniq_name = "_QF{{.*}}Ey"}
+
+  ! CHECK:         %[[ADD_RES:.*]] = arith.addi %{{.*}}, %{{.*}} : i32
+  ! CHECK:         hlfir.assign %[[ADD_RES]] to %[[ADD_RED_DECL]]#0 : i32, !fir.ref<i32>
+
+  ! CHECK:         %[[MUL_RES:.*]] = arith.muli %{{.*}}, %{{.*}} : i32
+  ! CHECK:         hlfir.assign %[[MUL_RES]] to %[[MUL_RED_DECL]]#0 : i32, !fir.ref<i32>
+  ! CHECK:         omp.yield
+  ! CHECK:       }
+  ! CHECK:       omp.terminator
+  ! CHECK: }
+  !$omp teams loop reduction(+: x) reduction(*: y) private(p)
+  do i = 1, 5
+    call foo()
+    x = x + i
+    y = y * i
+    p = 42
+  end do
+end subroutine


        


More information about the flang-commits mailing list