[flang-commits] [flang] [NFC][mlir][OpenMP] Remove mentions of `target` from generic `loop` rewrite (PR #124528)

Mon Jan 27 02:59:48 PST 2025

https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/124528

This removes mentions of `target` from the generic `loop` rewrite pass
since there is not need for it anyway. It is enough to detect `loop`'s
nesting within `teams` or `parallel` directives.

>From 09195daa445df9b804c7abb24b59e69b8815616e Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Sun, 12 Jan 2025 23:47:09 -0600
Subject: [PATCH 1/2] [flang][OpenMP] Support `bind` clause code-gen for
 standalone `loop`s

Extends rewriting of `loop` directives by supporting `bind` clause for
standalone directives. This follows both the spec and the current state
of clang as follows:

* No `bind` or `bind(thread)`: the `loop` is rewritten to `simd`.
* `bind(parallel)`: the `loop` is rewritten to `do`.
* `bind(teams)`: the `loop` is rewritten to `distribute`.
---
 .../OpenMP/GenericLoopConversion.cpp          | 77 ++++++++++++++-----
 flang/test/Lower/OpenMP/loop-directive.f90    | 42 +++++++++-
 2 files changed, 100 insertions(+), 19 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index 555601c5e92df6..13e9f8dc04e032 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -53,7 +53,7 @@ class GenericLoopConversionPattern
 
     switch (combinedInfo) {
     case GenericLoopCombinedInfo::Standalone:
-      rewriteToSimdLoop(loopOp, rewriter);
+      rewriteStandaloneLoop(loopOp, rewriter);
       break;
     case GenericLoopCombinedInfo::TargetParallelLoop:
       llvm_unreachable("not yet implemented: `parallel loop` direcitve");
@@ -87,7 +87,10 @@ class GenericLoopConversionPattern
              << loopOp->getName() << " operation";
     };
 
-    if (loopOp.getBindKind())
+    // For standalone directives, `bind` is already supported. Other combined
+    // forms will be supported in a follow-up PR.
+    if (combinedInfo != GenericLoopCombinedInfo::Standalone &&
+        loopOp.getBindKind())
       return todo("bind");
 
     if (loopOp.getOrder())
@@ -119,7 +122,27 @@ class GenericLoopConversionPattern
     return result;
   }
 
-  /// Rewrites standalone `loop` directives to equivalent `simd` constructs.
+  void rewriteStandaloneLoop(mlir::omp::LoopOp loopOp,
+                             mlir::ConversionPatternRewriter &rewriter) const {
+    using namespace mlir::omp;
+    std::optional<ClauseBindKind> bindKind = loopOp.getBindKind();
+
+    if (!bindKind.has_value())
+      return rewriteToSimdLoop(loopOp, rewriter);
+
+    switch (*loopOp.getBindKind()) {
+    case ClauseBindKind::Parallel:
+      return rewriteToWsloop(loopOp, rewriter);
+    case ClauseBindKind::Teams:
+      return rewriteToDistrbute(loopOp, rewriter);
+    case ClauseBindKind::Thread:
+      return rewriteToSimdLoop(loopOp, rewriter);
+    }
+  }
+
+  /// Rewrites standalone `loop` (without `bind` clause or with
+  /// `bind(parallel)`) directives to equivalent `simd` constructs.
+  ///
   /// The reasoning behind this decision is that according to the spec (version
   /// 5.2, section 11.7.1):
   ///
@@ -147,30 +170,48 @@ class GenericLoopConversionPattern
   /// the directive.
   void rewriteToSimdLoop(mlir::omp::LoopOp loopOp,
                          mlir::ConversionPatternRewriter &rewriter) const {
-    loopOp.emitWarning("Detected standalone OpenMP `loop` directive, the "
-                       "associated loop will be rewritten to `simd`.");
-    mlir::omp::SimdOperands simdClauseOps;
-    simdClauseOps.privateVars = loopOp.getPrivateVars();
+    loopOp.emitWarning(
+        "Detected standalone OpenMP `loop` directive with thread binding, "
+        "the associated loop will be rewritten to `simd`.");
+    rewriteToSingleWrapperOp<mlir::omp::SimdOp, mlir::omp::SimdOperands>(
+        loopOp, rewriter);
+  }
+
+  void rewriteToDistrbute(mlir::omp::LoopOp loopOp,
+                          mlir::ConversionPatternRewriter &rewriter) const {
+    rewriteToSingleWrapperOp<mlir::omp::DistributeOp,
+                             mlir::omp::DistributeOperands>(loopOp, rewriter);
+  }
+
+  void rewriteToWsloop(mlir::omp::LoopOp loopOp,
+                       mlir::ConversionPatternRewriter &rewriter) const {
+    rewriteToSingleWrapperOp<mlir::omp::WsloopOp, mlir::omp::WsloopOperands>(
+        loopOp, rewriter);
+  }
+
+  template <typename OpTy, typename OpOperandsTy>
+  void
+  rewriteToSingleWrapperOp(mlir::omp::LoopOp loopOp,
+                           mlir::ConversionPatternRewriter &rewriter) const {
+    OpOperandsTy clauseOps;
+    clauseOps.privateVars = loopOp.getPrivateVars();
 
     auto privateSyms = loopOp.getPrivateSyms();
     if (privateSyms)
-      simdClauseOps.privateSyms.assign(privateSyms->begin(),
-                                       privateSyms->end());
+      clauseOps.privateSyms.assign(privateSyms->begin(), privateSyms->end());
 
-    Fortran::common::openmp::EntryBlockArgs simdArgs;
-    simdArgs.priv.vars = simdClauseOps.privateVars;
+    Fortran::common::openmp::EntryBlockArgs args;
+    args.priv.vars = clauseOps.privateVars;
 
-    auto simdOp =
-        rewriter.create<mlir::omp::SimdOp>(loopOp.getLoc(), simdClauseOps);
-    mlir::Block *simdBlock =
-        genEntryBlock(rewriter, simdArgs, simdOp.getRegion());
+    auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps);
+    mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion());
 
     mlir::IRMapping mapper;
     mlir::Block &loopBlock = *loopOp.getRegion().begin();
 
-    for (auto [loopOpArg, simdopArg] :
-         llvm::zip_equal(loopBlock.getArguments(), simdBlock->getArguments()))
-      mapper.map(loopOpArg, simdopArg);
+    for (auto [loopOpArg, opArg] :
+         llvm::zip_equal(loopBlock.getArguments(), opBlock->getArguments()))
+      mapper.map(loopOpArg, opArg);
 
     rewriter.clone(*loopOp.begin(), mapper);
   }
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index 9fa0de3bfe171a..845905da0fcba2 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -92,7 +92,7 @@ subroutine test_reduction()
 ! CHECK-LABEL: func.func @_QPtest_bind
 subroutine test_bind()
   integer :: i, dummy = 1
-  ! CHECK: omp.loop bind(thread) private(@{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
+  ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
   ! CHECK: }
   !$omp loop bind(thread)
   do i=1,10
@@ -139,3 +139,43 @@ subroutine test_nested_directives
   end do
   !$omp end target teams
 end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_standalone_bind_teams
+subroutine test_standalone_bind_teams
+  implicit none
+  integer, parameter :: N = 100000
+  integer a(N), b(N), c(N)
+  integer j,i, num, flag;
+  num = N
+
+  ! CHECK:     omp.distribute
+  ! CHECK-SAME:  private(@{{.*}}Ea_private_ref_100000xi32 {{[^,]*}},
+  ! CHECK-SAME:          @{{.*}}Ei_private_ref_i32 {{.*}} : {{.*}}) {
+  ! CHECK:       omp.loop_nest {{.*}} {
+  ! CHECK:       }
+  ! CHECK:     }
+  !$omp loop bind(teams) private(a)
+  do i=1,N
+    c(i) = a(i) * b(i)
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_standalone_bind_parallel
+subroutine test_standalone_bind_parallel
+  implicit none
+  integer, parameter :: N = 100000
+  integer a(N), b(N), c(N)
+  integer j,i, num, flag;
+  num = N
+
+  ! CHECK:     omp.wsloop
+  ! CHECK-SAME:  private(@{{.*}}Ea_private_ref_100000xi32 {{[^,]*}},
+  ! CHECK-SAME:          @{{.*}}Ei_private_ref_i32 {{.*}} : {{.*}}) {
+  ! CHECK:       omp.loop_nest {{.*}} {
+  ! CHECK:       }
+  ! CHECK:     }
+  !$omp loop bind(parallel) private(a)
+  do i=1,N
+    c(i) = a(i) * b(i)
+  end do
+end subroutine

>From 14ebcfdf2da1cae38f67c83b0624a963961948ca Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 27 Jan 2025 04:56:13 -0600
Subject: [PATCH 2/2] [NFC][mlir][OpenMP] Remove mentions of `target` from
 generic loop rewrite

This removes mentions of `target` from the generic `loop` rewrite pass
since there is not need for it anyway. It is enough to detect `loop`'s
nesting within `teams` or `parallel` directives.
---
 .../OpenMP/GenericLoopConversion.cpp          | 24 +++++++------------
 .../generic-loop-rewriting-todo.mlir          |  2 +-
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index 13e9f8dc04e032..36d6c5a4e6b3b2 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -29,11 +29,7 @@ namespace {
 class GenericLoopConversionPattern
     : public mlir::OpConversionPattern<mlir::omp::LoopOp> {
 public:
-  enum class GenericLoopCombinedInfo {
-    Standalone,
-    TargetTeamsLoop,
-    TargetParallelLoop
-  };
+  enum class GenericLoopCombinedInfo { Standalone, TeamsLoop, ParallelLoop };
 
   using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;
 
@@ -55,10 +51,10 @@ class GenericLoopConversionPattern
     case GenericLoopCombinedInfo::Standalone:
       rewriteStandaloneLoop(loopOp, rewriter);
       break;
-    case GenericLoopCombinedInfo::TargetParallelLoop:
+    case GenericLoopCombinedInfo::ParallelLoop:
       llvm_unreachable("not yet implemented: `parallel loop` direcitve");
       break;
-    case GenericLoopCombinedInfo::TargetTeamsLoop:
+    case GenericLoopCombinedInfo::TeamsLoop:
       rewriteToDistributeParallelDo(loopOp, rewriter);
       break;
     }
@@ -74,10 +70,10 @@ class GenericLoopConversionPattern
     switch (combinedInfo) {
     case GenericLoopCombinedInfo::Standalone:
       break;
-    case GenericLoopCombinedInfo::TargetParallelLoop:
+    case GenericLoopCombinedInfo::ParallelLoop:
       return loopOp.emitError(
-          "not yet implemented: Combined `omp target parallel loop` directive");
-    case GenericLoopCombinedInfo::TargetTeamsLoop:
+          "not yet implemented: Combined `parallel loop` directive");
+    case GenericLoopCombinedInfo::TeamsLoop:
       break;
     }
 
@@ -99,7 +95,7 @@ class GenericLoopConversionPattern
     if (!loopOp.getReductionVars().empty())
       return todo("reduction");
 
-    // TODO For `target teams loop`, check similar constrains to what is checked
+    // TODO For `teams loop`, check similar constrains to what is checked
     // by `TeamsLoopChecker` in SemaOpenMP.cpp.
     return mlir::success();
   }
@@ -111,13 +107,11 @@ class GenericLoopConversionPattern
     GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone;
 
     if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
-      if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp()))
-        result = GenericLoopCombinedInfo::TargetTeamsLoop;
+      result = GenericLoopCombinedInfo::TeamsLoop;
 
     if (auto parallelOp =
             mlir::dyn_cast_if_present<mlir::omp::ParallelOp>(parentOp))
-      if (mlir::isa_and_present<mlir::omp::TargetOp>(parallelOp->getParentOp()))
-        result = GenericLoopCombinedInfo::TargetParallelLoop;
+      result = GenericLoopCombinedInfo::ParallelLoop;
 
     return result;
   }
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index becd6b8dcb5cb4..3259ceca70d50d 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -6,7 +6,7 @@ func.func @_QPtarget_parallel_loop() {
       %c0 = arith.constant 0 : i32
       %c10 = arith.constant 10 : i32
       %c1 = arith.constant 1 : i32
-      // expected-error at below {{not yet implemented: Combined `omp target parallel loop` directive}}
+      // expected-error at below {{not yet implemented: Combined `parallel loop` directive}}
       omp.loop {
         omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
           omp.yield