[flang-commits] [flang] [NFC][mlir][OpenMP] Remove mentions of `target` from generic `loop` rewrite (PR #124528)

Mon Jan 27 06:38:55 PST 2025

https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/124528

>From 00cba1e8595fd353e3ebae1c547a53243efff62c Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 27 Jan 2025 04:56:13 -0600
Subject: [PATCH] [NFC][mlir][OpenMP] Remove mentions of `target` from generic
 loop rewrite

This removes mentions of `target` from the generic `loop` rewrite pass
since there is not need for it anyway. It is enough to detect `loop`'s
nesting within `teams` or `parallel` directives.
---
 .../OpenMP/GenericLoopConversion.cpp          | 26 +++-----
 .../generic-loop-rewriting-todo.mlir          | 66 ++++++++-----------
 .../Transforms/generic-loop-rewriting.mlir    | 53 +++++++--------
 3 files changed, 61 insertions(+), 84 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index c95d625d7240b4..87d4f20af4bbac 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -29,11 +29,7 @@ namespace {
 class GenericLoopConversionPattern
     : public mlir::OpConversionPattern<mlir::omp::LoopOp> {
 public:
-  enum class GenericLoopCombinedInfo {
-    Standalone,
-    TargetTeamsLoop,
-    TargetParallelLoop
-  };
+  enum class GenericLoopCombinedInfo { Standalone, TeamsLoop, ParallelLoop };
 
   using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;
 
@@ -55,10 +51,10 @@ class GenericLoopConversionPattern
     case GenericLoopCombinedInfo::Standalone:
       rewriteStandaloneLoop(loopOp, rewriter);
       break;
-    case GenericLoopCombinedInfo::TargetParallelLoop:
-      llvm_unreachable("not yet implemented: `parallel loop` direcitve");
+    case GenericLoopCombinedInfo::ParallelLoop:
+      llvm_unreachable("not yet implemented: Combined `parallel loop` directive");
       break;
-    case GenericLoopCombinedInfo::TargetTeamsLoop:
+    case GenericLoopCombinedInfo::TeamsLoop:
       rewriteToDistributeParallelDo(loopOp, rewriter);
       break;
     }
@@ -74,10 +70,10 @@ class GenericLoopConversionPattern
     switch (combinedInfo) {
     case GenericLoopCombinedInfo::Standalone:
       break;
-    case GenericLoopCombinedInfo::TargetParallelLoop:
+    case GenericLoopCombinedInfo::ParallelLoop:
       return loopOp.emitError(
-          "not yet implemented: Combined `omp target parallel loop` directive");
-    case GenericLoopCombinedInfo::TargetTeamsLoop:
+          "not yet implemented: Combined `parallel loop` directive");
+    case GenericLoopCombinedInfo::TeamsLoop:
       break;
     }
 
@@ -99,7 +95,7 @@ class GenericLoopConversionPattern
     if (!loopOp.getReductionVars().empty())
       return todo("reduction");
 
-    // TODO For `target teams loop`, check similar constrains to what is checked
+    // TODO For `teams loop`, check similar constrains to what is checked
     // by `TeamsLoopChecker` in SemaOpenMP.cpp.
     return mlir::success();
   }
@@ -111,13 +107,11 @@ class GenericLoopConversionPattern
     GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone;
 
     if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
-      if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp()))
-        result = GenericLoopCombinedInfo::TargetTeamsLoop;
+      result = GenericLoopCombinedInfo::TeamsLoop;
 
     if (auto parallelOp =
             mlir::dyn_cast_if_present<mlir::omp::ParallelOp>(parentOp))
-      if (mlir::isa_and_present<mlir::omp::TargetOp>(parallelOp->getParentOp()))
-        result = GenericLoopCombinedInfo::TargetParallelLoop;
+      result = GenericLoopCombinedInfo::ParallelLoop;
 
     return result;
   }
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index becd6b8dcb5cb4..cbde981c4c49d3 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -1,37 +1,31 @@
 // RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
 
-func.func @_QPtarget_parallel_loop() {
-  omp.target {
-    omp.parallel {
-      %c0 = arith.constant 0 : i32
-      %c10 = arith.constant 10 : i32
-      %c1 = arith.constant 1 : i32
-      // expected-error at below {{not yet implemented: Combined `omp target parallel loop` directive}}
-      omp.loop {
-        omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
-          omp.yield
-        }
+func.func @_QPparallel_loop() {
+  omp.parallel {
+    %c0 = arith.constant 0 : i32
+    %c10 = arith.constant 10 : i32
+    %c1 = arith.constant 1 : i32
+    // expected-error at below {{not yet implemented: Combined `parallel loop` directive}}
+    omp.loop {
+      omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
+        omp.yield
       }
-      omp.terminator
     }
     omp.terminator
   }
   return
 }
 
-func.func @_QPtarget_loop_bind() {
-  omp.target {
-    omp.teams {
-      %c0 = arith.constant 0 : i32
-      %c10 = arith.constant 10 : i32
-      %c1 = arith.constant 1 : i32
-      // expected-error at below {{not yet implemented: Unhandled clause bind in omp.loop operation}}
-      omp.loop bind(thread) {
-        omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
-          omp.yield
-        }
+func.func @_QPloop_bind() {
+  omp.teams {
+    %c0 = arith.constant 0 : i32
+    %c10 = arith.constant 10 : i32
+    %c1 = arith.constant 1 : i32
+    // expected-error at below {{not yet implemented: Unhandled clause bind in omp.loop operation}}
+    omp.loop bind(thread) {
+      omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
+        omp.yield
       }
-      omp.terminator
     }
     omp.terminator
   }
@@ -48,22 +42,18 @@ omp.declare_reduction @add_reduction_i32 : i32 init {
     omp.yield(%0 : i32)
   }
 
-func.func @_QPtarget_loop_order() {
+func.func @_QPloop_order() {
+  omp.teams {
+    %c0 = arith.constant 0 : i32
+    %c10 = arith.constant 10 : i32
+    %c1 = arith.constant 1 : i32
+    %sum = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_orderEi"}
 
-  omp.target {
-    omp.teams {
-      %c0 = arith.constant 0 : i32
-      %c10 = arith.constant 10 : i32
-      %c1 = arith.constant 1 : i32
-      %sum = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_orderEi"}
-
-      // expected-error at below {{not yet implemented: Unhandled clause reduction in omp.loop operation}}
-      omp.loop reduction(@add_reduction_i32 %sum -> %arg2 : !fir.ref<i32>) {
-        omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
-          omp.yield
-        }
+    // expected-error at below {{not yet implemented: Unhandled clause reduction in omp.loop operation}}
+    omp.loop reduction(@add_reduction_i32 %sum -> %arg2 : !fir.ref<i32>) {
+      omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
+        omp.yield
       }
-      omp.terminator
     }
     omp.terminator
   }
diff --git a/flang/test/Transforms/generic-loop-rewriting.mlir b/flang/test/Transforms/generic-loop-rewriting.mlir
index a18ea9853602ac..842136444fc154 100644
--- a/flang/test/Transforms/generic-loop-rewriting.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting.mlir
@@ -1,55 +1,48 @@
 // RUN: fir-opt --omp-generic-loop-conversion %s | FileCheck %s
 
-omp.private {type = private} @_QFtarget_teams_loopEi_private_ref_i32 : !fir.ref<i32> alloc {
+omp.private {type = private} @_QFteams_loopEi_private_ref_i32 : !fir.ref<i32> alloc {
 ^bb0(%arg0: !fir.ref<i32>):
   omp.yield(%arg0 : !fir.ref<i32>)
 }
 
-func.func @_QPtarget_teams_loop() {
+func.func @_QPteams_loop() {
   %i = fir.alloca i32
-  %i_map = omp.map.info var_ptr(%i : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
-  omp.target map_entries(%i_map -> %arg0 : !fir.ref<i32>) {
-    omp.teams {
-      %c0 = arith.constant 0 : i32
-      %c10 = arith.constant 10 : i32
-      %c1 = arith.constant 1 : i32
-      omp.loop private(@_QFtarget_teams_loopEi_private_ref_i32 %arg0 -> %arg2 : !fir.ref<i32>) {
-        omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
-          fir.store %arg3 to %arg2 : !fir.ref<i32>
-          omp.yield
-        }
+  omp.teams {
+    %c0 = arith.constant 0 : i32
+    %c10 = arith.constant 10 : i32
+    %c1 = arith.constant 1 : i32
+    omp.loop private(@_QFteams_loopEi_private_ref_i32 %i -> %arg2 : !fir.ref<i32>) {
+      omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
+        fir.store %arg3 to %arg2 : !fir.ref<i32>
+        omp.yield
       }
-      omp.terminator
     }
     omp.terminator
   }
   return
 }
 
-// CHECK-LABEL: func.func @_QPtarget_teams_loop
-// CHECK:         omp.target map_entries(
-// CHECK-SAME:      %{{.*}} -> %[[I_ARG:[^[:space:]]+]] : {{.*}}) {
-// 
-// CHECK:           omp.teams {
+// CHECK-LABEL: func.func @_QPteams_loop
+// CHECK:         %[[I:.*]] = fir.alloca i32
+// CHECK:         omp.teams {
 // 
 // TODO we probably need to move the `loop_nest` bounds ops from the `teams`
 // region to the `parallel` region to avoid making these values `shared`. We can
 // find the backward slices of these bounds that are within the `teams` region
 // and move these slices to the `parallel` op.
 
-// CHECK:             %[[LB:.*]] = arith.constant 0 : i32
-// CHECK:             %[[UB:.*]] = arith.constant 10 : i32
-// CHECK:             %[[STEP:.*]] = arith.constant 1 : i32
+// CHECK:           %[[LB:.*]] = arith.constant 0 : i32
+// CHECK:           %[[UB:.*]] = arith.constant 10 : i32
+// CHECK:           %[[STEP:.*]] = arith.constant 1 : i32
 // 
-// CHECK:             omp.parallel private(@{{.*}} %[[I_ARG]]
-// CHECK-SAME:          -> %[[I_PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>) {
-// CHECK:               omp.distribute {
-// CHECK:                 omp.wsloop {
+// CHECK:           omp.parallel private(@{{.*}} %[[I]]
+// CHECK-SAME:        -> %[[I_PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>) {
+// CHECK:             omp.distribute {
+// CHECK:               omp.wsloop {
 // 
-// CHECK:                   omp.loop_nest (%{{.*}}) : i32 = 
-// CHECK-SAME:                (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
-// CHECK:                     fir.store %{{.*}} to %[[I_PRIV_ARG]] : !fir.ref<i32>
-// CHECK:                   }
+// CHECK:                 omp.loop_nest (%{{.*}}) : i32 =
+// CHECK-SAME:              (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+// CHECK:                   fir.store %{{.*}} to %[[I_PRIV_ARG]] : !fir.ref<i32>
 // CHECK:                 }
 // CHECK:               }
 // CHECK:             }