[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

Mon Aug 19 20:19:31 PDT 2024

https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748

>From bf0e09f9cfc3159517b1ebec9d39e1143fa935b9 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Tue, 20 Aug 2024 09:28:15 +0900
Subject: [PATCH 1/7] Iterate backwards to find all trivially dead ops

---
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp |  3 +-
 .../Transforms/OpenMP/lower-workshare4.mlir   | 56 ++++++++++---------
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index 9557dd200cacee..bfb9708af70923 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -200,7 +200,8 @@ static bool isTransitivelyUsedOutside(Value v, SingleRegion sr) {
 /// We clone pure operations in both the parallel and single blocks. this
 /// functions cleans them up if they end up with no uses
 static void cleanupBlock(Block *block) {
-  for (Operation &op : llvm::make_early_inc_range(*block))
+  for (Operation &op : llvm::make_early_inc_range(
+           llvm::make_range(block->rbegin(), block->rend())))
     if (isOpTriviallyDead(&op))
       op.erase();
 }
diff --git a/flang/test/Transforms/OpenMP/lower-workshare4.mlir b/flang/test/Transforms/OpenMP/lower-workshare4.mlir
index 44f68cd2ca3654..81bc20cb34b65d 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare4.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare4.mlir
@@ -1,8 +1,33 @@
 // RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s
 
-// Check that we cleanup unused pure operations from either the parallel or
-// single regions
+// Check that we cleanup unused pure operations from the parallel and single
+// regions
 
+// CHECK-LABEL:   func.func @wsfunc() {
+// CHECK:           %[[VAL_0:.*]] = fir.alloca i32
+// CHECK:           omp.parallel {
+// CHECK:             omp.single {
+// CHECK:               %[[VAL_1:.*]] = "test.test1"() : () -> i32
+// CHECK:               %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:               %[[VAL_3:.*]] = arith.constant 3 : index
+// CHECK:               %[[VAL_4:.*]] = arith.addi %[[VAL_2]], %[[VAL_3]] : index
+// CHECK:               "test.test3"(%[[VAL_4]]) : (index) -> ()
+// CHECK:               omp.terminator
+// CHECK:             }
+// CHECK:             %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_6:.*]] = arith.constant 42 : index
+// CHECK:             omp.wsloop nowait {
+// CHECK:               omp.loop_nest (%[[VAL_7:.*]]) : index = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_5]]) {
+// CHECK:                 "test.test2"() : () -> ()
+// CHECK:                 omp.yield
+// CHECK:               }
+// CHECK:               omp.terminator
+// CHECK:             }
+// CHECK:             omp.barrier
+// CHECK:             omp.terminator
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
 func.func @wsfunc() {
   %a = fir.alloca i32
   omp.parallel {
@@ -13,7 +38,9 @@ func.func @wsfunc() {
       %c42 = arith.constant 42 : index
 
       %c2 = arith.constant 2 : index
-      "test.test3"(%c2) : (index) -> ()
+      %c3 = arith.constant 3 : index
+      %add = arith.addi %c2, %c3 : index
+      "test.test3"(%add) : (index) -> ()
 
       omp.workshare_loop_wrapper {
         omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) {
@@ -29,27 +56,4 @@ func.func @wsfunc() {
   return
 }
 
-// CHECK-LABEL:   func.func @wsfunc() {
-// CHECK:           %[[VAL_0:.*]] = fir.alloca i32
-// CHECK:           omp.parallel {
-// CHECK:             omp.single {
-// CHECK:               %[[VAL_1:.*]] = "test.test1"() : () -> i32
-// CHECK:               %[[VAL_2:.*]] = arith.constant 2 : index
-// CHECK:               "test.test3"(%[[VAL_2]]) : (index) -> ()
-// CHECK:               omp.terminator
-// CHECK:             }
-// CHECK:             %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:             %[[VAL_4:.*]] = arith.constant 42 : index
-// CHECK:             omp.wsloop nowait {
-// CHECK:               omp.loop_nest (%[[VAL_5:.*]]) : index = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_3]]) {
-// CHECK:                 "test.test2"() : () -> ()
-// CHECK:                 omp.yield
-// CHECK:               }
-// CHECK:               omp.terminator
-// CHECK:             }
-// CHECK:             omp.barrier
-// CHECK:             omp.terminator
-// CHECK:           }
-// CHECK:           return
-// CHECK:         }
 

>From 90cd77c1c7ffdadfe4f088b358c0ee9ee1958872 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Tue, 20 Aug 2024 12:17:45 +0900
Subject: [PATCH 2/7] Add expalanation comment for createCopyFun

---
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index bfb9708af70923..284b2bed9628fe 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -136,6 +136,9 @@ static bool isSafeToParallelize(Operation *op) {
          isMemoryEffectFree(op);
 }
 
+/// Simple shallow copies suffice for our purposes in this pass, so we implement
+/// this simpler alternative to the full fledged `createCopyFunc` in the
+/// frontend
 static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type varType,
                                          fir::FirOpBuilder builder) {
   mlir::ModuleOp module = builder.getModule();

>From b9ba0d69b7c7559464e0f4601928bd9a38b56735 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 3/7] Add workshare loop wrapper lowerings

---
 .../lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp  |  6 ++++--
 .../HLFIR/Transforms/OptimizedBufferization.cpp        | 10 +++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index b608677c526310..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,12 +26,13 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/ADT/TypeSwitch.h"
 
 namespace hlfir {
@@ -792,7 +793,8 @@ struct ElementalOpConversion
     // Generate a loop nest looping around the fir.elemental shape and clone
     // fir.elemental region inside the inner loop.
     hlfir::LoopNest loopNest =
-        hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+        hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+                           flangomp::shouldUseWorkshareLowering(elemental));
     auto insPt = builder.saveInsertionPoint();
     builder.setInsertionPointToStart(loopNest.body);
     auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index c4aed6b79df923..150e3e91197241 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "flang/Optimizer/Transforms/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
   // Generate a loop nest looping around the hlfir.elemental shape and clone
   // hlfir.elemental region inside the inner loop
   hlfir::LoopNest loopNest =
-      hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+      hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+                         flangomp::shouldUseWorkshareLowering(elemental));
   builder.setInsertionPointToStart(loopNest.body);
   auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
                                         loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
   llvm::SmallVector<mlir::Value> extents =
       hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-      hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+      hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+                         flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto arrayElement =
       hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
@@ -648,7 +651,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
   llvm::SmallVector<mlir::Value> extents =
       hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-      hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+      hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+                         flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto rhsArrayElement =
       hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);

>From 7f6e8237c2d60e7873dfa82c126ea96e687457a8 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 19 Aug 2024 15:01:31 +0900
Subject: [PATCH 4/7] Bufferize test

---
 flang/test/HLFIR/bufferize-workshare.fir | 58 ++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir

diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir
new file mode 100644
index 00000000000000..86a2f031478dd7
--- /dev/null
+++ b/flang/test/HLFIR/bufferize-workshare.fir
@@ -0,0 +1,58 @@
+// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
+
+// CHECK-LABEL:   func.func @simple(
+// CHECK-SAME:                      %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
+// CHECK:           omp.parallel {
+// CHECK:             omp.workshare {
+// CHECK:               %[[VAL_1:.*]] = arith.constant 42 : index
+// CHECK:               %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK:               %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK:               %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+// CHECK:               %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK:               %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
+// CHECK:               %[[VAL_7:.*]] = arith.constant true
+// CHECK:               %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK:               omp.wsloop {
+// CHECK:                 omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
+// CHECK:                   %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK:                   %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+// CHECK:                   %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
+// CHECK:                   %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]])  : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK:                   hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
+// CHECK:                   omp.yield
+// CHECK:                 }
+// CHECK:                 omp.terminator
+// CHECK:               }
+// CHECK:               %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK:               %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK:               %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK:               hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
+// CHECK:               fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
+// CHECK:               omp.terminator
+// CHECK:             }
+// CHECK:             omp.terminator
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
+  omp.parallel {
+    omp.workshare {
+      %c42 = arith.constant 42 : index
+      %c1_i32 = arith.constant 1 : i32
+      %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+      %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+      %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+      ^bb0(%i: index):
+        %ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+        %val = fir.load %ref : !fir.ref<i32>
+        %sub = arith.subi %val, %c1_i32 : i32
+        hlfir.yield_element %sub : i32
+      }
+      hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+      hlfir.destroy %elemental : !hlfir.expr<42xi32>
+      omp.terminator
+    }
+    omp.terminator
+  }
+  return
+}

>From e71d30be67541e9735db1db2cd8223f65c8e834f Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 19 Aug 2024 15:03:42 +0900
Subject: [PATCH 5/7] Bufferize test

---
 flang/test/HLFIR/bufferize-workshare.fir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir
index 86a2f031478dd7..33b368a62eaabf 100644
--- a/flang/test/HLFIR/bufferize-workshare.fir
+++ b/flang/test/HLFIR/bufferize-workshare.fir
@@ -12,7 +12,7 @@
 // CHECK:               %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
 // CHECK:               %[[VAL_7:.*]] = arith.constant true
 // CHECK:               %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK:               omp.wsloop {
+// CHECK:               "omp.workshare_loop_wrapper"() ({
 // CHECK:                 omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
 // CHECK:                   %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
 // CHECK:                   %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
@@ -22,7 +22,7 @@
 // CHECK:                   omp.yield
 // CHECK:                 }
 // CHECK:                 omp.terminator
-// CHECK:               }
+// CHECK:               }) : () -> ()
 // CHECK:               %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
 // CHECK:               %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
 // CHECK:               %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>

>From eb8b7a78d39d2ac779898a5080a2c0da575a3d69 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 19 Aug 2024 15:04:39 +0900
Subject: [PATCH 6/7] Bufferize test

---
 flang/test/HLFIR/bufferize-workshare.fir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir
index 33b368a62eaabf..02fe6b1d1799c3 100644
--- a/flang/test/HLFIR/bufferize-workshare.fir
+++ b/flang/test/HLFIR/bufferize-workshare.fir
@@ -12,7 +12,7 @@
 // CHECK:               %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
 // CHECK:               %[[VAL_7:.*]] = arith.constant true
 // CHECK:               %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK:               "omp.workshare_loop_wrapper"() ({
+// CHECK:               omp.workshare_loop_wrapper {
 // CHECK:                 omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
 // CHECK:                   %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
 // CHECK:                   %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
@@ -22,7 +22,7 @@
 // CHECK:                   omp.yield
 // CHECK:                 }
 // CHECK:                 omp.terminator
-// CHECK:               }) : () -> ()
+// CHECK:               }
 // CHECK:               %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
 // CHECK:               %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
 // CHECK:               %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>

>From 0ee9e96472dd034b7214dc2edd21fcc98389cdb8 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 19 Aug 2024 16:53:40 +0900
Subject: [PATCH 7/7] Add test for should use workshare lowering

---
 .../OpenMP/should-use-workshare-lowering.mlir | 140 ++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir

diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
new file mode 100644
index 00000000000000..2ba445faf780ea
--- /dev/null
+++ b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
@@ -0,0 +1,140 @@
+// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
+
+// Checks that we correctly identify when to use the lowering to
+// omp.workshare_loop_wrapper
+
+// CHECK-LABEL: @should_parallelize_0
+// CHECK: omp.workshare_loop_wrapper
+func.func @should_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+  omp.workshare {
+    %c42 = arith.constant 42 : index
+    %c1_i32 = arith.constant 1 : i32
+    %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+    %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+    %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+    ^bb0(%i: index):
+      hlfir.yield_element %c1_i32 : i32
+    }
+    hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+    hlfir.destroy %elemental : !hlfir.expr<42xi32>
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @should_parallelize_1
+// CHECK: omp.workshare_loop_wrapper
+func.func @should_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+  omp.parallel {
+    omp.workshare {
+      %c42 = arith.constant 42 : index
+      %c1_i32 = arith.constant 1 : i32
+      %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+      %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+      %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+      ^bb0(%i: index):
+        hlfir.yield_element %c1_i32 : i32
+      }
+      hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+      hlfir.destroy %elemental : !hlfir.expr<42xi32>
+      omp.terminator
+    }
+    omp.terminator
+  }
+  return
+}
+
+
+// CHECK-LABEL: @should_not_parallelize_0
+// CHECK-NOT: omp.workshare_loop_wrapper
+func.func @should_not_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+  omp.workshare {
+    omp.single {
+      %c42 = arith.constant 42 : index
+      %c1_i32 = arith.constant 1 : i32
+      %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+      %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+      %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+      ^bb0(%i: index):
+        hlfir.yield_element %c1_i32 : i32
+      }
+      hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+      hlfir.destroy %elemental : !hlfir.expr<42xi32>
+      omp.terminator
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @should_not_parallelize_1
+// CHECK-NOT: omp.workshare_loop_wrapper
+func.func @should_not_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+  omp.workshare {
+    omp.critical {
+      %c42 = arith.constant 42 : index
+      %c1_i32 = arith.constant 1 : i32
+      %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+      %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+      %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+      ^bb0(%i: index):
+        hlfir.yield_element %c1_i32 : i32
+      }
+      hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+      hlfir.destroy %elemental : !hlfir.expr<42xi32>
+      omp.terminator
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @should_not_parallelize_2
+// CHECK-NOT: omp.workshare_loop_wrapper
+func.func @should_not_parallelize_2(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+  omp.workshare {
+    omp.parallel {
+      %c42 = arith.constant 42 : index
+      %c1_i32 = arith.constant 1 : i32
+      %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+      %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+      %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+      ^bb0(%i: index):
+        hlfir.yield_element %c1_i32 : i32
+      }
+      hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+      hlfir.destroy %elemental : !hlfir.expr<42xi32>
+      omp.terminator
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @should_not_parallelize_3
+// CHECK-NOT: omp.workshare_loop_wrapper
+func.func @should_not_parallelize_3(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+  omp.workshare {
+    omp.parallel {
+      omp.workshare {
+        omp.parallel {
+          %c42 = arith.constant 42 : index
+          %c1_i32 = arith.constant 1 : i32
+          %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+          %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+          %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+          ^bb0(%i: index):
+            hlfir.yield_element %c1_i32 : i32
+          }
+          hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+          hlfir.destroy %elemental : !hlfir.expr<42xi32>
+          omp.terminator
+        }
+        omp.terminator
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }
+  return
+}