[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
Ivan R. Ivanov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Oct 3 23:14:35 PDT 2024
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748
>From 07a9eb3581f480c47ce4de3de00c7cef15df3cdc Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 4 Oct 2024 14:21:14 +0900
Subject: [PATCH 1/7] Fix dst src in copy function
---
flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index cf1867311cc236..baf8346e7608a9 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -162,8 +162,8 @@ static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type varType,
{loc, loc});
builder.setInsertionPointToStart(&funcOp.getRegion().back());
- Value loaded = builder.create<fir::LoadOp>(loc, funcOp.getArgument(0));
- builder.create<fir::StoreOp>(loc, loaded, funcOp.getArgument(1));
+ Value loaded = builder.create<fir::LoadOp>(loc, funcOp.getArgument(1));
+ builder.create<fir::StoreOp>(loc, loaded, funcOp.getArgument(0));
builder.create<mlir::func::ReturnOp>(loc);
return funcOp;
>From c3ff901b31806c73228e4f47a47f420c2d2465ed Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 4 Oct 2024 14:38:48 +0900
Subject: [PATCH 2/7] Use omp.single to handle CFG cases
---
flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 77 +++++++++++++------
1 file changed, 53 insertions(+), 24 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index baf8346e7608a9..34399abbcd20ea 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#include "flang/Optimizer/Builder/Todo.h"
#include <flang/Optimizer/Builder/FIRBuilder.h>
#include <flang/Optimizer/Dialect/FIROps.h>
#include <flang/Optimizer/Dialect/FIRType.h>
@@ -39,7 +38,6 @@
#include <mlir/IR/Visitors.h>
#include <mlir/Interfaces/SideEffectInterfaces.h>
#include <mlir/Support/LLVM.h>
-#include <mlir/Transforms/GreedyPatternRewriteDriver.h>
#include <variant>
@@ -96,6 +94,12 @@ bool shouldUseWorkshareLowering(Operation *op) {
if (isNestedIn<omp::SingleOp>(parentWorkshare, op))
return false;
+ if (parentWorkshare.getRegion().getBlocks().size() != 1) {
+ parentWorkshare->emitWarning(
+ "omp workshare with unstructured control flow currently unsupported.");
+ return false;
+ }
+
return true;
}
@@ -408,15 +412,6 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) {
OpBuilder rootBuilder(wsOp);
- // This operation is just a placeholder which will be erased later. We need it
- // because our `parallelizeRegion` function works on regions and not blocks.
- omp::WorkshareOp newOp =
- rootBuilder.create<omp::WorkshareOp>(loc, omp::WorkshareOperands());
- if (!wsOp.getNowait())
- rootBuilder.create<omp::BarrierOp>(loc);
-
- parallelizeRegion(wsOp.getRegion(), newOp.getRegion(), rootMapping, loc, di);
-
// FIXME Currently, we only support workshare constructs with structured
// control flow. The transformation itself supports CFG, however, once we
// transform the MLIR region in the omp.workshare, we need to inline that
@@ -427,19 +422,53 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) {
// time when fir ops get lowered to CFG. However, SCF is not registered in
// flang so we cannot use it. Remove this requirement once we have
// scf.execute_region or an alternative operation available.
- if (wsOp.getRegion().getBlocks().size() != 1)
- TODO(wsOp->getLoc(), "omp workshare with unstructured control flow");
-
- // Inline the contents of the placeholder workshare op into its parent block.
- Block *theBlock = &newOp.getRegion().front();
- Operation *term = theBlock->getTerminator();
- Block *parentBlock = wsOp->getBlock();
- parentBlock->getOperations().splice(newOp->getIterator(),
- theBlock->getOperations());
- assert(term->getNumOperands() == 0);
- term->erase();
- newOp->erase();
- wsOp->erase();
+ if (wsOp.getRegion().getBlocks().size() == 1) {
+ // This operation is just a placeholder which will be erased later. We need
+ // it because our `parallelizeRegion` function works on regions and not
+ // blocks.
+ omp::WorkshareOp newOp =
+ rootBuilder.create<omp::WorkshareOp>(loc, omp::WorkshareOperands());
+ if (!wsOp.getNowait())
+ rootBuilder.create<omp::BarrierOp>(loc);
+
+ parallelizeRegion(wsOp.getRegion(), newOp.getRegion(), rootMapping, loc,
+ di);
+
+ // Inline the contents of the placeholder workshare op into its parent
+ // block.
+ Block *theBlock = &newOp.getRegion().front();
+ Operation *term = theBlock->getTerminator();
+ Block *parentBlock = wsOp->getBlock();
+ parentBlock->getOperations().splice(newOp->getIterator(),
+ theBlock->getOperations());
+ assert(term->getNumOperands() == 0);
+ term->erase();
+ newOp->erase();
+ wsOp->erase();
+ } else {
+ // Otherwise just change the operation to an omp.single.
+
+ // `shouldUseWorkshareLowering` should have guaranteed that there are no
+ // omp.workshare_loop_wrapper's that bind to this omp.workshare.
+ assert(!wsOp->walk([&](Operation *op) {
+ // Nested omp.workshare can have their own
+ // omp.workshare_loop_wrapper's.
+ if (isa<omp::WorkshareOp>(op))
+ return WalkResult::skip();
+ if (isa<omp::WorkshareLoopWrapperOp>(op))
+ return WalkResult::interrupt();
+ return WalkResult::advance();
+ })
+ .wasInterrupted());
+
+ omp::SingleOperands operands;
+ operands.nowait = wsOp.getNowaitAttr();
+ omp::SingleOp newOp = rootBuilder.create<omp::SingleOp>(loc, operands);
+
+ newOp.getRegion().getBlocks().splice(newOp.getRegion().getBlocks().begin(),
+ wsOp.getRegion().getBlocks());
+ wsOp->erase();
+ }
return success();
}
>From 76b6a9f6fbcfdaded965134bb5ca9c775c840562 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 4 Oct 2024 15:12:14 +0900
Subject: [PATCH 3/7] Fix lower workshare tests
---
flang/test/Transforms/OpenMP/lower-workshare-alloca.mlir | 4 ++--
.../Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir | 7 ++++---
flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir | 7 ++++---
3 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/flang/test/Transforms/OpenMP/lower-workshare-alloca.mlir b/flang/test/Transforms/OpenMP/lower-workshare-alloca.mlir
index d1bef3a359e487..618b8d9c19b6b1 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare-alloca.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare-alloca.mlir
@@ -24,8 +24,8 @@ func.func @wsfunc() {
// CHECK-LABEL: func.func private @_workshare_copy_i32(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<i32>,
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32>) {
-// CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32>
-// CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<i32>
+// CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
+// CHECK: fir.store %[[VAL_2]] to %[[VAL_0]] : !fir.ref<i32>
// CHECK: return
// CHECK: }
diff --git a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir
index d10996167ae623..62d9da6c520f85 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir
@@ -1,8 +1,9 @@
-// RUN: %not_todo_cmd fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | FileCheck %s
+// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | FileCheck %s
-// CHECK: not yet implemented: omp workshare with unstructured control flow
+// CHECK: omp.parallel
+// CHECK-NEXT: omp.single
-// Check that the definition of %r dominates its use post-transform
+// TODO Check that the definition of %r dominates its use post-transform
func.func @wsfunc() {
%a = fir.alloca i32
omp.parallel {
diff --git a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir
index 46d2a8e8d48a8a..d9551eb99f0762 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir
@@ -1,8 +1,9 @@
-// RUN: %not_todo_cmd fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | FileCheck %s
+// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | FileCheck %s
-// CHECK: not yet implemented: omp workshare with unstructured control flow
+// CHECK: omp.parallel
+// CHECK-NEXT: omp.single
-// Check transforming a simple CFG
+// TODO Check transforming a simple CFG
func.func @wsfunc() {
%a = fir.alloca i32
omp.parallel {
>From b6da4eb2954b0a0e68d61e8985b4484d33e129a6 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 4/7] Add workshare loop wrapper lowerings
Bufferize test
Bufferize test
Bufferize test
Add test for should use workshare lowering
---
.../HLFIR/Transforms/BufferizeHLFIR.cpp | 4 +-
.../Transforms/OptimizedBufferization.cpp | 10 +-
flang/test/HLFIR/bufferize-workshare.fir | 58 ++++++++
.../OpenMP/should-use-workshare-lowering.mlir | 140 ++++++++++++++++++
4 files changed, 208 insertions(+), 4 deletions(-)
create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 07794828fce267..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,6 +26,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/PatternMatch.h"
@@ -792,7 +793,8 @@ struct ElementalOpConversion
// Generate a loop nest looping around the fir.elemental shape and clone
// fir.elemental region inside the inner loop.
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+ hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
auto insPt = builder.saveInsertionPoint();
builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 3a0a98dc594463..f014724861e333 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/Transforms/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
// Generate a loop nest looping around the hlfir.elemental shape and clone
// hlfir.elemental region inside the inner loop
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+ hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+ hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto arrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
@@ -648,7 +651,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+ hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir
new file mode 100644
index 00000000000000..9b7341ae43398a
--- /dev/null
+++ b/flang/test/HLFIR/bufferize-workshare.fir
@@ -0,0 +1,58 @@
+// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
+
+// CHECK-LABEL: func.func @simple(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
+// CHECK: omp.parallel {
+// CHECK: omp.workshare {
+// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
+// CHECK: %[[VAL_7:.*]] = arith.constant true
+// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK: omp.workshare.loop_wrapper {
+// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
+// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
+// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
+// CHECK: omp.yield
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
+// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: return
+// CHECK: }
+func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
+ omp.parallel {
+ omp.workshare {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ %ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+ %val = fir.load %ref : !fir.ref<i32>
+ %sub = arith.subi %val, %c1_i32 : i32
+ hlfir.yield_element %sub : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
new file mode 100644
index 00000000000000..229fe592a02b9b
--- /dev/null
+++ b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
@@ -0,0 +1,140 @@
+// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
+
+// Checks that we correctly identify when to use the lowering to
+// omp.workshare.loop_wrapper
+
+// CHECK-LABEL: @should_parallelize_0
+// CHECK: omp.workshare.loop_wrapper
+func.func @should_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_parallelize_1
+// CHECK: omp.workshare.loop_wrapper
+func.func @should_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.parallel {
+ omp.workshare {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+
+// CHECK-LABEL: @should_not_parallelize_0
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ omp.single {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_not_parallelize_1
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ omp.critical {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_not_parallelize_2
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_2(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ omp.parallel {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_not_parallelize_3
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_3(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ omp.parallel {
+ omp.workshare {
+ omp.parallel {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
>From c7a843ab3155df3e1cea544edc09f8dc18cddb8b Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Mon, 23 Sep 2024 12:56:11 +0900
Subject: [PATCH 5/7] Add integration test for workshare
---
flang/test/Integration/OpenMP/workshare.f90 | 57 +++++++++++++++++++++
1 file changed, 57 insertions(+)
create mode 100644 flang/test/Integration/OpenMP/workshare.f90
diff --git a/flang/test/Integration/OpenMP/workshare.f90 b/flang/test/Integration/OpenMP/workshare.f90
new file mode 100644
index 00000000000000..0c4524f8552906
--- /dev/null
+++ b/flang/test/Integration/OpenMP/workshare.f90
@@ -0,0 +1,57 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
+!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
+
+subroutine sb1(a, x, y, z)
+ integer :: a
+ integer :: x(:)
+ integer :: y(:)
+ integer :: z(:)
+ !$omp parallel workshare
+ z = a * x + y
+ !$omp end parallel workshare
+end subroutine
+
+! HLFIR: func.func @_QPsb1
+! HLFIR: omp.parallel {
+! HLFIR: omp.workshare {
+! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! HLFIR: hlfir.assign
+! HLFIR: hlfir.destroy
+! HLFIR: hlfir.destroy
+! HLFIR-NOT: omp.barrier
+! HLFIR: omp.terminator
+! HLFIR: }
+! HLFIR-NOT: omp.barrier
+! HLFIR: omp.terminator
+! HLFIR: }
+! HLFIR: return
+! HLFIR: }
+! HLFIR:}
+
+
+! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
+! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>
+
+! FIR: func.func @_QPsb1
+! FIR: omp.parallel {
+! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
+! FIR: fir.allocmem
+! FIR: omp.wsloop {
+! FIR: omp.loop_nest
+! FIR: omp.single nowait {
+! FIR: fir.call @_FortranAAssign
+! FIR: fir.freemem
+! FIR: omp.terminator
+! FIR: }
+! FIR: omp.barrier
+! FIR: omp.terminator
+! FIR: }
>From ac636abbad85c3e765ca485eb0f7291da38460a6 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 4 Oct 2024 15:02:54 +0900
Subject: [PATCH 6/7] One more integration test
---
.../OpenMP/workshare-scalar-array-mul.f90 | 67 +++++++++++++++++++
1 file changed, 67 insertions(+)
create mode 100644 flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
new file mode 100644
index 00000000000000..2fb9a029bf93a5
--- /dev/null
+++ b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
@@ -0,0 +1,67 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3
+!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0
+!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0
+
+program test
+ real :: arr_01(10)
+ !$omp parallel workshare
+ arr_01 = arr_01*2
+ !$omp end parallel workshare
+end program
+
+! HLFIR-O3: omp.parallel {
+! HLFIR-O3: omp.workshare {
+! HLFIR-O3: hlfir.elemental
+! HLFIR-O3: hlfir.assign
+! HLFIR-O3: hlfir.destroy
+! HLFIR-O3: omp.terminator
+! HLFIR-O3: omp.terminator
+
+! FIR-O3: omp.parallel {
+! FIR-O3: omp.wsloop nowait {
+! FIR-O3: omp.loop_nest
+! FIR-O3: omp.terminator
+! FIR-O3: omp.barrier
+! FIR-O3: omp.terminator
+
+! HLFIR-O0: omp.parallel {
+! HLFIR-O0: omp.workshare {
+! HLFIR-O0: hlfir.elemental
+! HLFIR-O0: hlfir.assign
+! HLFIR-O0: hlfir.destroy
+! HLFIR-O0: omp.terminator
+! HLFIR-O0: omp.terminator
+
+! Check the copyprivate copy function
+! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}})
+! FIR-O0: fir.load %[[SRC]]
+! FIR-O0: fir.store {{.*}} to %[[DST]]
+
+! Check that we properly handle the temporary array
+! FIR-O0: omp.parallel {
+! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>>
+! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_
+! FIR-O0: fir.allocmem
+! FIR-O0: fir.store
+! FIR-O0: omp.terminator
+! FIR-O0: fir.load %[[CP]]
+! FIR-O0: omp.wsloop {
+! FIR-O0: omp.loop_nest
+! FIR-O0: omp.yield
+! FIR-O0: omp.terminator
+! FIR-O0: omp.single nowait {
+! FIR-O0: fir.call @_FortranAAssign
+! FIR-O0: fir.freemem
+! FIR-O0: omp.terminator
+! FIR-O0: omp.barrier
+! FIR-O0: omp.terminator
>From 41c738afbf9b18974d9774c05e24adb853d90ca6 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 4 Oct 2024 15:12:43 +0900
Subject: [PATCH 7/7] Add test for cfg workshare bufferization
---
.../should-use-workshare-lowering-cfg.mlir | 22 +++++++++++++++++++
1 file changed, 22 insertions(+)
create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir
diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir
new file mode 100644
index 00000000000000..8b6d8097caad87
--- /dev/null
+++ b/flang/test/Transforms/OpenMP/should-use-workshare-lowering-cfg.mlir
@@ -0,0 +1,22 @@
+// RUN: fir-opt --bufferize-hlfir %s 2>&1 | FileCheck %s
+
+// CHECK: warning: omp workshare with unstructured control flow currently unsupported.
+func.func @warn_cfg(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ ^bb1:
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ cf.br ^bb2
+ ^bb2:
+ omp.terminator
+ }
+ return
+}
More information about the llvm-branch-commits
mailing list