[llvm-branch-commits] [flang] [mlir] [flang] Lower omp.workshare to other omp constructs (PR #101446)
Ivan R. Ivanov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Aug 2 01:18:04 PDT 2024
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446
>From 62057f90e1e6e9e89df1bb666a3676421e2e52ac Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 2 Aug 2024 16:10:25 +0900
Subject: [PATCH 1/9] Add custom omp loop wrapper
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5199ff50abb95..76f0c472cfdb1 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [
let hasVerifier = 1;
}
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [
+ DeclareOpInterfaceMethods<LoopWrapperInterface>,
+ RecursiveMemoryEffects, SingleBlock
+ ], singleRegion = true> {
+ let summary = "contains loop nests to be parallelized by workshare";
+
+ let builders = [
+ OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+ ];
+}
+
//===----------------------------------------------------------------------===//
// Loop Nest
//===----------------------------------------------------------------------===//
>From d882f2b7413a9ad306334cc69691671b498985fc Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 2 Aug 2024 16:08:58 +0900
Subject: [PATCH 2/9] Add recursive memory effects trait to workshare
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 76f0c472cfdb1..7d1c80333855e 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [
// 2.8.3 Workshare Construct
//===----------------------------------------------------------------------===//
-def WorkshareOp : OpenMP_Op<"workshare", clauses = [
+def WorkshareOp : OpenMP_Op<"workshare", traits = [
+ RecursiveMemoryEffects,
+ ], clauses = [
OpenMP_NowaitClause,
], singleRegion = true> {
let summary = "workshare directive";
>From 14878e80f5bcf8dac5100951de803ce584a33b25 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 3/9] [flang][omp] Emit omp.workshare in frontend
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++++++++++++++++++++++++++----
1 file changed, 26 insertions(+), 4 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4..f7bc565ea8cbc 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter,
loc, llvm::omp::Directive::OMPD_taskwait);
}
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx,
+ lower::StatementContext &stmtCtx,
+ const List<Clause> &clauses, mlir::Location loc,
+ mlir::omp::WorkshareOperands &clauseOps) {
+ ClauseProcessor cp(converter, semaCtx, clauses);
+ cp.processNowait(clauseOps);
+}
+
static void genTeamsClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
}
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+ mlir::Location loc, const ConstructQueue &queue,
+ ConstructQueue::iterator item) {
+ lower::StatementContext stmtCtx;
+ mlir::omp::WorkshareOperands clauseOps;
+ genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps);
+
+ return genOpWithBody<mlir::omp::WorkshareOp>(
+ OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+ llvm::omp::Directive::OMPD_workshare)
+ .setClauses(&item->clauses),
+ queue, item, clauseOps);
+}
+
static mlir::omp::TeamsOp
genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
llvm::omp::getOpenMPDirectiveName(dir) + ")");
// case llvm::omp::Directive::OMPD_workdistribute:
case llvm::omp::Directive::OMPD_workshare:
- // FIXME: Workshare is not a commonly used OpenMP construct, an
- // implementation for this feature will come later. For the codes
- // that use this construct, add a single construct for now.
- genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+ genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
// Composite constructs
>From 16f7146a45ee9b31c00d9d54be4859df312dcb1b Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 4/9] [flang] Introduce ws loop nest generation for HLFIR
lowering
---
.../flang/Optimizer/Builder/HLFIRTools.h | 12 +++--
flang/lib/Lower/ConvertCall.cpp | 2 +-
flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +-
flang/lib/Optimizer/Builder/HLFIRTools.cpp | 52 ++++++++++++++-----
.../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +-
.../LowerHLFIROrderedAssignments.cpp | 30 +++++------
.../Transforms/OptimizedBufferization.cpp | 6 +--
7 files changed, 69 insertions(+), 40 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea078..14e42c6f358e4 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
/// Structure to describe a loop nest.
struct LoopNest {
- fir::DoLoopOp outerLoop;
- fir::DoLoopOp innerLoop;
+ mlir::Operation *outerOp;
+ mlir::Block *body;
llvm::SmallVector<mlir::Value> oneBasedIndices;
};
@@ -366,11 +366,13 @@ struct LoopNest {
/// \p isUnordered specifies whether the loops in the loop nest
/// are unordered.
LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::Value shape, bool isUnordered = false) {
+ mlir::Value shape, bool isUnordered = false,
+ bool emitWsLoop = false) {
return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
}
/// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844..0689d6e033dd9 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
auto insPt = builder.saveInsertionPoint();
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
callContext.stmtCtx.pushScope();
for (auto &preparedActual : loweredActuals)
if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c2..72a90dd0d6f29 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
// know this won't miss any opportuinties for clever elemental inlining
hlfir::LoopNest nest = hlfir::genLoopNest(
loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
- builder.setInsertionPointToStart(nest.innerLoop.getBody());
+ builder.setInsertionPointToStart(nest.body);
mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
auto lhsEleAddr = builder.create<fir::ArrayCoorOp>(
loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
builder, loc, redId, refTy, lhsEle, rhsEle);
builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr);
- builder.setInsertionPointAfter(nest.outerLoop);
+ builder.setInsertionPointAfter(nest.outerOp);
builder.create<mlir::omp::YieldOp>(loc, lhsAddr);
}
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178..cd07cb741eb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
#include "mlir/IR/IRMapping.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/TypeSwitch.h"
+#include <mlir/Dialect/OpenMP/OpenMPDialect.h>
#include <optional>
// Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered) {
+ mlir::ValueRange extents, bool isUnordered,
+ bool emitWsLoop) {
hlfir::LoopNest loopNest;
assert(!extents.empty() && "must have at least one extent");
- auto insPt = builder.saveInsertionPoint();
+ mlir::OpBuilder::InsertionGuard guard(builder);
loopNest.oneBasedIndices.assign(extents.size(), mlir::Value{});
// Build loop nest from column to row.
auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1);
mlir::Type indexType = builder.getIndexType();
- unsigned dim = extents.size() - 1;
- for (auto extent : llvm::reverse(extents)) {
- auto ub = builder.createConvert(loc, indexType, extent);
- loopNest.innerLoop =
- builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered);
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
- // Reverse the indices so they are in column-major order.
- loopNest.oneBasedIndices[dim--] = loopNest.innerLoop.getInductionVar();
- if (!loopNest.outerLoop)
- loopNest.outerLoop = loopNest.innerLoop;
+ if (emitWsLoop) {
+ auto wsloop = builder.create<mlir::omp::WsloopOp>(
+ loc, mlir::ArrayRef<mlir::NamedAttribute>());
+ loopNest.outerOp = wsloop;
+ builder.createBlock(&wsloop.getRegion());
+ mlir::omp::LoopNestOperands lnops;
+ lnops.loopInclusive = builder.getUnitAttr();
+ for (auto extent : llvm::reverse(extents)) {
+ lnops.loopLowerBounds.push_back(one);
+ lnops.loopUpperBounds.push_back(extent);
+ lnops.loopSteps.push_back(one);
+ }
+ auto lnOp = builder.create<mlir::omp::LoopNestOp>(loc, lnops);
+ builder.create<mlir::omp::TerminatorOp>(loc);
+ mlir::Block *block = builder.createBlock(&lnOp.getRegion());
+ for (auto extent : llvm::reverse(extents))
+ block->addArgument(extent.getType(), extent.getLoc());
+ loopNest.body = block;
+ builder.create<mlir::omp::YieldOp>(loc);
+ for (unsigned dim = 0; dim < extents.size(); dim++)
+ loopNest.oneBasedIndices[extents.size() - dim - 1] =
+ lnOp.getRegion().front().getArgument(dim);
+ } else {
+ unsigned dim = extents.size() - 1;
+ for (auto extent : llvm::reverse(extents)) {
+ auto ub = builder.createConvert(loc, indexType, extent);
+ auto doLoop =
+ builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered);
+ loopNest.body = doLoop.getBody();
+ builder.setInsertionPointToStart(loopNest.body);
+ // Reverse the indices so they are in column-major order.
+ loopNest.oneBasedIndices[dim--] = doLoop.getInductionVar();
+ if (!loopNest.outerOp)
+ loopNest.outerOp = doLoop;
+ }
}
- builder.restoreInsertionPoint(insPt);
return loopNest;
}
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index a70a6b388c4b1..b608677c52631 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -31,6 +31,7 @@
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "llvm/ADT/TypeSwitch.h"
namespace hlfir {
@@ -793,7 +794,7 @@ struct ElementalOpConversion
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
auto insPt = builder.saveInsertionPoint();
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
loopNest.oneBasedIndices);
hlfir::Entity elementValue(yield.getElementValue());
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 85dd517cb5791..645abf65d10a3 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -464,7 +464,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
// if the LHS is not).
mlir::Value shape = hlfir::genShape(loc, builder, lhsEntity);
elementalLoopNest = hlfir::genLoopNest(loc, builder, shape);
- builder.setInsertionPointToStart(elementalLoopNest->innerLoop.getBody());
+ builder.setInsertionPointToStart(elementalLoopNest->body);
lhsEntity = hlfir::getElementAt(loc, builder, lhsEntity,
elementalLoopNest->oneBasedIndices);
rhsEntity = hlfir::getElementAt(loc, builder, rhsEntity,
@@ -484,7 +484,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
for (auto &cleanupConversion : argConversionCleanups)
cleanupConversion();
if (elementalLoopNest)
- builder.setInsertionPointAfter(elementalLoopNest->outerLoop);
+ builder.setInsertionPointAfter(elementalLoopNest->outerOp);
} else {
// TODO: preserve allocatable assignment aspects for forall once
// they are conveyed in hlfir.region_assign.
@@ -493,7 +493,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
generateCleanupIfAny(loweredLhs.elementalCleanup);
if (loweredLhs.vectorSubscriptLoopNest)
builder.setInsertionPointAfter(
- loweredLhs.vectorSubscriptLoopNest->outerLoop);
+ loweredLhs.vectorSubscriptLoopNest->outerOp);
generateCleanupIfAny(oldRhsYield);
generateCleanupIfAny(loweredLhs.nonElementalCleanup);
}
@@ -518,8 +518,8 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
hlfir::Entity savedMask{maybeSaved->first};
mlir::Value shape = hlfir::genShape(loc, builder, savedMask);
whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
- constructStack.push_back(whereLoopNest->outerLoop.getOperation());
- builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
+ constructStack.push_back(whereLoopNest->outerOp);
+ builder.setInsertionPointToStart(whereLoopNest->body);
mlir::Value cdt = hlfir::getElementAt(loc, builder, savedMask,
whereLoopNest->oneBasedIndices);
generateMaskIfOp(cdt);
@@ -527,7 +527,7 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
// If this is the same run as the one that saved the value, the clean-up
// was left-over to be done now.
auto insertionPoint = builder.saveInsertionPoint();
- builder.setInsertionPointAfter(whereLoopNest->outerLoop);
+ builder.setInsertionPointAfter(whereLoopNest->outerOp);
generateCleanupIfAny(maybeSaved->second);
builder.restoreInsertionPoint(insertionPoint);
}
@@ -539,8 +539,8 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
mask.generateNoneElementalPart(builder, mapper);
mlir::Value shape = mask.generateShape(builder, mapper);
whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
- constructStack.push_back(whereLoopNest->outerLoop.getOperation());
- builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
+ constructStack.push_back(whereLoopNest->outerOp);
+ builder.setInsertionPointToStart(whereLoopNest->body);
mlir::Value cdt = generateMaskedEntity(mask);
generateMaskIfOp(cdt);
return;
@@ -754,7 +754,7 @@ OrderedAssignmentRewriter::generateYieldedLHS(
loweredLhs.vectorSubscriptLoopNest = hlfir::genLoopNest(
loc, builder, loweredLhs.vectorSubscriptShape.value());
builder.setInsertionPointToStart(
- loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody());
+ loweredLhs.vectorSubscriptLoopNest->body);
}
loweredLhs.lhs = temp->second.fetch(loc, builder);
return loweredLhs;
@@ -772,7 +772,7 @@ OrderedAssignmentRewriter::generateYieldedLHS(
hlfir::genLoopNest(loc, builder, *loweredLhs.vectorSubscriptShape,
!elementalAddrLhs.isOrdered());
builder.setInsertionPointToStart(
- loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody());
+ loweredLhs.vectorSubscriptLoopNest->body);
mapper.map(elementalAddrLhs.getIndices(),
loweredLhs.vectorSubscriptLoopNest->oneBasedIndices);
for (auto &op : elementalAddrLhs.getBody().front().without_terminator())
@@ -798,11 +798,11 @@ OrderedAssignmentRewriter::generateMaskedEntity(MaskedArrayExpr &maskedExpr) {
if (!maskedExpr.noneElementalPartWasGenerated) {
// Generate none elemental part before the where loops (but inside the
// current forall loops if any).
- builder.setInsertionPoint(whereLoopNest->outerLoop);
+ builder.setInsertionPoint(whereLoopNest->outerOp);
maskedExpr.generateNoneElementalPart(builder, mapper);
}
// Generate the none elemental part cleanup after the where loops.
- builder.setInsertionPointAfter(whereLoopNest->outerLoop);
+ builder.setInsertionPointAfter(whereLoopNest->outerOp);
maskedExpr.generateNoneElementalCleanupIfAny(builder, mapper);
// Generate the value of the current element for the masked expression
// at the current insertion point (inside the where loops, and any fir.if
@@ -1242,7 +1242,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide(
LhsValueAndCleanUp loweredLhs = generateYieldedLHS(loc, region);
fir::factory::TemporaryStorage *temp = nullptr;
if (loweredLhs.vectorSubscriptLoopNest)
- constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerLoop);
+ constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerOp);
if (loweredLhs.vectorSubscriptLoopNest && !rhsIsArray(regionAssignOp)) {
// Vector subscripted entity for which the shape must also be saved on top
// of the element addresses (e.g. the shape may change in each forall
@@ -1265,7 +1265,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide(
// subscripted LHS.
auto &vectorTmp = temp->cast<fir::factory::AnyVectorSubscriptStack>();
auto insertionPoint = builder.saveInsertionPoint();
- builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerLoop);
+ builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerOp);
vectorTmp.pushShape(loc, builder, shape);
builder.restoreInsertionPoint(insertionPoint);
} else {
@@ -1291,7 +1291,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide(
if (loweredLhs.vectorSubscriptLoopNest) {
constructStack.pop_back();
builder.setInsertionPointAfter(
- loweredLhs.vectorSubscriptLoopNest->outerLoop);
+ loweredLhs.vectorSubscriptLoopNest->outerOp);
}
}
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index c5b809514c54c..c4aed6b79df92 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -483,7 +483,7 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
// hlfir.elemental region inside the inner loop
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
loopNest.oneBasedIndices);
hlfir::Entity elementValue{yield.getElementValue()};
@@ -554,7 +554,7 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
auto arrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, rhs, arrayElement);
@@ -649,7 +649,7 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
>From decd0c5b35dcd5175e06319a793fedd2935b14ca Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 2 Aug 2024 16:08:34 +0900
Subject: [PATCH 5/9] Emit loop nests in a custom wrapper
---
flang/include/flang/Optimizer/Builder/HLFIRTools.h | 6 +++---
flang/lib/Optimizer/Builder/HLFIRTools.cpp | 11 +++++------
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 14e42c6f358e4..6987471957218 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -367,12 +367,12 @@ struct LoopNest {
/// are unordered.
LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
mlir::ValueRange extents, bool isUnordered = false,
- bool emitWsLoop = false);
+ bool emitWorkshareLoop = false);
inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
mlir::Value shape, bool isUnordered = false,
- bool emitWsLoop = false) {
+ bool emitWorkshareLoop = false) {
return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered, emitWsLoop);
+ isUnordered, emitWorkshareLoop);
}
/// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index cd07cb741eb4b..91b1b3d774a01 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -857,7 +857,7 @@ mlir::Value hlfir::inlineElementalOp(
hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
mlir::ValueRange extents, bool isUnordered,
- bool emitWsLoop) {
+ bool emitWorkshareLoop) {
hlfir::LoopNest loopNest;
assert(!extents.empty() && "must have at least one extent");
mlir::OpBuilder::InsertionGuard guard(builder);
@@ -865,11 +865,10 @@ hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
// Build loop nest from column to row.
auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1);
mlir::Type indexType = builder.getIndexType();
- if (emitWsLoop) {
- auto wsloop = builder.create<mlir::omp::WsloopOp>(
- loc, mlir::ArrayRef<mlir::NamedAttribute>());
- loopNest.outerOp = wsloop;
- builder.createBlock(&wsloop.getRegion());
+ if (emitWorkshareLoop) {
+ auto wslw = builder.create<mlir::omp::WorkshareLoopWrapperOp>(loc);
+ loopNest.outerOp = wslw;
+ builder.createBlock(&wslw.getRegion());
mlir::omp::LoopNestOperands lnops;
lnops.loopInclusive = builder.getUnitAttr();
for (auto extent : llvm::reverse(extents)) {
>From 3ab40e1600aecd5e39f9379941dabf67667a32e9 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Wed, 31 Jul 2024 14:44:31 +0900
Subject: [PATCH 6/9] [flang] Lower omp.workshare to other omp constructs
---
flang/include/flang/Optimizer/CMakeLists.txt | 1 +
.../flang/Optimizer/OpenMP/CMakeLists.txt | 4 +
flang/include/flang/Optimizer/OpenMP/Passes.h | 30 ++
.../include/flang/Optimizer/OpenMP/Passes.td | 18 ++
flang/include/flang/Tools/CLOptions.inc | 2 +
flang/lib/Frontend/CMakeLists.txt | 1 +
flang/lib/Optimizer/CMakeLists.txt | 1 +
.../HLFIR/Transforms/BufferizeHLFIR.cpp | 6 +-
flang/lib/Optimizer/OpenMP/CMakeLists.txt | 26 ++
flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 259 ++++++++++++++++++
flang/test/HLFIR/bufferize-workshare.fir | 58 ++++
.../Transforms/OpenMP/lower-workshare.mlir | 81 ++++++
flang/tools/bbc/CMakeLists.txt | 1 +
flang/tools/fir-opt/CMakeLists.txt | 1 +
flang/tools/fir-opt/fir-opt.cpp | 2 +
flang/tools/tco/CMakeLists.txt | 1 +
16 files changed, 490 insertions(+), 2 deletions(-)
create mode 100644 flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.h
create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.td
create mode 100644 flang/lib/Optimizer/OpenMP/CMakeLists.txt
create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir
diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt
index 89e43a9ee8d62..3336ac935e101 100644
--- a/flang/include/flang/Optimizer/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/CMakeLists.txt
@@ -2,3 +2,4 @@ add_subdirectory(CodeGen)
add_subdirectory(Dialect)
add_subdirectory(HLFIR)
add_subdirectory(Transforms)
+add_subdirectory(OpenMP)
diff --git a/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
new file mode 100644
index 0000000000000..d59573f0f7fd9
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name FlangOpenMP)
+
+add_public_tablegen_target(FlangOpenMPPassesIncGen)
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h
new file mode 100644
index 0000000000000..95a05b3005073
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -0,0 +1,30 @@
+//===- Passes.h - OpenMP pass entry points ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header declares OpenMP pass entry points.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+#define FORTRAN_OPTIMIZER_OPENMP_PASSES_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+#include <memory>
+
+namespace flangomp {
+#define GEN_PASS_DECL
+#define GEN_PASS_REGISTRATION
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
+} // namespace flangomp
+
+#endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td
new file mode 100644
index 0000000000000..6f636ec1df616
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -0,0 +1,18 @@
+//===-- Passes.td - HLFIR pass definition file -------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_DIALECT_OPENMP_PASSES
+#define FORTRAN_DIALECT_OPENMP_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+def LowerWorkshare : Pass<"lower-workshare"> {
+ let summary = "Lower workshare construct";
+}
+
+#endif //FORTRAN_DIALECT_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 7df5044949463..594369fc2ffe5 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -17,6 +17,7 @@
#include "mlir/Transforms/Passes.h"
#include "flang/Optimizer/CodeGen/CodeGen.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "llvm/Passes/OptimizationLevel.h"
#include "llvm/Support/CommandLine.h"
@@ -344,6 +345,7 @@ inline void createHLFIRToFIRPassPipeline(
pm.addPass(hlfir::createLowerHLFIRIntrinsics());
pm.addPass(hlfir::createBufferizeHLFIR());
pm.addPass(hlfir::createConvertHLFIRtoFIR());
+ pm.addPass(flangomp::createLowerWorkshare());
}
/// Create a pass pipeline for handling certain OpenMP transformations needed
diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt
index c20b9096aff49..ecdcc73d61ec1 100644
--- a/flang/lib/Frontend/CMakeLists.txt
+++ b/flang/lib/Frontend/CMakeLists.txt
@@ -38,6 +38,7 @@ add_flang_library(flangFrontend
FIRTransforms
HLFIRDialect
HLFIRTransforms
+ FlangOpenMPTransforms
MLIRTransforms
MLIRBuiltinToLLVMIRTranslation
MLIRLLVMToLLVMIRTranslation
diff --git a/flang/lib/Optimizer/CMakeLists.txt b/flang/lib/Optimizer/CMakeLists.txt
index 4a602162ed2b7..dd153ac33c0fb 100644
--- a/flang/lib/Optimizer/CMakeLists.txt
+++ b/flang/lib/Optimizer/CMakeLists.txt
@@ -5,3 +5,4 @@ add_subdirectory(HLFIR)
add_subdirectory(Support)
add_subdirectory(Transforms)
add_subdirectory(Analysis)
+add_subdirectory(OpenMP)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index b608677c52631..1848dbe2c7a2c 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,12 +26,13 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "llvm/ADT/TypeSwitch.h"
namespace hlfir {
@@ -792,7 +793,8 @@ struct ElementalOpConversion
// Generate a loop nest looping around the fir.elemental shape and clone
// fir.elemental region inside the inner loop.
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+ hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
auto insPt = builder.saveInsertionPoint();
builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
new file mode 100644
index 0000000000000..74419327d76d0
--- /dev/null
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -0,0 +1,26 @@
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_flang_library(FlangOpenMPTransforms
+ LowerWorkshare.cpp
+
+ DEPENDS
+ FIRDialect
+ FlangOpenMPPassesIncGen
+ ${dialect_libs}
+
+ LINK_LIBS
+ FIRAnalysis
+ FIRDialect
+ FIRBuilder
+ FIRDialectSupport
+ FIRSupport
+ FIRTransforms
+ HLFIRDialect
+ MLIRIR
+ ${dialect_libs}
+
+ LINK_COMPONENTS
+ AsmParser
+ AsmPrinter
+ Remarks
+)
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
new file mode 100644
index 0000000000000..40975552d1fe3
--- /dev/null
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Lower omp workshare construct.
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include <variant>
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+ auto workshare = dyn_cast<omp::WorkshareOp>(op->getParentOp());
+ if (!workshare)
+ return false;
+ return workshare->getParentOfType<omp::ParallelOp>();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+ Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+ return !isa<fir::ReferenceType>(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+ if (isa<fir::DeclareOp>(op))
+ return true;
+
+ llvm::SmallVector<MemoryEffects::EffectInstance> effects;
+ MemoryEffectOpInterface interface = dyn_cast<MemoryEffectOpInterface>(op);
+ if (!interface) {
+ return false;
+ }
+ interface.getEffects(effects);
+ if (effects.empty())
+ return true;
+
+ return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+/// %a = fir.allocmem
+/// omp.wsloop {}
+/// fir.call Assign %b %a
+/// fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+/// %a = fir.allocmem
+/// fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+/// fir.call Assign %b %a_reloaded
+/// fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.
+void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
+ assert(wsOp.getRegion().getBlocks().size() == 1);
+
+ Location loc = wsOp->getLoc();
+
+ omp::ParallelOp parallelOp = wsOp->getParentOfType<omp::ParallelOp>();
+ if (!parallelOp) {
+ wsOp.emitWarning("cannot handle workshare, converting to single");
+ Operation *terminator = wsOp.getRegion().front().getTerminator();
+ wsOp->getBlock()->getOperations().splice(
+ wsOp->getIterator(), wsOp.getRegion().front().getOperations());
+ terminator->erase();
+ return;
+ }
+
+ OpBuilder allocBuilder(parallelOp);
+ OpBuilder rootBuilder(wsOp);
+ IRMapping rootMapping;
+
+ omp::SingleOp singleOp = nullptr;
+
+ auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
+ IRMapping singleMapping) {
+ if (auto reloaded = rootMapping.lookupOrNull(v))
+ return;
+ Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext());
+ Type ty = v.getType();
+ Value alloc, reloaded;
+ if (isSupportedByFirAlloca(ty)) {
+ alloc = allocBuilder.create<fir::AllocaOp>(loc, ty);
+ singleBuilder.create<fir::StoreOp>(loc, singleMapping.lookup(v), alloc);
+ reloaded = rootBuilder.create<fir::LoadOp>(loc, ty, alloc);
+ } else {
+ auto one = allocBuilder.create<LLVM::ConstantOp>(
+ loc, allocBuilder.getI32Type(), 1);
+ alloc =
+ allocBuilder.create<LLVM::AllocaOp>(loc, llvmPtrTy, llvmPtrTy, one);
+ Value toStore = singleBuilder
+ .create<UnrealizedConversionCastOp>(
+ loc, llvmPtrTy, singleMapping.lookup(v))
+ .getResult(0);
+ singleBuilder.create<LLVM::StoreOp>(loc, toStore, alloc);
+ reloaded = rootBuilder.create<LLVM::LoadOp>(loc, llvmPtrTy, alloc);
+ reloaded =
+ rootBuilder.create<UnrealizedConversionCastOp>(loc, ty, reloaded)
+ .getResult(0);
+ }
+ rootMapping.map(v, reloaded);
+ };
+
+ auto moveToSingle = [&](SingleRegion sr, OpBuilder singleBuilder) {
+ IRMapping singleMapping = rootMapping;
+
+ for (Operation &op : llvm::make_range(sr.begin, sr.end)) {
+ singleBuilder.clone(op, singleMapping);
+ if (isSafeToParallelize(&op)) {
+ rootBuilder.clone(op, rootMapping);
+ } else {
+ // Prepare reloaded values for results of operations that cannot be
+ // safely parallelized and which are used after the region `sr`
+ for (auto res : op.getResults()) {
+ for (auto &use : res.getUses()) {
+ Operation *user = use.getOwner();
+ while (user->getParentOp() != wsOp)
+ user = user->getParentOp();
+ if (!user->isBeforeInBlock(&*sr.end)) {
+ // We need to reload
+ mapReloadedValue(use.get(), singleBuilder, singleMapping);
+ }
+ }
+ }
+ }
+ }
+ singleBuilder.create<omp::TerminatorOp>(loc);
+ };
+
+ Block *wsBlock = &wsOp.getRegion().front();
+ assert(wsBlock->getTerminator()->getNumOperands() == 0);
+ Operation *terminator = wsBlock->getTerminator();
+
+ SmallVector<std::variant<SingleRegion, omp::WsloopOp>> regions;
+
+ auto it = wsBlock->begin();
+ auto getSingleRegion = [&]() {
+ if (&*it == terminator)
+ return false;
+ if (auto pop = dyn_cast<omp::WsloopOp>(&*it)) {
+ regions.push_back(pop);
+ it++;
+ return true;
+ }
+ SingleRegion sr;
+ sr.begin = it;
+ while (&*it != terminator && !isa<omp::WsloopOp>(&*it))
+ it++;
+ sr.end = it;
+ assert(sr.begin != sr.end);
+ regions.push_back(sr);
+ return true;
+ };
+ while (getSingleRegion())
+ ;
+
+ for (auto [i, loopOrSingle] : llvm::enumerate(regions)) {
+ bool isLast = i + 1 == regions.size();
+ if (std::holds_alternative<SingleRegion>(loopOrSingle)) {
+ omp::SingleOperands singleOperands;
+ if (isLast)
+ singleOperands.nowait = rootBuilder.getUnitAttr();
+ singleOp = rootBuilder.create<omp::SingleOp>(loc, singleOperands);
+ OpBuilder singleBuilder(singleOp);
+ singleBuilder.createBlock(&singleOp.getRegion());
+ moveToSingle(std::get<SingleRegion>(loopOrSingle), singleBuilder);
+ } else {
+ rootBuilder.clone(*std::get<omp::WsloopOp>(loopOrSingle), rootMapping);
+ if (!isLast)
+ rootBuilder.create<omp::BarrierOp>(loc);
+ }
+ }
+
+ if (!wsOp.getNowait())
+ rootBuilder.create<omp::BarrierOp>(loc);
+
+ wsOp->erase();
+
+ return;
+}
+
+class LowerWorksharePass
+ : public flangomp::impl::LowerWorkshareBase<LowerWorksharePass> {
+public:
+ void runOnOperation() override {
+ SmallPtrSet<Operation *, 8> parents;
+ getOperation()->walk([&](mlir::omp::WorkshareOp wsOp) {
+ Operation *isolatedParent =
+ wsOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>();
+ parents.insert(isolatedParent);
+
+ lowerWorkshare(wsOp);
+ });
+
+ // Do folding
+ for (Operation *isolatedParent : parents) {
+ RewritePatternSet patterns(&getContext());
+ GreedyRewriteConfig config;
+ // prevent the pattern driver form merging blocks
+ config.enableRegionSimplification =
+ mlir::GreedySimplifyRegionLevel::Disabled;
+ if (failed(applyPatternsAndFoldGreedily(isolatedParent,
+ std::move(patterns), config))) {
+ emitError(isolatedParent->getLoc(), "error in lower workshare\n");
+ signalPassFailure();
+ }
+ }
+ }
+};
+} // namespace
diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir
new file mode 100644
index 0000000000000..86a2f031478dd
--- /dev/null
+++ b/flang/test/HLFIR/bufferize-workshare.fir
@@ -0,0 +1,58 @@
+// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
+
+// CHECK-LABEL: func.func @simple(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
+// CHECK: omp.parallel {
+// CHECK: omp.workshare {
+// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
+// CHECK: %[[VAL_7:.*]] = arith.constant true
+// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK: omp.wsloop {
+// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
+// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
+// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
+// CHECK: omp.yield
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
+// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: return
+// CHECK: }
+func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
+ omp.parallel {
+ omp.workshare {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ %ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+ %val = fir.load %ref : !fir.ref<i32>
+ %sub = arith.subi %val, %c1_i32 : i32
+ hlfir.yield_element %sub : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
diff --git a/flang/test/Transforms/OpenMP/lower-workshare.mlir b/flang/test/Transforms/OpenMP/lower-workshare.mlir
new file mode 100644
index 0000000000000..a8d36443f08bd
--- /dev/null
+++ b/flang/test/Transforms/OpenMP/lower-workshare.mlir
@@ -0,0 +1,81 @@
+// RUN: fir-opt --lower-workshare %s | FileCheck %s
+
+module {
+// CHECK-LABEL: func.func @simple(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
+// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_3:.*]] = arith.constant 42 : index
+// CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i32) : i32
+// CHECK: %[[VAL_5:.*]] = llvm.alloca %[[VAL_4]] x !llvm.ptr : (i32) -> !llvm.ptr
+// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.heap<!fir.array<42xi32>>
+// CHECK: omp.parallel {
+// CHECK: omp.single {
+// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_7]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+// CHECK: %[[VAL_9:.*]] = builtin.unrealized_conversion_cast %[[VAL_8]]#0 : !fir.ref<!fir.array<42xi32>> to !llvm.ptr
+// CHECK: llvm.store %[[VAL_9]], %[[VAL_5]] : !llvm.ptr, !llvm.ptr
+// CHECK: %[[VAL_10:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_7]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
+// CHECK: fir.store %[[VAL_11]]#0 to %[[VAL_6]] : !fir.ref<!fir.heap<!fir.array<42xi32>>>
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: %[[VAL_12:.*]] = llvm.load %[[VAL_5]] : !llvm.ptr -> !llvm.ptr
+// CHECK: %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.ptr to !fir.ref<!fir.array<42xi32>>
+// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]] : !fir.ref<!fir.heap<!fir.array<42xi32>>>
+// CHECK: omp.wsloop {
+// CHECK: omp.loop_nest (%[[VAL_15:.*]]) : index = (%[[VAL_1]]) to (%[[VAL_3]]) inclusive step (%[[VAL_1]]) {
+// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_15]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
+// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]], %[[VAL_2]] : i32
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_14]] (%[[VAL_15]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_18]] to %[[VAL_19]] temporary_lhs : i32, !fir.ref<i32>
+// CHECK: omp.yield
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: omp.barrier
+// CHECK: omp.single nowait {
+// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_13]] : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
+// CHECK: fir.freemem %[[VAL_14]] : !fir.heap<!fir.array<42xi32>>
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: omp.barrier
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: return
+// CHECK: }
+ func.func @simple(%arg0: !fir.ref<!fir.array<42xi32>>) {
+ omp.parallel {
+ omp.workshare {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.shape %c42 : (index) -> !fir.shape<1>
+ %1:2 = hlfir.declare %arg0(%0) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %2 = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
+ %3:2 = hlfir.declare %2(%0) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
+ %true = arith.constant true
+ %c1 = arith.constant 1 : index
+ omp.wsloop {
+ omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) {
+ %7 = hlfir.designate %1#0 (%arg1) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+ %8 = fir.load %7 : !fir.ref<i32>
+ %9 = arith.subi %8, %c1_i32 : i32
+ %10 = hlfir.designate %3#0 (%arg1) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+ hlfir.assign %9 to %10 temporary_lhs : i32, !fir.ref<i32>
+ omp.yield
+ }
+ omp.terminator
+ }
+ %4 = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
+ %5 = fir.insert_value %4, %true, [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+ %6 = fir.insert_value %5, %3#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+ hlfir.assign %3#0 to %1#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
+ fir.freemem %3#0 : !fir.heap<!fir.array<42xi32>>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+ }
+}
diff --git a/flang/tools/bbc/CMakeLists.txt b/flang/tools/bbc/CMakeLists.txt
index 9410fd0056600..69316d4dc61de 100644
--- a/flang/tools/bbc/CMakeLists.txt
+++ b/flang/tools/bbc/CMakeLists.txt
@@ -25,6 +25,7 @@ FIRTransforms
FIRBuilder
HLFIRDialect
HLFIRTransforms
+FlangOpenMPTransforms
${dialect_libs}
${extension_libs}
MLIRAffineToStandard
diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt
index 43679a9d53578..4c6dbf7d9c8c3 100644
--- a/flang/tools/fir-opt/CMakeLists.txt
+++ b/flang/tools/fir-opt/CMakeLists.txt
@@ -19,6 +19,7 @@ target_link_libraries(fir-opt PRIVATE
FIRCodeGen
HLFIRDialect
HLFIRTransforms
+ FlangOpenMPTransforms
FIRAnalysis
${test_libs}
${dialect_libs}
diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp
index 1846c1b317848..f75fba27c68f0 100644
--- a/flang/tools/fir-opt/fir-opt.cpp
+++ b/flang/tools/fir-opt/fir-opt.cpp
@@ -14,6 +14,7 @@
#include "mlir/Tools/mlir-opt/MlirOptMain.h"
#include "flang/Optimizer/CodeGen/CodeGen.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/Support/InitFIR.h"
#include "flang/Optimizer/Transforms/Passes.h"
@@ -34,6 +35,7 @@ int main(int argc, char **argv) {
fir::registerOptCodeGenPasses();
fir::registerOptTransformPasses();
hlfir::registerHLFIRPasses();
+ flangomp::registerFlangOpenMPPasses();
#ifdef FLANG_INCLUDE_TESTS
fir::test::registerTestFIRAliasAnalysisPass();
mlir::registerSideEffectTestPasses();
diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt
index 808219ac361f2..698a398547c77 100644
--- a/flang/tools/tco/CMakeLists.txt
+++ b/flang/tools/tco/CMakeLists.txt
@@ -17,6 +17,7 @@ target_link_libraries(tco PRIVATE
FIRBuilder
HLFIRDialect
HLFIRTransforms
+ FlangOpenMPTransforms
${dialect_libs}
${extension_libs}
MLIRIR
>From db4fc37ae430a138d1401b587aa1c54184e84dc9 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 2 Aug 2024 16:41:09 +0900
Subject: [PATCH 7/9] Change to workshare loop wrapper op
---
flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 24 ++++++++++++-------
flang/test/HLFIR/bufferize-workshare.fir | 4 ++--
.../Transforms/OpenMP/lower-workshare.mlir | 5 ++--
3 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index 40975552d1fe3..cb342b60de4e8 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator_range.h"
+#include <mlir/Dialect/OpenMP/OpenMPClauseOperands.h>
#include <variant>
namespace flangomp {
@@ -73,7 +74,7 @@ static bool isSafeToParallelize(Operation *op) {
///
/// omp.workshare {
/// %a = fir.allocmem
-/// omp.wsloop {}
+/// omp.workshare_loop_wrapper {}
/// fir.call Assign %b %a
/// fir.freemem %a
/// }
@@ -85,7 +86,7 @@ static bool isSafeToParallelize(Operation *op) {
/// fir.store %a %tmp
/// }
/// %a_reloaded = fir.load %tmp
-/// omp.wsloop {}
+/// omp.workshare_loop_wrapper {}
/// omp.single {
/// fir.call Assign %b %a_reloaded
/// fir.freemem %a_reloaded
@@ -180,20 +181,20 @@ void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
assert(wsBlock->getTerminator()->getNumOperands() == 0);
Operation *terminator = wsBlock->getTerminator();
- SmallVector<std::variant<SingleRegion, omp::WsloopOp>> regions;
+ SmallVector<std::variant<SingleRegion, omp::WorkshareLoopWrapperOp>> regions;
auto it = wsBlock->begin();
auto getSingleRegion = [&]() {
if (&*it == terminator)
return false;
- if (auto pop = dyn_cast<omp::WsloopOp>(&*it)) {
+ if (auto pop = dyn_cast<omp::WorkshareLoopWrapperOp>(&*it)) {
regions.push_back(pop);
it++;
return true;
}
SingleRegion sr;
sr.begin = it;
- while (&*it != terminator && !isa<omp::WsloopOp>(&*it))
+ while (&*it != terminator && !isa<omp::WorkshareLoopWrapperOp>(&*it))
it++;
sr.end = it;
assert(sr.begin != sr.end);
@@ -214,9 +215,16 @@ void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
singleBuilder.createBlock(&singleOp.getRegion());
moveToSingle(std::get<SingleRegion>(loopOrSingle), singleBuilder);
} else {
- rootBuilder.clone(*std::get<omp::WsloopOp>(loopOrSingle), rootMapping);
- if (!isLast)
- rootBuilder.create<omp::BarrierOp>(loc);
+ omp::WsloopOperands wsloopOperands;
+ if (isLast)
+ wsloopOperands.nowait = rootBuilder.getUnitAttr();
+ auto wsloop =
+ rootBuilder.create<mlir::omp::WsloopOp>(loc, wsloopOperands);
+ auto wslw = std::get<omp::WorkshareLoopWrapperOp>(loopOrSingle);
+ auto clonedWslw = cast<omp::WorkshareLoopWrapperOp>(
+ rootBuilder.clone(*wslw, rootMapping));
+ wsloop.getRegion().takeBody(clonedWslw.getRegion());
+ clonedWslw->erase();
}
}
diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir
index 86a2f031478dd..33b368a62eaab 100644
--- a/flang/test/HLFIR/bufferize-workshare.fir
+++ b/flang/test/HLFIR/bufferize-workshare.fir
@@ -12,7 +12,7 @@
// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
// CHECK: %[[VAL_7:.*]] = arith.constant true
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK: omp.wsloop {
+// CHECK: "omp.workshare_loop_wrapper"() ({
// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
@@ -22,7 +22,7 @@
// CHECK: omp.yield
// CHECK: }
// CHECK: omp.terminator
-// CHECK: }
+// CHECK: }) : () -> ()
// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
diff --git a/flang/test/Transforms/OpenMP/lower-workshare.mlir b/flang/test/Transforms/OpenMP/lower-workshare.mlir
index a8d36443f08bd..cb5791d35916a 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare.mlir
@@ -34,7 +34,6 @@ module {
// CHECK: }
// CHECK: omp.terminator
// CHECK: }
-// CHECK: omp.barrier
// CHECK: omp.single nowait {
// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_13]] : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
// CHECK: fir.freemem %[[VAL_14]] : !fir.heap<!fir.array<42xi32>>
@@ -56,7 +55,7 @@ module {
%3:2 = hlfir.declare %2(%0) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
%true = arith.constant true
%c1 = arith.constant 1 : index
- omp.wsloop {
+ "omp.workshare_loop_wrapper"() ({
omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) {
%7 = hlfir.designate %1#0 (%arg1) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
%8 = fir.load %7 : !fir.ref<i32>
@@ -66,7 +65,7 @@ module {
omp.yield
}
omp.terminator
- }
+ }) : () -> ()
%4 = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
%5 = fir.insert_value %4, %true, [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
%6 = fir.insert_value %5, %3#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
>From 1dfdab56b0250bc3e4b0869451f7c21c847f9aee Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 2 Aug 2024 16:47:27 +0900
Subject: [PATCH 8/9] Move single op declaration
---
flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index cb342b60de4e8..2322d2acbc013 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -120,8 +120,6 @@ void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
OpBuilder rootBuilder(wsOp);
IRMapping rootMapping;
- omp::SingleOp singleOp = nullptr;
-
auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
IRMapping singleMapping) {
if (auto reloaded = rootMapping.lookupOrNull(v))
@@ -210,7 +208,8 @@ void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
omp::SingleOperands singleOperands;
if (isLast)
singleOperands.nowait = rootBuilder.getUnitAttr();
- singleOp = rootBuilder.create<omp::SingleOp>(loc, singleOperands);
+ omp::SingleOp singleOp =
+ rootBuilder.create<omp::SingleOp>(loc, singleOperands);
OpBuilder singleBuilder(singleOp);
singleBuilder.createBlock(&singleOp.getRegion());
moveToSingle(std::get<SingleRegion>(loopOrSingle), singleBuilder);
>From 386157c154d25d22e4e3ea083d0421496072316a Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Fri, 2 Aug 2024 17:13:58 +0900
Subject: [PATCH 9/9] Schedule pass properly
---
flang/include/flang/Tools/CLOptions.inc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 594369fc2ffe5..2edaf0c6a0ae8 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -345,7 +345,7 @@ inline void createHLFIRToFIRPassPipeline(
pm.addPass(hlfir::createLowerHLFIRIntrinsics());
pm.addPass(hlfir::createBufferizeHLFIR());
pm.addPass(hlfir::createConvertHLFIRtoFIR());
- pm.addPass(flangomp::createLowerWorkshare());
+ addNestedPassToAllTopLevelOperations(pm, flangomp::createLowerWorkshare);
}
/// Create a pass pipeline for handling certain OpenMP transformations needed
More information about the llvm-branch-commits
mailing list