[flang-commits] [flang] 8bb21ae - [flang] Introduce custom loop nest generation for loops in workshare construct (#101445)
via flang-commits
flang-commits at lists.llvm.org
Tue Nov 19 00:00:08 PST 2024
Author: Ivan R. Ivanov
Date: 2024-11-19T17:00:04+09:00
New Revision: 8bb21ae6c92c03b2487ee9b0df584c7a17446863
URL: https://github.com/llvm/llvm-project/commit/8bb21ae6c92c03b2487ee9b0df584c7a17446863
DIFF: https://github.com/llvm/llvm-project/commit/8bb21ae6c92c03b2487ee9b0df584c7a17446863.diff
LOG: [flang] Introduce custom loop nest generation for loops in workshare construct (#101445)
This alternative loop nest generation is used to generate an OpenMP loop nest instead of fir loops to facilitate parallelizing statements in an OpenMP `workshare` construct.
Added:
Modified:
flang/include/flang/Optimizer/Builder/HLFIRTools.h
flang/lib/Lower/ConvertCall.cpp
flang/lib/Lower/OpenMP/ReductionProcessor.cpp
flang/lib/Optimizer/Builder/HLFIRTools.cpp
flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..f073f494b3fb21 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
/// Structure to describe a loop nest.
struct LoopNest {
- fir::DoLoopOp outerLoop;
- fir::DoLoopOp innerLoop;
+ mlir::Operation *outerOp = nullptr;
+ mlir::Block *body = nullptr;
llvm::SmallVector<mlir::Value> oneBasedIndices;
};
@@ -366,11 +366,13 @@ struct LoopNest {
/// \p isUnordered specifies whether the loops in the loop nest
/// are unordered.
LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWorkshareLoop = false);
inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::Value shape, bool isUnordered = false) {
+ mlir::Value shape, bool isUnordered = false,
+ bool emitWorkshareLoop = false) {
return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWorkshareLoop);
}
/// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 9f5b58590fb79e..e84e7afbe82e09 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2135,7 +2135,7 @@ class ElementalCallBuilder {
hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
auto insPt = builder.saveInsertionPoint();
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
callContext.stmtCtx.pushScope();
for (auto &preparedActual : loweredActuals)
if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 6b98ea3d0615b6..736de2ee511bef 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -374,7 +374,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
// know this won't miss any opportuinties for clever elemental inlining
hlfir::LoopNest nest = hlfir::genLoopNest(
loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
- builder.setInsertionPointToStart(nest.innerLoop.getBody());
+ builder.setInsertionPointToStart(nest.body);
mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
auto lhsEleAddr = builder.create<fir::ArrayCoorOp>(
loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -388,7 +388,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
builder, loc, redId, refTy, lhsEle, rhsEle);
builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr);
- builder.setInsertionPointAfter(nest.outerLoop);
+ builder.setInsertionPointAfter(nest.outerOp);
builder.create<mlir::omp::YieldOp>(loc, lhsAddr);
}
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..7425ccf7fc0e30 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
#include "mlir/IR/IRMapping.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/TypeSwitch.h"
+#include <mlir/Dialect/OpenMP/OpenMPDialect.h>
#include <optional>
// Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,50 @@ mlir::Value hlfir::inlineElementalOp(
hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered) {
+ mlir::ValueRange extents, bool isUnordered,
+ bool emitWorkshareLoop) {
+ emitWorkshareLoop = emitWorkshareLoop && isUnordered;
hlfir::LoopNest loopNest;
assert(!extents.empty() && "must have at least one extent");
- auto insPt = builder.saveInsertionPoint();
+ mlir::OpBuilder::InsertionGuard guard(builder);
loopNest.oneBasedIndices.assign(extents.size(), mlir::Value{});
// Build loop nest from column to row.
auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1);
mlir::Type indexType = builder.getIndexType();
- unsigned dim = extents.size() - 1;
- for (auto extent : llvm::reverse(extents)) {
- auto ub = builder.createConvert(loc, indexType, extent);
- loopNest.innerLoop =
- builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered);
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
- // Reverse the indices so they are in column-major order.
- loopNest.oneBasedIndices[dim--] = loopNest.innerLoop.getInductionVar();
- if (!loopNest.outerLoop)
- loopNest.outerLoop = loopNest.innerLoop;
+ if (emitWorkshareLoop) {
+ auto wslw = builder.create<mlir::omp::WorkshareLoopWrapperOp>(loc);
+ loopNest.outerOp = wslw;
+ builder.createBlock(&wslw.getRegion());
+ mlir::omp::LoopNestOperands lnops;
+ lnops.loopInclusive = builder.getUnitAttr();
+ for (auto extent : llvm::reverse(extents)) {
+ lnops.loopLowerBounds.push_back(one);
+ lnops.loopUpperBounds.push_back(extent);
+ lnops.loopSteps.push_back(one);
+ }
+ auto lnOp = builder.create<mlir::omp::LoopNestOp>(loc, lnops);
+ mlir::Block *block = builder.createBlock(&lnOp.getRegion());
+ for (auto extent : llvm::reverse(extents))
+ block->addArgument(extent.getType(), extent.getLoc());
+ loopNest.body = block;
+ builder.create<mlir::omp::YieldOp>(loc);
+ for (unsigned dim = 0; dim < extents.size(); dim++)
+ loopNest.oneBasedIndices[extents.size() - dim - 1] =
+ lnOp.getRegion().front().getArgument(dim);
+ } else {
+ unsigned dim = extents.size() - 1;
+ for (auto extent : llvm::reverse(extents)) {
+ auto ub = builder.createConvert(loc, indexType, extent);
+ auto doLoop =
+ builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered);
+ loopNest.body = doLoop.getBody();
+ builder.setInsertionPointToStart(loopNest.body);
+ // Reverse the indices so they are in column-major order.
+ loopNest.oneBasedIndices[dim--] = doLoop.getInductionVar();
+ if (!loopNest.outerOp)
+ loopNest.outerOp = doLoop;
+ }
}
- builder.restoreInsertionPoint(insPt);
return loopNest;
}
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index a70a6b388c4b1a..07794828fce267 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,6 +26,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Pass/Pass.h"
@@ -793,7 +794,7 @@ struct ElementalOpConversion
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
auto insPt = builder.saveInsertionPoint();
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
loopNest.oneBasedIndices);
hlfir::Entity elementValue(yield.getElementValue());
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 85dd517cb57914..424566462e8fe0 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -464,7 +464,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
// if the LHS is not).
mlir::Value shape = hlfir::genShape(loc, builder, lhsEntity);
elementalLoopNest = hlfir::genLoopNest(loc, builder, shape);
- builder.setInsertionPointToStart(elementalLoopNest->innerLoop.getBody());
+ builder.setInsertionPointToStart(elementalLoopNest->body);
lhsEntity = hlfir::getElementAt(loc, builder, lhsEntity,
elementalLoopNest->oneBasedIndices);
rhsEntity = hlfir::getElementAt(loc, builder, rhsEntity,
@@ -484,7 +484,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
for (auto &cleanupConversion : argConversionCleanups)
cleanupConversion();
if (elementalLoopNest)
- builder.setInsertionPointAfter(elementalLoopNest->outerLoop);
+ builder.setInsertionPointAfter(elementalLoopNest->outerOp);
} else {
// TODO: preserve allocatable assignment aspects for forall once
// they are conveyed in hlfir.region_assign.
@@ -492,8 +492,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
}
generateCleanupIfAny(loweredLhs.elementalCleanup);
if (loweredLhs.vectorSubscriptLoopNest)
- builder.setInsertionPointAfter(
- loweredLhs.vectorSubscriptLoopNest->outerLoop);
+ builder.setInsertionPointAfter(loweredLhs.vectorSubscriptLoopNest->outerOp);
generateCleanupIfAny(oldRhsYield);
generateCleanupIfAny(loweredLhs.nonElementalCleanup);
}
@@ -518,8 +517,8 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
hlfir::Entity savedMask{maybeSaved->first};
mlir::Value shape = hlfir::genShape(loc, builder, savedMask);
whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
- constructStack.push_back(whereLoopNest->outerLoop.getOperation());
- builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
+ constructStack.push_back(whereLoopNest->outerOp);
+ builder.setInsertionPointToStart(whereLoopNest->body);
mlir::Value cdt = hlfir::getElementAt(loc, builder, savedMask,
whereLoopNest->oneBasedIndices);
generateMaskIfOp(cdt);
@@ -527,7 +526,7 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
// If this is the same run as the one that saved the value, the clean-up
// was left-over to be done now.
auto insertionPoint = builder.saveInsertionPoint();
- builder.setInsertionPointAfter(whereLoopNest->outerLoop);
+ builder.setInsertionPointAfter(whereLoopNest->outerOp);
generateCleanupIfAny(maybeSaved->second);
builder.restoreInsertionPoint(insertionPoint);
}
@@ -539,8 +538,8 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
mask.generateNoneElementalPart(builder, mapper);
mlir::Value shape = mask.generateShape(builder, mapper);
whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
- constructStack.push_back(whereLoopNest->outerLoop.getOperation());
- builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
+ constructStack.push_back(whereLoopNest->outerOp);
+ builder.setInsertionPointToStart(whereLoopNest->body);
mlir::Value cdt = generateMaskedEntity(mask);
generateMaskIfOp(cdt);
return;
@@ -754,7 +753,7 @@ OrderedAssignmentRewriter::generateYieldedLHS(
loweredLhs.vectorSubscriptLoopNest = hlfir::genLoopNest(
loc, builder, loweredLhs.vectorSubscriptShape.value());
builder.setInsertionPointToStart(
- loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody());
+ loweredLhs.vectorSubscriptLoopNest->body);
}
loweredLhs.lhs = temp->second.fetch(loc, builder);
return loweredLhs;
@@ -771,8 +770,7 @@ OrderedAssignmentRewriter::generateYieldedLHS(
loweredLhs.vectorSubscriptLoopNest =
hlfir::genLoopNest(loc, builder, *loweredLhs.vectorSubscriptShape,
!elementalAddrLhs.isOrdered());
- builder.setInsertionPointToStart(
- loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody());
+ builder.setInsertionPointToStart(loweredLhs.vectorSubscriptLoopNest->body);
mapper.map(elementalAddrLhs.getIndices(),
loweredLhs.vectorSubscriptLoopNest->oneBasedIndices);
for (auto &op : elementalAddrLhs.getBody().front().without_terminator())
@@ -798,11 +796,11 @@ OrderedAssignmentRewriter::generateMaskedEntity(MaskedArrayExpr &maskedExpr) {
if (!maskedExpr.noneElementalPartWasGenerated) {
// Generate none elemental part before the where loops (but inside the
// current forall loops if any).
- builder.setInsertionPoint(whereLoopNest->outerLoop);
+ builder.setInsertionPoint(whereLoopNest->outerOp);
maskedExpr.generateNoneElementalPart(builder, mapper);
}
// Generate the none elemental part cleanup after the where loops.
- builder.setInsertionPointAfter(whereLoopNest->outerLoop);
+ builder.setInsertionPointAfter(whereLoopNest->outerOp);
maskedExpr.generateNoneElementalCleanupIfAny(builder, mapper);
// Generate the value of the current element for the masked expression
// at the current insertion point (inside the where loops, and any fir.if
@@ -1242,7 +1240,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide(
LhsValueAndCleanUp loweredLhs = generateYieldedLHS(loc, region);
fir::factory::TemporaryStorage *temp = nullptr;
if (loweredLhs.vectorSubscriptLoopNest)
- constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerLoop);
+ constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerOp);
if (loweredLhs.vectorSubscriptLoopNest && !rhsIsArray(regionAssignOp)) {
// Vector subscripted entity for which the shape must also be saved on top
// of the element addresses (e.g. the shape may change in each forall
@@ -1265,7 +1263,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide(
// subscripted LHS.
auto &vectorTmp = temp->cast<fir::factory::AnyVectorSubscriptStack>();
auto insertionPoint = builder.saveInsertionPoint();
- builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerLoop);
+ builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerOp);
vectorTmp.pushShape(loc, builder, shape);
builder.restoreInsertionPoint(insertionPoint);
} else {
@@ -1290,8 +1288,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide(
generateCleanupIfAny(loweredLhs.elementalCleanup);
if (loweredLhs.vectorSubscriptLoopNest) {
constructStack.pop_back();
- builder.setInsertionPointAfter(
- loweredLhs.vectorSubscriptLoopNest->outerLoop);
+ builder.setInsertionPointAfter(loweredLhs.vectorSubscriptLoopNest->outerOp);
}
}
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index d05a3258cf293c..166649d955dabd 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -483,7 +483,7 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
// hlfir.elemental region inside the inner loop
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
loopNest.oneBasedIndices);
hlfir::Entity elementValue{yield.getElementValue()};
@@ -554,7 +554,7 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
auto arrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, rhs, arrayElement);
@@ -652,7 +652,7 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
- builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
More information about the flang-commits
mailing list