[flang-commits] [flang] 5d0c5c5 - [flang][hlfir] Lower hlfir.where when there are no conflicts
Jean Perier via flang-commits
flang-commits at lists.llvm.org
Mon May 22 03:51:08 PDT 2023
Author: Jean Perier
Date: 2023-05-22T12:50:54+02:00
New Revision: 5d0c5c5928409306c860c8d0506de81b9ade854c
URL: https://github.com/llvm/llvm-project/commit/5d0c5c5928409306c860c8d0506de81b9ade854c
DIFF: https://github.com/llvm/llvm-project/commit/5d0c5c5928409306c860c8d0506de81b9ade854c.diff
LOG: [flang][hlfir] Lower hlfir.where when there are no conflicts
Lower hlfir.where when the scheduling analysis determined that no
temporary storage is needed.
Differential Revision: https://reviews.llvm.org/D150881
Added:
flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir
Modified:
flang/include/flang/Optimizer/Builder/HLFIRTools.h
flang/include/flang/Optimizer/HLFIR/HLFIROps.td
flang/lib/Lower/ConvertCall.cpp
flang/lib/Optimizer/Builder/HLFIRTools.cpp
flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 0b1e36590f10b..06bd4d8ccd2d3 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -23,6 +23,10 @@ namespace fir {
class FirOpBuilder;
}
+namespace mlir {
+class IRMapping;
+}
+
namespace hlfir {
class AssociateOp;
@@ -359,13 +363,18 @@ hlfir::ElementalOp genElementalOp(mlir::Location loc,
mlir::ValueRange typeParams,
const ElementalKernelGenerator &genKernel);
+/// Structure to describe a loop nest.
+struct LoopNest {
+ fir::DoLoopOp outerLoop;
+ fir::DoLoopOp innerLoop;
+ llvm::SmallVector<mlir::Value> oneBasedIndices;
+};
+
/// Generate a fir.do_loop nest looping from 1 to extents[i].
-/// Return the inner fir.do_loop and the indices of the loops.
-std::pair<fir::DoLoopOp, llvm::SmallVector<mlir::Value>>
-genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents);
-inline std::pair<fir::DoLoopOp, llvm::SmallVector<mlir::Value>>
-genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value shape) {
+LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::ValueRange extents);
+inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::Value shape) {
return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape));
}
@@ -379,6 +388,20 @@ hlfir::YieldElementOp inlineElementalOp(mlir::Location loc,
hlfir::ElementalOp elemental,
mlir::ValueRange oneBasedIndices);
+/// Inline the body of an hlfir.elemental without cloning the resulting
+/// hlfir.yield_element, and return the cloned operand of the
+/// hlfir.yield_element. The mapper must be provided to cover complex cases
+/// where the inlined elemental is not defined in the current context and uses
+/// values that have been cloned already.
+/// A callback is provided to indicate if an hlfir.apply inside the
+/// hlfir.elemental must be immediately replaced by the inlining of the
+/// applied hlfir.elemental.
+mlir::Value inlineElementalOp(
+ mlir::Location loc, fir::FirOpBuilder &builder,
+ hlfir::ElementalOp elemental, mlir::ValueRange oneBasedIndices,
+ mlir::IRMapping &mapper,
+ const std::function<bool(hlfir::ElementalOp)> &mustRecursivelyInline);
+
std::pair<fir::ExtendedValue, std::optional<hlfir::CleanupFunction>>
convertToValue(mlir::Location loc, fir::FirOpBuilder &builder,
const hlfir::Entity &entity);
diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index 572faf06d14c2..15b92385a7720 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -644,10 +644,14 @@ def hlfir_ElementalOp : hlfir_Op<"elemental", [RecursiveMemoryEffects]> {
let extraClassDeclaration = [{
mlir::Block *getBody() { return &getRegion().front(); }
- // Get the indices iterating over the shape.
+ /// Get the indices iterating over the shape.
mlir::Block::BlockArgListType getIndices() {
return getBody()->getArguments();
}
+
+ /// Must this elemental be evaluated in order?
+ /// TODO: add attribute and set it in lowering.
+ bool isOrdered() {return true;}
}];
let skipDefaultBuilders = 1;
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 17fbdc437aa82..ad7e177020522 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -1517,11 +1517,10 @@ class ElementalCallBuilder {
// iterations are cleaned up inside the iterations.
if (!callContext.resultType) {
// Subroutine case. Generate call inside loop nest.
- auto [innerLoop, oneBasedIndicesVector] =
- hlfir::genLoopNest(loc, builder, shape);
- mlir::ValueRange oneBasedIndices = oneBasedIndicesVector;
+ hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, shape);
+ mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
auto insPt = builder.saveInsertionPoint();
- builder.setInsertionPointToStart(innerLoop.getBody());
+ builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
callContext.stmtCtx.pushScope();
for (auto &preparedActual : loweredActuals)
if (preparedActual)
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 30bae873a5eaa..7fd41a214ac45 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -764,26 +764,62 @@ hlfir::inlineElementalOp(mlir::Location loc, fir::FirOpBuilder &builder,
return yield;
}
-std::pair<fir::DoLoopOp, llvm::SmallVector<mlir::Value>>
-hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents) {
+mlir::Value hlfir::inlineElementalOp(
+ mlir::Location loc, fir::FirOpBuilder &builder,
+ hlfir::ElementalOp elemental, mlir::ValueRange oneBasedIndices,
+ mlir::IRMapping &mapper,
+ const std::function<bool(hlfir::ElementalOp)> &mustRecursivelyInline) {
+ mlir::Region ®ion = elemental.getRegion();
+ // hlfir.elemental region is a SizedRegion<1>.
+ assert(region.hasOneBlock() && "elemental region must have one block");
+ mapper.map(elemental.getIndices(), oneBasedIndices);
+ mlir::Block::OpListType &ops = region.back().getOperations();
+ assert(!ops.empty() && "elemental block cannot be empty");
+ auto end = ops.end();
+ for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt) {
+ if (auto apply = mlir::dyn_cast<hlfir::ApplyOp>(*opIt))
+ if (auto appliedElemental =
+ apply.getExpr().getDefiningOp<hlfir::ElementalOp>())
+ if (mustRecursivelyInline(appliedElemental)) {
+ llvm::SmallVector<mlir::Value> clonedApplyIndices;
+ for (auto indice : apply.getIndices())
+ clonedApplyIndices.push_back(mapper.lookupOrDefault(indice));
+ mlir::Value inlined = inlineElementalOp(
+ loc, builder, appliedElemental, clonedApplyIndices, mapper,
+ mustRecursivelyInline);
+ mapper.map(apply.getResult(), inlined);
+ continue;
+ }
+ (void)builder.clone(*opIt, mapper);
+ }
+ auto oldYield = mlir::dyn_cast_or_null<hlfir::YieldElementOp>(
+ region.back().getOperations().back());
+ assert(oldYield && "must terminate with yieldElementalOp");
+ return mapper.lookupOrDefault(oldYield.getElementValue());
+}
+
+hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ mlir::ValueRange extents) {
+ hlfir::LoopNest loopNest;
assert(!extents.empty() && "must have at least one extent");
auto insPt = builder.saveInsertionPoint();
- llvm::SmallVector<mlir::Value> indices(extents.size());
+ loopNest.oneBasedIndices.assign(extents.size(), mlir::Value{});
// Build loop nest from column to row.
auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1);
mlir::Type indexType = builder.getIndexType();
unsigned dim = extents.size() - 1;
- fir::DoLoopOp innerLoop;
for (auto extent : llvm::reverse(extents)) {
auto ub = builder.createConvert(loc, indexType, extent);
- innerLoop = builder.create<fir::DoLoopOp>(loc, one, ub, one);
- builder.setInsertionPointToStart(innerLoop.getBody());
+ loopNest.innerLoop = builder.create<fir::DoLoopOp>(loc, one, ub, one);
+ builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
// Reverse the indices so they are in column-major order.
- indices[dim--] = innerLoop.getInductionVar();
+ loopNest.oneBasedIndices[dim--] = loopNest.innerLoop.getInductionVar();
+ if (!loopNest.outerLoop)
+ loopNest.outerLoop = loopNest.innerLoop;
}
builder.restoreInsertionPoint(insPt);
- return {innerLoop, indices};
+ return loopNest;
}
static fir::ExtendedValue
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 7b12fad984f4e..12cc236384111 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -552,12 +552,11 @@ struct ElementalOpConversion
adaptor.getTypeparams());
// Generate a loop nest looping around the fir.elemental shape and clone
// fir.elemental region inside the inner loop.
- auto [innerLoop, oneBasedLoopIndices] =
- hlfir::genLoopNest(loc, builder, extents);
+ hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, extents);
auto insPt = builder.saveInsertionPoint();
- builder.setInsertionPointToStart(innerLoop.getBody());
- auto yield =
- hlfir::inlineElementalOp(loc, builder, elemental, oneBasedLoopIndices);
+ builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+ auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
+ loopNest.oneBasedIndices);
hlfir::Entity elementValue(yield.getElementValue());
// Skip final AsExpr if any. It would create an element temporary,
// which is no needed since the element will be assigned right away in
@@ -572,7 +571,7 @@ struct ElementalOpConversion
rewriter.eraseOp(yield);
// Assign the element value to the temp element for this iteration.
auto tempElement =
- hlfir::getElementAt(loc, builder, temp, oneBasedLoopIndices);
+ hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, elementValue, tempElement);
// hlfir.yield_element implicitly marks the end-of-life its operand if
// it is an expression created in the hlfir.elemental (since it is its
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 5cea4d743841b..d49bc1e6bdff3 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -19,11 +19,13 @@
#include "ScheduleOrderedAssignments.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Transforms/DialectConversion.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
@@ -42,6 +44,52 @@ static llvm::cl::opt<bool> dbgScheduleOnly(
llvm::cl::desc("Only run ordered assignment scheduling with no codegen"),
llvm::cl::init(false));
+namespace {
+
+/// Structure that represents a masked expression being lowered. Masked
+/// expressions are any expressions inside an hlfir.where. As described in
+/// Fortran 2018 section 10.2.3.2, the evaluation of the elemental parts of such
+/// expressions must be masked, while the evaluation of none elemental parts
+/// must not be masked. This structure analyzes the region evaluating the
+/// expression and allows splitting the generation of the none elemental part
+/// from the elemental part.
+struct MaskedArrayExpr {
+ MaskedArrayExpr(mlir::Location loc, mlir::Region ®ion);
+
+ /// Generate the none elemental part. Must be called outside of the
+ /// loops created for the WHERE construct.
+ void generateNoneElementalPart(fir::FirOpBuilder &builder,
+ mlir::IRMapping &mapper);
+
+ /// Methods below can only be called once generateNoneElementalPart has been
+ /// called.
+
+ /// Return the shape of the expression.
+ mlir::Value generateShape(fir::FirOpBuilder &builder,
+ mlir::IRMapping &mapper);
+ /// Return the value of an element value for this expression given the current
+ /// where loop indices.
+ mlir::Value generateElementalParts(fir::FirOpBuilder &builder,
+ mlir::ValueRange oneBasedIndices,
+ mlir::IRMapping &mapper);
+ /// Generate the cleanup for the none elemental parts, if any. This must be
+ /// called after the loops created for the WHERE construct.
+ void generateNoneElementalCleanupIfAny(fir::FirOpBuilder &builder,
+ mlir::IRMapping &mapper);
+
+ mlir::Location loc;
+ mlir::Region ®ion;
+ /// Was generateNoneElementalPart called?
+ bool noneElementalPartWasGenerated = false;
+ /// Set of operations that form the elemental parts of the
+ /// expression evaluation. These are the hlfir.elemental and
+ /// hlfir.elemental_addr that form the elemental tree producing
+ /// the expression value. hlfir.elemental that produce values
+ /// used inside transformational operations are not part of this set.
+ llvm::SmallSet<mlir::Operation *, 4> elementalParts{};
+};
+} // namespace
+
namespace {
/// Structure that visits an ordered assignment tree and generates code for
/// it according to a schedule.
@@ -76,6 +124,8 @@ class OrderedAssignmentRewriter {
/// Generate code when leaving a given ordered assignment node.
void post(hlfir::ForallOp);
void post(hlfir::ForallMaskOp);
+ void post(hlfir::WhereOp);
+ void post(hlfir::ElseWhereOp);
/// Is this an assignment to a vector subscripted entity?
static bool hasVectorSubscriptedLhs(hlfir::RegionAssignOp regionAssignOp);
@@ -105,9 +155,23 @@ class OrderedAssignmentRewriter {
/// at the current insertion point (by cloning).
void generateCleanupIfAny(std::optional<hlfir::YieldOp> maybeYield);
+ /// Generate a masked entity. This can only be called when whereLoopNest was
+ /// set (When an hlfir.where is being visited).
+ /// This method returns the scalar element (that may have been previously
+ /// saved) for the current indices inside the where loop.
+ mlir::Value generateMaskedEntity(mlir::Location loc, mlir::Region ®ion) {
+ MaskedArrayExpr maskedExpr(loc, region);
+ return generateMaskedEntity(maskedExpr);
+ }
+ mlir::Value generateMaskedEntity(MaskedArrayExpr &maskedExpr);
+
+ /// Create a fir.if at the current position inside the where loop nest
+ /// given a mask expression.
+ void generateMaskIfOp(MaskedArrayExpr &mask);
+
fir::FirOpBuilder &builder;
- /// Map containg the mapping between the original order assignment tree
+ /// Map containing the mapping between the original order assignment tree
/// operations and the operations that have been cloned in the current run.
/// It is reset between two runs.
mlir::IRMapping mapper;
@@ -115,6 +179,9 @@ class OrderedAssignmentRewriter {
/// point correctly when leaving a node that requires a fir.do_loop or fir.if
/// operation.
llvm::SmallVector<mlir::Operation *> constructStack;
+ /// Current where loop nest, if any.
+ std::optional<hlfir::LoopNest> whereLoopNest;
+
/// Root of the order assignment tree being lowered.
hlfir::OrderedAssignmentTreeOpInterface root;
/// Pointer to the current run of the schedule being lowered.
@@ -139,8 +206,8 @@ void OrderedAssignmentRewriter::walk(
mlir::dyn_cast<hlfir::OrderedAssignmentTreeOpInterface>(op))
walk(subNode);
llvm::TypeSwitch<mlir::Operation *, void>(node.getOperation())
- .Case<hlfir::ForallOp, hlfir::ForallMaskOp>(
- [&](auto concreteOp) { post(concreteOp); })
+ .Case<hlfir::ForallOp, hlfir::ForallMaskOp, hlfir::WhereOp,
+ hlfir::ElseWhereOp>([&](auto concreteOp) { post(concreteOp); })
.Default([](auto) {});
}
}
@@ -218,19 +285,78 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
generateCleanupIfAny(oldLhsYield);
}
+void OrderedAssignmentRewriter::generateMaskIfOp(MaskedArrayExpr &mask) {
+ assert(whereLoopNest.has_value() && "must be inside a WHERE");
+ mlir::Location loc = mask.loc;
+ hlfir::Entity maskVal{generateMaskedEntity(mask)};
+ maskVal = hlfir::loadTrivialScalar(loc, builder, maskVal);
+ mlir::Value cdt = builder.createConvert(loc, builder.getI1Type(), maskVal);
+ // Else region is added when visiting nested hlfir.elseWhereOp, if any.
+ auto ifOp = builder.create<fir::IfOp>(loc, std::nullopt, cdt,
+ /*withElseRegion=*/false);
+ constructStack.push_back(ifOp.getOperation());
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+}
+
void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
mlir::Location loc = whereOp.getLoc();
- TODO(loc, "WHERE in HLFIR");
+ MaskedArrayExpr mask(loc, whereOp.getMaskRegion());
+ if (!whereLoopNest) {
+ // Start a loop nest iterating on the shape of the where mask.
+ mask.generateNoneElementalPart(builder, mapper);
+ mlir::Value shape = mask.generateShape(builder, mapper);
+ whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
+ constructStack.push_back(whereLoopNest->outerLoop.getOperation());
+ builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
+ }
+ // Generate a fir.if with the value of the current element of the mask
+ // inside the loops.
+ generateMaskIfOp(mask);
+}
+
+void OrderedAssignmentRewriter::post(hlfir::WhereOp whereOp) {
+ assert(!constructStack.empty() && "must contain a fir.if");
+ builder.setInsertionPointAfter(constructStack.pop_back_val());
+ // If all where/elsewhere fir.if have been popped, this is the outer whereOp,
+ // and the where loop must be exited.
+ assert(!constructStack.empty() && "must contain a fir.do_loop or fir.if");
+ if (mlir::isa<fir::DoLoopOp>(constructStack.back())) {
+ builder.setInsertionPointAfter(constructStack.pop_back_val());
+ whereLoopNest.reset();
+ }
}
void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) {
+ assert(!constructStack.empty() && "cannot be empty inside a where");
mlir::Location loc = elseWhereOp.getLoc();
- TODO(loc, "ELSEWHERE in HLFIR");
+ // Create an "else" region for the current where/elsewhere fir.if.
+ auto ifOp = mlir::dyn_cast<fir::IfOp>(constructStack.back());
+ assert(ifOp && ifOp.getElseRegion().empty() && "must be an if without else");
+ builder.createBlock(&ifOp.getElseRegion());
+ auto end = builder.create<fir::ResultOp>(loc);
+ builder.setInsertionPoint(end);
+ if (elseWhereOp.getMaskRegion().empty())
+ return;
+ // Create new nested fir.if with elsewhere mask if any.
+ MaskedArrayExpr mask(loc, elseWhereOp.getMaskRegion());
+ generateMaskIfOp(mask);
+}
+
+void OrderedAssignmentRewriter::post(hlfir::ElseWhereOp elseWhereOp) {
+ // Exit ifOp that was created for the elseWhereOp mask, if any.
+ if (elseWhereOp.getMaskRegion().empty())
+ return;
+ assert(!constructStack.empty() && "must contain a fir.if");
+ builder.setInsertionPointAfter(constructStack.pop_back_val());
}
std::pair<mlir::Value, std::optional<hlfir::YieldOp>>
OrderedAssignmentRewriter::generateYieldedEntity(mlir::Region ®ion) {
// TODO: if the region was saved, use that instead of generating code again.
+ if (whereLoopNest.has_value()) {
+ mlir::Location loc = region.getParentOp()->getLoc();
+ return {generateMaskedEntity(loc, region), std::nullopt};
+ }
assert(region.hasOneBlock() && "region must contain one block");
// Clone all operations except the final hlfir.yield.
mlir::Block::OpListType &ops = region.back().getOperations();
@@ -258,6 +384,27 @@ OrderedAssignmentRewriter::generateYieldedScalarValue(mlir::Region ®ion) {
return value;
}
+mlir::Value
+OrderedAssignmentRewriter::generateMaskedEntity(MaskedArrayExpr &maskedExpr) {
+ assert(whereLoopNest.has_value() && "must be inside WHERE loop nest");
+ auto insertionPoint = builder.saveInsertionPoint();
+ if (!maskedExpr.noneElementalPartWasGenerated) {
+ // Generate none elemental part before the where loops (but inside the
+ // current forall loops if any).
+ builder.setInsertionPoint(whereLoopNest->outerLoop);
+ maskedExpr.generateNoneElementalPart(builder, mapper);
+ }
+ // Generate the none elemental part cleanup after the where loops.
+ builder.setInsertionPointAfter(whereLoopNest->outerLoop);
+ maskedExpr.generateNoneElementalCleanupIfAny(builder, mapper);
+ // Generate the value of the current element for the masked expression
+ // at the current insertion point (inside the where loops, and any fir.if
+ // generated for previous masks).
+ builder.restoreInsertionPoint(insertionPoint);
+ return maskedExpr.generateElementalParts(
+ builder, whereLoopNest->oneBasedIndices, mapper);
+}
+
void OrderedAssignmentRewriter::generateCleanupIfAny(
std::optional<hlfir::YieldOp> maybeYield) {
if (maybeYield.has_value())
@@ -310,6 +457,127 @@ bool OrderedAssignmentRewriter::isRequiredInCurrentRun(
return false;
}
+/// Is the apply using all the elemental indices in order?
+static bool isInOrderApply(hlfir::ApplyOp apply, hlfir::ElementalOp elemental) {
+ if (elemental.getIndices().size() != apply.getIndices().size())
+ return false;
+ for (auto [elementalIdx, applyIdx] :
+ llvm::zip(elemental.getIndices(), apply.getIndices()))
+ if (elementalIdx != applyIdx)
+ return false;
+ return true;
+}
+
+/// Gather the chain of hlfir::ElementalOp, if any, that produced \p value.
+static void
+gatherElementalTree(mlir::Value value,
+ llvm::SmallPtrSetImpl<mlir::Operation *> &elementalOps,
+ bool isOutOfOrder) {
+ if (auto elemental = value.getDefiningOp<hlfir::ElementalOp>()) {
+ // Only inline an applied elemental that must be executed in order if the
+ // applying indices are in order. An hlfir::Elemental may have been created
+ // for a transformational like transpose, and Fortran 2018 standard
+ // section 10.2.3.2, point 10 imply that impure elemental sub-expression
+ // evaluations should not be masked if they are the arguments of
+ // transformational expressions.
+ if (isOutOfOrder && elemental.isOrdered())
+ return;
+ elementalOps.insert(elemental.getOperation());
+ for (mlir::Operation &op : elemental.getBody()->getOperations())
+ if (auto apply = mlir::dyn_cast<hlfir::ApplyOp>(op)) {
+ bool isUnorderedApply =
+ isOutOfOrder || !isInOrderApply(apply, elemental);
+ gatherElementalTree(apply.getExpr(), elementalOps, isUnorderedApply);
+ }
+ }
+}
+
+MaskedArrayExpr::MaskedArrayExpr(mlir::Location loc, mlir::Region ®ion)
+ : loc{loc}, region{region} {
+ mlir::Operation &terminator = region.back().back();
+ // TODO: clarify if vector subscripts must be inlined or not here.
+ // In case of x(elemental(A), :), this could lead to more elemental(A)
+ // evaluation than needed, which is not OK if "elemental" is impure.
+ // The standard is not very clear here.
+ if (mlir::isa<hlfir::ElementalAddrOp>(terminator))
+ TODO(loc, "vector subscripted assignments inside WHERE");
+ mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
+ gatherElementalTree(entity, elementalParts, /*isOutOfOrder=*/false);
+}
+
+void MaskedArrayExpr::generateNoneElementalPart(fir::FirOpBuilder &builder,
+ mlir::IRMapping &mapper) {
+ assert(!noneElementalPartWasGenerated &&
+ "none elemental parts already generated");
+ // Clone all operations, except the elemental and the final yield.
+ mlir::Block::OpListType &ops = region.back().getOperations();
+ assert(!ops.empty() && "yield block cannot be empty");
+ auto end = ops.end();
+ for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt)
+ if (!elementalParts.contains(&*opIt))
+ (void)builder.clone(*opIt, mapper);
+ noneElementalPartWasGenerated = true;
+}
+
+mlir::Value MaskedArrayExpr::generateShape(fir::FirOpBuilder &builder,
+ mlir::IRMapping &mapper) {
+ assert(noneElementalPartWasGenerated &&
+ "non elemental part must have been generated");
+ mlir::Operation &terminator = region.back().back();
+ // If the operation that produced the yielded entity is elemental, it was not
+ // cloned, but it holds a shape argument that was cloned. Return the cloned
+ // shape.
+ if (auto elementalAddrOp = mlir::dyn_cast<hlfir::ElementalAddrOp>(terminator))
+ return mapper.lookupOrDefault(elementalAddrOp.getShape());
+ mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
+ if (auto elemental = entity.getDefiningOp<hlfir::ElementalOp>())
+ return mapper.lookupOrDefault(elemental.getShape());
+ // Otherwise, the whole entity was cloned, and the shape can be generated
+ // from it.
+ hlfir::Entity clonedEntity{mapper.lookupOrDefault(entity)};
+ return hlfir::genShape(loc, builder, hlfir::Entity{clonedEntity});
+}
+
+mlir::Value
+MaskedArrayExpr::generateElementalParts(fir::FirOpBuilder &builder,
+ mlir::ValueRange oneBasedIndices,
+ mlir::IRMapping &mapper) {
+ assert(noneElementalPartWasGenerated &&
+ "non elemental part must have been generated");
+ mlir::Operation &terminator = region.back().back();
+ if (mlir::isa<hlfir::ElementalAddrOp>(terminator))
+ TODO(loc, "vector subscripted assignments inside WHERE");
+ mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
+ auto elemental = entity.getDefiningOp<hlfir::ElementalOp>();
+ if (!elemental) {
+ hlfir::Entity clonedEntity{mapper.lookupOrDefault(entity)};
+ return hlfir::getElementAt(loc, builder, clonedEntity, oneBasedIndices);
+ }
+ auto mustRecursivelyInline =
+ [&](hlfir::ElementalOp appliedElemental) -> bool {
+ return elementalParts.contains(appliedElemental.getOperation());
+ };
+ return inlineElementalOp(loc, builder, elemental, oneBasedIndices, mapper,
+ mustRecursivelyInline);
+}
+
+void MaskedArrayExpr::generateNoneElementalCleanupIfAny(
+ fir::FirOpBuilder &builder, mlir::IRMapping &mapper) {
+ mlir::Operation &terminator = region.back().back();
+ if (mlir::isa<hlfir::ElementalAddrOp>(terminator))
+ TODO(loc, "vector subscripted assignments inside WHERE");
+ auto yieldOp = mlir::cast<hlfir::YieldOp>(terminator);
+ if (yieldOp.getCleanup().empty())
+ return;
+ for (mlir::Operation &op : yieldOp.getCleanup().getOps()) {
+ if (auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(op))
+ if (elementalParts.contains(destroy.getExpr().getDefiningOp()))
+ continue;
+ if (!mlir::isa<fir::FirEndOp>(op))
+ (void)builder.clone(op, mapper);
+ }
+}
+
/// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given
/// a schedule.
static void lower(hlfir::OrderedAssignmentTreeOpInterface root,
diff --git a/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir b/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir
new file mode 100644
index 0000000000000..ac93e6828096a
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir
@@ -0,0 +1,309 @@
+// Test code generation of hlfir.where, and hlfir.elsewhere when there
+// is no need to create temporary storage.
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s
+
+func.func @test_simple(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+ %cst = arith.constant 4.200000e+01 : f32
+ %0:2 = hlfir.declare %arg1 {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+ %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+ hlfir.where {
+ hlfir.yield %0#0 : !fir.box<!fir.array<?x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ hlfir.yield %cst : f32
+ } to {
+ hlfir.yield %1#0 : !fir.box<!fir.array<?xf32>>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @test_simple(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+// CHECK: %[[VAL_2:.*]] = arith.constant 4.200000e+01 : f32
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_5]] : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_6]]#1 : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_8]] to %[[VAL_6]]#1 step %[[VAL_8]] {
+// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_9]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_12]] {
+// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_13]] : f32, !fir.ref<f32>
+// CHECK: }
+// CHECK: }
+// CHECK: return
+// CHECK: }
+
+
+func.func @test_elsewhere(%arg0: !fir.ref<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>, %arg2: !fir.ref<!fir.array<100xf32>>, %arg3: !fir.ref<!fir.array<100x!fir.logical<4>>>, %arg4: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "mask2"}) {
+ %c100 = arith.constant 100 : index
+ %0 = fir.shape %c100 : (index) -> !fir.shape<1>
+ %1:2 = hlfir.declare %arg3(%0) {uniq_name = "mask"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+ %2:2 = hlfir.declare %arg4(%0) {uniq_name = "mask2"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+ %3:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+ %4:2 = hlfir.declare %arg1(%0) {uniq_name = "y"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+ %5:2 = hlfir.declare %arg2(%0) {uniq_name = "z"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+ hlfir.where {
+ hlfir.yield %1#0 : !fir.ref<!fir.array<100x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ hlfir.yield %4#0 : !fir.ref<!fir.array<100xf32>>
+ } to {
+ hlfir.yield %3#0 : !fir.ref<!fir.array<100xf32>>
+ }
+ hlfir.elsewhere mask {
+ hlfir.yield %2#0 : !fir.ref<!fir.array<100x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ hlfir.yield %3#0 : !fir.ref<!fir.array<100xf32>>
+ } to {
+ hlfir.yield %4#0 : !fir.ref<!fir.array<100xf32>>
+ }
+ hlfir.elsewhere do {
+ hlfir.region_assign {
+ hlfir.yield %4#0 : !fir.ref<!fir.array<100xf32>>
+ } to {
+ hlfir.yield %5#0 : !fir.ref<!fir.array<100xf32>>
+ }
+ }
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @test_elsewhere(
+// CHECK-SAME: %[[VAL_0:[^:]*]]: !fir.ref<!fir.array<100xf32>>,
+// CHECK-SAME: %[[VAL_1:[^:]*]]: !fir.ref<!fir.array<100xf32>>,
+// CHECK-SAME: %[[VAL_2:[^:]*]]: !fir.ref<!fir.array<100xf32>>,
+// CHECK-SAME: %[[VAL_3:[^:]*]]: !fir.ref<!fir.array<100x!fir.logical<4>>>,
+// CHECK-SAME: %[[VAL_4:[^:]*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "mask2"}) {
+// CHECK: %[[VAL_5:.*]] = arith.constant 100 : index
+// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_3]](%[[VAL_6]]) {uniq_name = "mask"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_6]]) {uniq_name = "mask2"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "x"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_6]]) {uniq_name = "y"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_6]]) {uniq_name = "z"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_13:.*]] = %[[VAL_12]] to %[[VAL_5]] step %[[VAL_12]] {
+// CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_16]] {
+// CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_13]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_13]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_18]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_19:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_20:.*]] = %[[VAL_19]] to %[[VAL_5]] step %[[VAL_19]] {
+// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_23]] {
+// CHECK: } else {
+// CHECK: %[[VAL_24:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_26]] {
+// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_28]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_29:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_29]] to %[[VAL_5]] step %[[VAL_29]] {
+// CHECK: %[[VAL_31:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_30]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_31]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_33]] {
+// CHECK: } else {
+// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_30]]) : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_36]] {
+// CHECK: } else {
+// CHECK: %[[VAL_37:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_30]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_30]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK: hlfir.assign %[[VAL_37]] to %[[VAL_38]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: return
+// CHECK: }
+
+func.func @expr_tree(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: !fir.box<!fir.array<?xf32>>, %arg2: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+ %cst = arith.constant 0.000000e+00 : f32
+ %c-1 = arith.constant -1 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %0:2 = hlfir.declare %arg2 {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+ %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+ %2:2 = hlfir.declare %arg1 {uniq_name = "y"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+ hlfir.where {
+ %3 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %4 = hlfir.designate %2#0 (%c10:%c1:%c-1) shape %3 : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<10xf32>>
+ %5 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
+ ^bb0(%arg3: index):
+ %9 = hlfir.designate %4 (%arg3) : (!fir.box<!fir.array<10xf32>>, index) -> !fir.ref<f32>
+ %10 = fir.load %9 : !fir.ref<f32>
+ %11 = math.absf %10 fastmath<contract> : f32
+ hlfir.yield_element %11 : f32
+ }
+ %6 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %9 = hlfir.apply %5, %arg3 : (!hlfir.expr<10xf32>, index) -> f32
+ %10 = arith.cmpf ogt, %9, %cst : f32
+ %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %11 : !fir.logical<4>
+ }
+ %7 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %9 = hlfir.apply %6, %arg3 : (!hlfir.expr<10x!fir.logical<4>>, index) -> !fir.logical<4>
+ %10 = hlfir.no_reassoc %9 : !fir.logical<4>
+ hlfir.yield_element %10 : !fir.logical<4>
+ }
+ %8 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %9 = hlfir.apply %7, %arg3 : (!hlfir.expr<10x!fir.logical<4>>, index) -> !fir.logical<4>
+ %10 = hlfir.designate %0#0 (%arg3) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+ %11 = fir.load %10 : !fir.ref<!fir.logical<4>>
+ %12 = fir.convert %9 : (!fir.logical<4>) -> i1
+ %13 = fir.convert %11 : (!fir.logical<4>) -> i1
+ %14 = arith.andi %12, %13 : i1
+ %15 = fir.convert %14 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %15 : !fir.logical<4>
+ }
+ hlfir.yield %8 : !hlfir.expr<10x!fir.logical<4>> cleanup {
+ hlfir.destroy %8 : !hlfir.expr<10x!fir.logical<4>>
+ hlfir.destroy %7 : !hlfir.expr<10x!fir.logical<4>>
+ hlfir.destroy %6 : !hlfir.expr<10x!fir.logical<4>>
+ hlfir.destroy %5 : !hlfir.expr<10xf32>
+ }
+ } do {
+ hlfir.region_assign {
+ hlfir.yield %2#0 : !fir.box<!fir.array<?xf32>>
+ } to {
+ hlfir.yield %1#0 : !fir.box<!fir.array<?xf32>>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @expr_tree(
+// CHECK-SAME: %[[VAL_0:[^:]*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME: %[[VAL_1:[^:]*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME: %[[VAL_2:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[VAL_4:.*]] = arith.constant -1 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_6:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "y"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_6]]:%[[VAL_5]]:%[[VAL_4]]) shape %[[VAL_10]] : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<10xf32>>
+// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_13:.*]] = %[[VAL_12]] to %[[VAL_6]] step %[[VAL_12]] {
+// CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_13]]) : (!fir.box<!fir.array<10xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<f32>
+// CHECK: %[[VAL_16:.*]] = math.absf %[[VAL_15]] fastmath<contract> : f32
+// CHECK: %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_3]] : f32
+// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i1) -> !fir.logical<4>
+// CHECK: %[[VAL_19:.*]] = hlfir.no_reassoc %[[VAL_18]] : !fir.logical<4>
+// CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+// CHECK: %[[VAL_24:.*]] = arith.andi %[[VAL_22]], %[[VAL_23]] : i1
+// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i1) -> !fir.logical<4>
+// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_26]] {
+// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_13]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_13]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_28]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK: }
+// CHECK: }
+// CHECK: return
+// CHECK: }
+
+func.func @inside_forall(%arg0: !fir.ref<!fir.array<10x20xf32>>, %arg1: !fir.ref<!fir.array<20xf32>>) {
+ %c1 = arith.constant 1 : index
+ %cst = arith.constant 0.000000e+00 : f32
+ %c10_i32 = arith.constant 10 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %c10 = arith.constant 10 : index
+ %c20 = arith.constant 20 : index
+ %0 = fir.shape %c10, %c20 : (index, index) -> !fir.shape<2>
+ %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<10x20xf32>>, !fir.ref<!fir.array<10x20xf32>>)
+ %2 = fir.shape %c20 : (index) -> !fir.shape<1>
+ %3:2 = hlfir.declare %arg1(%2) {uniq_name = "y"} : (!fir.ref<!fir.array<20xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<20xf32>>, !fir.ref<!fir.array<20xf32>>)
+ hlfir.forall lb {
+ hlfir.yield %c1_i32 : i32
+ } ub {
+ hlfir.yield %c10_i32 : i32
+ } (%arg2: i32) {
+ hlfir.where {
+ %4 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr<20x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %5 = hlfir.designate %3#0 (%arg3) : (!fir.ref<!fir.array<20xf32>>, index) -> !fir.ref<f32>
+ %6 = fir.load %5 : !fir.ref<f32>
+ %7 = arith.cmpf ogt, %6, %cst : f32
+ %8 = fir.convert %7 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %8 : !fir.logical<4>
+ }
+ hlfir.yield %4 : !hlfir.expr<20x!fir.logical<4>> cleanup {
+ hlfir.destroy %4 : !hlfir.expr<20x!fir.logical<4>>
+ }
+ } do {
+ hlfir.region_assign {
+ hlfir.yield %3#0 : !fir.ref<!fir.array<20xf32>>
+ } to {
+ %4 = fir.convert %arg2 : (i32) -> i64
+ %5 = hlfir.designate %1#0 (%4, %c1:%c20:%c1) shape %2 : (!fir.ref<!fir.array<10x20xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<20xf32>>
+ hlfir.yield %5 : !fir.box<!fir.array<20xf32>>
+ }
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @inside_forall(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<10x20xf32>>,
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<!fir.array<20xf32>>) {
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32
+// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_6:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_7:.*]] = arith.constant 20 : index
+// CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_6]], %[[VAL_7]] : (index, index) -> !fir.shape<2>
+// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_8]]) {uniq_name = "x"} : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<10x20xf32>>, !fir.ref<!fir.array<10x20xf32>>)
+// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_10]]) {uniq_name = "y"} : (!fir.ref<!fir.array<20xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<20xf32>>, !fir.ref<!fir.array<20xf32>>)
+// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_5]] : (i32) -> index
+// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
+// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_14]] {
+// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (i32) -> i64
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_18]], %[[VAL_2]]:%[[VAL_7]]:%[[VAL_2]]) shape %[[VAL_10]] : (!fir.ref<!fir.array<10x20xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<20xf32>>
+// CHECK: fir.do_loop %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_7]] step %[[VAL_17]] {
+// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<20xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<f32>
+// CHECK: %[[VAL_23:.*]] = arith.cmpf ogt, %[[VAL_22]], %[[VAL_3]] : f32
+// CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i1) -> !fir.logical<4>
+// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_25]] {
+// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_20]]) : (!fir.ref<!fir.array<20xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_19]] (%[[VAL_20]]) : (!fir.box<!fir.array<20xf32>>, index) -> !fir.ref<f32>
+// CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_27]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: return
+// CHECK: }
More information about the flang-commits
mailing list