[flang-commits] [flang] d757811 - [flang][hlfir] Generate temporary storage in Forall/Where [1/2]
Jean Perier via flang-commits
flang-commits at lists.llvm.org
Thu May 25 02:55:20 PDT 2023
Author: Jean Perier
Date: 2023-05-25T11:51:32+02:00
New Revision: d7578116b89fcffe8db4b2512ceda8c6fbf1ea7f
URL: https://github.com/llvm/llvm-project/commit/d7578116b89fcffe8db4b2512ceda8c6fbf1ea7f
DIFF: https://github.com/llvm/llvm-project/commit/d7578116b89fcffe8db4b2512ceda8c6fbf1ea7f.diff
LOG: [flang][hlfir] Generate temporary storage in Forall/Where [1/2]
Generate temporary storage inline inside WHERE and FORALL when possible.
A following patch will use the runtime to cover the generic cases.
Reviewed By: vzakhari
Differential Revision: https://reviews.llvm.org/D151247
Added:
flang/test/HLFIR/order_assignments/impure-where.fir
flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
Modified:
flang/include/flang/Optimizer/Builder/TemporaryStorage.h
flang/lib/Optimizer/Builder/TemporaryStorage.cpp
flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
Removed:
flang/test/HLFIR/ordered-assignments-codegen-todo.fir
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
index 4a96b11d44804..88bf4af382724 100644
--- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
+++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
@@ -93,5 +93,52 @@ class HomogeneousScalarStack {
/// Temporary storage.
mlir::Value temp;
};
+
+/// Structure to hold the value of a single entity.
+class SimpleCopy {
+public:
+ SimpleCopy(mlir::Location loc, fir::FirOpBuilder &builder,
+ hlfir::Entity source, llvm::StringRef tempName);
+
+ void pushValue(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::Value value) {
+ assert(false && "must not be called: value already set");
+ }
+ void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder){};
+ mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) {
+ return copy.getBase();
+ }
+ void destroy(mlir::Location loc, fir::FirOpBuilder &builder);
+
+public:
+ /// Temporary storage for the copy.
+ hlfir::AssociateOp copy;
+};
+
+/// Generic wrapper over the
diff erent sorts of temporary storages.
+class TemporaryStorage {
+public:
+ template <typename T>
+ TemporaryStorage(T &&impl) : impl{std::forward<T>(impl)} {}
+
+ void pushValue(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::Value value) {
+ std::visit([&](auto &temp) { temp.pushValue(loc, builder, value); }, impl);
+ }
+ void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder) {
+ std::visit([&](auto &temp) { temp.resetFetchPosition(loc, builder); },
+ impl);
+ }
+ mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) {
+ return std::visit([&](auto &temp) { return temp.fetch(loc, builder); },
+ impl);
+ }
+ void destroy(mlir::Location loc, fir::FirOpBuilder &builder) {
+ std::visit([&](auto &temp) { temp.destroy(loc, builder); }, impl);
+ }
+
+private:
+ std::variant<HomogeneousScalarStack, SimpleCopy> impl;
+};
} // namespace fir::factory
#endif // FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H
diff --git a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
index d707d623bc9c8..b4e01556af086 100644
--- a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
+++ b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
@@ -10,8 +10,8 @@
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Builder/TemporaryStorage.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
-#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
@@ -133,3 +133,24 @@ hlfir::Entity fir::factory::HomogeneousScalarStack::moveStackAsArrayExpr(
auto hlfirExpr = builder.create<hlfir::AsExprOp>(loc, temp, mustFree);
return hlfir::Entity{hlfirExpr};
}
+
+//===----------------------------------------------------------------------===//
+// fir::factory::SimpleCopy implementation.
+//===----------------------------------------------------------------------===//
+
+fir::factory::SimpleCopy::SimpleCopy(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ hlfir::Entity source,
+ llvm::StringRef tempName) {
+ // Use hlfir.as_expr and hlfir.associate to create a copy and leave
+ // bufferization deals with how best to make the copy.
+ if (source.isVariable())
+ source = hlfir::Entity{builder.create<hlfir::AsExprOp>(loc, source)};
+ copy = hlfir::genAssociateExpr(loc, builder, source,
+ source.getFortranElementType(), tempName);
+}
+
+void fir::factory::SimpleCopy::destroy(mlir::Location loc,
+ fir::FirOpBuilder &builder) {
+ builder.create<hlfir::EndAssociateOp>(loc, copy);
+}
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 0317f83063f5e..1ec3aca640cb4 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -20,9 +20,11 @@
#include "ScheduleOrderedAssignments.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/TemporaryStorage.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "mlir/IR/Dominance.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallSet.h"
@@ -106,8 +108,20 @@ class OrderedAssignmentRewriter {
currentRun = nullptr;
assert(constructStack.empty() && "must exit constructs after a run");
mapper.clear();
+ savedInCurrentRunBeforeUse.clear();
}
+ /// After all run have been lowered, clean-up all the temporary
+ /// storage that were created (do not call final routines).
+ void cleanupSavedEntities() {
+ for (auto &temp : savedEntities)
+ temp.second.destroy(root.getLoc(), builder);
+ }
+
+ /// Lowered value for an expression, and the original hlfir.yield if any
+ /// clean-up needs to be cloned after usage.
+ using ValueAndCleanUp = std::pair<mlir::Value, std::optional<hlfir::YieldOp>>;
+
private:
/// Walk the part of an order assignment tree node that needs
/// to be evaluated in the current run.
@@ -126,11 +140,16 @@ class OrderedAssignmentRewriter {
void post(hlfir::ForallMaskOp);
void post(hlfir::WhereOp);
void post(hlfir::ElseWhereOp);
+ /// Enter (and maybe create) the fir.if else block of an ElseWhereOp,
+ /// but do not generate the elswhere mask or the new fir.if.
+ void enterElsewhere(hlfir::ElseWhereOp);
/// Is this an assignment to a vector subscripted entity?
static bool hasVectorSubscriptedLhs(hlfir::RegionAssignOp regionAssignOp);
/// Are they any leaf region in node that must be saved in the current run?
- bool mustSavedRegionIn(hlfir::OrderedAssignmentTreeOpInterface node) const;
+ bool mustSaveRegionIn(
+ hlfir::OrderedAssignmentTreeOpInterface node,
+ llvm::SmallVectorImpl<hlfir::SaveEntity> &saveEntities) const;
/// Should this node be evaluated in the current run? Saving a region in a
/// node does not imply the node needs to be evaluated.
bool
@@ -154,7 +173,7 @@ class OrderedAssignmentRewriter {
/// should be done after using the entity. Like, generateYieldedScalarValue,
/// this will return the saved value if the region was saved in a previous
/// run.
- std::pair<mlir::Value, std::optional<hlfir::YieldOp>>
+ ValueAndCleanUp
generateYieldedEntity(mlir::Region ®ion,
std::optional<mlir::Type> castToType = std::nullopt);
@@ -173,8 +192,43 @@ class OrderedAssignmentRewriter {
mlir::Value generateMaskedEntity(MaskedArrayExpr &maskedExpr);
/// Create a fir.if at the current position inside the where loop nest
- /// given a mask expression.
- void generateMaskIfOp(MaskedArrayExpr &mask);
+ /// given the element value of a mask.
+ void generateMaskIfOp(mlir::Value cdt);
+
+ /// Save a value for subsequent runs.
+ void generateSaveEntity(hlfir::SaveEntity savedEntity,
+ bool willUseSavedEntityInSameRun);
+
+ /// Get a value if it was saved in this run or a previous run. Returns
+ /// nullopt if it has not been saved.
+ std::optional<ValueAndCleanUp> getIfSaved(mlir::Region ®ion);
+
+ /// Generate code before the loop nest for the current run, if any.
+ void doBeforeLoopNest(const std::function<void()> &callback) {
+ if (constructStack.empty()) {
+ callback();
+ return;
+ }
+ auto insertionPoint = builder.saveInsertionPoint();
+ builder.setInsertionPoint(constructStack[0]);
+ callback();
+ builder.restoreInsertionPoint(insertionPoint);
+ }
+
+ /// Can the current loop nest iteration number be computed? For simplicity,
+ /// this is true if an only if all the bounds and steps of the fir.do_loop
+ /// nest dominates the outer loop. The argument is filled with the current
+ /// loop nest on success.
+ bool currentLoopNestIterationNumberCanBeComputed(
+ llvm::SmallVectorImpl<fir::DoLoopOp> &loopNest);
+
+ template <typename T>
+ fir::factory::TemporaryStorage *insertSavedEntity(mlir::Region ®ion,
+ T &&temp) {
+ auto inserted = savedEntities.try_emplace(®ion, std::forward<T>(temp));
+ assert(inserted.second && "temp must have been emplaced");
+ return &inserted.first->second;
+ }
fir::FirOpBuilder &builder;
@@ -182,6 +236,10 @@ class OrderedAssignmentRewriter {
/// operations and the operations that have been cloned in the current run.
/// It is reset between two runs.
mlir::IRMapping mapper;
+ /// Dominance info is used to determine if inner loop bounds are all computed
+ /// before outer loop for the current loop. It does not need to be reset
+ /// between runs.
+ mlir::DominanceInfo dominanceInfo;
/// Construct stack in the current run. This allows setting back the insertion
/// point correctly when leaving a node that requires a fir.do_loop or fir.if
/// operation.
@@ -189,20 +247,50 @@ class OrderedAssignmentRewriter {
/// Current where loop nest, if any.
std::optional<hlfir::LoopNest> whereLoopNest;
+ /// Map of temporary storage to keep track of saved entity once the run
+ /// that saves them has been lowered. It is kept in-between runs.
+ llvm::DenseMap<mlir::Region *, fir::factory::TemporaryStorage> savedEntities;
+ /// Map holding the value that were saved in the current run and that also
+ /// need to be used (because their construct will be visited). It is reset
+ /// after each run. It avoids having to store and fetch in the temporary
+ /// during the same run, which would required the temporary to have
diff erent
+ /// fetching and storing counters.
+ llvm::DenseMap<mlir::Region *, ValueAndCleanUp> savedInCurrentRunBeforeUse;
+
/// Root of the order assignment tree being lowered.
hlfir::OrderedAssignmentTreeOpInterface root;
/// Pointer to the current run of the schedule being lowered.
hlfir::Run *currentRun = nullptr;
+
+ /// When allocating temporary storage inlined, indicate if the storage should
+ /// be heap or stack allocated. Temporary allocated with the runtime are heap
+ /// allocated by the runtime.
+ bool allocateOnHeap = true;
};
} // namespace
void OrderedAssignmentRewriter::walk(
hlfir::OrderedAssignmentTreeOpInterface node) {
- if (mustSavedRegionIn(node))
- TODO(node.getLoc(),
- "creating temporary storage in FORALL or WHERE constructs");
- if (isRequiredInCurrentRun(node) || mlir::isa<hlfir::ForallIndexOp>(node)) {
- llvm::TypeSwitch<mlir::Operation *, void>(node.getOperation())
+ bool mustVisit =
+ isRequiredInCurrentRun(node) || mlir::isa<hlfir::ForallIndexOp>(node);
+ llvm::SmallVector<hlfir::SaveEntity> saveEntities;
+ mlir::Operation *nodeOp = node.getOperation();
+ if (mustSaveRegionIn(node, saveEntities)) {
+ mlir::IRRewriter::InsertPoint insertionPoint;
+ if (auto elseWhereOp = mlir::dyn_cast<hlfir::ElseWhereOp>(nodeOp)) {
+ // ElseWhere mask to save must be evaluated inside the fir.if else
+ // for the previous where/elsewehere (its evaluation must be
+ // masked by the "pending control mask").
+ insertionPoint = builder.saveInsertionPoint();
+ enterElsewhere(elseWhereOp);
+ }
+ for (hlfir::SaveEntity saveEntity : saveEntities)
+ generateSaveEntity(saveEntity, mustVisit);
+ if (insertionPoint.isSet())
+ builder.restoreInsertionPoint(insertionPoint);
+ }
+ if (mustVisit) {
+ llvm::TypeSwitch<mlir::Operation *, void>(nodeOp)
.Case<hlfir::ForallOp, hlfir::ForallIndexOp, hlfir::ForallMaskOp,
hlfir::RegionAssignOp, hlfir::WhereOp, hlfir::ElseWhereOp>(
[&](auto concreteOp) { pre(concreteOp); })
@@ -212,7 +300,7 @@ void OrderedAssignmentRewriter::walk(
if (auto subNode =
mlir::dyn_cast<hlfir::OrderedAssignmentTreeOpInterface>(op))
walk(subNode);
- llvm::TypeSwitch<mlir::Operation *, void>(node.getOperation())
+ llvm::TypeSwitch<mlir::Operation *, void>(nodeOp)
.Case<hlfir::ForallOp, hlfir::ForallMaskOp, hlfir::WhereOp,
hlfir::ElseWhereOp>([&](auto concreteOp) { post(concreteOp); })
.Default([](auto) {});
@@ -292,14 +380,11 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
generateCleanupIfAny(oldLhsYield);
}
-void OrderedAssignmentRewriter::generateMaskIfOp(MaskedArrayExpr &mask) {
- assert(whereLoopNest.has_value() && "must be inside a WHERE");
- mlir::Location loc = mask.loc;
- hlfir::Entity maskVal{generateMaskedEntity(mask)};
- maskVal = hlfir::loadTrivialScalar(loc, builder, maskVal);
- mlir::Value cdt = builder.createConvert(loc, builder.getI1Type(), maskVal);
- // Else region is added when visiting nested hlfir.elseWhereOp, if any.
- auto ifOp = builder.create<fir::IfOp>(loc, std::nullopt, cdt,
+void OrderedAssignmentRewriter::generateMaskIfOp(mlir::Value cdt) {
+ mlir::Location loc = cdt.getLoc();
+ cdt = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{cdt});
+ cdt = builder.createConvert(loc, builder.getI1Type(), cdt);
+ auto ifOp = builder.create<fir::IfOp>(cdt.getLoc(), std::nullopt, cdt,
/*withElseRegion=*/false);
constructStack.push_back(ifOp.getOperation());
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
@@ -307,18 +392,46 @@ void OrderedAssignmentRewriter::generateMaskIfOp(MaskedArrayExpr &mask) {
void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
mlir::Location loc = whereOp.getLoc();
- MaskedArrayExpr mask(loc, whereOp.getMaskRegion());
if (!whereLoopNest) {
- // Start a loop nest iterating on the shape of the where mask.
+ // This is the top-level WHERE. Start a loop nest iterating on the shape of
+ // the where mask.
+ if (auto maybeSaved = getIfSaved(whereOp.getMaskRegion())) {
+ // Use the saved value to get the shape and condition element.
+ hlfir::Entity savedMask{maybeSaved->first};
+ mlir::Value shape = hlfir::genShape(loc, builder, savedMask);
+ whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
+ constructStack.push_back(whereLoopNest->outerLoop.getOperation());
+ builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
+ mlir::Value cdt = hlfir::getElementAt(loc, builder, savedMask,
+ whereLoopNest->oneBasedIndices);
+ generateMaskIfOp(cdt);
+ if (maybeSaved->second) {
+ // If this is the same run as the one that saved the value, the clean-up
+ // was left-over to be done now.
+ auto insertionPoint = builder.saveInsertionPoint();
+ builder.setInsertionPointAfter(whereLoopNest->outerLoop);
+ generateCleanupIfAny(maybeSaved->second);
+ builder.restoreInsertionPoint(insertionPoint);
+ }
+ return;
+ }
+ // The mask was not evaluated yet or can be safely re-evaluated.
+ MaskedArrayExpr mask(loc, whereOp.getMaskRegion());
mask.generateNoneElementalPart(builder, mapper);
mlir::Value shape = mask.generateShape(builder, mapper);
whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
constructStack.push_back(whereLoopNest->outerLoop.getOperation());
builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
+ mlir::Value cdt = generateMaskedEntity(mask);
+ generateMaskIfOp(cdt);
+ return;
}
+ // Where Loops have been already created by a parent WHERE.
// Generate a fir.if with the value of the current element of the mask
- // inside the loops.
- generateMaskIfOp(mask);
+ // inside the loops. The case where the mask was saved is handled in the
+ // generateYieldedScalarValue call.
+ mlir::Value cdt = generateYieldedScalarValue(whereOp.getMaskRegion());
+ generateMaskIfOp(cdt);
}
void OrderedAssignmentRewriter::post(hlfir::WhereOp whereOp) {
@@ -333,20 +446,27 @@ void OrderedAssignmentRewriter::post(hlfir::WhereOp whereOp) {
}
}
-void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) {
- assert(!constructStack.empty() && "cannot be empty inside a where");
- mlir::Location loc = elseWhereOp.getLoc();
+void OrderedAssignmentRewriter::enterElsewhere(hlfir::ElseWhereOp elseWhereOp) {
// Create an "else" region for the current where/elsewhere fir.if.
auto ifOp = mlir::dyn_cast<fir::IfOp>(constructStack.back());
- assert(ifOp && ifOp.getElseRegion().empty() && "must be an if without else");
- builder.createBlock(&ifOp.getElseRegion());
- auto end = builder.create<fir::ResultOp>(loc);
- builder.setInsertionPoint(end);
+ assert(ifOp && "must be an if");
+ if (ifOp.getElseRegion().empty()) {
+ mlir::Location loc = elseWhereOp.getLoc();
+ builder.createBlock(&ifOp.getElseRegion());
+ auto end = builder.create<fir::ResultOp>(loc);
+ builder.setInsertionPoint(end);
+ } else {
+ builder.setInsertionPoint(&ifOp.getElseRegion().back().back());
+ }
+}
+
+void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) {
+ enterElsewhere(elseWhereOp);
if (elseWhereOp.getMaskRegion().empty())
return;
// Create new nested fir.if with elsewhere mask if any.
- MaskedArrayExpr mask(loc, elseWhereOp.getMaskRegion());
- generateMaskIfOp(mask);
+ mlir::Value cdt = generateYieldedScalarValue(elseWhereOp.getMaskRegion());
+ generateMaskIfOp(cdt);
}
void OrderedAssignmentRewriter::post(hlfir::ElseWhereOp elseWhereOp) {
@@ -370,14 +490,51 @@ static bool isForallIndex(mlir::Value value) {
return value.getDefiningOp<hlfir::ForallIndexOp>();
}
-std::pair<mlir::Value, std::optional<hlfir::YieldOp>>
+static OrderedAssignmentRewriter::ValueAndCleanUp
+castIfNeeded(mlir::Location loc, fir::FirOpBuilder &builder,
+ OrderedAssignmentRewriter::ValueAndCleanUp valueAndCleanUp,
+ std::optional<mlir::Type> castToType) {
+ if (!castToType.has_value())
+ return valueAndCleanUp;
+ mlir::Value cast =
+ builder.createConvert(loc, *castToType, valueAndCleanUp.first);
+ return {cast, valueAndCleanUp.second};
+}
+
+std::optional<OrderedAssignmentRewriter::ValueAndCleanUp>
+OrderedAssignmentRewriter::getIfSaved(mlir::Region ®ion) {
+ mlir::Location loc = region.getParentOp()->getLoc();
+ // If the region was saved in the same run, use the value that was evaluated
+ // instead of fetching the temp, and do clean-up, if any, that were delayed.
+ // This is done to avoid requiring the temporary stack to have
diff erent
+ // fetching and storing counters, and also because it produces slightly better
+ // code.
+ if (auto savedInSameRun = savedInCurrentRunBeforeUse.find(®ion);
+ savedInSameRun != savedInCurrentRunBeforeUse.end())
+ return savedInSameRun->second;
+ // If the region was saved in a previous run, fetch the saved value.
+ if (auto temp = savedEntities.find(®ion); temp != savedEntities.end()) {
+ doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); });
+ return ValueAndCleanUp{temp->second.fetch(loc, builder), std::nullopt};
+ }
+ return std::nullopt;
+}
+
+OrderedAssignmentRewriter::ValueAndCleanUp
OrderedAssignmentRewriter::generateYieldedEntity(
mlir::Region ®ion, std::optional<mlir::Type> castToType) {
- // TODO: if the region was saved, use that instead of generating code again.
+ mlir::Location loc = region.getParentOp()->getLoc();
+ if (auto maybeValueAndCleanUp = getIfSaved(region))
+ return castIfNeeded(loc, builder, *maybeValueAndCleanUp, castToType);
+ // Otherwise, evaluate the region now.
+
+ // Masked expression must not evaluate the elemental parts that are masked,
+ // they have custom code generation.
if (whereLoopNest.has_value()) {
- mlir::Location loc = region.getParentOp()->getLoc();
- return {generateMaskedEntity(loc, region), std::nullopt};
+ mlir::Value maskedValue = generateMaskedEntity(loc, region);
+ return castIfNeeded(loc, builder, {maskedValue, std::nullopt}, castToType);
}
+
assert(region.hasOneBlock() && "region must contain one block");
auto oldYield = mlir::dyn_cast_or_null<hlfir::YieldOp>(
region.back().getOperations().back());
@@ -434,7 +591,9 @@ OrderedAssignmentRewriter::generateYieldedEntity(
mlir::Value OrderedAssignmentRewriter::generateYieldedScalarValue(
mlir::Region ®ion, std::optional<mlir::Type> castToType) {
+ mlir::Location loc = region.getParentOp()->getLoc();
auto [value, maybeYield] = generateYieldedEntity(region, castToType);
+ value = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{value});
assert(fir::isa_trivial(value.getType()) && "not a trivial scalar value");
generateCleanupIfAny(maybeYield);
return value;
@@ -468,7 +627,8 @@ void OrderedAssignmentRewriter::generateCleanupIfAny(
assert(maybeYield->getCleanup().hasOneBlock() &&
"region must contain one block");
for (auto &op : maybeYield->getCleanup().back().getOperations())
- builder.clone(op, mapper);
+ if (!mlir::isa<fir::FirEndOp>(op))
+ builder.clone(op, mapper);
}
}
@@ -478,14 +638,15 @@ bool OrderedAssignmentRewriter::hasVectorSubscriptedLhs(
regionAssignOp.getLhsRegion().back().back());
}
-bool OrderedAssignmentRewriter::mustSavedRegionIn(
- hlfir::OrderedAssignmentTreeOpInterface node) const {
+bool OrderedAssignmentRewriter::mustSaveRegionIn(
+ hlfir::OrderedAssignmentTreeOpInterface node,
+ llvm::SmallVectorImpl<hlfir::SaveEntity> &saveEntities) const {
for (auto &action : currentRun->actions)
if (hlfir::SaveEntity *savedEntity =
std::get_if<hlfir::SaveEntity>(&action))
if (node.getOperation() == savedEntity->yieldRegion->getParentOp())
- return true;
- return false;
+ saveEntities.push_back(*savedEntity);
+ return !saveEntities.empty();
}
bool OrderedAssignmentRewriter::isRequiredInCurrentRun(
@@ -634,6 +795,125 @@ void MaskedArrayExpr::generateNoneElementalCleanupIfAny(
}
}
+static bool isLeftHandSide(mlir::Region ®ion) {
+ auto assign = mlir::dyn_cast<hlfir::RegionAssignOp>(region.getParentOp());
+ return assign && (&assign.getLhsRegion() == ®ion);
+}
+
+bool OrderedAssignmentRewriter::currentLoopNestIterationNumberCanBeComputed(
+ llvm::SmallVectorImpl<fir::DoLoopOp> &loopNest) {
+ if (constructStack.empty())
+ return true;
+ mlir::Operation *outerLoop = constructStack[0];
+ mlir::Operation *currentConstruct = constructStack.back();
+ // Loop through the loops until the outer construct is met, and test if the
+ // loop operands dominate the outer construct.
+ while (currentConstruct) {
+ if (auto doLoop = mlir::dyn_cast<fir::DoLoopOp>(currentConstruct)) {
+ if (llvm::any_of(doLoop->getOperands(), [&](mlir::Value value) {
+ return !dominanceInfo.properlyDominates(value, outerLoop);
+ })) {
+ return false;
+ }
+ loopNest.push_back(doLoop);
+ }
+ if (currentConstruct == outerLoop)
+ currentConstruct = nullptr;
+ else
+ currentConstruct = currentConstruct->getParentOp();
+ }
+ return true;
+}
+
+static mlir::Value
+computeLoopNestIterationNumber(mlir::Location loc, fir::FirOpBuilder &builder,
+ llvm::ArrayRef<fir::DoLoopOp> loopNest) {
+ mlir::Value loopExtent;
+ for (fir::DoLoopOp doLoop : loopNest) {
+ mlir::Value extent = builder.genExtentFromTriplet(
+ loc, doLoop.getLowerBound(), doLoop.getUpperBound(), doLoop.getStep(),
+ builder.getIndexType());
+ if (!loopExtent)
+ loopExtent = extent;
+ else
+ loopExtent = builder.create<mlir::arith::MulIOp>(loc, loopExtent, extent);
+ }
+ assert(loopExtent && "loopNest must not be empty");
+ return loopExtent;
+}
+
+void OrderedAssignmentRewriter::generateSaveEntity(
+ hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun) {
+ mlir::Region ®ion = *savedEntity.yieldRegion;
+ mlir::Location loc = region.getParentOp()->getLoc();
+
+ if (!mlir::isa<hlfir::YieldOp>(region.back().back()))
+ TODO(loc, "creating temporary storage for vector subscripted LHS");
+
+ // Evaluate the region inside the loop nest (if any).
+ auto [clonedValue, oldYield] = generateYieldedEntity(region);
+ hlfir::Entity entity{clonedValue};
+ if (isLeftHandSide(region)) // Need to save the address, not the values.
+ TODO(loc, "creating temporary storage for LHS");
+ else
+ entity = hlfir::loadTrivialScalar(loc, builder, entity);
+ mlir::Type entityType = entity.getType();
+
+ static constexpr char tempName[] = ".tmp.forall";
+ if (constructStack.empty()) {
+ // Value evaluated outside of any loops (this may be the first MASK of a
+ // WHERE construct, or an LHS/RHS temp of hlfir.region_assign outside of
+ // WHERE/FORALL).
+ insertSavedEntity(region,
+ fir::factory::SimpleCopy(loc, builder, entity, tempName));
+ } else {
+ // Need to create a temporary for values computed inside loops.
+ // Create temporary storage outside of the loop nest given the entity
+ // type (and the loop context).
+ fir::factory::TemporaryStorage *temp;
+ llvm::SmallVector<fir::DoLoopOp> loopNest;
+ bool loopShapeCanBePreComputed =
+ currentLoopNestIterationNumberCanBeComputed(loopNest);
+ doBeforeLoopNest([&] {
+ /// For simple scalars inside loops whose total iteration number can be
+ /// pre-computed, create a rank-1 array outside of the loops. It will be
+ /// assigned/fetched inside the loops like a normal Fortran array given
+ /// the iteration count.
+ if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) {
+ mlir::Value loopExtent =
+ computeLoopNestIterationNumber(loc, builder, loopNest);
+ auto sequenceType =
+ builder.getVarLenSeqTy(entityType).cast<fir::SequenceType>();
+ temp = insertSavedEntity(region,
+ fir::factory::HomogeneousScalarStack{
+ loc, builder, sequenceType, loopExtent,
+ /*lenParams=*/{}, allocateOnHeap,
+ /*stackThroughLoops=*/true, tempName});
+
+ } else {
+ // If the number of iteration is not known, or if the values at each
+ // iterations are values that may have
diff erent shape, type parameters
+ // or dynamic type, use the runtime to create and manage a stack-like
+ // temporary.
+ TODO(loc, "use runtime to create temporary storage in FORALL or WHERE");
+ }
+ });
+ // Inside the loop nest (and any fir.if if there are active masks), copy
+ // the value to the temp and do clean-ups for the value if any.
+ temp->pushValue(loc, builder, entity);
+ }
+
+ // Delay the clean-up if the entity will be used in the same run (i.e., the
+ // parent construct will be visited and needs to be lowered).
+ if (willUseSavedEntityInSameRun) {
+ auto inserted =
+ savedInCurrentRunBeforeUse.try_emplace(®ion, entity, oldYield);
+ assert(inserted.second && "entity must have been emplaced");
+ } else {
+ generateCleanupIfAny(oldYield);
+ }
+}
+
/// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given
/// a schedule.
static void lower(hlfir::OrderedAssignmentTreeOpInterface root,
@@ -643,6 +923,7 @@ static void lower(hlfir::OrderedAssignmentTreeOpInterface root,
OrderedAssignmentRewriter assignmentRewriter(builder, root);
for (auto &run : schedule)
assignmentRewriter.lowerRun(run);
+ assignmentRewriter.cleanupSavedEntities();
}
/// Shared rewrite entry point for all the ordered assignment tree root
diff --git a/flang/test/HLFIR/order_assignments/impure-where.fir b/flang/test/HLFIR/order_assignments/impure-where.fir
new file mode 100644
index 0000000000000..537fd48282cf8
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/impure-where.fir
@@ -0,0 +1,73 @@
+// Test code generation of hlfir.where/hflir.elsewhere when an
+// "impure" mask is used and several runs are needed. The mask
+// must be saved so that the impure function is only evaluated once.
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s
+
+func.func private @impure() -> !fir.heap<!fir.array<10x!fir.logical<4>>>
+func.func @test_elsewhere_impure_mask(%x: !fir.ref<!fir.array<10xi32>>, %y: !fir.ref<!fir.array<10xi32>>, %z: !fir.ref<!fir.array<10xi32>>, %mask: !fir.ref<!fir.array<10x!fir.logical<4>>>) {
+ %c-1 = arith.constant -1 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ hlfir.where {
+ hlfir.yield %mask : !fir.ref<!fir.array<10x!fir.logical<4>>>
+ } do {
+ hlfir.elsewhere mask {
+ %mask2 = fir.call @impure() : () -> !fir.heap<!fir.array<10x!fir.logical<4>>>
+ hlfir.yield %mask2 : !fir.heap<!fir.array<10x!fir.logical<4>>> cleanup {
+ fir.freemem %mask2 : !fir.heap<!fir.array<10x!fir.logical<4>>>
+ }
+ } do {
+ hlfir.region_assign {
+ hlfir.yield %y : !fir.ref<!fir.array<10xi32>>
+ } to {
+ hlfir.yield %x : !fir.ref<!fir.array<10xi32>>
+ }
+ hlfir.region_assign {
+ hlfir.yield %x : !fir.ref<!fir.array<10xi32>>
+ } to {
+ hlfir.yield %z : !fir.ref<!fir.array<10xi32>>
+ }
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @test_elsewhere_impure_mask(
+// CHECK: %[[VAL_12:.*]] = fir.call @impure() : () -> !fir.heap<!fir.array<10x!fir.logical<4>>>
+// CHECK: %[[VAL_21:.*]] = fir.allocmem !fir.array<?x!fir.logical<4>>
+// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_21]](%{{.*}}) {uniq_name = ".tmp.forall"}
+// CHECK: fir.do_loop
+// CHECK: fir.if {{.*}} {
+// CHECK: } else {
+// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]] (%{{.*}})
+// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_32]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK: }
+// CHECK: }
+// CHECK-NOT: fir.call @impure
+// CHECK: fir.do_loop
+// CHECK: fir.if {{.*}} {
+// CHECK: } else {
+// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_44]] {
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK-NOT: fir.call @impure
+// CHECK: fir.do_loop
+// CHECK: fir.if {{.*}} {
+// CHECK: } else {
+// CHECK: %[[VAL_52:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_53:.*]] = fir.load %[[VAL_52]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_54]] {
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: fir.freemem %[[VAL_21]] : !fir.heap<!fir.array<?x!fir.logical<4>>>
+// CHECK: fir.freemem %[[VAL_12]] : !fir.heap<!fir.array<10x!fir.logical<4>>>
+// CHECK: return
+// CHECK: }
diff --git a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
new file mode 100644
index 0000000000000..6566620a51bfc
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
@@ -0,0 +1,332 @@
+// Test code generation of hlfir.forall and hlfir.where when temporary
+// storage is needed and can be allocated inline.
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s
+
+func.func @test_scalar_save(%arg0: !fir.box<!fir.array<?xi32>>) {
+ %c10_i32 = arith.constant 10 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ hlfir.forall lb {
+ hlfir.yield %c1_i32 : i32
+ } ub {
+ hlfir.yield %c10_i32 : i32
+ } (%arg1: i32) {
+ hlfir.region_assign {
+ %1 = fir.convert %arg1 : (i32) -> i64
+ %2 = hlfir.designate %0#0 (%1) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+ %3 = fir.load %2 : !fir.ref<i32>
+ hlfir.yield %3 : i32
+ } to {
+ %1 = arith.addi %arg1, %c1_i32 : i32
+ %2 = fir.convert %1 : (i32) -> i64
+ %3 = hlfir.designate %0#0 (%2) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+ hlfir.yield %3 : !fir.ref<i32>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @test_scalar_save(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
+// CHECK: %[[VAL_1:.*]] = fir.alloca index
+// CHECK: %[[VAL_2:.*]] = arith.constant 10 : i32
+// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
+// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index
+// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index
+// CHECK: %[[VAL_11:.*]] = arith.divsi %[[VAL_10]], %[[VAL_7]] : index
+// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_8]] : index
+// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_8]] : index
+// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_15:.*]] = arith.constant 1 : index
+// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_16:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_13]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_16]](%[[VAL_17]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_7]] {
+// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (index) -> i32
+// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+// CHECK: %[[VAL_22:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_21]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_15]] : index
+// CHECK: fir.store %[[VAL_25]] to %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_24]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_26]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
+// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_29:.*]] = arith.constant 1 : index
+// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref<index>
+// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_27]] to %[[VAL_28]] step %[[VAL_29]] {
+// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (index) -> i32
+// CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_15]] : index
+// CHECK: fir.store %[[VAL_33]] to %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_32]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_31]], %[[VAL_3]] : i32
+// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> i64
+// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_37]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_38]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: fir.freemem %[[VAL_16]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: return
+// CHECK: }
+
+func.func @mask_and_rhs_conflict(%arg0: !fir.box<!fir.array<?xi32>>) {
+ %c42_i32 = arith.constant 42 : i32
+ %c10_i32 = arith.constant 10 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ hlfir.forall lb {
+ hlfir.yield %c1_i32 : i32
+ } ub {
+ hlfir.yield %c10_i32 : i32
+ } (%arg1: i32) {
+ hlfir.forall_mask {
+ %1 = fir.convert %arg1 : (i32) -> i64
+ %2 = hlfir.designate %0#0 (%1) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+ %3 = fir.load %2 : !fir.ref<i32>
+ %4 = arith.cmpi sgt, %3, %c42_i32 : i32
+ hlfir.yield %4 : i1
+ } do {
+ hlfir.region_assign {
+ %1 = fir.convert %arg1 : (i32) -> i64
+ %2 = hlfir.designate %0#0 (%1) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+ %3 = fir.load %2 : !fir.ref<i32>
+ hlfir.yield %3 : i32
+ } to {
+ %1 = arith.addi %arg1, %c1_i32 : i32
+ %2 = fir.convert %1 : (i32) -> i64
+ %3 = hlfir.designate %0#0 (%2) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+ hlfir.yield %3 : !fir.ref<i32>
+ }
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @mask_and_rhs_conflict(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
+// CHECK: %[[VAL_1:.*]] = fir.alloca index
+// CHECK: %[[VAL_2:.*]] = fir.alloca index
+// CHECK: %[[VAL_3:.*]] = arith.constant 42 : i32
+// CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32
+// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_5]] : (i32) -> index
+// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
+// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_10:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index
+// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_9]] : index
+// CHECK: %[[VAL_13:.*]] = arith.divsi %[[VAL_12]], %[[VAL_9]] : index
+// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_10]] : index
+// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14]], %[[VAL_13]], %[[VAL_10]] : index
+// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
+// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_18:.*]] = fir.allocmem !fir.array<?xi1>, %[[VAL_15]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_15]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[VAL_19]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
+// CHECK: %[[VAL_21:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index
+// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_22]], %[[VAL_9]] : index
+// CHECK: %[[VAL_24:.*]] = arith.divsi %[[VAL_23]], %[[VAL_9]] : index
+// CHECK: %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_24]], %[[VAL_21]] : index
+// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_24]], %[[VAL_21]] : index
+// CHECK: %[[VAL_27:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_28:.*]] = arith.constant 1 : index
+// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_29:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_26]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[VAL_30:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%[[VAL_30]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_32:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_9]] {
+// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i32
+// CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64
+// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_34]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<i32>
+// CHECK: %[[VAL_37:.*]] = arith.cmpi sgt, %[[VAL_36]], %[[VAL_3]] : i32
+// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_17]] : index
+// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: hlfir.assign %[[VAL_37]] to %[[VAL_40]] : i1, !fir.ref<i1>
+// CHECK: fir.if %[[VAL_37]] {
+// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64
+// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_41]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<i32>
+// CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_45:.*]] = arith.addi %[[VAL_44]], %[[VAL_28]] : index
+// CHECK: fir.store %[[VAL_45]] to %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_44]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_43]] to %[[VAL_46]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_5]] : (i32) -> index
+// CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
+// CHECK: %[[VAL_49:.*]] = arith.constant 1 : index
+// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref<index>
+// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref<index>
+// CHECK: fir.do_loop %[[VAL_50:.*]] = %[[VAL_47]] to %[[VAL_48]] step %[[VAL_49]] {
+// CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (index) -> i32
+// CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_17]] : index
+// CHECK: fir.store %[[VAL_53]] to %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_54:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_52]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: %[[VAL_55:.*]] = fir.load %[[VAL_54]] : !fir.ref<i1>
+// CHECK: fir.if %[[VAL_55]] {
+// CHECK: %[[VAL_56:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_56]], %[[VAL_28]] : index
+// CHECK: fir.store %[[VAL_57]] to %[[VAL_1]] : !fir.ref<index>
+// CHECK: %[[VAL_58:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_56]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_59:.*]] = fir.load %[[VAL_58]] : !fir.ref<i32>
+// CHECK: %[[VAL_60:.*]] = arith.addi %[[VAL_51]], %[[VAL_5]] : i32
+// CHECK: %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (i32) -> i64
+// CHECK: %[[VAL_62:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_61]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_59]] to %[[VAL_62]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK-DAG: fir.freemem %[[VAL_18]] : !fir.heap<!fir.array<?xi1>>
+// CHECK-DAG: fir.freemem %[[VAL_29]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: return
+// CHECK: }
+
+func.func @test_where_mask_save(%arg0: !fir.box<!fir.array<?xi32>>) {
+ %c0 = arith.constant 0 : index
+ %c42_i32 = arith.constant 42 : i32
+ %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ hlfir.where {
+ %1:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+ %2 = fir.shape %1#1 : (index) -> !fir.shape<1>
+ %3 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+ ^bb0(%arg1: index):
+ %4 = hlfir.designate %0#0 (%arg1) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+ %5 = fir.load %4 : !fir.ref<i32>
+ %6 = arith.cmpi sgt, %5, %c42_i32 : i32
+ %7 = fir.convert %6 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %7 : !fir.logical<4>
+ }
+ hlfir.yield %3 : !hlfir.expr<?x!fir.logical<4>> cleanup {
+ hlfir.destroy %3 : !hlfir.expr<?x!fir.logical<4>>
+ }
+ } do {
+ hlfir.region_assign {
+ hlfir.yield %c42_i32 : i32
+ } to {
+ hlfir.yield %0#0 : !fir.box<!fir.array<?xi32>>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @test_where_mask_save(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
+// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_2:.*]] = arith.constant 42 : i32
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+// CHECK: ^bb0(%[[VAL_7:.*]]: index):
+// CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_7]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_8]] : !fir.ref<i32>
+// CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_2]] : i32
+// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i1) -> !fir.logical<4>
+// CHECK: hlfir.yield_element %[[VAL_11]] : !fir.logical<4>
+// CHECK: }
+// CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_13:.*]](%[[VAL_5]]) {uniq_name = ".tmp.forall"} : (!hlfir.expr<?x!fir.logical<4>>, !fir.shape<1>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.ref<!fir.array<?x!fir.logical<4>>>, i1)
+// CHECK: hlfir.destroy %[[VAL_13]] : !hlfir.expr<?x!fir.logical<4>>
+// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_14]] to %[[VAL_4]]#1 step %[[VAL_14]] {
+// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_15]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_18]] {
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_15]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_19]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: hlfir.end_associate %[[VAL_12]]#1, %[[VAL_12]]#2 : !fir.ref<!fir.array<?x!fir.logical<4>>>, i1
+// CHECK: return
+// CHECK: }
+
+func.func @test_where_rhs_save(%x: !fir.ref<!fir.array<10xi32>>, %mask: !fir.ref<!fir.array<10x!fir.logical<4>>>) {
+ %c-1 = arith.constant -1 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ hlfir.where {
+ hlfir.yield %mask : !fir.ref<!fir.array<10x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ %2 = hlfir.designate %x (%c10:%c1:%c-1) shape %1 :
+(!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
+ hlfir.yield %2 : !fir.ref<!fir.array<10xi32>>
+ } to {
+ hlfir.yield %x : !fir.ref<!fir.array<10xi32>>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @test_where_rhs_save(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<10xi32>>,
+// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<!fir.array<10x!fir.logical<4>>>) {
+// CHECK: %[[VAL_2:.*]] = fir.alloca index
+// CHECK: %[[VAL_3:.*]] = arith.constant -1 : index
+// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_7:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_5]]:%[[VAL_4]]:%[[VAL_3]]) shape %[[VAL_6]] : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
+// CHECK: %[[VAL_11:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_7]], %[[VAL_9]] : index
+// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_9]] : index
+// CHECK: %[[VAL_14:.*]] = arith.divsi %[[VAL_13]], %[[VAL_9]] : index
+// CHECK: %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_11]] : index
+// CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_11]] : index
+// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
+// CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_19:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_16]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[VAL_20:.*]] = fir.shape %[[VAL_16]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_19]](%[[VAL_20]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_22:.*]] = %[[VAL_9]] to %[[VAL_7]] step %[[VAL_9]] {
+// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_22]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_25]] {
+// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_22]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<i32>
+// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_18]] : index
+// CHECK: fir.store %[[VAL_29]] to %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_21]]#0 (%[[VAL_28]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_30]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_31:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_32:.*]] = fir.shape %[[VAL_31]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_33:.*]] = arith.constant 1 : index
+// CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref<index>
+// CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_33]] to %[[VAL_31]] step %[[VAL_33]] {
+// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_34]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_37]] {
+// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_18]] : index
+// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref<index>
+// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_21]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<i32>
+// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_41]] to %[[VAL_42]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: fir.freemem %[[VAL_19]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: return
+// CHECK: }
diff --git a/flang/test/HLFIR/ordered-assignments-codegen-todo.fir b/flang/test/HLFIR/ordered-assignments-codegen-todo.fir
deleted file mode 100644
index 6557a03219fb3..0000000000000
--- a/flang/test/HLFIR/ordered-assignments-codegen-todo.fir
+++ /dev/null
@@ -1,24 +0,0 @@
-// Just test that Ordered assignment pass TODOs are properly reported.
-// RUN: %not_todo_cmd fir-opt --lower-hlfir-ordered-assignments %s 2>&1 | FileCheck %s
-
-
-// CHECK: not yet implemented: creating temporary storage in FORALL or WHERE constructs
-
-func.func @forall_todo(%arg0: !fir.ref<!fir.array<10xf32>>) {
- %c1 = arith.constant 1 : index
- %c10 = arith.constant 10 : index
- hlfir.forall lb {
- hlfir.yield %c1 : index
- } ub {
- hlfir.yield %c10 : index
- } (%arg2: i64) {
- hlfir.region_assign {
- %1 = hlfir.designate %arg0 (%arg2) : (!fir.ref<!fir.array<10xf32>>, i64) -> !fir.ref<f32>
- hlfir.yield %1 : !fir.ref<f32>
- } to {
- %1 = hlfir.designate %arg0 (%arg2) : (!fir.ref<!fir.array<10xf32>>, i64) -> !fir.ref<f32>
- hlfir.yield %1 : !fir.ref<f32>
- }
- }
- return
-}
More information about the flang-commits
mailing list