[flang-commits] [flang] [Flang] Minloc elemental intrinsic lowering (PR #74828)
David Green via flang-commits
flang-commits at lists.llvm.org
Tue Dec 12 11:07:46 PST 2023
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/74828
>From dafa6a56cfbf23423a23ef9a4659fc37bf8bf178 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 8 Dec 2023 11:19:02 +0000
Subject: [PATCH 1/2] [Flang] Minloc elemental intrinsic lowering
Currently the lowering of a minloc intrinsic with a mask will look something like
%e = hlfir.elemental %shape ({
...
})
%m = hlfir.minloc %array mask %e
hlfir.assign %m to %result
hlfir.destroy %m
The elemental will be expanded into a temporary+loop, the minloc into a
FortranAMinloc call (which hopefully gets simplified to a specialized call that
can be inlined at the call site), and the assign might get expanded to a
FortranAAssign. The assign we could inline too, but it would be better to
generate the entire construct as single loop if we can - one that performs the
minloc calculation with the mask elemental computed inline and assigns directly
to the output array.
This patch attempt to do that, adding a hlfir version of the expansion code
from SimpliftIntrinsics that turns an assign+minloc+elemental into a single
combined loop nest. It attempts to reuse the methods in genMinlocReductionLoop
for constructing the loop with a modified loop body. The declaration for the
function is curently in Optimizer/Support/Utils.h, but there might be a better
place for it.
It is currently added as port of the OptimizedBufferizationPass. I originally
had it as part of the SimplifyHLFIRIntrinsics pass, but there were already some
methods doing similar things in OptimizedBufferization. It just needs to happen
before the elementals are expanded. I think I would like to do a similar thing
for maxloc and any/all/count too if this looks OK. I will rebase over #74436
once that goes in.
---
flang/include/flang/Optimizer/Support/Utils.h | 16 +
.../Transforms/OptimizedBufferization.cpp | 370 +++++++++++++-----
.../Transforms/SimplifyIntrinsics.cpp | 188 +++++----
flang/test/HLFIR/minloc-elemental.fir | 327 ++++++++++++++++
flang/test/Transforms/simplifyintrinsics.fir | 5 +-
5 files changed, 720 insertions(+), 186 deletions(-)
create mode 100644 flang/test/HLFIR/minloc-elemental.fir
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 34c8e79173bcd4..93caa8b23d320c 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -133,6 +133,22 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
fir::numericMlirTypeToFortran(builder, type, loc, intrinsicName) +
" in " + intrinsicName);
}
+
+using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
+ fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
+ mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
+using InitValGeneratorTy = llvm::function_ref<mlir::Value(
+ fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
+
+// Produces a loop nest for a Minloc intrinsic.
+void genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
+ InitValGeneratorTy initVal,
+ MinlocBodyOpGeneratorTy genBody, unsigned rank,
+ mlir::Type elementType, mlir::Location loc,
+ mlir::Type maskElemType, mlir::Value resultArr,
+ bool maskMayBeLogicalScalar);
+
+
} // namespace fir
#endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 7abfa20493c736..218ddd2a6a7b7e 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/Support/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/PatternMatch.h"
@@ -98,7 +99,8 @@ class ElementalAssignBufferization
/// the same block. If any operations with unknown effects are found,
/// std::nullopt is returned
static std::optional<mlir::SmallVector<mlir::MemoryEffects::EffectInstance>>
-getEffectsBetween(mlir::Operation *start, mlir::Operation *end) {
+getEffectsBetween(mlir::Operation *start, mlir::Operation *end,
+ mlir::Operation *ignoring) {
mlir::SmallVector<mlir::MemoryEffects::EffectInstance> ret;
if (start == end)
return ret;
@@ -108,6 +110,10 @@ getEffectsBetween(mlir::Operation *start, mlir::Operation *end) {
mlir::Operation *nextOp = start;
while (nextOp && nextOp != end) {
+ if (nextOp == ignoring) {
+ nextOp = nextOp->getNextNode();
+ continue;
+ }
std::optional<mlir::SmallVector<mlir::MemoryEffects::EffectInstance>>
effects = mlir::getEffectsRecursively(nextOp);
if (!effects)
@@ -293,80 +299,10 @@ static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) {
return false;
}
-std::optional<ElementalAssignBufferization::MatchInfo>
-ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
- mlir::Operation::user_range users = elemental->getUsers();
- // the only uses of the elemental should be the assignment and the destroy
- if (std::distance(users.begin(), users.end()) != 2) {
- LLVM_DEBUG(llvm::dbgs() << "Too many uses of the elemental\n");
- return std::nullopt;
- }
-
- // If the ElementalOp must produce a temporary (e.g. for
- // finalization purposes), then we cannot inline it.
- if (hlfir::elementalOpMustProduceTemp(elemental)) {
- LLVM_DEBUG(llvm::dbgs() << "ElementalOp must produce a temp\n");
- return std::nullopt;
- }
-
- MatchInfo match;
- for (mlir::Operation *user : users)
- mlir::TypeSwitch<mlir::Operation *, void>(user)
- .Case([&](hlfir::AssignOp op) { match.assign = op; })
- .Case([&](hlfir::DestroyOp op) { match.destroy = op; });
-
- if (!match.assign || !match.destroy) {
- LLVM_DEBUG(llvm::dbgs() << "Couldn't find assign or destroy\n");
- return std::nullopt;
- }
-
- // the array is what the elemental is assigned into
- // TODO: this could be extended to also allow hlfir.expr by first bufferizing
- // the incoming expression
- match.array = match.assign.getLhs();
- mlir::Type arrayType = mlir::dyn_cast<fir::SequenceType>(
- fir::unwrapPassByRefType(match.array.getType()));
- if (!arrayType)
- return std::nullopt;
-
- // require that the array elements are trivial
- // TODO: this is just to make the pass easier to think about. Not an inherent
- // limitation
- mlir::Type eleTy = hlfir::getFortranElementType(arrayType);
- if (!fir::isa_trivial(eleTy))
- return std::nullopt;
-
- // the array must have the same shape as the elemental. CSE should have
- // deduplicated the fir.shape operations where they are provably the same
- // so we just have to check for the same ssa value
- // TODO: add more ways of getting the shape of the array
- mlir::Value arrayShape;
- if (match.array.getDefiningOp())
- arrayShape =
- mlir::TypeSwitch<mlir::Operation *, mlir::Value>(
- match.array.getDefiningOp())
- .Case([](hlfir::DesignateOp designate) {
- return designate.getShape();
- })
- .Case([](hlfir::DeclareOp declare) { return declare.getShape(); })
- .Default([](mlir::Operation *) { return mlir::Value{}; });
- if (!arrayShape) {
- LLVM_DEBUG(llvm::dbgs() << "Can't get shape of " << match.array << " at "
- << elemental->getLoc() << "\n");
- return std::nullopt;
- }
- if (arrayShape != elemental.getShape()) {
- // f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be
- // conformable unless the lhs is an allocatable array. In HLFIR we can
- // see this from the presence or absence of the realloc attribute on
- // hlfir.assign. If it is not a realloc assignment, we can trust that
- // the shapes do conform
- if (match.assign.getRealloc())
- return std::nullopt;
- }
-
- // the transformation wants to apply the elemental in a do-loop at the
- // hlfir.assign, check there are no effects which make this unsafe
+static bool checkForElementalEffectsBetween(hlfir::ElementalOp elemental,
+ hlfir::AssignOp assign,
+ mlir::Value array,
+ mlir::Operation *ignoring) {
// keep track of any values written to in the elemental, as these can't be
// read from between the elemental and the assignment
@@ -375,20 +311,21 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
mlir::SmallVector<mlir::Value, 1> notToBeAccessedBeforeAssign;
// any accesses to the array between the array and the assignment means it
// would be unsafe to move the elemental to the assignment
- notToBeAccessedBeforeAssign.push_back(match.array);
+ notToBeAccessedBeforeAssign.push_back(array);
// 1) side effects in the elemental body - it isn't sufficient to just look
// for ordered elementals because we also cannot support out of order reads
std::optional<mlir::SmallVector<mlir::MemoryEffects::EffectInstance>>
- effects = getEffectsBetween(&elemental.getBody()->front(),
- elemental.getBody()->getTerminator());
+ effects =
+ getEffectsBetween(&elemental.getBody()->front(),
+ elemental.getBody()->getTerminator(), nullptr);
if (!effects) {
LLVM_DEBUG(llvm::dbgs()
<< "operation with unknown effects inside elemental\n");
- return std::nullopt;
+ return false;
}
for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
- mlir::AliasResult res = containsReadOrWriteEffectOn(effect, match.array);
+ mlir::AliasResult res = containsReadOrWriteEffectOn(effect, array);
if (res.isNo()) {
if (mlir::isa<mlir::MemoryEffects::Write, mlir::MemoryEffects::Read>(
effect.getEffect()))
@@ -402,7 +339,7 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
// don't allow any aliasing writes in the elemental
if (mlir::isa<mlir::MemoryEffects::Write>(effect.getEffect())) {
LLVM_DEBUG(llvm::dbgs() << "write inside the elemental body\n");
- return std::nullopt;
+ return false;
}
// allow if and only if the reads are from the elemental indices, in order
@@ -417,17 +354,17 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
if (!res.isPartial()) {
if (auto designate =
effect.getValue().getDefiningOp<hlfir::DesignateOp>()) {
- if (!areIdenticalOrDisjointSlices(match.array, designate.getMemref())) {
+ if (!areIdenticalOrDisjointSlices(array, designate.getMemref())) {
LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate
<< " at " << elemental.getLoc() << "\n");
- return std::nullopt;
+ return false;
}
auto indices = designate.getIndices();
auto elementalIndices = elemental.getIndices();
if (indices.size() != elementalIndices.size()) {
LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate
<< " at " << elemental.getLoc() << "\n");
- return std::nullopt;
+ return false;
}
if (std::equal(indices.begin(), indices.end(), elementalIndices.begin(),
elementalIndices.end()))
@@ -436,16 +373,16 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
}
LLVM_DEBUG(llvm::dbgs() << "disallowed side-effect: " << effect.getValue()
<< " for " << elemental.getLoc() << "\n");
- return std::nullopt;
+ return false;
}
// 2) look for conflicting effects between the elemental and the assignment
- effects = getEffectsBetween(elemental->getNextNode(), match.assign);
+ effects = getEffectsBetween(elemental->getNextNode(), assign, ignoring);
if (!effects) {
LLVM_DEBUG(
llvm::dbgs()
<< "operation with unknown effects between elemental and assign\n");
- return std::nullopt;
+ return false;
}
for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
// not safe to access anything written in the elemental as this write
@@ -456,11 +393,92 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
LLVM_DEBUG(llvm::dbgs()
<< "diasllowed side-effect: " << effect.getValue() << " for "
<< elemental.getLoc() << "\n");
- return std::nullopt;
+ return false;
}
}
}
+ return true;
+}
+
+std::optional<ElementalAssignBufferization::MatchInfo>
+ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
+ mlir::Operation::user_range users = elemental->getUsers();
+ // the only uses of the elemental should be the assignment and the destroy
+ if (std::distance(users.begin(), users.end()) != 2) {
+ LLVM_DEBUG(llvm::dbgs() << "Too many uses of the elemental\n");
+ return std::nullopt;
+ }
+
+ // If the ElementalOp must produce a temporary (e.g. for
+ // finalization purposes), then we cannot inline it.
+ if (hlfir::elementalOpMustProduceTemp(elemental)) {
+ LLVM_DEBUG(llvm::dbgs() << "ElementalOp must produce a temp\n");
+ return std::nullopt;
+ }
+
+ MatchInfo match;
+ for (mlir::Operation *user : users)
+ mlir::TypeSwitch<mlir::Operation *, void>(user)
+ .Case([&](hlfir::AssignOp op) { match.assign = op; })
+ .Case([&](hlfir::DestroyOp op) { match.destroy = op; });
+
+ if (!match.assign || !match.destroy) {
+ LLVM_DEBUG(llvm::dbgs() << "Couldn't find assign or destroy\n");
+ return std::nullopt;
+ }
+
+ // the array is what the elemental is assigned into
+ // TODO: this could be extended to also allow hlfir.expr by first bufferizing
+ // the incoming expression
+ match.array = match.assign.getLhs();
+ mlir::Type arrayType = mlir::dyn_cast<fir::SequenceType>(
+ fir::unwrapPassByRefType(match.array.getType()));
+ if (!arrayType)
+ return std::nullopt;
+
+ // require that the array elements are trivial
+ // TODO: this is just to make the pass easier to think about. Not an inherent
+ // limitation
+ mlir::Type eleTy = hlfir::getFortranElementType(arrayType);
+ if (!fir::isa_trivial(eleTy))
+ return std::nullopt;
+
+ // the array must have the same shape as the elemental. CSE should have
+ // deduplicated the fir.shape operations where they are provably the same
+ // so we just have to check for the same ssa value
+ // TODO: add more ways of getting the shape of the array
+ mlir::Value arrayShape;
+ if (match.array.getDefiningOp())
+ arrayShape =
+ mlir::TypeSwitch<mlir::Operation *, mlir::Value>(
+ match.array.getDefiningOp())
+ .Case([](hlfir::DesignateOp designate) {
+ return designate.getShape();
+ })
+ .Case([](hlfir::DeclareOp declare) { return declare.getShape(); })
+ .Default([](mlir::Operation *) { return mlir::Value{}; });
+ if (!arrayShape) {
+ LLVM_DEBUG(llvm::dbgs() << "Can't get shape of " << match.array << " at "
+ << elemental->getLoc() << "\n");
+ return std::nullopt;
+ }
+ if (arrayShape != elemental.getShape()) {
+ // f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be
+ // conformable unless the lhs is an allocatable array. In HLFIR we can
+ // see this from the presence or absence of the realloc attribute on
+ // hlfir.assign. If it is not a realloc assignment, we can trust that
+ // the shapes do conform
+ if (match.assign.getRealloc())
+ return std::nullopt;
+ }
+
+ // the transformation wants to apply the elemental in a do-loop at the
+ // hlfir.assign, check there are no effects which make this unsafe
+ if (!checkForElementalEffectsBetween(elemental, match.assign, match.array,
+ nullptr))
+ return std::nullopt;
+
return match;
}
@@ -659,6 +677,181 @@ mlir::LogicalResult VariableAssignBufferization::matchAndRewrite(
return mlir::success();
}
+// Look for assign(minloc(mask=elemental)) and generate the minloc loop with
+// inlined elemental and no extra temporaries.
+// %e = hlfir.elemental %shape ({ ... })
+// %m = hlfir.minloc %array mask %e
+// hlfir.assign %m to %result
+// hlfir.destroy %m
+class AssignMinMaxlocElementalConversion
+ : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+ using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(hlfir::AssignOp assign,
+ mlir::PatternRewriter &rewriter) const override {
+ auto minloc = assign.getOperand(0).getDefiningOp<hlfir::MinlocOp>();
+ if (!minloc || !minloc.getMask() || minloc.getDim() || minloc.getBack())
+ return rewriter.notifyMatchFailure(assign,
+ "Did not find minloc with kind");
+
+ auto elemental = minloc.getMask().getDefiningOp<hlfir::ElementalOp>();
+ if (!elemental || hlfir::elementalOpMustProduceTemp(elemental))
+ return rewriter.notifyMatchFailure(assign, "Did not find elemental");
+
+ mlir::Operation::user_range users = minloc->getUsers();
+ if (std::distance(users.begin(), users.end()) != 2)
+ return rewriter.notifyMatchFailure(assign, "Did not find minloc users");
+ auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(
+ (*users.begin()) == minloc ? *++users.begin() : *users.begin());
+ if (!destroy)
+ return rewriter.notifyMatchFailure(assign, "Did not find destroy");
+
+ if (!checkForElementalEffectsBetween(elemental, assign, minloc.getArray(),
+ minloc))
+ return rewriter.notifyMatchFailure(assign, "Had unhandled effects");
+
+ mlir::Value resultArr = assign.getOperand(1);
+ mlir::Value array = minloc.getArray();
+
+ unsigned rank = mlir::cast<hlfir::ExprType>(minloc.getType()).getShape()[0];
+ mlir::Type arrayType = array.getType();
+ if (!arrayType.isa<fir::BoxType>())
+ return rewriter.notifyMatchFailure(
+ assign, "Currently requires a boxed type input");
+ mlir::Type elementType = hlfir::getFortranElementType(arrayType);
+ if (!fir::isa_trivial(elementType))
+ return rewriter.notifyMatchFailure(
+ assign, "Character arrays are currently not handled");
+
+ auto init = [](fir::FirOpBuilder builder, mlir::Location loc,
+ mlir::Type elementType) {
+ if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
+ const llvm::fltSemantics &sem = ty.getFloatSemantics();
+ return builder.createRealConstant(
+ loc, elementType,
+ llvm::APFloat::getLargest(sem, /*Negative=*/false));
+ }
+ unsigned bits = elementType.getIntOrFloatBitWidth();
+ int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue();
+ return builder.createIntegerConstant(loc, elementType, maxInt);
+ };
+
+ auto genBodyOp =
+ [&rank, &resultArr, &elemental](
+ fir::FirOpBuilder builder, mlir::Location loc,
+ mlir::Type elementType, mlir::Value array, mlir::Value flagRef,
+ mlir::Value reduction,
+ const llvm::SmallVectorImpl<mlir::Value> &indices)
+ -> mlir::Value {
+ // We are in the innermost loop: generate the elemental inline
+ mlir::Value oneIdx =
+ builder.createIntegerConstant(loc, builder.getIndexType(), 1);
+ llvm::SmallVector<mlir::Value> oneBasedIndices;
+ llvm::transform(
+ indices, std::back_inserter(oneBasedIndices), [&](mlir::Value V) {
+ return builder.create<mlir::arith::AddIOp>(loc, V, oneIdx);
+ });
+ hlfir::YieldElementOp yield =
+ hlfir::inlineElementalOp(loc, builder, elemental, oneBasedIndices);
+ mlir::Value maskElem = yield.getElementValue();
+ yield->erase();
+
+ mlir::Type ifCompatType = builder.getI1Type();
+ mlir::Value ifCompatElem =
+ builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
+
+ llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
+ fir::IfOp maskIfOp =
+ builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
+ /*withElseRegion=*/true);
+ builder.setInsertionPointToStart(&maskIfOp.getThenRegion().front());
+
+ // Set flag that mask was true at some point
+ mlir::Value flagSet = builder.createIntegerConstant(
+ loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
+ builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+ mlir::Type eleRefTy = builder.getRefType(elementType);
+ mlir::Value addr =
+ builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
+ mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
+
+ // Compare with the max reduction value
+ mlir::Value cmp;
+ if (elementType.isa<mlir::FloatType>()) {
+ cmp = builder.create<mlir::arith::CmpFOp>(
+ loc, mlir::arith::CmpFPredicate::OLT, elem, reduction);
+ } else if (elementType.isa<mlir::IntegerType>()) {
+ cmp = builder.create<mlir::arith::CmpIOp>(
+ loc, mlir::arith::CmpIPredicate::slt, elem, reduction);
+ } else {
+ llvm_unreachable("unsupported type");
+ }
+
+ // Set the new coordinate to the result
+ fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp,
+ /*withElseRegion*/ true);
+
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+ mlir::Type resultElemTy =
+ hlfir::getFortranElementType(resultArr.getType());
+ mlir::Type returnRefTy = builder.getRefType(resultElemTy);
+ mlir::IndexType idxTy = builder.getIndexType();
+
+ mlir::Value one = builder.createIntegerConstant(loc, resultElemTy, 1);
+
+ for (unsigned int i = 0; i < rank; ++i) {
+ mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
+ mlir::Value resultElemAddr = builder.create<fir::CoordinateOp>(
+ loc, returnRefTy, resultArr, index);
+ mlir::Value convert =
+ builder.create<fir::ConvertOp>(loc, resultElemTy, indices[i]);
+ mlir::Value fortranIndex =
+ builder.create<mlir::arith::AddIOp>(loc, convert, one);
+ builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
+ }
+ builder.create<fir::ResultOp>(loc, elem);
+ builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+ builder.create<fir::ResultOp>(loc, reduction);
+ builder.setInsertionPointAfter(ifOp);
+
+ // Close the mask if
+ builder.create<fir::ResultOp>(loc, ifOp.getResult(0));
+ builder.setInsertionPointToStart(&maskIfOp.getElseRegion().front());
+ builder.create<fir::ResultOp>(loc, reduction);
+ builder.setInsertionPointAfter(maskIfOp);
+
+ return maskIfOp.getResult(0);
+ };
+
+ mlir::Location loc = assign.getLoc();
+ fir::FirOpBuilder builder{rewriter, assign.getOperation()};
+
+ // Initialize the result
+ mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType());
+ mlir::Type resultRefTy = builder.getRefType(resultElemTy);
+ mlir::Value returnValue =
+ builder.createIntegerConstant(loc, resultElemTy, 0);
+ for (unsigned int i = 0; i < rank; ++i) {
+ mlir::Value index =
+ builder.createIntegerConstant(loc, builder.getIndexType(), i);
+ mlir::Value resultElemAddr =
+ builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index);
+ builder.create<fir::StoreOp>(loc, returnValue, resultElemAddr);
+ }
+
+ fir::genMinlocReductionLoop(builder, array, init, genBodyOp, rank,
+ elementType, loc, builder.getI1Type(),
+ resultArr, false);
+
+ rewriter.eraseOp(assign);
+ rewriter.eraseOp(destroy);
+ rewriter.eraseOp(minloc);
+ return mlir::success();
+ }
+};
+
class OptimizedBufferizationPass
: public hlfir::impl::OptimizedBufferizationBase<
OptimizedBufferizationPass> {
@@ -681,6 +874,7 @@ class OptimizedBufferizationPass
patterns.insert<ElementalAssignBufferization>(context);
patterns.insert<BroadcastAssignBufferization>(context);
patterns.insert<VariableAssignBufferization>(context);
+ patterns.insert<AssignMinMaxlocElementalConversion>(context);
if (mlir::failed(mlir::applyPatternsAndFoldGreedily(
func, std::move(patterns), config))) {
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index 3eddb9e61ae3b3..c88b71baf202e8 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -32,6 +32,7 @@
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/Transforms/Passes.h"
+#include "flang/Optimizer/Support/Utils.h"
#include "flang/Runtime/entry-names.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Matchers.h"
@@ -243,8 +244,6 @@ static std::optional<mlir::Type> getArgElementType(mlir::Value val) {
using BodyOpGeneratorTy = llvm::function_ref<mlir::Value(
fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
mlir::Value)>;
-using InitValGeneratorTy = llvm::function_ref<mlir::Value(
- fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
using ContinueLoopGenTy = llvm::function_ref<llvm::SmallVector<mlir::Value>(
fir::FirOpBuilder &, mlir::Location, mlir::Value)>;
@@ -266,7 +265,7 @@ using ContinueLoopGenTy = llvm::function_ref<llvm::SmallVector<mlir::Value>(
template <typename OP, typename T, int resultIndex>
static void
genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
- InitValGeneratorTy initVal, ContinueLoopGenTy loopCond,
+ fir::InitValGeneratorTy initVal, ContinueLoopGenTy loopCond,
T unorderedOrInitialLoopCond, BodyOpGeneratorTy genBody,
unsigned rank, mlir::Type elementType, mlir::Location loc) {
@@ -353,29 +352,23 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
// Return the reduction value from the function.
builder.create<mlir::func::ReturnOp>(loc, results[resultIndex]);
}
-using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
- fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
- mlir::Value, llvm::SmallVector<mlir::Value, Fortran::common::maxRank> &)>;
-
-static void
-genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
- InitValGeneratorTy initVal,
- MinlocBodyOpGeneratorTy genBody, unsigned rank,
- mlir::Type elementType, mlir::Location loc, bool hasMask,
- mlir::Type maskElemType, mlir::Value resultArr) {
+void fir::genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
+ fir::InitValGeneratorTy initVal,
+ fir::MinlocBodyOpGeneratorTy genBody,
+ unsigned rank, mlir::Type elementType,
+ mlir::Location loc, mlir::Type maskElemType,
+ mlir::Value resultArr,
+ bool maskMayBeLogicalScalar) {
mlir::IndexType idxTy = builder.getIndexType();
- mlir::Block::BlockArgListType args = funcOp.front().getArguments();
- mlir::Value arg = args[1];
-
mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
fir::SequenceType::Shape flatShape(rank,
fir::SequenceType::getUnknownExtent());
mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
mlir::Type boxArrTy = fir::BoxType::get(arrTy);
- mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, arg);
+ array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
@@ -383,13 +376,6 @@ genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
builder.create<fir::StoreOp>(loc, zero, flagRef);
- mlir::Value mask;
- if (hasMask) {
- mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType);
- mlir::Type boxMaskTy = fir::BoxType::get(maskTy);
- mask = builder.create<fir::ConvertOp>(loc, boxMaskTy, args[2]);
- }
-
mlir::Value init = initVal(builder, loc, elementType);
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
@@ -432,44 +418,8 @@ genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
// Reverse the indices such that they are ordered as:
// <dim-0-idx, dim-1-idx, ...>
std::reverse(indices.begin(), indices.end());
- // We are in the innermost loop: generate the reduction body.
- if (hasMask) {
- mlir::Type logicalRef = builder.getRefType(maskElemType);
- mlir::Value maskAddr =
- builder.create<fir::CoordinateOp>(loc, logicalRef, mask, indices);
- mlir::Value maskElem = builder.create<fir::LoadOp>(loc, maskAddr);
-
- // fir::IfOp requires argument to be I1 - won't accept logical or any other
- // Integer.
- mlir::Type ifCompatType = builder.getI1Type();
- mlir::Value ifCompatElem =
- builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
-
- llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
- fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
- /*withElseRegion=*/true);
- builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
- }
-
- // Set flag that mask was true at some point
- builder.create<fir::StoreOp>(loc, flagSet, flagRef);
- mlir::Type eleRefTy = builder.getRefType(elementType);
- mlir::Value addr =
- builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
- mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
-
mlir::Value reductionVal =
- genBody(builder, loc, elementType, elem, init, indices);
-
- if (hasMask) {
- fir::IfOp ifOp =
- mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp());
- builder.create<fir::ResultOp>(loc, reductionVal);
- builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
- builder.create<fir::ResultOp>(loc, init);
- reductionVal = ifOp.getResult(0);
- builder.setInsertionPointAfter(ifOp);
- }
+ genBody(builder, loc, elementType, array, flagRef, init, indices);
// Unwind the loop nest and insert ResultOp on each level
// to return the updated value of the reduction to the enclosing
@@ -484,13 +434,15 @@ genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
builder.setInsertionPointAfter(loop.getOperation());
}
// End of loop nest. The insertion point is after the outermost loop.
- if (fir::IfOp ifOp =
- mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
- builder.create<fir::ResultOp>(loc, reductionVal);
- builder.setInsertionPointAfter(ifOp);
- // Redefine flagSet to escape scope of ifOp
- flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
- reductionVal = ifOp.getResult(0);
+ if (maskMayBeLogicalScalar) {
+ if (fir::IfOp ifOp =
+ mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
+ builder.create<fir::ResultOp>(loc, reductionVal);
+ builder.setInsertionPointAfter(ifOp);
+ // Redefine flagSet to escape scope of ifOp
+ flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
+ reductionVal = ifOp.getResult(0);
+ }
}
// Check for case where array was full of max values.
@@ -523,27 +475,12 @@ genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
// Load output array with 1s instead of 0s
for (unsigned int i = 0; i < rank; ++i) {
mlir::Type resultRefTy = builder.getRefType(resultElemType);
- // mlir::Value one = builder.createIntegerConstant(loc, resultElemType, 1);
mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
mlir::Value resultElemAddr =
builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index);
builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
}
builder.setInsertionPointAfter(ifMaskTrueOp);
- // Store newly created output array to the reference passed in
- fir::SequenceType::Shape resultShape(1, rank);
- mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemType);
- mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy);
- mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy);
- mlir::Type outputRefTy = builder.getRefType(outputBoxTy);
-
- mlir::Value outputArrNone = args[0];
- mlir::Value outputArr =
- builder.create<fir::ConvertOp>(loc, outputRefTy, outputArrNone);
-
- // Store nearly created array to output array
- builder.create<fir::StoreOp>(loc, resultArr, outputArr);
- builder.create<mlir::func::ReturnOp>(loc);
}
static llvm::SmallVector<mlir::Value> nopLoopCond(fir::FirOpBuilder &builder,
@@ -789,6 +726,14 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
mlir::Type resultRefTy = builder.getRefType(resultElemTy);
+ if (maskRank > 0) {
+ fir::SequenceType::Shape flatShape(rank,
+ fir::SequenceType::getUnknownExtent());
+ mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType);
+ mlir::Type boxMaskTy = fir::BoxType::get(maskTy);
+ mask = builder.create<fir::ConvertOp>(loc, boxMaskTy, mask);
+ }
+
for (unsigned int i = 0; i < rank; ++i) {
mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
mlir::Value resultElemAddr =
@@ -797,18 +742,46 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
}
auto genBodyOp =
- [&rank, &resultArr](
+ [&rank, &resultArr, &mask, &maskElemType, &maskRank](
fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType,
- mlir::Value elem1, mlir::Value elem2,
- llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices)
+ mlir::Value array, mlir::Value flagRef, mlir::Value reduction,
+ const llvm::SmallVectorImpl<mlir::Value> &indices)
-> mlir::Value {
+ // We are in the innermost loop: generate the reduction body.
+ if (maskRank > 0) {
+ mlir::Type logicalRef = builder.getRefType(maskElemType);
+ mlir::Value maskAddr =
+ builder.create<fir::CoordinateOp>(loc, logicalRef, mask, indices);
+ mlir::Value maskElem = builder.create<fir::LoadOp>(loc, maskAddr);
+
+ // fir::IfOp requires argument to be I1 - won't accept logical or any
+ // other Integer.
+ mlir::Type ifCompatType = builder.getI1Type();
+ mlir::Value ifCompatElem =
+ builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
+
+ llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
+ fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
+ /*withElseRegion=*/true);
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+ }
+
+ // Set flag that mask was true at some point
+ mlir::Value flagSet = builder.createIntegerConstant(
+ loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
+ builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+ mlir::Type eleRefTy = builder.getRefType(elementType);
+ mlir::Value addr =
+ builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
+ mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
+
mlir::Value cmp;
if (elementType.isa<mlir::FloatType>()) {
cmp = builder.create<mlir::arith::CmpFOp>(
- loc, mlir::arith::CmpFPredicate::OLT, elem1, elem2);
+ loc, mlir::arith::CmpFPredicate::OLT, elem, reduction);
} else if (elementType.isa<mlir::IntegerType>()) {
cmp = builder.create<mlir::arith::CmpIOp>(
- loc, mlir::arith::CmpIPredicate::slt, elem1, elem2);
+ loc, mlir::arith::CmpIPredicate::slt, elem, reduction);
} else {
llvm_unreachable("unsupported type");
}
@@ -833,11 +806,24 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
builder.create<mlir::arith::AddIOp>(loc, convert, one);
builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
}
- builder.create<fir::ResultOp>(loc, elem1);
+ builder.create<fir::ResultOp>(loc, elem);
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
- builder.create<fir::ResultOp>(loc, elem2);
+ builder.create<fir::ResultOp>(loc, reduction);
builder.setInsertionPointAfter(ifOp);
- return ifOp.getResult(0);
+ mlir::Value reductionVal = ifOp.getResult(0);
+
+ // Close the mask if needed
+ if (maskRank > 0) {
+ fir::IfOp ifOp =
+ mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp());
+ builder.create<fir::ResultOp>(loc, reductionVal);
+ builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+ builder.create<fir::ResultOp>(loc, reduction);
+ reductionVal = ifOp.getResult(0);
+ builder.setInsertionPointAfter(ifOp);
+ }
+
+ return reductionVal;
};
// if mask is a logical scalar, we can check its value before the main loop
@@ -872,12 +858,22 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
}
- // bit of a hack - maskRank is set to -1 for absent mask arg, so don't
- // generate high level mask or element by element mask.
- bool hasMask = maskRank > 0;
+ genMinlocReductionLoop(builder, funcOp.front().getArgument(1), init,
+ genBodyOp, rank, elementType, loc, maskElemType,
+ resultArr, maskRank == 0);
- genMinlocReductionLoop(builder, funcOp, init, genBodyOp, rank, elementType,
- loc, hasMask, maskElemType, resultArr);
+ // Store newly created output array to the reference passed in
+ fir::SequenceType::Shape resultShape(1, rank);
+ mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemTy);
+ mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy);
+ mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy);
+ mlir::Type outputRefTy = builder.getRefType(outputBoxTy);
+ mlir::Value outputArr = builder.create<fir::ConvertOp>(
+ loc, outputRefTy, funcOp.front().getArgument(0));
+
+ // Store nearly created array to output array
+ builder.create<fir::StoreOp>(loc, resultArr, outputArr);
+ builder.create<mlir::func::ReturnOp>(loc);
}
/// Generate function type for the simplified version of RTNAME(DotProduct)
diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir
new file mode 100644
index 00000000000000..2375a1529cf923
--- /dev/null
+++ b/flang/test/HLFIR/minloc-elemental.fir
@@ -0,0 +1,327 @@
+// RUN: fir-opt %s -opt-bufferization | FileCheck %s
+
+func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+ %c0 = arith.constant 0 : index
+ %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3 = fir.load %2#0 : !fir.ref<i32>
+ %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+ %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+ %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %8 = hlfir.designate %0#0 (%arg3) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+ %9 = fir.load %8 : !fir.ref<i32>
+ %10 = arith.cmpi sge, %9, %3 : i32
+ %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %11 : !fir.logical<4>
+ }
+ %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+ hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+ hlfir.destroy %7 : !hlfir.expr<1xi32>
+ hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+ return
+}
+// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT: %c1 = arith.constant 1 : index
+// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32
+// CHECK-NEXT: %c1_i32 = arith.constant 1 : i32
+// CHECK-NEXT: %c0_i32 = arith.constant 0 : i32
+// CHECK-NEXT: %c0 = arith.constant 0 : index
+// CHECK-NEXT: %0 = fir.alloca i32
+// CHECK-NEXT: %1:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT: %2:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT: %3:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT: %4 = fir.load %3#0 : !fir.ref<i32>
+// CHECK-NEXT: %5:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT: %6 = fir.shape %5#1 : (index) -> !fir.shape<1>
+// CHECK-NEXT: %7 = hlfir.elemental %6 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+// CHECK-NEXT: ^bb0(%arg3: index):
+// CHECK-NEXT: %14 = hlfir.designate %1#0 (%arg3) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %15 = fir.load %14 : !fir.ref<i32>
+// CHECK-NEXT: %16 = arith.cmpi sge, %15, %4 : i32
+// CHECK-NEXT: %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+// CHECK-NEXT: hlfir.yield_element %17 : !fir.logical<4>
+// CHECK-NEXT: }
+// CHECK-NEXT: %8 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: fir.store %c0_i32 to %8 : !fir.ref<i32>
+// CHECK-NEXT: fir.store %c0_i32 to %0 : !fir.ref<i32>
+// CHECK-NEXT: %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT: %10 = arith.subi %9#1, %c1 : index
+// CHECK-NEXT: %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT: %14 = arith.addi %arg3, %c1 : index
+// CHECK-NEXT: %15 = hlfir.designate %1#0 (%14) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %16 = fir.load %15 : !fir.ref<i32>
+// CHECK-NEXT: %17 = arith.cmpi sge, %16, %4 : i32
+// CHECK-NEXT: %18 = fir.if %17 -> (i32) {
+// CHECK-NEXT: fir.store %c1_i32 to %0 : !fir.ref<i32>
+// CHECK-NEXT: %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %20 = fir.load %19 : !fir.ref<i32>
+// CHECK-NEXT: %21 = arith.cmpi slt, %20, %arg4 : i32
+// CHECK-NEXT: %22 = fir.if %21 -> (i32) {
+// CHECK-NEXT: %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %24 = fir.convert %arg3 : (index) -> i32
+// CHECK-NEXT: %25 = arith.addi %24, %c1_i32 : i32
+// CHECK-NEXT: fir.store %25 to %23 : !fir.ref<i32>
+// CHECK-NEXT: fir.result %20 : i32
+// CHECK-NEXT: } else {
+// CHECK-NEXT: fir.result %arg4 : i32
+// CHECK-NEXT: }
+// CHECK-NEXT: fir.result %22 : i32
+// CHECK-NEXT: } else {
+// CHECK-NEXT: fir.result %arg4 : i32
+// CHECK-NEXT: }
+// CHECK-NEXT: fir.result %18 : i32
+// CHECK-NEXT: }
+// CHECK-NEXT: %12 = fir.load %0 : !fir.ref<i32>
+// CHECK-NEXT: %13 = arith.cmpi eq, %12, %c1_i32 : i32
+// CHECK-NEXT: fir.if %13 {
+// CHECK-NEXT: %14 = arith.cmpi eq, %11, %c2147483647_i32 : i32
+// CHECK-NEXT: fir.if %14 {
+// CHECK-NEXT: %15 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: fir.store %c1_i32 to %15 : !fir.ref<i32>
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: hlfir.destroy %7 : !hlfir.expr<?x!fir.logical<4>>
+// CHECK-NEXT: return
+// CHECK-NEXT: }
+
+
+func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
+ %c0 = arith.constant 0 : index
+ %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
+ %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3 = fir.load %2#0 : !fir.ref<i32>
+ %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+ %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+ %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %8 = hlfir.designate %0#0 (%arg3) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+ %9 = fir.load %8 : !fir.ref<i32>
+ %10 = arith.cmpi sge, %9, %3 : i32
+ %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %11 : !fir.logical<4>
+ }
+ %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+ hlfir.assign %7 to %1#0 : !hlfir.expr<1xi16>, !fir.box<!fir.array<?xi16>>
+ hlfir.destroy %7 : !hlfir.expr<1xi16>
+ hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+ return
+}
+// CHECK-LABEL: func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT: %c1 = arith.constant 1 : index
+// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32
+// CHECK-NEXT: %c1_i16 = arith.constant 1 : i16
+// CHECK-NEXT: %c0_i16 = arith.constant 0 : i16
+// CHECK-NEXT: %c0 = arith.constant 0 : index
+// CHECK-NEXT: %0 = fir.alloca i16
+// CHECK-NEXT: %1:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT: %2:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
+// CHECK-NEXT: %3:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT: %4 = fir.load %3#0 : !fir.ref<i32>
+// CHECK-NEXT: %5:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT: %6 = fir.shape %5#1 : (index) -> !fir.shape<1>
+// CHECK-NEXT: %7 = hlfir.elemental %6 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+// CHECK-NEXT: ^bb0(%arg3: index):
+// CHECK-NEXT: %14 = hlfir.designate %1#0 (%arg3) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %15 = fir.load %14 : !fir.ref<i32>
+// CHECK-NEXT: %16 = arith.cmpi sge, %15, %4 : i32
+// CHECK-NEXT: %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+// CHECK-NEXT: hlfir.yield_element %17 : !fir.logical<4>
+// CHECK-NEXT: }
+// CHECK-NEXT: %8 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT: fir.store %c0_i16 to %8 : !fir.ref<i16>
+// CHECK-NEXT: fir.store %c0_i16 to %0 : !fir.ref<i16>
+// CHECK-NEXT: %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT: %10 = arith.subi %9#1, %c1 : index
+// CHECK-NEXT: %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT: %14 = arith.addi %arg3, %c1 : index
+// CHECK-NEXT: %15 = hlfir.designate %1#0 (%14) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %16 = fir.load %15 : !fir.ref<i32>
+// CHECK-NEXT: %17 = arith.cmpi sge, %16, %4 : i32
+// CHECK-NEXT: %18 = fir.if %17 -> (i32) {
+// CHECK-NEXT: fir.store %c1_i16 to %0 : !fir.ref<i16>
+// CHECK-NEXT: %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %20 = fir.load %19 : !fir.ref<i32>
+// CHECK-NEXT: %21 = arith.cmpi slt, %20, %arg4 : i32
+// CHECK-NEXT: %22 = fir.if %21 -> (i32) {
+// CHECK-NEXT: %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT: %24 = fir.convert %arg3 : (index) -> i16
+// CHECK-NEXT: %25 = arith.addi %24, %c1_i16 : i16
+// CHECK-NEXT: fir.store %25 to %23 : !fir.ref<i16>
+// CHECK-NEXT: fir.result %20 : i32
+// CHECK-NEXT: } else {
+// CHECK-NEXT: fir.result %arg4 : i32
+// CHECK-NEXT: }
+// CHECK-NEXT: fir.result %22 : i32
+// CHECK-NEXT: } else {
+// CHECK-NEXT: fir.result %arg4 : i32
+// CHECK-NEXT: }
+// CHECK-NEXT: fir.result %18 : i32
+// CHECK-NEXT: }
+// CHECK-NEXT: %12 = fir.load %0 : !fir.ref<i16>
+// CHECK-NEXT: %13 = arith.cmpi eq, %12, %c1_i16 : i16
+// CHECK-NEXT: fir.if %13 {
+// CHECK-NEXT: %14 = arith.cmpi eq, %11, %c2147483647_i32 : i32
+// CHECK-NEXT: fir.if %14 {
+// CHECK-NEXT: %15 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT: fir.store %c1_i16 to %15 : !fir.ref<i16>
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: hlfir.destroy %7 : !hlfir.expr<?x!fir.logical<4>>
+// CHECK-NEXT: return
+
+
+func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3 = fir.load %2#0 : !fir.ref<i32>
+ %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+ %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+ %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %10 = hlfir.designate %0#0 (%arg3) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+ %11 = fir.load %10 : !fir.ref<i32>
+ %12 = arith.cmpi sge, %11, %3 : i32
+ %13 = fir.convert %12 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %13 : !fir.logical<4>
+ }
+ %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+ %8 = fir.shape %c1 : (index) -> !fir.shape<1>
+ %9 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+ ^bb0(%arg3: index):
+ %10 = hlfir.apply %7, %arg3 : (!hlfir.expr<1xi16>, index) -> i16
+ %11 = fir.convert %10 : (i16) -> i32
+ hlfir.yield_element %11 : i32
+ }
+ hlfir.assign %9 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+ hlfir.destroy %9 : !hlfir.expr<?xi32>
+ hlfir.destroy %7 : !hlfir.expr<1xi16>
+ hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+ return
+}
+// Doesn't transform due to the convert after the minloc
+// CHECK-LABEL: _QPtest_kind2_convert
+// CHECK: hlfir.minloc
+
+
+func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+ %c0 = arith.constant 0 : index
+ %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+ %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+ %3 = fir.load %2#0 : !fir.ref<f32>
+ %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+ %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+ %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %8 = hlfir.designate %0#0 (%arg3) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %9 = fir.load %8 : !fir.ref<f32>
+ %10 = arith.cmpf oge, %9, %3 : f32
+ %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %11 : !fir.logical<4>
+ }
+ %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+ hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+ hlfir.destroy %7 : !hlfir.expr<1xi32>
+ hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+ return
+}
+// CHECK-LABEL: _QPtest_float
+// CHECK: %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %cst) -> (f32) {
+// CHECK-NEXT: %14 = arith.addi %arg3, %c1 : index
+// CHECK-NEXT: %15 = hlfir.designate %1#0 (%14) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK-NEXT: %16 = fir.load %15 : !fir.ref<f32>
+// CHECK-NEXT: %17 = arith.cmpf oge, %16, %4 : f32
+// CHECK-NEXT: %18 = fir.if %17 -> (f32) {
+// CHECK-NEXT: fir.store %c1_i32 to %0 : !fir.ref<i32>
+// CHECK-NEXT: %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK-NEXT: %20 = fir.load %19 : !fir.ref<f32>
+// CHECK-NEXT: %21 = arith.cmpf olt, %20, %arg4 : f32
+// CHECK-NEXT: %22 = fir.if %21 -> (f32) {
+// CHECK-NEXT: %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %24 = fir.convert %arg3 : (index) -> i32
+// CHECK-NEXT: %25 = arith.addi %24, %c1_i32 : i32
+// CHECK-NEXT: fir.store %25 to %23 : !fir.ref<i32>
+// CHECK-NEXT: fir.result %20 : f32
+// CHECK-NEXT: } else {
+// CHECK-NEXT: fir.result %arg4 : f32
+// CHECK-NEXT: }
+// CHECK-NEXT: fir.result %22 : f32
+// CHECK-NEXT: } else {
+// CHECK-NEXT: fir.result %arg4 : f32
+// CHECK-NEXT: }
+// CHECK-NEXT: fir.result %18 : f32
+// CHECK-NEXT: }
+
+
+func.func @_QPtest_assignshape(%arg0: !fir.ref<!fir.array<3x3xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.ref<!fir.array<3xi32>> {fir.bindc_name = "m"}) {
+ %c2 = arith.constant 2 : index
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %0 = fir.shape %c3, %c3 : (index, index) -> !fir.shape<2>
+ %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFtestEarray"} : (!fir.ref<!fir.array<3x3xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x3xf32>>, !fir.ref<!fir.array<3x3xf32>>)
+ %2 = fir.shape %c3 : (index) -> !fir.shape<1>
+ %3:2 = hlfir.declare %arg2(%2) {uniq_name = "_QFtestEm"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+ %4:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+ %5 = fir.load %4#0 : !fir.ref<f32>
+ %6 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<3x3x!fir.logical<4>> {
+ ^bb0(%arg3: index, %arg4: index):
+ %10 = hlfir.designate %1#0 (%arg3, %arg4) : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
+ %11 = fir.load %10 : !fir.ref<f32>
+ %12 = arith.cmpf oge, %11, %5 : f32
+ %13 = fir.convert %12 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %13 : !fir.logical<4>
+ }
+ %7 = hlfir.minloc %1#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.ref<!fir.array<3x3xf32>>, !hlfir.expr<3x3x!fir.logical<4>>) -> !hlfir.expr<2xi32>
+ %8 = fir.shape %c2 : (index) -> !fir.shape<1>
+ %9 = hlfir.designate %3#0 (%c1:%c2:%c1) shape %8 : (!fir.ref<!fir.array<3xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<2xi32>>
+ hlfir.assign %7 to %9 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
+ hlfir.destroy %7 : !hlfir.expr<2xi32>
+ hlfir.destroy %6 : !hlfir.expr<3x3x!fir.logical<4>>
+ return
+}
+// Not supported as the input is not a box
+// CHECK-LABEL: _QPtest_assignshape
+// CHECK: hlfir.minloc
+
+
+func.func @_QFPtest_character(%arg0: !fir.box<!fir.array<?x!fir.char<1>>> {fir.bindc_name = "b"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "c"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0:2 = hlfir.declare %arg0 typeparams %c1 {uniq_name = "_QFFtestEb"} : (!fir.box<!fir.array<?x!fir.char<1>>>, index) -> (!fir.box<!fir.array<?x!fir.char<1>>>, !fir.box<!fir.array<?x!fir.char<1>>>)
+ %1:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestEc"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+ %2 = fir.alloca !fir.array<1xi32> {bindc_name = "m", uniq_name = "_QFFtestEm"}
+ %3 = fir.shape %c1 : (index) -> !fir.shape<1>
+ %4:2 = hlfir.declare %2(%3) {uniq_name = "_QFFtestEm"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
+ %5 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+ %6:2 = hlfir.declare %5 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %7:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %8 = fir.load %7#0 : !fir.ref<i32>
+ %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+ %10 = fir.shape %9#1 : (index) -> !fir.shape<1>
+ %11 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+ ^bb0(%arg3: index):
+ %16 = hlfir.designate %1#0 (%arg3) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+ %17 = fir.load %16 : !fir.ref<i32>
+ %18 = arith.cmpi eq, %17, %8 : i32
+ %19 = fir.convert %18 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %19 : !fir.logical<4>
+ }
+ %12 = hlfir.minloc %0#0 mask %11 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x!fir.char<1>>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+ hlfir.assign %12 to %4#0 : !hlfir.expr<1xi32>, !fir.ref<!fir.array<1xi32>>
+ hlfir.destroy %12 : !hlfir.expr<1xi32>
+ hlfir.destroy %11 : !hlfir.expr<?x!fir.logical<4>>
+ %13 = hlfir.designate %4#0 (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+ %14 = fir.load %13 : !fir.ref<i32>
+ hlfir.assign %14 to %6#0 : i32, !fir.ref<i32>
+ %15 = fir.load %6#1 : !fir.ref<i32>
+ return %15 : i32
+}
+// Characters are not supported at the moment
+// CHECK-LABEL: _QFPtest_character
+// CHECK: hlfir.minloc
diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index 39483a9cc18fe8..da54dcbedc0432 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -1760,6 +1760,7 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
// CHECK: %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32>
// CHECK: %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1>
// CHECK: %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<1xi32>>>
+// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
// CHECK: %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
// CHECK: %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
// CHECK: fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref<i32>
@@ -1768,7 +1769,6 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
// CHECK: %[[FLAG_SET:.*]] = arith.constant 1 : i32
// CHECK: %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
// CHECK: fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
// CHECK: %[[MAX:.*]] = arith.constant 2147483647 : i32
// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index
// CHECK: %[[DIM_INDEX0:.*]] = arith.constant 0 : index
@@ -1779,7 +1779,8 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
// CHECK: %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref<!fir.logical<4>>
// CHECK: %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1
// CHECK: %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) {
-// CHECK: fir.store %[[FLAG_SET]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK: %[[FLAG_SET2:.*]] = arith.constant 1 : i32
+// CHECK: fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
// CHECK: %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
// CHECK: %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN]] : i32
>From 381097d6c3fa243c4b376dba294706d1a2a69a57 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 12 Dec 2023 19:07:22 +0000
Subject: [PATCH 2/2] Use Designate and remove dead elementals
---
flang/include/flang/Optimizer/Support/Utils.h | 1 -
.../Transforms/OptimizedBufferization.cpp | 39 ++--
.../Transforms/SimplifyIntrinsics.cpp | 5 +-
flang/test/HLFIR/minloc-elemental.fir | 206 ++++++++----------
4 files changed, 120 insertions(+), 131 deletions(-)
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 93caa8b23d320c..e567a81b724ed5 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -148,7 +148,6 @@ void genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
mlir::Type maskElemType, mlir::Value resultArr,
bool maskMayBeLogicalScalar);
-
} // namespace fir
#endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 218ddd2a6a7b7e..6ea1d0eca243e6 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -704,7 +704,7 @@ class AssignMinMaxlocElementalConversion
if (std::distance(users.begin(), users.end()) != 2)
return rewriter.notifyMatchFailure(assign, "Did not find minloc users");
auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(
- (*users.begin()) == minloc ? *++users.begin() : *users.begin());
+ *users.begin() == minloc ? *++users.begin() : *users.begin());
if (!destroy)
return rewriter.notifyMatchFailure(assign, "Did not find destroy");
@@ -721,9 +721,9 @@ class AssignMinMaxlocElementalConversion
return rewriter.notifyMatchFailure(
assign, "Currently requires a boxed type input");
mlir::Type elementType = hlfir::getFortranElementType(arrayType);
- if (!fir::isa_trivial(elementType))
- return rewriter.notifyMatchFailure(
- assign, "Character arrays are currently not handled");
+ if (!fir::isa_trivial(elementType))
+ return rewriter.notifyMatchFailure(
+ assign, "Character arrays are currently not handled");
auto init = [](fir::FirOpBuilder builder, mlir::Location loc,
mlir::Type elementType) {
@@ -743,8 +743,7 @@ class AssignMinMaxlocElementalConversion
fir::FirOpBuilder builder, mlir::Location loc,
mlir::Type elementType, mlir::Value array, mlir::Value flagRef,
mlir::Value reduction,
- const llvm::SmallVectorImpl<mlir::Value> &indices)
- -> mlir::Value {
+ const llvm::SmallVectorImpl<mlir::Value> &indices) -> mlir::Value {
// We are in the innermost loop: generate the elemental inline
mlir::Value oneIdx =
builder.createIntegerConstant(loc, builder.getIndexType(), 1);
@@ -773,8 +772,8 @@ class AssignMinMaxlocElementalConversion
loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
builder.create<fir::StoreOp>(loc, flagSet, flagRef);
mlir::Type eleRefTy = builder.getRefType(elementType);
- mlir::Value addr =
- builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
+ mlir::Value addr = builder.create<hlfir::DesignateOp>(
+ loc, eleRefTy, array, oneBasedIndices);
mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
// Compare with the max reduction value
@@ -802,8 +801,8 @@ class AssignMinMaxlocElementalConversion
mlir::Value one = builder.createIntegerConstant(loc, resultElemTy, 1);
for (unsigned int i = 0; i < rank; ++i) {
- mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
- mlir::Value resultElemAddr = builder.create<fir::CoordinateOp>(
+ mlir::Value index = builder.createIntegerConstant(loc, idxTy, i + 1);
+ mlir::Value resultElemAddr = builder.create<hlfir::DesignateOp>(
loc, returnRefTy, resultArr, index);
mlir::Value convert =
builder.create<fir::ConvertOp>(loc, resultElemTy, indices[i]);
@@ -835,9 +834,9 @@ class AssignMinMaxlocElementalConversion
builder.createIntegerConstant(loc, resultElemTy, 0);
for (unsigned int i = 0; i < rank; ++i) {
mlir::Value index =
- builder.createIntegerConstant(loc, builder.getIndexType(), i);
- mlir::Value resultElemAddr =
- builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index);
+ builder.createIntegerConstant(loc, builder.getIndexType(), i + 1);
+ mlir::Value resultElemAddr = builder.create<hlfir::DesignateOp>(
+ loc, resultRefTy, resultArr, index);
builder.create<fir::StoreOp>(loc, returnValue, resultElemAddr);
}
@@ -845,9 +844,23 @@ class AssignMinMaxlocElementalConversion
elementType, loc, builder.getI1Type(),
resultArr, false);
+ // Check if the minloc was the only user of the elemental (apart from a
+ // destroy), and remove it if so.
+ mlir::Operation::user_range elemUsers = elemental->getUsers();
+ hlfir::DestroyOp elemDestroy;
+ if (std::distance(elemUsers.begin(), elemUsers.end()) == 2) {
+ elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*elemUsers.begin());
+ if (!elemDestroy)
+ elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*++elemUsers.begin());
+ }
+
rewriter.eraseOp(assign);
rewriter.eraseOp(destroy);
rewriter.eraseOp(minloc);
+ if (elemDestroy) {
+ rewriter.eraseOp(elemDestroy);
+ rewriter.eraseOp(elemental);
+ }
return mlir::success();
}
};
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index c88b71baf202e8..1d7e092b615f4a 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -31,8 +31,8 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
-#include "flang/Optimizer/Transforms/Passes.h"
#include "flang/Optimizer/Support/Utils.h"
+#include "flang/Optimizer/Transforms/Passes.h"
#include "flang/Runtime/entry-names.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Matchers.h"
@@ -745,8 +745,7 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
[&rank, &resultArr, &mask, &maskElemType, &maskRank](
fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType,
mlir::Value array, mlir::Value flagRef, mlir::Value reduction,
- const llvm::SmallVectorImpl<mlir::Value> &indices)
- -> mlir::Value {
+ const llvm::SmallVectorImpl<mlir::Value> &indices) -> mlir::Value {
// We are in the innermost loop: generate the reduction body.
if (maskRank > 0) {
mlir::Type logicalRef = builder.getRefType(maskElemType);
diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir
index 2375a1529cf923..123a05071ba5d4 100644
--- a/flang/test/HLFIR/minloc-elemental.fir
+++ b/flang/test/HLFIR/minloc-elemental.fir
@@ -23,66 +23,55 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
return
}
// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-// CHECK-NEXT: %c1 = arith.constant 1 : index
// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32
// CHECK-NEXT: %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT: %c0_i32 = arith.constant 0 : i32
// CHECK-NEXT: %c0 = arith.constant 0 : index
-// CHECK-NEXT: %0 = fir.alloca i32
-// CHECK-NEXT: %1:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT: %2:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT: %3:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT: %4 = fir.load %3#0 : !fir.ref<i32>
-// CHECK-NEXT: %5:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT: %6 = fir.shape %5#1 : (index) -> !fir.shape<1>
-// CHECK-NEXT: %7 = hlfir.elemental %6 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-// CHECK-NEXT: ^bb0(%arg3: index):
-// CHECK-NEXT: %14 = hlfir.designate %1#0 (%arg3) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: %15 = fir.load %14 : !fir.ref<i32>
-// CHECK-NEXT: %16 = arith.cmpi sge, %15, %4 : i32
-// CHECK-NEXT: %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-// CHECK-NEXT: hlfir.yield_element %17 : !fir.logical<4>
-// CHECK-NEXT: }
-// CHECK-NEXT: %8 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: fir.store %c0_i32 to %8 : !fir.ref<i32>
-// CHECK-NEXT: fir.store %c0_i32 to %0 : !fir.ref<i32>
-// CHECK-NEXT: %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT: %10 = arith.subi %9#1, %c1 : index
-// CHECK-NEXT: %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
-// CHECK-NEXT: %14 = arith.addi %arg3, %c1 : index
-// CHECK-NEXT: %15 = hlfir.designate %1#0 (%14) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: %16 = fir.load %15 : !fir.ref<i32>
-// CHECK-NEXT: %17 = arith.cmpi sge, %16, %4 : i32
-// CHECK-NEXT: %18 = fir.if %17 -> (i32) {
-// CHECK-NEXT: fir.store %c1_i32 to %0 : !fir.ref<i32>
-// CHECK-NEXT: %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: %20 = fir.load %19 : !fir.ref<i32>
-// CHECK-NEXT: %21 = arith.cmpi slt, %20, %arg4 : i32
-// CHECK-NEXT: %22 = fir.if %21 -> (i32) {
-// CHECK-NEXT: %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: %24 = fir.convert %arg3 : (index) -> i32
-// CHECK-NEXT: %25 = arith.addi %24, %c1_i32 : i32
-// CHECK-NEXT: fir.store %25 to %23 : !fir.ref<i32>
-// CHECK-NEXT: fir.result %20 : i32
+// CHECK-NEXT: %c1 = arith.constant 1 : index
+// CHECK-NEXT: %c0_i32 = arith.constant 0 : i32
+// CHECK-NEXT: %[[V0:.*]] = fir.alloca i32
+// CHECK-NEXT: %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT: %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT: %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT: %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
+// CHECK-NEXT: %[[V8:.*]] = hlfir.designate %[[V2]]#0 (%c1) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: fir.store %c0_i32 to %[[V8]] : !fir.ref<i32>
+// CHECK-NEXT: fir.store %c0_i32 to %[[V0]] : !fir.ref<i32>
+// CHECK-NEXT: %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT: %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
+// CHECK-NEXT: %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT: %[[V14:.*]] = arith.addi %arg3, %c1 : index
+// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
+// CHECK-NEXT: %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
+// CHECK-NEXT: %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
+// CHECK-NEXT: fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
+// CHECK-NEXT: %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
+// CHECK-NEXT: %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
+// CHECK-NEXT: %[[V22:.*]] = fir.if %[[V21]] -> (i32) {
+// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[V2]]#0 (%c1) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %[[V24:.*]] = fir.convert %arg3 : (index) -> i32
+// CHECK-NEXT: %[[V25:.*]] = arith.addi %[[V24]], %c1_i32 : i32
+// CHECK-NEXT: fir.store %[[V25]] to %[[V23]] : !fir.ref<i32>
+// CHECK-NEXT: fir.result %[[V20]] : i32
// CHECK-NEXT: } else {
// CHECK-NEXT: fir.result %arg4 : i32
// CHECK-NEXT: }
-// CHECK-NEXT: fir.result %22 : i32
+// CHECK-NEXT: fir.result %[[V22]] : i32
// CHECK-NEXT: } else {
// CHECK-NEXT: fir.result %arg4 : i32
// CHECK-NEXT: }
-// CHECK-NEXT: fir.result %18 : i32
+// CHECK-NEXT: fir.result %[[V18]] : i32
// CHECK-NEXT: }
-// CHECK-NEXT: %12 = fir.load %0 : !fir.ref<i32>
-// CHECK-NEXT: %13 = arith.cmpi eq, %12, %c1_i32 : i32
-// CHECK-NEXT: fir.if %13 {
-// CHECK-NEXT: %14 = arith.cmpi eq, %11, %c2147483647_i32 : i32
-// CHECK-NEXT: fir.if %14 {
-// CHECK-NEXT: %15 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: fir.store %c1_i32 to %15 : !fir.ref<i32>
+// CHECK-NEXT: %[[V12:.*]] = fir.load %[[V0]] : !fir.ref<i32>
+// CHECK-NEXT: %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i32 : i32
+// CHECK-NEXT: fir.if %[[V13]] {
+// CHECK-NEXT: %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c2147483647_i32 : i32
+// CHECK-NEXT: fir.if %[[V14]] {
+// CHECK-NEXT: %[[V15:.*]] = fir.coordinate_of %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: fir.store %c1_i32 to %[[V15]] : !fir.ref<i32>
// CHECK-NEXT: }
// CHECK-NEXT: }
-// CHECK-NEXT: hlfir.destroy %7 : !hlfir.expr<?x!fir.logical<4>>
// CHECK-NEXT: return
// CHECK-NEXT: }
@@ -110,66 +99,55 @@ func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a
return
}
// CHECK-LABEL: func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
-// CHECK-NEXT: %c1 = arith.constant 1 : index
// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32
// CHECK-NEXT: %c1_i16 = arith.constant 1 : i16
-// CHECK-NEXT: %c0_i16 = arith.constant 0 : i16
// CHECK-NEXT: %c0 = arith.constant 0 : index
-// CHECK-NEXT: %0 = fir.alloca i16
-// CHECK-NEXT: %1:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT: %2:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
-// CHECK-NEXT: %3:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT: %4 = fir.load %3#0 : !fir.ref<i32>
-// CHECK-NEXT: %5:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT: %6 = fir.shape %5#1 : (index) -> !fir.shape<1>
-// CHECK-NEXT: %7 = hlfir.elemental %6 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-// CHECK-NEXT: ^bb0(%arg3: index):
-// CHECK-NEXT: %14 = hlfir.designate %1#0 (%arg3) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: %15 = fir.load %14 : !fir.ref<i32>
-// CHECK-NEXT: %16 = arith.cmpi sge, %15, %4 : i32
-// CHECK-NEXT: %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-// CHECK-NEXT: hlfir.yield_element %17 : !fir.logical<4>
-// CHECK-NEXT: }
-// CHECK-NEXT: %8 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT: fir.store %c0_i16 to %8 : !fir.ref<i16>
-// CHECK-NEXT: fir.store %c0_i16 to %0 : !fir.ref<i16>
-// CHECK-NEXT: %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT: %10 = arith.subi %9#1, %c1 : index
-// CHECK-NEXT: %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
-// CHECK-NEXT: %14 = arith.addi %arg3, %c1 : index
-// CHECK-NEXT: %15 = hlfir.designate %1#0 (%14) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: %16 = fir.load %15 : !fir.ref<i32>
-// CHECK-NEXT: %17 = arith.cmpi sge, %16, %4 : i32
-// CHECK-NEXT: %18 = fir.if %17 -> (i32) {
-// CHECK-NEXT: fir.store %c1_i16 to %0 : !fir.ref<i16>
-// CHECK-NEXT: %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: %20 = fir.load %19 : !fir.ref<i32>
-// CHECK-NEXT: %21 = arith.cmpi slt, %20, %arg4 : i32
-// CHECK-NEXT: %22 = fir.if %21 -> (i32) {
-// CHECK-NEXT: %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT: %24 = fir.convert %arg3 : (index) -> i16
-// CHECK-NEXT: %25 = arith.addi %24, %c1_i16 : i16
-// CHECK-NEXT: fir.store %25 to %23 : !fir.ref<i16>
-// CHECK-NEXT: fir.result %20 : i32
+// CHECK-NEXT: %c1 = arith.constant 1 : index
+// CHECK-NEXT: %c0_i16 = arith.constant 0 : i16
+// CHECK-NEXT: %[[V0:.*]] = fir.alloca i16
+// CHECK-NEXT: %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT: %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
+// CHECK-NEXT: %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT: %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
+// CHECK-NEXT: %[[V8:.*]] = hlfir.designate %[[V2]]#0 (%c1) : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT: fir.store %c0_i16 to %[[V8]] : !fir.ref<i16>
+// CHECK-NEXT: fir.store %c0_i16 to %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT: %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT: %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
+// CHECK-NEXT: %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT: %[[V14:.*]] = arith.addi %arg3, %c1 : index
+// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
+// CHECK-NEXT: %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
+// CHECK-NEXT: %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
+// CHECK-NEXT: fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT: %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
+// CHECK-NEXT: %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
+// CHECK-NEXT: %[[V22:.*]] = fir.if %[[V21]] -> (i32) {
+// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[V2]]#0 (%c1) : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT: %[[V24:.*]] = fir.convert %arg3 : (index) -> i16
+// CHECK-NEXT: %[[V25:.*]] = arith.addi %[[V24]], %c1_i16 : i16
+// CHECK-NEXT: fir.store %[[V25]] to %[[V23]] : !fir.ref<i16>
+// CHECK-NEXT: fir.result %[[V20]] : i32
// CHECK-NEXT: } else {
// CHECK-NEXT: fir.result %arg4 : i32
// CHECK-NEXT: }
-// CHECK-NEXT: fir.result %22 : i32
+// CHECK-NEXT: fir.result %[[V22]] : i32
// CHECK-NEXT: } else {
// CHECK-NEXT: fir.result %arg4 : i32
// CHECK-NEXT: }
-// CHECK-NEXT: fir.result %18 : i32
+// CHECK-NEXT: fir.result %[[V18]] : i32
// CHECK-NEXT: }
-// CHECK-NEXT: %12 = fir.load %0 : !fir.ref<i16>
-// CHECK-NEXT: %13 = arith.cmpi eq, %12, %c1_i16 : i16
-// CHECK-NEXT: fir.if %13 {
-// CHECK-NEXT: %14 = arith.cmpi eq, %11, %c2147483647_i32 : i32
-// CHECK-NEXT: fir.if %14 {
-// CHECK-NEXT: %15 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT: fir.store %c1_i16 to %15 : !fir.ref<i16>
+// CHECK-NEXT: %[[V12:.*]] = fir.load %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT: %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i16 : i16
+// CHECK-NEXT: fir.if %[[V13]] {
+// CHECK-NEXT: %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c2147483647_i32 : i32
+// CHECK-NEXT: fir.if %[[V14]] {
+// CHECK-NEXT: %[[V15:.*]] = fir.coordinate_of %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT: fir.store %c1_i16 to %[[V15]] : !fir.ref<i16>
// CHECK-NEXT: }
// CHECK-NEXT: }
-// CHECK-NEXT: hlfir.destroy %7 : !hlfir.expr<?x!fir.logical<4>>
// CHECK-NEXT: return
@@ -232,30 +210,30 @@ func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a
return
}
// CHECK-LABEL: _QPtest_float
-// CHECK: %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %cst) -> (f32) {
-// CHECK-NEXT: %14 = arith.addi %arg3, %c1 : index
-// CHECK-NEXT: %15 = hlfir.designate %1#0 (%14) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT: %16 = fir.load %15 : !fir.ref<f32>
-// CHECK-NEXT: %17 = arith.cmpf oge, %16, %4 : f32
-// CHECK-NEXT: %18 = fir.if %17 -> (f32) {
-// CHECK-NEXT: fir.store %c1_i32 to %0 : !fir.ref<i32>
-// CHECK-NEXT: %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT: %20 = fir.load %19 : !fir.ref<f32>
-// CHECK-NEXT: %21 = arith.cmpf olt, %20, %arg4 : f32
-// CHECK-NEXT: %22 = fir.if %21 -> (f32) {
-// CHECK-NEXT: %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT: %24 = fir.convert %arg3 : (index) -> i32
-// CHECK-NEXT: %25 = arith.addi %24, %c1_i32 : i32
-// CHECK-NEXT: fir.store %25 to %23 : !fir.ref<i32>
-// CHECK-NEXT: fir.result %20 : f32
+// CHECK: %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) {
+// CHECK-NEXT: %[[V14:.*]] = arith.addi %arg3, %c1 : index
+// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK-NEXT: %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<f32>
+// CHECK-NEXT: %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32
+// CHECK-NEXT: %[[V18:.*]] = fir.if %[[V17]] -> (f32) {
+// CHECK-NEXT: fir.store %c1_i32 to %[[V0:.*]] : !fir.ref<i32>
+// CHECK-NEXT: %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK-NEXT: %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<f32>
+// CHECK-NEXT: %[[V21:.*]] = arith.cmpf olt, %[[V20]], %arg4 : f32
+// CHECK-NEXT: %[[V22:.*]] = fir.if %[[V21]] -> (f32) {
+// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[V2:.*]]#0 (%c1) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT: %[[V24:.*]] = fir.convert %arg3 : (index) -> i32
+// CHECK-NEXT: %[[V25:.*]] = arith.addi %[[V24]], %c1_i32 : i32
+// CHECK-NEXT: fir.store %[[V25]] to %[[V23]] : !fir.ref<i32>
+// CHECK-NEXT: fir.result %[[V20]] : f32
// CHECK-NEXT: } else {
// CHECK-NEXT: fir.result %arg4 : f32
// CHECK-NEXT: }
-// CHECK-NEXT: fir.result %22 : f32
+// CHECK-NEXT: fir.result %[[V22]] : f32
// CHECK-NEXT: } else {
// CHECK-NEXT: fir.result %arg4 : f32
// CHECK-NEXT: }
-// CHECK-NEXT: fir.result %18 : f32
+// CHECK-NEXT: fir.result %[[V18]] : f32
// CHECK-NEXT: }
More information about the flang-commits
mailing list