[flang-commits] [flang] [flang][HLFIR] Relax InlineElementals to support more than two users (PR #186916)
Tom Eccles via flang-commits
flang-commits at lists.llvm.org
Tue Apr 21 04:31:08 PDT 2026
================
@@ -31,29 +33,217 @@ namespace hlfir {
#include "flang/Optimizer/HLFIR/Passes.h.inc"
} // namespace hlfir
+/// Collects all memory values (buffers/references) that the elemental body
+/// reads from.
+static void getReadDependencies(hlfir::ElementalOp elemental,
+ llvm::SmallVectorImpl<mlir::Value> &deps) {
+ elemental.getRegion().walk([&](mlir::Operation *op) {
+ if (auto designate = mlir::dyn_cast<hlfir::DesignateOp>(op))
+ deps.push_back(designate.getMemref());
+ else if (auto load = mlir::dyn_cast<fir::LoadOp>(op))
+ deps.push_back(load.getMemref());
+ // Capture any value defined outside the elemental but used inside it.
+ for (mlir::Value operand : op->getOperands()) {
+ if (operand.getParentRegion() != &elemental.getRegion())
+ if (mlir::isa<fir::ReferenceType, fir::PointerType, fir::HeapType,
+ fir::BoxType>(operand.getType()))
+ deps.push_back(operand);
+ }
+ });
+}
+
+/// Checks if an operation 'op' potentially modifies any memory location that
+/// the elemental reads from (captured in 'deps').
+static bool isConflictingWrite(mlir::Operation *op,
+ const llvm::SmallVectorImpl<mlir::Value> &deps,
+ mlir::AliasAnalysis &aa) {
+ // Operations explicitly marked as having no memory effects are safe.
+ if (mlir::isMemoryEffectFree(op))
+ return false;
+
+ // Explicitly allow safe HLFIR/FIR metadata/lifetime operations.
+ // While these may have internal effects (e.g. allocating a descriptor),
+ // they do not modify the user data being read by the elemental.
+ if (mlir::isa<hlfir::DeclareOp, hlfir::AssociateOp, hlfir::EndAssociateOp,
+ fir::AllocaOp, hlfir::NoReassocOp>(op))
+ return false;
+
+ // Check for explicit memory effects via the MemoryEffectOpInterface.
+ if (auto memInterface = mlir::dyn_cast<mlir::MemoryEffectOpInterface>(op)) {
+ llvm::SmallVector<mlir::MemoryEffects::EffectInstance, 4> effects;
+ memInterface.getEffects(effects);
+
+ for (const auto &effect : effects) {
+ // Analyze effects that modify memory or release resources.
+ if (mlir::isa<mlir::MemoryEffects::Write>(effect.getEffect()) ||
+ mlir::isa<mlir::MemoryEffects::Free>(effect.getEffect())) {
+
+ mlir::Value accessedValue = effect.getValue();
+ // If the effect is on an unknown resource (e.g. external call),
+ // assume a conflict.
+ if (!accessedValue)
+ return true;
+
+ // Perform alias analysis against all read dependencies.
+ for (mlir::Value dep : deps) {
+ if (!aa.alias(accessedValue, dep).isNo())
+ return true;
+ }
+ }
+ }
+ } else if (op->getNumRegions() == 0) {
+ // Conservative Fallback: If an operation lacks the interface and has no
+ // regions (e.g. a fir.call to an external function), assume it can
+ // potentially modifies any memory.
+ return true;
+ }
+
+ // Recursive Analysis into structured control flow regions.
+ // (e.g. fir.if, fir.do_loop) to find nested conflicting writes.
+ for (mlir::Region ®ion : op->getRegions()) {
+ for (mlir::Block &block : region) {
+ for (mlir::Operation &nestedOp : block) {
+ if (isConflictingWrite(&nestedOp, deps, aa))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool isSafeToInline(hlfir::ElementalOp producer, hlfir::ApplyOp applySite,
+ mlir::AliasAnalysis &aa) {
+ mlir::DominanceInfo domInfo(producer->getParentOp());
+ if (!domInfo.properlyDominates(producer.getOperation(),
+ applySite.getOperation()))
+ return false;
+
+ llvm::SmallVector<mlir::Value> deps;
+ getReadDependencies(producer, deps);
+
+ mlir::Operation *func = producer->getParentOfType<mlir::func::FuncOp>();
+ bool conflict = false;
+
+ func->walk([&](mlir::Operation *op) {
+ // Skip the producer and applySite themselves.
+ if (op == producer.getOperation() || op == applySite.getOperation())
+ return mlir::WalkResult::advance();
+
+ // Skip the operation that contains the applySite.
+ // We only care about operations that execute before the applySite
+ // starts or between the producer and the start of the loop.
+ if (op->isAncestor(applySite.getOperation()))
+ return mlir::WalkResult::advance();
+
+ // Only check operations that strictly execute between definition and use.
+ if (domInfo.properlyDominates(producer.getOperation(), op) &&
+ domInfo.dominates(op, applySite.getOperation())) {
+ if (isConflictingWrite(op, deps, aa)) {
+ conflict = true;
+ return mlir::WalkResult::interrupt();
+ }
+ }
+ return mlir::WalkResult::advance();
+ });
+
+ return !conflict;
+}
+
/// If the elemental has only two uses and those two are an apply operation and
/// a destroy operation, return those two, otherwise return {}
static std::optional<std::pair<hlfir::ApplyOp, hlfir::DestroyOp>>
-getTwoUses(hlfir::ElementalOp elemental) {
- mlir::Operation::user_range users = elemental->getUsers();
- // don't inline anything with more than one use (plus hfir.destroy)
- if (std::distance(users.begin(), users.end()) != 2) {
- return std::nullopt;
- }
-
+getTwoUses(hlfir::ElementalOp elemental, mlir::AliasAnalysis &aliasAnalysis) {
// If the ElementalOp must produce a temporary (e.g. for
// finalization purposes), then we cannot inline it.
if (hlfir::elementalOpMustProduceTemp(elemental))
return std::nullopt;
hlfir::ApplyOp apply;
hlfir::DestroyOp destroy;
- for (mlir::Operation *user : users)
- mlir::TypeSwitch<mlir::Operation *, void>(user)
- .Case([&](hlfir::ApplyOp op) { apply = op; })
- .Case([&](hlfir::DestroyOp op) { destroy = op; });
+ unsigned applyCount = 0;
+
+ llvm::SmallVector<mlir::Value> worklist;
+ worklist.push_back(elemental.getResult());
+ llvm::SmallPtrSet<mlir::Value, 16> visited;
- if (!apply || !destroy)
+ while (!worklist.empty()) {
+ mlir::Value current = worklist.pop_back_val();
+ if (!current || !visited.insert(current).second)
+ continue;
+
+ for (mlir::OpOperand &use : current.getUses()) {
+ mlir::Operation *user = use.getOwner();
+
+ mlir::TypeSwitch<mlir::Operation *, void>(user)
+ .Case<hlfir::ApplyOp>([&](hlfir::ApplyOp op) {
+ apply = op;
+ applyCount++;
+ })
+ .Case<hlfir::DestroyOp>([&](hlfir::DestroyOp op) {
+ // Track the mandatory destroy operation for the elemental expr.
+ destroy = op;
+ })
+ .Case<hlfir::DeclareOp>([&](hlfir::DeclareOp op) {
+ // Follow the dataflow through variable declarations.
+ worklist.push_back(op.getBase());
+ })
+ .Case<fir::ConvertOp>([&](fir::ConvertOp op) {
+ // Follow the dataflow through type conversions.
+ worklist.push_back(op.getResult());
+ })
+ .Case<mlir::BranchOpInterface>([&](mlir::BranchOpInterface branch) {
+ for (unsigned i = 0; i < branch->getNumSuccessors(); ++i) {
+ mlir::SuccessorOperands operands = branch.getSuccessorOperands(i);
+ for (unsigned j = 0; j < operands.size(); ++j) {
+ if (operands[j] == current) {
+ // The j-th operand of the branch maps to the j-th block
+ // argument of the successor block.
+ mlir::Block *successor = branch->getSuccessor(i);
+ worklist.push_back(successor->getArgument(j));
+ }
+ }
+ }
+ })
+ .Case<fir::ResultOp>([&](fir::ResultOp op) {
+ mlir::Operation *parent = op->getParentOp();
+ if (parent) {
+ for (auto it : llvm::enumerate(op.getOperands())) {
+ if (it.value() == current) {
+ // 'current' is being yielded. The value outside the loop is
+ // the i-th result of the parent operation.
+ unsigned i = it.index();
+ if (i < parent->getNumResults()) {
+ worklist.push_back(parent->getResult(i));
+ }
+ }
+ }
+ }
+ })
+ .Default([&](mlir::Operation *op) {
+ // If the elemental result is used by an operation with regions
+ // (like fir.if or fir.do_loop), the apply site may be nested
+ // inside.
+ if (op->getNumRegions() > 0) {
+ op->walk([&](hlfir::ApplyOp nestedApply) {
+ if (nestedApply.getExpr() == current) {
+ apply = nestedApply;
+ applyCount++;
+ }
+ });
+ }
+ });
+ }
+ }
+
+ // Only inline if there is a unique 'apply' site. Other users (such as
+ // intrinsic operations) are allowed because scalarizing the elemental
+ // renders the original array result redundant.
+ if (applyCount != 1 || !destroy)
+ return std::nullopt;
----------------
tblah wrote:
Do we need some tracking to ensure the destroy op is not removed for intrinsic operations?
https://github.com/llvm/llvm-project/pull/186916
More information about the flang-commits
mailing list