[flang-commits] [flang] [flang][HLFIR] Relax InlineElementals to support more than two users (PR #186916)
via flang-commits
flang-commits at lists.llvm.org
Wed Apr 22 21:51:58 PDT 2026
================
@@ -31,29 +33,217 @@ namespace hlfir {
#include "flang/Optimizer/HLFIR/Passes.h.inc"
} // namespace hlfir
+/// Collects all memory values (buffers/references) that the elemental body
+/// reads from.
+static void getReadDependencies(hlfir::ElementalOp elemental,
+ llvm::SmallVectorImpl<mlir::Value> &deps) {
+ elemental.getRegion().walk([&](mlir::Operation *op) {
+ if (auto designate = mlir::dyn_cast<hlfir::DesignateOp>(op))
+ deps.push_back(designate.getMemref());
+ else if (auto load = mlir::dyn_cast<fir::LoadOp>(op))
+ deps.push_back(load.getMemref());
+ // Capture any value defined outside the elemental but used inside it.
+ for (mlir::Value operand : op->getOperands()) {
+ if (operand.getParentRegion() != &elemental.getRegion())
+ if (mlir::isa<fir::ReferenceType, fir::PointerType, fir::HeapType,
+ fir::BoxType>(operand.getType()))
+ deps.push_back(operand);
+ }
+ });
+}
+
+/// Checks if an operation 'op' potentially modifies any memory location that
+/// the elemental reads from (captured in 'deps').
+static bool isConflictingWrite(mlir::Operation *op,
+ const llvm::SmallVectorImpl<mlir::Value> &deps,
+ mlir::AliasAnalysis &aa) {
+ // Operations explicitly marked as having no memory effects are safe.
+ if (mlir::isMemoryEffectFree(op))
+ return false;
+
+ // Explicitly allow safe HLFIR/FIR metadata/lifetime operations.
+ // While these may have internal effects (e.g. allocating a descriptor),
+ // they do not modify the user data being read by the elemental.
+ if (mlir::isa<hlfir::DeclareOp, hlfir::AssociateOp, hlfir::EndAssociateOp,
+ fir::AllocaOp, hlfir::NoReassocOp>(op))
+ return false;
+
+ // Check for explicit memory effects via the MemoryEffectOpInterface.
+ if (auto memInterface = mlir::dyn_cast<mlir::MemoryEffectOpInterface>(op)) {
+ llvm::SmallVector<mlir::MemoryEffects::EffectInstance, 4> effects;
+ memInterface.getEffects(effects);
+
+ for (const auto &effect : effects) {
+ // Analyze effects that modify memory or release resources.
+ if (mlir::isa<mlir::MemoryEffects::Write>(effect.getEffect()) ||
+ mlir::isa<mlir::MemoryEffects::Free>(effect.getEffect())) {
+
+ mlir::Value accessedValue = effect.getValue();
+ // If the effect is on an unknown resource (e.g. external call),
+ // assume a conflict.
+ if (!accessedValue)
+ return true;
+
+ // Perform alias analysis against all read dependencies.
+ for (mlir::Value dep : deps) {
+ if (!aa.alias(accessedValue, dep).isNo())
+ return true;
+ }
+ }
+ }
+ } else if (op->getNumRegions() == 0) {
+ // Conservative Fallback: If an operation lacks the interface and has no
+ // regions (e.g. a fir.call to an external function), assume it can
+ // potentially modifies any memory.
+ return true;
+ }
+
+ // Recursive Analysis into structured control flow regions.
+ // (e.g. fir.if, fir.do_loop) to find nested conflicting writes.
+ for (mlir::Region ®ion : op->getRegions()) {
+ for (mlir::Block &block : region) {
+ for (mlir::Operation &nestedOp : block) {
+ if (isConflictingWrite(&nestedOp, deps, aa))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool isSafeToInline(hlfir::ElementalOp producer, hlfir::ApplyOp applySite,
+ mlir::AliasAnalysis &aa) {
+ mlir::DominanceInfo domInfo(producer->getParentOp());
+ if (!domInfo.properlyDominates(producer.getOperation(),
+ applySite.getOperation()))
+ return false;
+
+ llvm::SmallVector<mlir::Value> deps;
+ getReadDependencies(producer, deps);
+
+ mlir::Operation *func = producer->getParentOfType<mlir::func::FuncOp>();
+ bool conflict = false;
+
+ func->walk([&](mlir::Operation *op) {
+ // Skip the producer and applySite themselves.
+ if (op == producer.getOperation() || op == applySite.getOperation())
+ return mlir::WalkResult::advance();
+
+ // Skip the operation that contains the applySite.
+ // We only care about operations that execute before the applySite
+ // starts or between the producer and the start of the loop.
+ if (op->isAncestor(applySite.getOperation()))
+ return mlir::WalkResult::advance();
+
+ // Only check operations that strictly execute between definition and use.
+ if (domInfo.properlyDominates(producer.getOperation(), op) &&
+ domInfo.dominates(op, applySite.getOperation())) {
+ if (isConflictingWrite(op, deps, aa)) {
+ conflict = true;
+ return mlir::WalkResult::interrupt();
+ }
+ }
+ return mlir::WalkResult::advance();
+ });
+
+ return !conflict;
+}
+
/// If the elemental has only two uses and those two are an apply operation and
/// a destroy operation, return those two, otherwise return {}
static std::optional<std::pair<hlfir::ApplyOp, hlfir::DestroyOp>>
-getTwoUses(hlfir::ElementalOp elemental) {
- mlir::Operation::user_range users = elemental->getUsers();
- // don't inline anything with more than one use (plus hfir.destroy)
- if (std::distance(users.begin(), users.end()) != 2) {
- return std::nullopt;
- }
-
+getTwoUses(hlfir::ElementalOp elemental, mlir::AliasAnalysis &aliasAnalysis) {
// If the ElementalOp must produce a temporary (e.g. for
// finalization purposes), then we cannot inline it.
if (hlfir::elementalOpMustProduceTemp(elemental))
return std::nullopt;
hlfir::ApplyOp apply;
hlfir::DestroyOp destroy;
- for (mlir::Operation *user : users)
- mlir::TypeSwitch<mlir::Operation *, void>(user)
- .Case([&](hlfir::ApplyOp op) { apply = op; })
- .Case([&](hlfir::DestroyOp op) { destroy = op; });
+ unsigned applyCount = 0;
+
+ llvm::SmallVector<mlir::Value> worklist;
+ worklist.push_back(elemental.getResult());
+ llvm::SmallPtrSet<mlir::Value, 16> visited;
- if (!apply || !destroy)
+ while (!worklist.empty()) {
+ mlir::Value current = worklist.pop_back_val();
+ if (!current || !visited.insert(current).second)
+ continue;
+
+ for (mlir::OpOperand &use : current.getUses()) {
+ mlir::Operation *user = use.getOwner();
+
+ mlir::TypeSwitch<mlir::Operation *, void>(user)
+ .Case<hlfir::ApplyOp>([&](hlfir::ApplyOp op) {
+ apply = op;
+ applyCount++;
+ })
+ .Case<hlfir::DestroyOp>([&](hlfir::DestroyOp op) {
+ // Track the mandatory destroy operation for the elemental expr.
+ destroy = op;
+ })
+ .Case<hlfir::DeclareOp>([&](hlfir::DeclareOp op) {
+ // Follow the dataflow through variable declarations.
+ worklist.push_back(op.getBase());
+ })
+ .Case<fir::ConvertOp>([&](fir::ConvertOp op) {
+ // Follow the dataflow through type conversions.
+ worklist.push_back(op.getResult());
+ })
+ .Case<mlir::BranchOpInterface>([&](mlir::BranchOpInterface branch) {
+ for (unsigned i = 0; i < branch->getNumSuccessors(); ++i) {
+ mlir::SuccessorOperands operands = branch.getSuccessorOperands(i);
+ for (unsigned j = 0; j < operands.size(); ++j) {
+ if (operands[j] == current) {
+ // The j-th operand of the branch maps to the j-th block
+ // argument of the successor block.
+ mlir::Block *successor = branch->getSuccessor(i);
+ worklist.push_back(successor->getArgument(j));
+ }
+ }
+ }
+ })
+ .Case<fir::ResultOp>([&](fir::ResultOp op) {
+ mlir::Operation *parent = op->getParentOp();
+ if (parent) {
+ for (auto it : llvm::enumerate(op.getOperands())) {
+ if (it.value() == current) {
+ // 'current' is being yielded. The value outside the loop is
+ // the i-th result of the parent operation.
+ unsigned i = it.index();
+ if (i < parent->getNumResults()) {
+ worklist.push_back(parent->getResult(i));
+ }
+ }
+ }
+ }
+ })
+ .Default([&](mlir::Operation *op) {
+ // If the elemental result is used by an operation with regions
+ // (like fir.if or fir.do_loop), the apply site may be nested
+ // inside.
+ if (op->getNumRegions() > 0) {
+ op->walk([&](hlfir::ApplyOp nestedApply) {
+ if (nestedApply.getExpr() == current) {
+ apply = nestedApply;
+ applyCount++;
+ }
+ });
+ }
+ });
+ }
+ }
+
+ // Only inline if there is a unique 'apply' site. Other users (such as
+ // intrinsic operations) are allowed because scalarizing the elemental
+ // renders the original array result redundant.
+ if (applyCount != 1 || !destroy)
+ return std::nullopt;
----------------
anoopkg6 wrote:
I added logic to track hasOtherUsers. We now only return the destroy operation for erasure if there is exactly one apply and no other consumers. This ensures memory safety for intrinsics while still allowing the apply site to be inlined.
https://github.com/llvm/llvm-project/pull/186916
More information about the flang-commits
mailing list