[flang-commits] [flang] [Flang] - Add optional inlining of allocatable assignments with hlfir.expr RHS (PR #186880)
Pranav Bhandarkar via flang-commits
flang-commits at lists.llvm.org
Tue Mar 17 07:16:43 PDT 2026
https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/186880
>From 8f7a2a3052a8921bfe6326aeaf3ab09c9d53989f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 27 Jan 2026 00:19:42 -0600
Subject: [PATCH] [Flang] - Add optional inlining of allocatable assignments
with hlfir.expr RHS
Add support for inlining hlfir.assign operations where the LHS is an
allocatable and the RHS is an hlfir.expr (e.g., from hlfir.elemental).
Since hlfir.expr values cannot alias with memory locations, these
assignments can be safely inlined without alias analysis.
The optimization is controlled by the -inline-hlfir-allocatable-expr-assign
flag:
fir-opt -inline-hlfir-allocatable-expr-assign --inline-hlfir-assign file.fir
flang -mmlir -inline-hlfir-allocatable-expr-assign file.f90
The generated code uses fir::factory::genReallocIfNeeded to properly
handle Fortran allocatable assignment semantics (F2018 10.2.1.3):
- If not allocated: allocate with RHS shape
- If allocated with same shape: reuse existing allocation
- If allocated with different shape: reallocate
---
.../HLFIR/Transforms/InlineHLFIRAssign.cpp | 185 ++++++++++++++++++
.../inline-hlfir-assign-allocatable-expr.fir | 97 +++++++++
2 files changed, 282 insertions(+)
create mode 100644 flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
index 356552fac249d..db9826257cd8c 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -12,8 +12,11 @@
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
#include "flang/Optimizer/Analysis/ArraySectionAnalyzer.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
@@ -21,6 +24,8 @@
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
namespace hlfir {
#define GEN_PASS_DEF_INLINEHLFIRASSIGN
@@ -29,6 +34,12 @@ namespace hlfir {
#define DEBUG_TYPE "inline-hlfir-assign"
+static llvm::cl::opt<bool> inlineAllocatableExprAssignFlag(
+ "inline-hlfir-allocatable-expr-assign",
+ llvm::cl::desc("Enable inlining of allocatable assignments when RHS is an "
+ "hlfir.expr (e.g., from hlfir.elemental)"),
+ llvm::cl::init(false));
+
namespace {
/// Expand hlfir.assign of array RHS to array LHS into a loop nest
/// of element-by-element assignments:
@@ -125,9 +136,175 @@ class InlineHLFIRAssignConversion
}
};
+/// Expand hlfir.assign of hlfir.expr RHS to allocatable LHS.
+/// When RHS is an hlfir.expr (e.g., from hlfir.elemental), there is no
+/// aliasing concern because expressions don't represent memory locations.
+/// This allows us to inline the assignment even for allocatables.
+///
+/// The generated code:
+/// 1. Gets the shape from the RHS expression
+/// 2. Uses genReallocIfNeeded to handle allocation/reallocation properly
+/// 3. Generates a loop nest to assign elements (via storage handler callback)
+/// 4. Finalizes the reallocation
+///
+/// Example transformation for: allocatable_array = elemental_expr
+/// hlfir.assign %expr to %alloc realloc : !hlfir.expr<?xf64>,
+/// !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+/// into:
+/// // Check allocation status and reallocate if needed
+/// // ... (genReallocIfNeeded handles this) ...
+/// // Loop over elements
+/// fir.do_loop %i = %c1 to %extent step %c1 unordered {
+/// %rhs_val = hlfir.apply %expr, %i : ...
+/// %lhs_elem = hlfir.designate %lhs_box (%i) : ...
+/// hlfir.assign %rhs_val to %lhs_elem : f64, !fir.ref<f64>
+/// }
+class InlineAllocatableExprAssignConversion
+ : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+ using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+ llvm::LogicalResult
+ matchAndRewrite(hlfir::AssignOp assign,
+ mlir::PatternRewriter &rewriter) const override {
+ // This pattern only handles allocatable assignments
+ if (!assign.isAllocatableAssignment())
+ return rewriter.notifyMatchFailure(
+ assign, "AssignOp is not an allocatable assignment");
+
+ hlfir::Entity rhs{assign.getRhs()};
+ hlfir::Entity lhs{assign.getLhs()};
+
+ // RHS must be an hlfir.expr (this is the key condition - no aliasing)
+ if (!mlir::isa<hlfir::ExprType>(rhs.getType()))
+ return rewriter.notifyMatchFailure(
+ assign,
+ "RHS is not an hlfir.expr - cannot inline allocatable assign");
+
+ // RHS must be an array
+ if (!rhs.isArray())
+ return rewriter.notifyMatchFailure(assign,
+ "AssignOp's RHS is not an array");
+
+ // Check element types are trivial and match
+ mlir::Type rhsEleTy = rhs.getFortranElementType();
+ if (!fir::isa_trivial(rhsEleTy))
+ return rewriter.notifyMatchFailure(
+ assign, "AssignOp's RHS data type is not trivial");
+
+ mlir::Type lhsEleTy = lhs.getFortranElementType();
+ if (!fir::isa_trivial(lhsEleTy))
+ return rewriter.notifyMatchFailure(
+ assign, "AssignOp's LHS data type is not trivial");
+
+ if (lhsEleTy != rhsEleTy)
+ return rewriter.notifyMatchFailure(assign,
+ "RHS/LHS element types mismatch");
+
+ // LHS must be a reference to a box (allocatable)
+ mlir::Type lhsType = lhs.getType();
+ if (!fir::isBoxAddress(lhsType))
+ return rewriter.notifyMatchFailure(assign,
+ "LHS is not a reference to a box");
+
+ LLVM_DEBUG(llvm::dbgs()
+ << "InlineHLFIRAssign: inlining allocatable expr assignment\n");
+
+ mlir::Location loc = assign->getLoc();
+ fir::FirOpBuilder builder(rewriter, assign.getOperation());
+ builder.setInsertionPoint(assign);
+
+ // Get the shape of the RHS expression
+ mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
+ llvm::SmallVector<mlir::Value> rhsExtents =
+ hlfir::getIndexExtents(loc, builder, rhsShape);
+
+ // Create a MutableBoxValue for the LHS allocatable
+ mlir::Value lhsBoxRef = lhs.getFirBase();
+ mlir::Type boxType = fir::unwrapRefType(lhsType);
+ auto boxBaseType = mlir::cast<fir::BaseBoxType>(boxType);
+ mlir::Type baseTy = boxBaseType.getEleTy();
+ if (auto heapTy = mlir::dyn_cast<fir::HeapType>(baseTy))
+ baseTy = heapTy.getEleTy();
+
+ // Create MutableBoxValue - for trivial types, no length params needed
+ fir::MutableBoxValue mutableBox(lhsBoxRef, /*nonDeferredParams=*/{},
+ /*mutableProperties=*/{});
+
+ // Generate ones for lower bounds (Fortran default)
+ mlir::Value one =
+ builder.createIntegerConstant(loc, builder.getIndexType(), 1);
+ llvm::SmallVector<mlir::Value> lbounds;
+ for (size_t i = 0; i < rhsExtents.size(); ++i)
+ lbounds.push_back(one);
+
+ // Use genReallocIfNeeded to handle allocation/reallocation properly.
+ // This implements Fortran 10.2.1.3 point 3:
+ // - If not allocated, allocate with RHS shape
+ // - If allocated with same shape, keep existing allocation
+ // - If allocated with different shape, reallocate
+ //
+ // The storage handler callback performs the actual assignment loop.
+ bool useWorkshare = flangomp::shouldUseWorkshareLowering(assign);
+ auto storageHandler = [&](fir::ExtendedValue storage) {
+ // Create an hlfir.declare for the storage to get a proper Entity.
+ // This is necessary because hlfir::Entity requires a value from an
+ // HLFIR operation, not a raw pointer.
+ auto declare = hlfir::genDeclare(loc, builder, storage, ".tmp.assign",
+ /*flags=*/{});
+ hlfir::Entity lhsEntity{declare.getBase()};
+
+ llvm::SmallVector<mlir::Value> extents =
+ fir::factory::getExtents(loc, builder, storage);
+
+ // Generate loop nest to assign elements
+ hlfir::LoopNest loopNest =
+ hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ useWorkshare);
+ builder.setInsertionPointToStart(loopNest.body);
+
+ // Get RHS element via hlfir.apply
+ hlfir::Entity rhsElement =
+ hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
+ rhsElement = hlfir::loadTrivialScalar(loc, builder, rhsElement);
+
+ // Get LHS element
+ hlfir::Entity lhsElement =
+ hlfir::getElementAt(loc, builder, lhsEntity, loopNest.oneBasedIndices);
+
+ // Assign the element (scalar, non-allocatable)
+ hlfir::AssignOp::create(builder, loc, rhsElement, lhsElement,
+ /*realloc=*/false,
+ /*keep_lhs_length_if_realloc=*/false,
+ /*temporary_lhs=*/false);
+
+ // Restore insertion point after loop
+ builder.setInsertionPointAfter(loopNest.outerOp);
+ };
+
+ // No length params for trivial types
+ llvm::SmallVector<mlir::Value> lenParams;
+
+ // Generate reallocation logic with assignment in the callback
+ fir::factory::MutableBoxReallocation realloc =
+ fir::factory::genReallocIfNeeded(builder, loc, mutableBox, rhsExtents,
+ lenParams, storageHandler);
+
+ // Finalize: free old storage if reallocated and update the mutable box
+ fir::factory::finalizeRealloc(builder, loc, mutableBox, lbounds,
+ /*takeLboundsIfRealloc=*/true, realloc);
+
+ // Erase the original assign
+ rewriter.eraseOp(assign);
+ return mlir::success();
+ }
+};
+
class InlineHLFIRAssignPass
: public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
public:
+ using InlineHLFIRAssignBase<InlineHLFIRAssignPass>::InlineHLFIRAssignBase;
+
void runOnOperation() override {
mlir::MLIRContext *context = &getContext();
@@ -139,6 +316,14 @@ class InlineHLFIRAssignPass
mlir::RewritePatternSet patterns(context);
patterns.insert<InlineHLFIRAssignConversion>(context);
+ // Optionally add the allocatable expr assignment pattern
+ if (inlineAllocatableExprAssignFlag) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "InlineHLFIRAssign: enabling allocatable expr assignment "
+ "inlining\n");
+ patterns.insert<InlineAllocatableExprAssignConversion>(context);
+ }
+
if (mlir::failed(mlir::applyPatternsGreedily(
getOperation(), std::move(patterns), config))) {
mlir::emitError(getOperation()->getLoc(),
diff --git a/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir b/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir
new file mode 100644
index 0000000000000..452866b617f9a
--- /dev/null
+++ b/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir
@@ -0,0 +1,97 @@
+// Test inlining of hlfir.assign for allocatable LHS with hlfir.expr RHS.
+// This tests the -inline-hlfir-allocatable-expr-assign flag.
+
+// RUN: fir-opt --inline-hlfir-assign %s | FileCheck %s --check-prefix=DEFAULT
+// RUN: fir-opt -inline-hlfir-allocatable-expr-assign --inline-hlfir-assign %s | FileCheck %s --check-prefix=ENABLED
+
+// Test case: c = cos(a) where c is allocatable
+// This is derived from the flang-529628 test case.
+// The hlfir.elemental produces an hlfir.expr which doesn't alias with
+// memory, so the assignment can be safely inlined.
+
+func.func @test_allocatable_elemental_assign(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+
+ // Declare the allocatables
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+ %c:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEc"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+
+ // Load a to get its shape
+ %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+
+ // Create elemental: cos(a)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+ ^bb0(%i: index):
+ %lb_offset = arith.subi %dims#0, %c1 : index
+ %idx = arith.addi %i, %lb_offset : index
+ %a_elem = hlfir.designate %a_box (%idx) : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> !fir.ref<f64>
+ %a_val = fir.load %a_elem : !fir.ref<f64>
+ %cos_val = math.cos %a_val fastmath<contract> : f64
+ hlfir.yield_element %cos_val : f64
+ }
+
+ // Assign elemental result to allocatable c
+ hlfir.assign %elemental to %c#0 realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ hlfir.destroy %elemental : !hlfir.expr<?xf64>
+ return
+}
+
+// DEFAULT-LABEL: func.func @test_allocatable_elemental_assign
+// By default (without the option), the allocatable assign should NOT be inlined
+// DEFAULT: hlfir.assign %{{.*}} to %{{.*}} realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+
+// ENABLED-LABEL: func.func @test_allocatable_elemental_assign
+// With the option enabled, the assign should be inlined.
+// The generated code uses genReallocIfNeeded which creates fir.if for
+// allocation checking and a loop for assignment.
+// ENABLED-NOT: hlfir.assign %{{.*}} realloc
+// ENABLED: fir.if
+// ENABLED: fir.do_loop
+// ENABLED: hlfir.apply
+// ENABLED: hlfir.designate
+// ENABLED: hlfir.assign %{{.*}} : f64, !fir.ref<f64>
+
+
+// Test case: Non-trivial element type should NOT be inlined
+func.func @test_allocatable_nontrivial_type(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) {
+ %c10 = arith.constant 10 : index
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>)
+
+ // Character elemental - should NOT be inlined even with the option
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.char<1,10>> {
+ ^bb0(%i: index):
+ %str = fir.undefined !fir.char<1,10>
+ hlfir.yield_element %str : !fir.char<1,10>
+ }
+
+ hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?x!fir.char<1,10>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>
+ hlfir.destroy %elemental : !hlfir.expr<?x!fir.char<1,10>>
+ return
+}
+
+// Character types are not trivial, so this should never be inlined
+// ENABLED-LABEL: func.func @test_allocatable_nontrivial_type
+// ENABLED: hlfir.assign %{{.*}} to %{{.*}} realloc : !hlfir.expr<?x!fir.char<1,10>>
+
+
+// Test case: Variable RHS (not hlfir.expr) should NOT be inlined by this pattern
+func.func @test_allocatable_variable_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+ %c10 = arith.constant 10 : index
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+ %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+ // Variable RHS - NOT an hlfir.expr, so the allocatable pattern should NOT match
+ hlfir.assign %b#0 to %a#0 realloc : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ return
+}
+
+// Variable RHS should keep the original assign (allocatable pattern doesn't match)
+// ENABLED-LABEL: func.func @test_allocatable_variable_rhs
+// ENABLED: hlfir.assign %{{.*}} to %{{.*}} realloc : !fir.ref<!fir.array<10xf64>>
More information about the flang-commits
mailing list