[flang-commits] [flang] 87bab31 - [Flang] - Add optional inlining of allocatable assignments with hlfir.expr RHS (#186880)
via flang-commits
flang-commits at lists.llvm.org
Mon Mar 23 10:49:53 PDT 2026
Author: Pranav Bhandarkar
Date: 2026-03-23T12:49:49-05:00
New Revision: 87bab31c27dee5ec69022df3beaa018744c4e8d1
URL: https://github.com/llvm/llvm-project/commit/87bab31c27dee5ec69022df3beaa018744c4e8d1
DIFF: https://github.com/llvm/llvm-project/commit/87bab31c27dee5ec69022df3beaa018744c4e8d1.diff
LOG: [Flang] - Add optional inlining of allocatable assignments with hlfir.expr RHS (#186880)
Add support for inlining hlfir.assign operations where the LHS is an
allocatable and the RHS is an `hlfir.expr` (e.g., from
`hlfir.elemental`). Since `hlfir.expr` values cannot alias with memory
locations, these assignments can be safely inlined without alias
analysis.
The optimization is controlled by the
-inline-hlfir-allocatable-expr-assign flag:
fir-opt -inline-hlfir-allocatable-expr-assign --inline-hlfir-assign
file.fir
flang -mmlir -inline-hlfir-allocatable-expr-assign file.f90
The generated code uses `fir::factory::genReallocIfNeeded` to properly
handle Fortran allocatable assignment semantics (F2018 10.2.1.3):
- If not allocated: allocate with RHS shape
- If allocated with same shape: reuse existing allocation
- If allocated with different shape: reallocate
This is motivated by the increased link time that I am seeing when
`__FortranAAssign` is called from the user code. The details of the
problem are documented in
https://github.com/llvm/llvm-project/issues/187720
Added:
flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir
Modified:
flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
index 356552fac249d..160efede12bd5 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -12,8 +12,11 @@
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
#include "flang/Optimizer/Analysis/ArraySectionAnalyzer.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
@@ -21,6 +24,8 @@
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
namespace hlfir {
#define GEN_PASS_DEF_INLINEHLFIRASSIGN
@@ -29,6 +34,12 @@ namespace hlfir {
#define DEBUG_TYPE "inline-hlfir-assign"
+static llvm::cl::opt<bool> inlineAllocatableExprAssignFlag(
+ "inline-hlfir-allocatable-expr-assign",
+ llvm::cl::desc("Enable inlining of allocatable assignments when RHS is an "
+ "hlfir.expr (e.g., from hlfir.elemental)"),
+ llvm::cl::init(false));
+
namespace {
/// Expand hlfir.assign of array RHS to array LHS into a loop nest
/// of element-by-element assignments:
@@ -125,9 +136,158 @@ class InlineHLFIRAssignConversion
}
};
+/// Expand hlfir.assign of hlfir.expr RHS to allocatable LHS.
+/// When RHS is an hlfir.expr (e.g., from hlfir.elemental), there is no
+/// aliasing concern because expressions don't represent memory locations.
+/// This allows us to inline the assignment even for allocatables.
+///
+/// The generated code:
+/// 1. Gets the shape from the RHS expression
+/// 2. Uses genReallocIfNeeded to handle allocation/reallocation properly
+/// 3. Generates a loop nest to assign elements (via storage handler callback)
+/// 4. Finalizes the reallocation
+///
+/// Example transformation for: allocatable_array = elemental_expr
+/// hlfir.assign %expr to %alloc realloc : !hlfir.expr<?xf64>,
+/// !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+/// into:
+/// // Check allocation status and reallocate if needed
+/// // ... (genReallocIfNeeded handles this) ...
+/// // Loop over elements
+/// fir.do_loop %i = %c1 to %extent step %c1 unordered {
+/// %rhs_val = hlfir.apply %expr, %i : ...
+/// %lhs_elem = hlfir.designate %lhs_box (%i) : ...
+/// hlfir.assign %rhs_val to %lhs_elem : f64, !fir.ref<f64>
+/// }
+class InlineAllocatableExprAssignConversion
+ : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+ using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+ llvm::LogicalResult
+ matchAndRewrite(hlfir::AssignOp assign,
+ mlir::PatternRewriter &rewriter) const override {
+ // This pattern only handles allocatable assignments
+ if (!assign.isAllocatableAssignment())
+ return rewriter.notifyMatchFailure(
+ assign, "AssignOp is not an allocatable assignment");
+
+ hlfir::Entity rhs{assign.getRhs()};
+ hlfir::Entity lhs{assign.getLhs()};
+
+ // RHS must be an hlfir.expr (this is the key condition - no aliasing)
+ if (!mlir::isa<hlfir::ExprType>(rhs.getType()))
+ return rewriter.notifyMatchFailure(
+ assign,
+ "RHS is not an hlfir.expr - cannot inline allocatable assign");
+
+ // RHS must be an array
+ if (!rhs.isArray())
+ return rewriter.notifyMatchFailure(assign,
+ "AssignOp's RHS is not an array");
+
+ // Check element types are trivial and match
+ mlir::Type rhsEleTy = rhs.getFortranElementType();
+ if (!fir::isa_trivial(rhsEleTy))
+ return rewriter.notifyMatchFailure(
+ assign, "AssignOp's RHS data type is not trivial");
+
+ mlir::Type lhsEleTy = lhs.getFortranElementType();
+ if (!fir::isa_trivial(lhsEleTy))
+ return rewriter.notifyMatchFailure(
+ assign, "AssignOp's LHS data type is not trivial");
+
+ if (lhsEleTy != rhsEleTy)
+ return rewriter.notifyMatchFailure(assign,
+ "RHS/LHS element types mismatch");
+
+ // LHS must be a reference to a box (allocatable)
+ mlir::Type lhsType = lhs.getType();
+ if (!fir::isBoxAddress(lhsType))
+ return rewriter.notifyMatchFailure(assign,
+ "LHS is not a reference to a box");
+
+ LLVM_DEBUG(llvm::dbgs()
+ << "InlineHLFIRAssign: inlining allocatable expr assignment\n");
+
+ mlir::Location loc = assign->getLoc();
+ fir::FirOpBuilder builder(rewriter, assign.getOperation());
+ builder.setInsertionPoint(assign);
+
+ // Get the shape of the RHS expression
+ mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
+ llvm::SmallVector<mlir::Value> rhsExtents =
+ hlfir::getIndexExtents(loc, builder, rhsShape);
+
+ // Create a MutableBoxValue for the LHS allocatable
+ mlir::Value lhsBoxRef = lhs.getFirBase();
+
+ // Create MutableBoxValue - for trivial types, no length params needed
+ fir::MutableBoxValue mutableBox(lhsBoxRef, /*lenParameters=*/{},
+ /*mutableProperties=*/{});
+
+ // Use genReallocIfNeeded to handle allocation/reallocation properly.
+ // This implements Fortran 10.2.1.3 point 3:
+ // - If not allocated, allocate with RHS shape
+ // - If allocated with same shape, keep existing allocation
+ // - If allocated with
diff erent shape, reallocate
+ //
+ // The storage handler callback performs the actual assignment loop.
+ bool useWorkshare = flangomp::shouldUseWorkshareLowering(assign);
+ auto storageHandler = [&](fir::ExtendedValue storage) {
+ hlfir::Entity lhsEntity{
+ fir::getBase(fir::factory::createBoxValue(builder, loc, storage))};
+
+ llvm::SmallVector<mlir::Value> extents =
+ fir::factory::getExtents(loc, builder, storage);
+
+ // Generate loop nest to assign elements
+ hlfir::LoopNest loopNest = hlfir::genLoopNest(
+ loc, builder, extents, /*isUnordered=*/true, useWorkshare);
+ builder.setInsertionPointToStart(loopNest.body);
+
+ // Get RHS element via hlfir.apply
+ hlfir::Entity rhsElement =
+ hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
+ rhsElement = hlfir::loadTrivialScalar(loc, builder, rhsElement);
+
+ // Get LHS element
+ hlfir::Entity lhsElement = hlfir::getElementAt(loc, builder, lhsEntity,
+ loopNest.oneBasedIndices);
+
+ // Assign the element (scalar, non-allocatable)
+ hlfir::AssignOp::create(builder, loc, rhsElement, lhsElement,
+ /*realloc=*/false,
+ /*keep_lhs_length_if_realloc=*/false,
+ /*temporary_lhs=*/false);
+
+ // Restore insertion point after loop
+ builder.setInsertionPointAfter(loopNest.outerOp);
+ };
+
+ // No length params for trivial types
+ llvm::SmallVector<mlir::Value> lenParams;
+
+ // Generate reallocation logic with assignment in the callback
+ fir::factory::MutableBoxReallocation realloc =
+ fir::factory::genReallocIfNeeded(builder, loc, mutableBox, rhsExtents,
+ lenParams, storageHandler);
+
+ // Finalize: free old storage if reallocated and update the mutable box
+ fir::factory::finalizeRealloc(builder, loc, mutableBox, /*lbounds=*/{},
+ /*takeLboundsIfRealloc=*/true, realloc);
+
+ // Erase the original assign
+ rewriter.eraseOp(assign);
+ return mlir::success();
+ }
+};
+
class InlineHLFIRAssignPass
: public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
public:
+ using InlineHLFIRAssignBase<InlineHLFIRAssignPass>::InlineHLFIRAssignBase;
+
void runOnOperation() override {
mlir::MLIRContext *context = &getContext();
@@ -139,6 +299,14 @@ class InlineHLFIRAssignPass
mlir::RewritePatternSet patterns(context);
patterns.insert<InlineHLFIRAssignConversion>(context);
+ // Optionally add the allocatable expr assignment pattern
+ if (inlineAllocatableExprAssignFlag) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "InlineHLFIRAssign: enabling allocatable expr assignment "
+ "inlining\n");
+ patterns.insert<InlineAllocatableExprAssignConversion>(context);
+ }
+
if (mlir::failed(mlir::applyPatternsGreedily(
getOperation(), std::move(patterns), config))) {
mlir::emitError(getOperation()->getLoc(),
diff --git a/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir b/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir
new file mode 100644
index 0000000000000..6f3b5ea0eb794
--- /dev/null
+++ b/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir
@@ -0,0 +1,101 @@
+// Test inlining of hlfir.assign for allocatable LHS with hlfir.expr RHS.
+// This tests the -inline-hlfir-allocatable-expr-assign flag.
+
+// RUN: fir-opt --inline-hlfir-assign %s | FileCheck %s --check-prefix=DEFAULT
+// RUN: fir-opt -inline-hlfir-allocatable-expr-assign --inline-hlfir-assign %s | FileCheck %s --check-prefix=ENABLED
+
+// Test case: c = cos(a) where c is allocatable
+// This is derived from the flang-529628 test case.
+// The hlfir.elemental produces an hlfir.expr which doesn't alias with
+// memory, so the assignment can be safely inlined.
+
+func.func @test_allocatable_elemental_assign(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+
+ // Declare the allocatables
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+ %c:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEc"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+
+ // Load a to get its shape
+ %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+
+ // Create elemental: cos(a)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+ ^bb0(%i: index):
+ %lb_offset = arith.subi %dims#0, %c1 : index
+ %idx = arith.addi %i, %lb_offset : index
+ %a_elem = hlfir.designate %a_box (%idx) : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> !fir.ref<f64>
+ %a_val = fir.load %a_elem : !fir.ref<f64>
+ %cos_val = math.cos %a_val fastmath<contract> : f64
+ hlfir.yield_element %cos_val : f64
+ }
+
+ // Assign elemental result to allocatable c
+ hlfir.assign %elemental to %c#0 realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ hlfir.destroy %elemental : !hlfir.expr<?xf64>
+ return
+}
+
+// DEFAULT-LABEL: func.func @test_allocatable_elemental_assign
+// By default (without the option), the allocatable assign should NOT be inlined
+// DEFAULT: hlfir.assign %{{.*}} to %{{.*}} realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+
+// ENABLED-LABEL: func.func @test_allocatable_elemental_assign
+// With the option enabled, the assign should be inlined.
+// The generated code uses genReallocIfNeeded which creates fir.if for
+// allocation checking and a loop for assignment.
+// ENABLED-NOT: hlfir.assign %{{.*}} realloc
+// ENABLED: hlfir.elemental
+// ENABLED: %[[ORIG_MEM:.*]] = fir.box_addr %{{.*}} : (!fir.box<!fir.heap<!fir.array<?xf64>>>) -> !fir.heap<!fir.array<?xf64>>
+// ENABLED: %[[IF_RET:.*]]:2 = fir.if %[[COND:.*]] -> (i1, !fir.heap<!fir.array<?xf64>>) {
+// ENABLED: fir.do_loop
+// ENABLED: hlfir.apply
+// ENABLED: hlfir.designate
+// ENABLED: hlfir.assign %{{.*}} : f64, !fir.ref<f64>
+// ENABLED: fir.if %[[IF_RET]]#0 {
+// ENABLED: fir.if %[[COND]] {
+// ENABLED: fir.freemem %[[ORIG_MEM]] : !fir.heap<!fir.array<?xf64>>
+
+// Test case: Non-trivial element type should NOT be inlined
+func.func @test_allocatable_nontrivial_type(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) {
+ %c10 = arith.constant 10 : index
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>)
+
+ // Character elemental - should NOT be inlined even with the option
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.char<1,10>> {
+ ^bb0(%i: index):
+ %str = fir.undefined !fir.char<1,10>
+ hlfir.yield_element %str : !fir.char<1,10>
+ }
+
+ hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?x!fir.char<1,10>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>
+ hlfir.destroy %elemental : !hlfir.expr<?x!fir.char<1,10>>
+ return
+}
+
+// Character types are not trivial, so this should never be inlined
+// ENABLED-LABEL: func.func @test_allocatable_nontrivial_type
+// ENABLED: hlfir.assign %{{.*}} to %{{.*}} realloc : !hlfir.expr<?x!fir.char<1,10>>
+
+
+// Test case: Variable RHS (not hlfir.expr) should NOT be inlined by this pattern
+func.func @test_allocatable_variable_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+ %c10 = arith.constant 10 : index
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+ %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+ // Variable RHS - NOT an hlfir.expr, so the allocatable pattern should NOT match
+ hlfir.assign %b#0 to %a#0 realloc : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ return
+}
+
+// Variable RHS should keep the original assign (allocatable pattern doesn't match)
+// ENABLED-LABEL: func.func @test_allocatable_variable_rhs
+// ENABLED: hlfir.assign %{{.*}} to %{{.*}} realloc : !fir.ref<!fir.array<10xf64>>
More information about the flang-commits
mailing list