[flang-commits] [flang] 87bab31 - [Flang] - Add optional inlining of allocatable assignments with hlfir.expr RHS (#186880)

via flang-commits flang-commits at lists.llvm.org
Mon Mar 23 10:49:53 PDT 2026


Author: Pranav Bhandarkar
Date: 2026-03-23T12:49:49-05:00
New Revision: 87bab31c27dee5ec69022df3beaa018744c4e8d1

URL: https://github.com/llvm/llvm-project/commit/87bab31c27dee5ec69022df3beaa018744c4e8d1
DIFF: https://github.com/llvm/llvm-project/commit/87bab31c27dee5ec69022df3beaa018744c4e8d1.diff

LOG: [Flang] - Add optional inlining of allocatable assignments with hlfir.expr RHS (#186880)

Add support for inlining hlfir.assign operations where the LHS is an
allocatable and the RHS is an `hlfir.expr` (e.g., from
`hlfir.elemental`). Since `hlfir.expr` values cannot alias with memory
locations, these assignments can be safely inlined without alias
analysis.

The optimization is controlled by the
-inline-hlfir-allocatable-expr-assign flag:
fir-opt -inline-hlfir-allocatable-expr-assign --inline-hlfir-assign
file.fir
  flang -mmlir -inline-hlfir-allocatable-expr-assign file.f90

The generated code uses `fir::factory::genReallocIfNeeded` to properly
handle Fortran allocatable assignment semantics (F2018 10.2.1.3):
- If not allocated: allocate with RHS shape
- If allocated with same shape: reuse existing allocation
- If allocated with different shape: reallocate

This is motivated by the increased link time that I am seeing when
`__FortranAAssign` is called from the user code. The details of the
problem are documented in
https://github.com/llvm/llvm-project/issues/187720

Added: 
    flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir

Modified: 
    flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
index 356552fac249d..160efede12bd5 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -12,8 +12,11 @@
 
 #include "flang/Optimizer/Analysis/AliasAnalysis.h"
 #include "flang/Optimizer/Analysis/ArraySectionAnalyzer.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
 #include "flang/Optimizer/OpenMP/Passes.h"
@@ -21,6 +24,8 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 
 namespace hlfir {
 #define GEN_PASS_DEF_INLINEHLFIRASSIGN
@@ -29,6 +34,12 @@ namespace hlfir {
 
 #define DEBUG_TYPE "inline-hlfir-assign"
 
+static llvm::cl::opt<bool> inlineAllocatableExprAssignFlag(
+    "inline-hlfir-allocatable-expr-assign",
+    llvm::cl::desc("Enable inlining of allocatable assignments when RHS is an "
+                   "hlfir.expr (e.g., from hlfir.elemental)"),
+    llvm::cl::init(false));
+
 namespace {
 /// Expand hlfir.assign of array RHS to array LHS into a loop nest
 /// of element-by-element assignments:
@@ -125,9 +136,158 @@ class InlineHLFIRAssignConversion
   }
 };
 
+/// Expand hlfir.assign of hlfir.expr RHS to allocatable LHS.
+/// When RHS is an hlfir.expr (e.g., from hlfir.elemental), there is no
+/// aliasing concern because expressions don't represent memory locations.
+/// This allows us to inline the assignment even for allocatables.
+///
+/// The generated code:
+/// 1. Gets the shape from the RHS expression
+/// 2. Uses genReallocIfNeeded to handle allocation/reallocation properly
+/// 3. Generates a loop nest to assign elements (via storage handler callback)
+/// 4. Finalizes the reallocation
+///
+/// Example transformation for: allocatable_array = elemental_expr
+///   hlfir.assign %expr to %alloc realloc : !hlfir.expr<?xf64>,
+///                                          !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+/// into:
+///   // Check allocation status and reallocate if needed
+///   // ... (genReallocIfNeeded handles this) ...
+///   // Loop over elements
+///   fir.do_loop %i = %c1 to %extent step %c1 unordered {
+///     %rhs_val = hlfir.apply %expr, %i : ...
+///     %lhs_elem = hlfir.designate %lhs_box (%i) : ...
+///     hlfir.assign %rhs_val to %lhs_elem : f64, !fir.ref<f64>
+///   }
+class InlineAllocatableExprAssignConversion
+    : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+  llvm::LogicalResult
+  matchAndRewrite(hlfir::AssignOp assign,
+                  mlir::PatternRewriter &rewriter) const override {
+    // This pattern only handles allocatable assignments
+    if (!assign.isAllocatableAssignment())
+      return rewriter.notifyMatchFailure(
+          assign, "AssignOp is not an allocatable assignment");
+
+    hlfir::Entity rhs{assign.getRhs()};
+    hlfir::Entity lhs{assign.getLhs()};
+
+    // RHS must be an hlfir.expr (this is the key condition - no aliasing)
+    if (!mlir::isa<hlfir::ExprType>(rhs.getType()))
+      return rewriter.notifyMatchFailure(
+          assign,
+          "RHS is not an hlfir.expr - cannot inline allocatable assign");
+
+    // RHS must be an array
+    if (!rhs.isArray())
+      return rewriter.notifyMatchFailure(assign,
+                                         "AssignOp's RHS is not an array");
+
+    // Check element types are trivial and match
+    mlir::Type rhsEleTy = rhs.getFortranElementType();
+    if (!fir::isa_trivial(rhsEleTy))
+      return rewriter.notifyMatchFailure(
+          assign, "AssignOp's RHS data type is not trivial");
+
+    mlir::Type lhsEleTy = lhs.getFortranElementType();
+    if (!fir::isa_trivial(lhsEleTy))
+      return rewriter.notifyMatchFailure(
+          assign, "AssignOp's LHS data type is not trivial");
+
+    if (lhsEleTy != rhsEleTy)
+      return rewriter.notifyMatchFailure(assign,
+                                         "RHS/LHS element types mismatch");
+
+    // LHS must be a reference to a box (allocatable)
+    mlir::Type lhsType = lhs.getType();
+    if (!fir::isBoxAddress(lhsType))
+      return rewriter.notifyMatchFailure(assign,
+                                         "LHS is not a reference to a box");
+
+    LLVM_DEBUG(llvm::dbgs()
+               << "InlineHLFIRAssign: inlining allocatable expr assignment\n");
+
+    mlir::Location loc = assign->getLoc();
+    fir::FirOpBuilder builder(rewriter, assign.getOperation());
+    builder.setInsertionPoint(assign);
+
+    // Get the shape of the RHS expression
+    mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
+    llvm::SmallVector<mlir::Value> rhsExtents =
+        hlfir::getIndexExtents(loc, builder, rhsShape);
+
+    // Create a MutableBoxValue for the LHS allocatable
+    mlir::Value lhsBoxRef = lhs.getFirBase();
+
+    // Create MutableBoxValue - for trivial types, no length params needed
+    fir::MutableBoxValue mutableBox(lhsBoxRef, /*lenParameters=*/{},
+                                    /*mutableProperties=*/{});
+
+    // Use genReallocIfNeeded to handle allocation/reallocation properly.
+    // This implements Fortran 10.2.1.3 point 3:
+    // - If not allocated, allocate with RHS shape
+    // - If allocated with same shape, keep existing allocation
+    // - If allocated with 
diff erent shape, reallocate
+    //
+    // The storage handler callback performs the actual assignment loop.
+    bool useWorkshare = flangomp::shouldUseWorkshareLowering(assign);
+    auto storageHandler = [&](fir::ExtendedValue storage) {
+      hlfir::Entity lhsEntity{
+          fir::getBase(fir::factory::createBoxValue(builder, loc, storage))};
+
+      llvm::SmallVector<mlir::Value> extents =
+          fir::factory::getExtents(loc, builder, storage);
+
+      // Generate loop nest to assign elements
+      hlfir::LoopNest loopNest = hlfir::genLoopNest(
+          loc, builder, extents, /*isUnordered=*/true, useWorkshare);
+      builder.setInsertionPointToStart(loopNest.body);
+
+      // Get RHS element via hlfir.apply
+      hlfir::Entity rhsElement =
+          hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
+      rhsElement = hlfir::loadTrivialScalar(loc, builder, rhsElement);
+
+      // Get LHS element
+      hlfir::Entity lhsElement = hlfir::getElementAt(loc, builder, lhsEntity,
+                                                     loopNest.oneBasedIndices);
+
+      // Assign the element (scalar, non-allocatable)
+      hlfir::AssignOp::create(builder, loc, rhsElement, lhsElement,
+                              /*realloc=*/false,
+                              /*keep_lhs_length_if_realloc=*/false,
+                              /*temporary_lhs=*/false);
+
+      // Restore insertion point after loop
+      builder.setInsertionPointAfter(loopNest.outerOp);
+    };
+
+    // No length params for trivial types
+    llvm::SmallVector<mlir::Value> lenParams;
+
+    // Generate reallocation logic with assignment in the callback
+    fir::factory::MutableBoxReallocation realloc =
+        fir::factory::genReallocIfNeeded(builder, loc, mutableBox, rhsExtents,
+                                         lenParams, storageHandler);
+
+    // Finalize: free old storage if reallocated and update the mutable box
+    fir::factory::finalizeRealloc(builder, loc, mutableBox, /*lbounds=*/{},
+                                  /*takeLboundsIfRealloc=*/true, realloc);
+
+    // Erase the original assign
+    rewriter.eraseOp(assign);
+    return mlir::success();
+  }
+};
+
 class InlineHLFIRAssignPass
     : public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
 public:
+  using InlineHLFIRAssignBase<InlineHLFIRAssignPass>::InlineHLFIRAssignBase;
+
   void runOnOperation() override {
     mlir::MLIRContext *context = &getContext();
 
@@ -139,6 +299,14 @@ class InlineHLFIRAssignPass
     mlir::RewritePatternSet patterns(context);
     patterns.insert<InlineHLFIRAssignConversion>(context);
 
+    // Optionally add the allocatable expr assignment pattern
+    if (inlineAllocatableExprAssignFlag) {
+      LLVM_DEBUG(llvm::dbgs()
+                 << "InlineHLFIRAssign: enabling allocatable expr assignment "
+                    "inlining\n");
+      patterns.insert<InlineAllocatableExprAssignConversion>(context);
+    }
+
     if (mlir::failed(mlir::applyPatternsGreedily(
             getOperation(), std::move(patterns), config))) {
       mlir::emitError(getOperation()->getLoc(),

diff  --git a/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir b/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir
new file mode 100644
index 0000000000000..6f3b5ea0eb794
--- /dev/null
+++ b/flang/test/HLFIR/inline-hlfir-assign-allocatable-expr.fir
@@ -0,0 +1,101 @@
+// Test inlining of hlfir.assign for allocatable LHS with hlfir.expr RHS.
+// This tests the -inline-hlfir-allocatable-expr-assign flag.
+
+// RUN: fir-opt --inline-hlfir-assign %s | FileCheck %s --check-prefix=DEFAULT
+// RUN: fir-opt -inline-hlfir-allocatable-expr-assign --inline-hlfir-assign %s | FileCheck %s --check-prefix=ENABLED
+
+// Test case: c = cos(a) where c is allocatable
+// This is derived from the flang-529628 test case.
+// The hlfir.elemental produces an hlfir.expr which doesn't alias with
+// memory, so the assignment can be safely inlined.
+
+func.func @test_allocatable_elemental_assign(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+
+  // Declare the allocatables
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+  %c:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEc"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+
+  // Load a to get its shape
+  %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+  %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+
+  // Create elemental: cos(a)
+  %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+  ^bb0(%i: index):
+    %lb_offset = arith.subi %dims#0, %c1 : index
+    %idx = arith.addi %i, %lb_offset : index
+    %a_elem = hlfir.designate %a_box (%idx) : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> !fir.ref<f64>
+    %a_val = fir.load %a_elem : !fir.ref<f64>
+    %cos_val = math.cos %a_val fastmath<contract> : f64
+    hlfir.yield_element %cos_val : f64
+  }
+
+  // Assign elemental result to allocatable c
+  hlfir.assign %elemental to %c#0 realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  hlfir.destroy %elemental : !hlfir.expr<?xf64>
+  return
+}
+
+// DEFAULT-LABEL: func.func @test_allocatable_elemental_assign
+// By default (without the option), the allocatable assign should NOT be inlined
+// DEFAULT: hlfir.assign %{{.*}} to %{{.*}} realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+
+// ENABLED-LABEL: func.func @test_allocatable_elemental_assign
+// With the option enabled, the assign should be inlined.
+// The generated code uses genReallocIfNeeded which creates fir.if for
+// allocation checking and a loop for assignment.
+// ENABLED-NOT: hlfir.assign %{{.*}} realloc
+// ENABLED: hlfir.elemental
+// ENABLED: %[[ORIG_MEM:.*]] = fir.box_addr %{{.*}} : (!fir.box<!fir.heap<!fir.array<?xf64>>>) -> !fir.heap<!fir.array<?xf64>>
+// ENABLED: %[[IF_RET:.*]]:2 = fir.if %[[COND:.*]] -> (i1, !fir.heap<!fir.array<?xf64>>) {
+// ENABLED: fir.do_loop
+// ENABLED: hlfir.apply
+// ENABLED: hlfir.designate
+// ENABLED: hlfir.assign %{{.*}} : f64, !fir.ref<f64>
+// ENABLED: fir.if %[[IF_RET]]#0 {
+// ENABLED: fir.if %[[COND]] {
+// ENABLED: fir.freemem %[[ORIG_MEM]] : !fir.heap<!fir.array<?xf64>>
+
+// Test case: Non-trivial element type should NOT be inlined
+func.func @test_allocatable_nontrivial_type(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>)
+
+  // Character elemental - should NOT be inlined even with the option
+  %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.char<1,10>> {
+  ^bb0(%i: index):
+    %str = fir.undefined !fir.char<1,10>
+    hlfir.yield_element %str : !fir.char<1,10>
+  }
+
+  hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?x!fir.char<1,10>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>
+  hlfir.destroy %elemental : !hlfir.expr<?x!fir.char<1,10>>
+  return
+}
+
+// Character types are not trivial, so this should never be inlined
+// ENABLED-LABEL: func.func @test_allocatable_nontrivial_type
+// ENABLED: hlfir.assign %{{.*}} to %{{.*}} realloc : !hlfir.expr<?x!fir.char<1,10>>
+
+
+// Test case: Variable RHS (not hlfir.expr) should NOT be inlined by this pattern
+func.func @test_allocatable_variable_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+  %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+  // Variable RHS - NOT an hlfir.expr, so the allocatable pattern should NOT match
+  hlfir.assign %b#0 to %a#0 realloc : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  return
+}
+
+// Variable RHS should keep the original assign (allocatable pattern doesn't match)
+// ENABLED-LABEL: func.func @test_allocatable_variable_rhs
+// ENABLED: hlfir.assign %{{.*}} to %{{.*}} realloc : !fir.ref<!fir.array<10xf64>>


        


More information about the flang-commits mailing list