[flang-commits] [flang] [flang][HLFIR] Add SeparateAllocatableAssign pass (PR #197814)

via flang-commits flang-commits at lists.llvm.org
Tue Jun 2 04:21:34 PDT 2026


https://github.com/khaki3 updated https://github.com/llvm/llvm-project/pull/197814

>From a90568ce053c2e462f0b46cbd19d06789a17fe4f Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Tue, 26 May 2026 22:01:33 -0700
Subject: [PATCH] [flang][HLFIR] Add SeparateAllocatableAssign pass

Add a new HLFIR pass that transforms

  hlfir.assign %rhs to %lhs realloc

into an explicit conditional [re-]allocation of the LHS followed by a
non-realloc hlfir.assign on the resulting box.  Lower bounds for the
newly allocated LHS are taken from LBOUND(rhs) per F2018 10.2.1.3, and
the transformation is skipped when the RHS may alias the LHS (e.g.
a = a(:n)) so that the runtime's overlap-aware in-place handling stays
in effect for those cases.

The pass runs at every optimization level (including -O0).  Splitting
the allocation from the per-element copy exposes the allocation as
plain FIR ops, which:

  - lets downstream MLIR analyses/transforms reason about the realloc
    (rather than seeing an opaque _FortranAAssign runtime call), and
  - leaves a simple hlfir.assign that the existing InlineHLFIRAssign
    pattern can inline without also having to inline the allocation
    logic.

This is particularly useful for OpenACC/OpenMP offloading, where host-
side allocation has to be separable from the device-side computation.
---
 flang/include/flang/Optimizer/HLFIR/Passes.td |  10 +
 .../Optimizer/HLFIR/Transforms/CMakeLists.txt |   1 +
 .../Transforms/OptimizedBufferization.cpp     |  22 +++
 .../Transforms/SeparateAllocatableAssign.cpp  | 175 +++++++++++++++++
 flang/lib/Optimizer/Passes/Pipelines.cpp      |   2 +
 .../test/Driver/mlir-debug-pass-pipeline.f90  |   5 +
 flang/test/Driver/mlir-pass-pipeline.f90      |   5 +
 flang/test/Fir/basic-program.fir              |   5 +
 .../opt-bufferization-dealloc-conflict.fir    |  32 ++++
 .../HLFIR/separate-allocatable-assign.fir     | 181 ++++++++++++++++++
 .../Integration/OpenMP/workshare-axpy.f90     |  10 +-
 11 files changed, 442 insertions(+), 6 deletions(-)
 create mode 100644 flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
 create mode 100644 flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
 create mode 100644 flang/test/HLFIR/separate-allocatable-assign.fir

diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td
index 7ef4e5e9c4ade..4973715c1055c 100644
--- a/flang/include/flang/Optimizer/HLFIR/Passes.td
+++ b/flang/include/flang/Optimizer/HLFIR/Passes.td
@@ -84,6 +84,16 @@ def InlineElementals : Pass<"inline-elementals"> {
   let summary = "Inline chained hlfir.elemental operations";
 }
 
+def SeparateAllocatableAssign : Pass<"separate-allocatable-assign"> {
+  let summary = "Separate reallocation from allocatable array assignments";
+  let description = [{
+    Transform `hlfir.assign %rhs to %lhs realloc` into a conditional
+    reallocation of the LHS followed by a non-realloc `hlfir.assign`.
+    This separates host-side allocation from device-side computation
+    for OpenACC/OpenMP offloading and runs at all optimization levels.
+  }];
+}
+
 def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
   let summary = "Inline hlfir.assign operations";
   let options = [Option<
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index 5c24fe58b05c4..c0c64c19e3826 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -6,6 +6,7 @@ add_flang_library(HLFIRTransforms
   ExpressionSimplification.cpp
   InlineElementals.cpp
   InlineHLFIRAssign.cpp
+  SeparateAllocatableAssign.cpp
   InlineHLFIRCopyIn.cpp
   LowerHLFIRIntrinsics.cpp
   LowerHLFIROrderedAssignments.cpp
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index ede5aeab2436e..51af673406b4a 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -401,6 +401,28 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) const {
     return std::nullopt;
   }
   for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
+    // A deallocation between the elemental and the assignment would invalidate
+    // memory accessed by the elemental once its evaluation is moved down to the
+    // assignment. containsReadOrWriteEffectOn only covers Read/Write effects,
+    // so MemoryEffects::Free is checked explicitly here.
+    if (mlir::isa<mlir::MemoryEffects::Free>(effect.getEffect())) {
+      mlir::Value freed = effect.getValue();
+      auto mayAccessFreed = [&](llvm::ArrayRef<mlir::Value> vals) {
+        if (!freed)
+          return true; // unknown freed memory - be conservative
+        for (mlir::Value val : vals)
+          if (!aliasAnalysis.alias(val, freed).isNo())
+            return true;
+        return false;
+      };
+      if (mayAccessFreed(notToBeWrittenBeforeAssign) ||
+          mayAccessFreed(notToBeAccessedBeforeAssign)) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "disallowed deallocation between elemental and assign: "
+                   << freed << " for " << elemental.getLoc() << "\n");
+        return std::nullopt;
+      }
+    }
     // not safe to access anything written in the elemental as this write
     // will be moved to the assignment
     for (mlir::Value val : notToBeAccessedBeforeAssign) {
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
new file mode 100644
index 0000000000000..0160ff7d75f76
--- /dev/null
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
@@ -0,0 +1,175 @@
+//===- SeparateAllocatableAssign.cpp - Split realloc from assign ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Transform hlfir.assign with realloc semantics into a conditional
+// reallocation of the LHS followed by a plain hlfir.assign (without realloc).
+//
+// Before:
+//   hlfir.assign %rhs to %lhs realloc
+//
+// After:
+//   %shape = shape_of(%rhs)
+//   %new_lhs = genReallocIfNeeded(%lhs, %shape)  // host-side alloc
+//   hlfir.assign %rhs to %new_lhs                // element copy
+//
+// This is useful for OpenACC/OpenMP offloading where the allocation must
+// happen on the host before entering a device compute region.
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/HLFIR/Passes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/Debug.h"
+
+namespace hlfir {
+#define GEN_PASS_DEF_SEPARATEALLOCATABLEASSIGN
+#include "flang/Optimizer/HLFIR/Passes.h.inc"
+} // namespace hlfir
+
+#define DEBUG_TYPE "separate-allocatable-assign"
+
+namespace {
+
+class SeparateAllocatableAssignConversion
+    : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+  llvm::LogicalResult
+  matchAndRewrite(hlfir::AssignOp assign,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (!assign.isAllocatableAssignment())
+      return rewriter.notifyMatchFailure(assign, "not an allocatable assign");
+
+    hlfir::Entity rhs{assign.getRhs()};
+    hlfir::Entity lhs{assign.getLhs()};
+
+    if (!rhs.isArray())
+      return rewriter.notifyMatchFailure(assign, "RHS is not an array");
+
+    if (!lhs.isArray())
+      return rewriter.notifyMatchFailure(assign, "LHS is not an array");
+
+    mlir::Type rhsEleTy = rhs.getFortranElementType();
+    if (!fir::isa_trivial(rhsEleTy))
+      return rewriter.notifyMatchFailure(assign, "RHS type is not trivial");
+
+    mlir::Type lhsEleTy = lhs.getFortranElementType();
+    if (!fir::isa_trivial(lhsEleTy))
+      return rewriter.notifyMatchFailure(assign, "LHS type is not trivial");
+
+    if (lhsEleTy != rhsEleTy)
+      return rewriter.notifyMatchFailure(assign, "element type mismatch");
+
+    if (!fir::isBoxAddress(lhs.getType()))
+      return rewriter.notifyMatchFailure(assign, "LHS is not a box address");
+
+    mlir::Location loc = assign->getLoc();
+    fir::FirOpBuilder builder(rewriter, assign.getOperation());
+    builder.setInsertionPoint(assign);
+
+    // Reallocation frees the old LHS storage. If the RHS reads that same
+    // storage, the freed data would be read while producing the value to
+    // assign, causing use-after-free.
+    //
+    // For a variable RHS, query fir::AliasAnalysis to decide whether the RHS
+    // may access the LHS data and bail out if so. The aliasing question is
+    // about the *data* the allocatable points to, not the descriptor address:
+    // the RHS may reach the same storage through a different descriptor (e.g.
+    // a pointer or a function result whose local descriptor does not alias the
+    // LHS descriptor). To make the analysis reason about the data, materialize
+    // a temporary load of the LHS descriptor (a loaded fir.box is a data view)
+    // and use it as the LHS value in the query, then erase it.
+    //
+    // For an hlfir.expr RHS, the realloc is split out and the (lazy)
+    // expression evaluation is left in place before it. Keeping the expression
+    // evaluation from being moved across the deallocation is the
+    // responsibility of the hlfir.assign lowering / expression bufferization,
+    // so no aliasing analysis is performed here.
+    if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
+      fir::AliasAnalysis aliasAnalysis;
+      auto lhsDataView = fir::LoadOp::create(builder, loc, lhs.getFirBase());
+      mlir::AliasResult aliasRes =
+          aliasAnalysis.alias(lhsDataView.getResult(), assign.getRhs());
+      rewriter.eraseOp(lhsDataView);
+      if (!aliasRes.isNo())
+        return rewriter.notifyMatchFailure(assign, "LHS and RHS may alias");
+    }
+
+    LLVM_DEBUG(llvm::dbgs() << "SeparateAllocatableAssign: splitting realloc "
+                               "from assign\n");
+
+    mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
+    llvm::SmallVector<mlir::Value> rhsExtents =
+        hlfir::getIndexExtents(loc, builder, rhsShape);
+
+    // F2018 10.2.1.3: when the LHS is (re-)allocated, its lower bounds
+    // come from LBOUND(rhs).  For variable RHS, extract the actual lower
+    // bounds from the entity; for hlfir.expr RHS, LBOUND is always 1.
+    llvm::SmallVector<mlir::Value> rhsLbounds;
+    if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
+      auto bounds = hlfir::genBounds(loc, builder, rhs);
+      for (auto &[lb, ub] : bounds)
+        rhsLbounds.push_back(lb);
+    }
+
+    fir::MutableBoxValue mutableBox(lhs.getFirBase(), /*lenParameters=*/{},
+                                    /*mutableProperties=*/{});
+
+    auto noopHandler = [](fir::ExtendedValue) {};
+    llvm::SmallVector<mlir::Value> lenParams;
+    fir::factory::MutableBoxReallocation realloc =
+        fir::factory::genReallocIfNeeded(builder, loc, mutableBox, rhsExtents,
+                                         lenParams, noopHandler);
+    fir::factory::finalizeRealloc(builder, loc, mutableBox, rhsLbounds,
+                                  /*takeLboundsIfRealloc=*/true, realloc);
+
+    mlir::Value lhsBox = fir::LoadOp::create(builder, loc, lhs.getFirBase());
+    hlfir::AssignOp::create(builder, loc, rhs, lhsBox,
+                            /*realloc=*/false,
+                            /*keep_lhs_length_if_realloc=*/false,
+                            assign.isTemporaryLHS());
+
+    rewriter.eraseOp(assign);
+    return mlir::success();
+  }
+};
+
+class SeparateAllocatableAssignPass
+    : public hlfir::impl::SeparateAllocatableAssignBase<
+          SeparateAllocatableAssignPass> {
+public:
+  using SeparateAllocatableAssignBase<
+      SeparateAllocatableAssignPass>::SeparateAllocatableAssignBase;
+
+  void runOnOperation() override {
+    mlir::MLIRContext *context = &getContext();
+
+    mlir::GreedyRewriteConfig config;
+    config.setRegionSimplificationLevel(
+        mlir::GreedySimplifyRegionLevel::Disabled);
+
+    mlir::RewritePatternSet patterns(context);
+    patterns.insert<SeparateAllocatableAssignConversion>(context);
+
+    if (mlir::failed(mlir::applyPatternsGreedily(
+            getOperation(), std::move(patterns), config))) {
+      mlir::emitError(getOperation()->getLoc(),
+                      "failure in separate-allocatable-assign");
+      signalPassFailure();
+    }
+  }
+};
+} // namespace
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 2b80da308a7d4..682e3e48e0a22 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -291,6 +291,8 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
   }
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, hlfir::createInlineElementals);
+  addNestedPassToAllTopLevelOperations<PassConstructor>(
+      pm, hlfir::createSeparateAllocatableAssign);
   if (optLevel.isOptimizingForSpeed()) {
     addCanonicalizerPassWithoutRegionSimplification(pm);
     pm.addPass(mlir::createCSEPass());
diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90
index 62f8f98d028a8..c5e63fdbd9d2b 100644
--- a/flang/test/Driver/mlir-debug-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90
@@ -31,18 +31,23 @@
 ! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_mapper', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT: 'fir.global' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: 'func.func' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: 'omp.declare_mapper' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: 'omp.declare_reduction' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: 'omp.private' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: LowerHLFIROrderedAssignments
 ! ALL-NEXT: LowerHLFIRIntrinsics
diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 3719113367018..a7ea0a9de4867 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -31,22 +31,27 @@
 ! ALL-NEXT:'fir.global' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT:'func.func' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT:'omp.declare_mapper' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT:'omp.declare_reduction' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT:'omp.private' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! O2-NEXT: Canonicalizer
 ! O2-NEXT: CSE
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index d76fbc3df0704..2502a2a89a539 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -21,18 +21,23 @@ func.func @_QQmain() {
 // PASSES-NEXT: 'fir.global' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT: 'func.func' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT:  'omp.declare_mapper' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT: 'omp.declare_reduction' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT: 'omp.private' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT:   Canonicalizer
 // PASSES-NEXT:   CSE
 // PASSES-NEXT:    (S) 0 num-cse'd - Number of operations CSE'd
diff --git a/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir b/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
new file mode 100644
index 0000000000000..2b5eda169a7fc
--- /dev/null
+++ b/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
@@ -0,0 +1,32 @@
+// RUN: fir-opt --opt-bufferization %s | FileCheck %s
+
+// Verify that ElementalAssignBufferization does NOT fuse an elemental into the
+// assignment when a deallocation between the elemental and the assignment frees
+// memory that the elemental reads. Moving the elemental evaluation down to the
+// assignment would read freed memory.
+
+func.func @dealloc_conflict(%dst: !fir.box<!fir.array<?xf32>>, %n: index) {
+  %c1 = arith.constant 1 : index
+  %cst = arith.constant 1.000000e+00 : f32
+  %shape = fir.shape %n : (index) -> !fir.shape<1>
+  %heap = fir.allocmem !fir.array<?xf32>, %n {uniq_name = ".src"}
+  %src = fir.embox %heap(%shape) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+  %elem = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+  ^bb0(%i: index):
+    %d = hlfir.designate %src (%i) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+    %v = fir.load %d : !fir.ref<f32>
+    %s = arith.addf %v, %cst fastmath<contract> : f32
+    hlfir.yield_element %s : f32
+  }
+  // Deallocation of the elemental's read source between elemental and assign.
+  fir.freemem %heap : !fir.heap<!fir.array<?xf32>>
+  hlfir.assign %elem to %dst : !hlfir.expr<?xf32>, !fir.box<!fir.array<?xf32>>
+  hlfir.destroy %elem : !hlfir.expr<?xf32>
+  return
+}
+
+// CHECK-LABEL: func.func @dealloc_conflict
+// The elemental and the array-level assign must be preserved (no fusion).
+// CHECK: hlfir.elemental
+// CHECK: fir.freemem
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !hlfir.expr<?xf32>, !fir.box<!fir.array<?xf32>>
diff --git a/flang/test/HLFIR/separate-allocatable-assign.fir b/flang/test/HLFIR/separate-allocatable-assign.fir
new file mode 100644
index 0000000000000..97c664c38a94f
--- /dev/null
+++ b/flang/test/HLFIR/separate-allocatable-assign.fir
@@ -0,0 +1,181 @@
+// Test the separate-allocatable-assign pass.
+// It should transform hlfir.assign ... realloc into conditional reallocation
+// followed by a non-realloc hlfir.assign.
+
+// RUN: fir-opt --separate-allocatable-assign %s | FileCheck %s
+
+// Test: allocatable array assignment with elemental RHS
+func.func @test_expr_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+  %c:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEc"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+
+  %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+  %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+
+  %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+  ^bb0(%i: index):
+    %lb_offset = arith.subi %dims#0, %c1 : index
+    %idx = arith.addi %i, %lb_offset : index
+    %a_elem = hlfir.designate %a_box (%idx) : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> !fir.ref<f64>
+    %a_val = fir.load %a_elem : !fir.ref<f64>
+    %cos_val = math.cos %a_val fastmath<contract> : f64
+    hlfir.yield_element %cos_val : f64
+  }
+
+  hlfir.assign %elemental to %c#0 realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  hlfir.destroy %elemental : !hlfir.expr<?xf64>
+  return
+}
+
+// CHECK-LABEL: func.func @test_expr_rhs
+// The realloc assign should be separated into realloc + non-realloc assign.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !hlfir.expr<?xf64>, !fir.box<!fir.heap<!fir.array<?xf64>>>
+
+// Test: allocatable array assignment with variable RHS
+func.func @test_var_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+  %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+  hlfir.assign %b#0 to %a#0 realloc : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_var_rhs
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !fir.ref<!fir.array<10xf64>>, !fir.box<!fir.heap<!fir.array<?xf64>>>
+
+// Test: non-trivial element type should NOT be separated
+func.func @test_nontrivial(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>)
+
+  %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.char<1,10>> {
+  ^bb0(%i: index):
+    %str = fir.undefined !fir.char<1,10>
+    hlfir.yield_element %str : !fir.char<1,10>
+  }
+
+  hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?x!fir.char<1,10>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>
+  hlfir.destroy %elemental : !hlfir.expr<?x!fir.char<1,10>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_nontrivial
+// Character types are not trivial, so the assign should remain
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} realloc
+
+// Test: non-allocatable assign should NOT be modified
+func.func @test_non_allocatable(%arg0: !fir.ref<!fir.array<10xf64>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+  %a:2 = hlfir.declare %arg0(%shape) {uniq_name = "_QFEa"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+  %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+  hlfir.assign %b#0 to %a#0 : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_non_allocatable
+// Non-allocatable assign should pass through unchanged
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>
+// CHECK-NOT: fir.if
+
+// Test: self-aliasing (a = a(:n)) should NOT be separated because realloc
+// would free the old LHS storage that the RHS still references.
+func.func @test_self_alias(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c5 = arith.constant 5 : index
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+
+  %shape = fir.shape %c5 : (index) -> !fir.shape<1>
+  %section = hlfir.designate %a_box (%c1:%c5:%c1) shape %shape : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<5xi32>>
+
+  hlfir.assign %section to %a#0 realloc : !fir.box<!fir.array<5xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_self_alias
+// Self-aliasing must keep the original realloc assign
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} realloc
+
+// Test: expr RHS whose producing elemental reads from the LHS
+// (e.g. a = a(:n) + 1).  The pass separates the reallocation regardless of
+// any self-reference in the expression: keeping the (lazy) expression
+// evaluation from being moved across the reallocation's deallocation is the
+// responsibility of the hlfir.assign lowering / expression bufferization, not
+// of this pass.  See the deallocation-conflict check in OptimizedBufferization.
+func.func @test_self_alias_expr(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+  %c1 = arith.constant 1 : index
+  %c5 = arith.constant 5 : index
+  %one_i32 = arith.constant 1 : i32
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %shape = fir.shape %c5 : (index) -> !fir.shape<1>
+
+  %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+  ^bb0(%i: index):
+    %elem = hlfir.designate %a_box (%i) : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+    %val = fir.load %elem : !fir.ref<i32>
+    %sum = arith.addi %val, %one_i32 : i32
+    hlfir.yield_element %sum : i32
+  }
+
+  hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?xi32>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  hlfir.destroy %elemental : !hlfir.expr<?xi32>
+  return
+}
+
+// CHECK-LABEL: func.func @test_self_alias_expr
+// Expression RHS is always separated; alias safety is left to bufferization.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !hlfir.expr<?xi32>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+
+// Test: lower bounds from RHS should be preserved during reallocation.
+// source(10:12) has lower bound 10; dest should get lower bound 10 after
+// dest = source.
+func.func @test_lower_bounds(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.array<3xi32>>) {
+  %c10 = arith.constant 10 : index
+  %c3 = arith.constant 3 : index
+  %shapeshift = fir.shape_shift %c10, %c3 : (index, index) -> !fir.shapeshift<1>
+
+  %dest:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEdest"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  %source:2 = hlfir.declare %arg1(%shapeshift) {uniq_name = "_QFEsource"} : (!fir.ref<!fir.array<3xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+
+  hlfir.assign %source#0 to %dest#0 realloc : !fir.box<!fir.array<3xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_lower_bounds
+// The realloc should be separated with lower bound 10 propagated.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: %[[C10:.*]] = arith.constant 10 : index
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// Lower bound 10 should appear in the embox/store of the new allocation.
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !fir.box<!fir.array<3xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
diff --git a/flang/test/Integration/OpenMP/workshare-axpy.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90
index 12246e54d3432..846bef5f5082c 100644
--- a/flang/test/Integration/OpenMP/workshare-axpy.f90
+++ b/flang/test/Integration/OpenMP/workshare-axpy.f90
@@ -38,20 +38,18 @@ subroutine sb1(a, x, y, z)
 ! HLFIR:}
 
 
-! FIR:  func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
+! FIR:  func.func private @_workshare_copy_box_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 ! FIR:  func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>
 
 ! FIR:  func.func @_QPsb1
 ! FIR:    omp.parallel {
-! FIR:      omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref<i32>, %{{[a-z0-9]+}} -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
+! FIR:      omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref<i32>, %{{[a-z0-9]+}} -> @_workshare_copy_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
 ! FIR:        fir.allocmem
-! FIR:      omp.wsloop {
-! FIR:        omp.loop_nest
-! FIR:      omp.single nowait {
-! FIR:        fir.call @_FortranAAssign
 ! FIR:        fir.freemem
 ! FIR:        omp.terminator
 ! FIR:      }
+! FIR:      omp.wsloop nowait {
+! FIR:        omp.loop_nest
 ! FIR:      omp.barrier
 ! FIR:      omp.terminator
 ! FIR:    }



More information about the flang-commits mailing list