[flang-commits] [flang] 1872f06 - [flang][HLFIR] Add SeparateAllocatableAssign pass (#197814)

via flang-commits flang-commits at lists.llvm.org
Tue Jun 2 04:55:10 PDT 2026


Author: khaki3
Date: 2026-06-02T04:55:05-07:00
New Revision: 1872f06d60f35133bbc06e047c763b490c2d31be

URL: https://github.com/llvm/llvm-project/commit/1872f06d60f35133bbc06e047c763b490c2d31be
DIFF: https://github.com/llvm/llvm-project/commit/1872f06d60f35133bbc06e047c763b490c2d31be.diff

LOG: [flang][HLFIR] Add SeparateAllocatableAssign pass (#197814)

Example:
```fortran
!$acc kernels
B = A          ! A, B allocatable
!$acc end kernels
```

In this code, `B = A` lowers to `hlfir.assign ... realloc`, which
becomes a `_FortranAAssign` runtime call inside the compute region — the
allocation can't be separated from the copy, and it crashes when `B` is
unallocated.

Fix: add `SeparateAllocatableAssign`, which splits the realloc assign
into an explicit conditional [re-]allocation followed by a plain
`hlfir.assign`, exposing the allocation as plain FIR. For variable RHS
it skips aliasing cases (`a = a(:n)`) by loading the LHS `fir.box` and
querying `fir::AliasAnalysis` on the data. For `hlfir.expr` RHS it
leaves ordering to bufferization, and fixes
`ElementalAssignBufferization` to stop fusing an elemental across a
deallocation.

Added: 
    flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
    flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
    flang/test/HLFIR/separate-allocatable-assign.fir

Modified: 
    flang/include/flang/Optimizer/HLFIR/Passes.td
    flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
    flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
    flang/lib/Optimizer/Passes/Pipelines.cpp
    flang/test/Driver/mlir-debug-pass-pipeline.f90
    flang/test/Driver/mlir-pass-pipeline.f90
    flang/test/Fir/basic-program.fir
    flang/test/Integration/OpenMP/workshare-axpy.f90

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td
index 7ef4e5e9c4ade..4973715c1055c 100644
--- a/flang/include/flang/Optimizer/HLFIR/Passes.td
+++ b/flang/include/flang/Optimizer/HLFIR/Passes.td
@@ -84,6 +84,16 @@ def InlineElementals : Pass<"inline-elementals"> {
   let summary = "Inline chained hlfir.elemental operations";
 }
 
+def SeparateAllocatableAssign : Pass<"separate-allocatable-assign"> {
+  let summary = "Separate reallocation from allocatable array assignments";
+  let description = [{
+    Transform `hlfir.assign %rhs to %lhs realloc` into a conditional
+    reallocation of the LHS followed by a non-realloc `hlfir.assign`.
+    This separates host-side allocation from device-side computation
+    for OpenACC/OpenMP offloading and runs at all optimization levels.
+  }];
+}
+
 def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
   let summary = "Inline hlfir.assign operations";
   let options = [Option<

diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index 5c24fe58b05c4..c0c64c19e3826 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -6,6 +6,7 @@ add_flang_library(HLFIRTransforms
   ExpressionSimplification.cpp
   InlineElementals.cpp
   InlineHLFIRAssign.cpp
+  SeparateAllocatableAssign.cpp
   InlineHLFIRCopyIn.cpp
   LowerHLFIRIntrinsics.cpp
   LowerHLFIROrderedAssignments.cpp

diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index ede5aeab2436e..51af673406b4a 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -401,6 +401,28 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) const {
     return std::nullopt;
   }
   for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
+    // A deallocation between the elemental and the assignment would invalidate
+    // memory accessed by the elemental once its evaluation is moved down to the
+    // assignment. containsReadOrWriteEffectOn only covers Read/Write effects,
+    // so MemoryEffects::Free is checked explicitly here.
+    if (mlir::isa<mlir::MemoryEffects::Free>(effect.getEffect())) {
+      mlir::Value freed = effect.getValue();
+      auto mayAccessFreed = [&](llvm::ArrayRef<mlir::Value> vals) {
+        if (!freed)
+          return true; // unknown freed memory - be conservative
+        for (mlir::Value val : vals)
+          if (!aliasAnalysis.alias(val, freed).isNo())
+            return true;
+        return false;
+      };
+      if (mayAccessFreed(notToBeWrittenBeforeAssign) ||
+          mayAccessFreed(notToBeAccessedBeforeAssign)) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "disallowed deallocation between elemental and assign: "
+                   << freed << " for " << elemental.getLoc() << "\n");
+        return std::nullopt;
+      }
+    }
     // not safe to access anything written in the elemental as this write
     // will be moved to the assignment
     for (mlir::Value val : notToBeAccessedBeforeAssign) {

diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
new file mode 100644
index 0000000000000..0160ff7d75f76
--- /dev/null
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
@@ -0,0 +1,175 @@
+//===- SeparateAllocatableAssign.cpp - Split realloc from assign ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Transform hlfir.assign with realloc semantics into a conditional
+// reallocation of the LHS followed by a plain hlfir.assign (without realloc).
+//
+// Before:
+//   hlfir.assign %rhs to %lhs realloc
+//
+// After:
+//   %shape = shape_of(%rhs)
+//   %new_lhs = genReallocIfNeeded(%lhs, %shape)  // host-side alloc
+//   hlfir.assign %rhs to %new_lhs                // element copy
+//
+// This is useful for OpenACC/OpenMP offloading where the allocation must
+// happen on the host before entering a device compute region.
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/HLFIR/Passes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/Debug.h"
+
+namespace hlfir {
+#define GEN_PASS_DEF_SEPARATEALLOCATABLEASSIGN
+#include "flang/Optimizer/HLFIR/Passes.h.inc"
+} // namespace hlfir
+
+#define DEBUG_TYPE "separate-allocatable-assign"
+
+namespace {
+
+class SeparateAllocatableAssignConversion
+    : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+  llvm::LogicalResult
+  matchAndRewrite(hlfir::AssignOp assign,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (!assign.isAllocatableAssignment())
+      return rewriter.notifyMatchFailure(assign, "not an allocatable assign");
+
+    hlfir::Entity rhs{assign.getRhs()};
+    hlfir::Entity lhs{assign.getLhs()};
+
+    if (!rhs.isArray())
+      return rewriter.notifyMatchFailure(assign, "RHS is not an array");
+
+    if (!lhs.isArray())
+      return rewriter.notifyMatchFailure(assign, "LHS is not an array");
+
+    mlir::Type rhsEleTy = rhs.getFortranElementType();
+    if (!fir::isa_trivial(rhsEleTy))
+      return rewriter.notifyMatchFailure(assign, "RHS type is not trivial");
+
+    mlir::Type lhsEleTy = lhs.getFortranElementType();
+    if (!fir::isa_trivial(lhsEleTy))
+      return rewriter.notifyMatchFailure(assign, "LHS type is not trivial");
+
+    if (lhsEleTy != rhsEleTy)
+      return rewriter.notifyMatchFailure(assign, "element type mismatch");
+
+    if (!fir::isBoxAddress(lhs.getType()))
+      return rewriter.notifyMatchFailure(assign, "LHS is not a box address");
+
+    mlir::Location loc = assign->getLoc();
+    fir::FirOpBuilder builder(rewriter, assign.getOperation());
+    builder.setInsertionPoint(assign);
+
+    // Reallocation frees the old LHS storage. If the RHS reads that same
+    // storage, the freed data would be read while producing the value to
+    // assign, causing use-after-free.
+    //
+    // For a variable RHS, query fir::AliasAnalysis to decide whether the RHS
+    // may access the LHS data and bail out if so. The aliasing question is
+    // about the *data* the allocatable points to, not the descriptor address:
+    // the RHS may reach the same storage through a 
diff erent descriptor (e.g.
+    // a pointer or a function result whose local descriptor does not alias the
+    // LHS descriptor). To make the analysis reason about the data, materialize
+    // a temporary load of the LHS descriptor (a loaded fir.box is a data view)
+    // and use it as the LHS value in the query, then erase it.
+    //
+    // For an hlfir.expr RHS, the realloc is split out and the (lazy)
+    // expression evaluation is left in place before it. Keeping the expression
+    // evaluation from being moved across the deallocation is the
+    // responsibility of the hlfir.assign lowering / expression bufferization,
+    // so no aliasing analysis is performed here.
+    if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
+      fir::AliasAnalysis aliasAnalysis;
+      auto lhsDataView = fir::LoadOp::create(builder, loc, lhs.getFirBase());
+      mlir::AliasResult aliasRes =
+          aliasAnalysis.alias(lhsDataView.getResult(), assign.getRhs());
+      rewriter.eraseOp(lhsDataView);
+      if (!aliasRes.isNo())
+        return rewriter.notifyMatchFailure(assign, "LHS and RHS may alias");
+    }
+
+    LLVM_DEBUG(llvm::dbgs() << "SeparateAllocatableAssign: splitting realloc "
+                               "from assign\n");
+
+    mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
+    llvm::SmallVector<mlir::Value> rhsExtents =
+        hlfir::getIndexExtents(loc, builder, rhsShape);
+
+    // F2018 10.2.1.3: when the LHS is (re-)allocated, its lower bounds
+    // come from LBOUND(rhs).  For variable RHS, extract the actual lower
+    // bounds from the entity; for hlfir.expr RHS, LBOUND is always 1.
+    llvm::SmallVector<mlir::Value> rhsLbounds;
+    if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
+      auto bounds = hlfir::genBounds(loc, builder, rhs);
+      for (auto &[lb, ub] : bounds)
+        rhsLbounds.push_back(lb);
+    }
+
+    fir::MutableBoxValue mutableBox(lhs.getFirBase(), /*lenParameters=*/{},
+                                    /*mutableProperties=*/{});
+
+    auto noopHandler = [](fir::ExtendedValue) {};
+    llvm::SmallVector<mlir::Value> lenParams;
+    fir::factory::MutableBoxReallocation realloc =
+        fir::factory::genReallocIfNeeded(builder, loc, mutableBox, rhsExtents,
+                                         lenParams, noopHandler);
+    fir::factory::finalizeRealloc(builder, loc, mutableBox, rhsLbounds,
+                                  /*takeLboundsIfRealloc=*/true, realloc);
+
+    mlir::Value lhsBox = fir::LoadOp::create(builder, loc, lhs.getFirBase());
+    hlfir::AssignOp::create(builder, loc, rhs, lhsBox,
+                            /*realloc=*/false,
+                            /*keep_lhs_length_if_realloc=*/false,
+                            assign.isTemporaryLHS());
+
+    rewriter.eraseOp(assign);
+    return mlir::success();
+  }
+};
+
+class SeparateAllocatableAssignPass
+    : public hlfir::impl::SeparateAllocatableAssignBase<
+          SeparateAllocatableAssignPass> {
+public:
+  using SeparateAllocatableAssignBase<
+      SeparateAllocatableAssignPass>::SeparateAllocatableAssignBase;
+
+  void runOnOperation() override {
+    mlir::MLIRContext *context = &getContext();
+
+    mlir::GreedyRewriteConfig config;
+    config.setRegionSimplificationLevel(
+        mlir::GreedySimplifyRegionLevel::Disabled);
+
+    mlir::RewritePatternSet patterns(context);
+    patterns.insert<SeparateAllocatableAssignConversion>(context);
+
+    if (mlir::failed(mlir::applyPatternsGreedily(
+            getOperation(), std::move(patterns), config))) {
+      mlir::emitError(getOperation()->getLoc(),
+                      "failure in separate-allocatable-assign");
+      signalPassFailure();
+    }
+  }
+};
+} // namespace

diff  --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 2b80da308a7d4..682e3e48e0a22 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -291,6 +291,8 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
   }
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, hlfir::createInlineElementals);
+  addNestedPassToAllTopLevelOperations<PassConstructor>(
+      pm, hlfir::createSeparateAllocatableAssign);
   if (optLevel.isOptimizingForSpeed()) {
     addCanonicalizerPassWithoutRegionSimplification(pm);
     pm.addPass(mlir::createCSEPass());

diff  --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90
index 62f8f98d028a8..c5e63fdbd9d2b 100644
--- a/flang/test/Driver/mlir-debug-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90
@@ -31,18 +31,23 @@
 ! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_mapper', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT: 'fir.global' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: 'func.func' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: 'omp.declare_mapper' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: 'omp.declare_reduction' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: 'omp.private' Pipeline
 ! ALL-NEXT:   InlineElementals
+! ALL-NEXT:   SeparateAllocatableAssign
 ! ALL-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT: LowerHLFIROrderedAssignments
 ! ALL-NEXT: LowerHLFIRIntrinsics

diff  --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 3719113367018..a7ea0a9de4867 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -31,22 +31,27 @@
 ! ALL-NEXT:'fir.global' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT:'func.func' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT:'omp.declare_mapper' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT:'omp.declare_reduction' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! ALL-NEXT:'omp.private' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
+! ALL-NEXT:  SeparateAllocatableAssign
 ! O0-NEXT:   InlineHLFIRAssign
 ! O2-NEXT: Canonicalizer
 ! O2-NEXT: CSE

diff  --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index d76fbc3df0704..2502a2a89a539 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -21,18 +21,23 @@ func.func @_QQmain() {
 // PASSES-NEXT: 'fir.global' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT: 'func.func' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT:  'omp.declare_mapper' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT: 'omp.declare_reduction' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT: 'omp.private' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
+// PASSES-NEXT:   SeparateAllocatableAssign
 // PASSES-NEXT:   Canonicalizer
 // PASSES-NEXT:   CSE
 // PASSES-NEXT:    (S) 0 num-cse'd - Number of operations CSE'd

diff  --git a/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir b/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
new file mode 100644
index 0000000000000..2b5eda169a7fc
--- /dev/null
+++ b/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
@@ -0,0 +1,32 @@
+// RUN: fir-opt --opt-bufferization %s | FileCheck %s
+
+// Verify that ElementalAssignBufferization does NOT fuse an elemental into the
+// assignment when a deallocation between the elemental and the assignment frees
+// memory that the elemental reads. Moving the elemental evaluation down to the
+// assignment would read freed memory.
+
+func.func @dealloc_conflict(%dst: !fir.box<!fir.array<?xf32>>, %n: index) {
+  %c1 = arith.constant 1 : index
+  %cst = arith.constant 1.000000e+00 : f32
+  %shape = fir.shape %n : (index) -> !fir.shape<1>
+  %heap = fir.allocmem !fir.array<?xf32>, %n {uniq_name = ".src"}
+  %src = fir.embox %heap(%shape) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+  %elem = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+  ^bb0(%i: index):
+    %d = hlfir.designate %src (%i) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+    %v = fir.load %d : !fir.ref<f32>
+    %s = arith.addf %v, %cst fastmath<contract> : f32
+    hlfir.yield_element %s : f32
+  }
+  // Deallocation of the elemental's read source between elemental and assign.
+  fir.freemem %heap : !fir.heap<!fir.array<?xf32>>
+  hlfir.assign %elem to %dst : !hlfir.expr<?xf32>, !fir.box<!fir.array<?xf32>>
+  hlfir.destroy %elem : !hlfir.expr<?xf32>
+  return
+}
+
+// CHECK-LABEL: func.func @dealloc_conflict
+// The elemental and the array-level assign must be preserved (no fusion).
+// CHECK: hlfir.elemental
+// CHECK: fir.freemem
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !hlfir.expr<?xf32>, !fir.box<!fir.array<?xf32>>

diff  --git a/flang/test/HLFIR/separate-allocatable-assign.fir b/flang/test/HLFIR/separate-allocatable-assign.fir
new file mode 100644
index 0000000000000..97c664c38a94f
--- /dev/null
+++ b/flang/test/HLFIR/separate-allocatable-assign.fir
@@ -0,0 +1,181 @@
+// Test the separate-allocatable-assign pass.
+// It should transform hlfir.assign ... realloc into conditional reallocation
+// followed by a non-realloc hlfir.assign.
+
+// RUN: fir-opt --separate-allocatable-assign %s | FileCheck %s
+
+// Test: allocatable array assignment with elemental RHS
+func.func @test_expr_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+  %c:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEc"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+
+  %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+  %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+
+  %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+  ^bb0(%i: index):
+    %lb_offset = arith.subi %dims#0, %c1 : index
+    %idx = arith.addi %i, %lb_offset : index
+    %a_elem = hlfir.designate %a_box (%idx) : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> !fir.ref<f64>
+    %a_val = fir.load %a_elem : !fir.ref<f64>
+    %cos_val = math.cos %a_val fastmath<contract> : f64
+    hlfir.yield_element %cos_val : f64
+  }
+
+  hlfir.assign %elemental to %c#0 realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  hlfir.destroy %elemental : !hlfir.expr<?xf64>
+  return
+}
+
+// CHECK-LABEL: func.func @test_expr_rhs
+// The realloc assign should be separated into realloc + non-realloc assign.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !hlfir.expr<?xf64>, !fir.box<!fir.heap<!fir.array<?xf64>>>
+
+// Test: allocatable array assignment with variable RHS
+func.func @test_var_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+  %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+  hlfir.assign %b#0 to %a#0 realloc : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_var_rhs
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !fir.ref<!fir.array<10xf64>>, !fir.box<!fir.heap<!fir.array<?xf64>>>
+
+// Test: non-trivial element type should NOT be separated
+func.func @test_nontrivial(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>)
+
+  %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.char<1,10>> {
+  ^bb0(%i: index):
+    %str = fir.undefined !fir.char<1,10>
+    hlfir.yield_element %str : !fir.char<1,10>
+  }
+
+  hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?x!fir.char<1,10>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>
+  hlfir.destroy %elemental : !hlfir.expr<?x!fir.char<1,10>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_nontrivial
+// Character types are not trivial, so the assign should remain
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} realloc
+
+// Test: non-allocatable assign should NOT be modified
+func.func @test_non_allocatable(%arg0: !fir.ref<!fir.array<10xf64>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+  %a:2 = hlfir.declare %arg0(%shape) {uniq_name = "_QFEa"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+  %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+  hlfir.assign %b#0 to %a#0 : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_non_allocatable
+// Non-allocatable assign should pass through unchanged
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>
+// CHECK-NOT: fir.if
+
+// Test: self-aliasing (a = a(:n)) should NOT be separated because realloc
+// would free the old LHS storage that the RHS still references.
+func.func @test_self_alias(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c5 = arith.constant 5 : index
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+
+  %shape = fir.shape %c5 : (index) -> !fir.shape<1>
+  %section = hlfir.designate %a_box (%c1:%c5:%c1) shape %shape : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<5xi32>>
+
+  hlfir.assign %section to %a#0 realloc : !fir.box<!fir.array<5xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_self_alias
+// Self-aliasing must keep the original realloc assign
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} realloc
+
+// Test: expr RHS whose producing elemental reads from the LHS
+// (e.g. a = a(:n) + 1).  The pass separates the reallocation regardless of
+// any self-reference in the expression: keeping the (lazy) expression
+// evaluation from being moved across the reallocation's deallocation is the
+// responsibility of the hlfir.assign lowering / expression bufferization, not
+// of this pass.  See the deallocation-conflict check in OptimizedBufferization.
+func.func @test_self_alias_expr(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+  %c1 = arith.constant 1 : index
+  %c5 = arith.constant 5 : index
+  %one_i32 = arith.constant 1 : i32
+
+  %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %shape = fir.shape %c5 : (index) -> !fir.shape<1>
+
+  %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+  ^bb0(%i: index):
+    %elem = hlfir.designate %a_box (%i) : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+    %val = fir.load %elem : !fir.ref<i32>
+    %sum = arith.addi %val, %one_i32 : i32
+    hlfir.yield_element %sum : i32
+  }
+
+  hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?xi32>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  hlfir.destroy %elemental : !hlfir.expr<?xi32>
+  return
+}
+
+// CHECK-LABEL: func.func @test_self_alias_expr
+// Expression RHS is always separated; alias safety is left to bufferization.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !hlfir.expr<?xi32>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+
+// Test: lower bounds from RHS should be preserved during reallocation.
+// source(10:12) has lower bound 10; dest should get lower bound 10 after
+// dest = source.
+func.func @test_lower_bounds(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.array<3xi32>>) {
+  %c10 = arith.constant 10 : index
+  %c3 = arith.constant 3 : index
+  %shapeshift = fir.shape_shift %c10, %c3 : (index, index) -> !fir.shapeshift<1>
+
+  %dest:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEdest"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  %source:2 = hlfir.declare %arg1(%shapeshift) {uniq_name = "_QFEsource"} : (!fir.ref<!fir.array<3xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+
+  hlfir.assign %source#0 to %dest#0 realloc : !fir.box<!fir.array<3xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  return
+}
+
+// CHECK-LABEL: func.func @test_lower_bounds
+// The realloc should be separated with lower bound 10 propagated.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: %[[C10:.*]] = arith.constant 10 : index
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// Lower bound 10 should appear in the embox/store of the new allocation.
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !fir.box<!fir.array<3xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>

diff  --git a/flang/test/Integration/OpenMP/workshare-axpy.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90
index 12246e54d3432..846bef5f5082c 100644
--- a/flang/test/Integration/OpenMP/workshare-axpy.f90
+++ b/flang/test/Integration/OpenMP/workshare-axpy.f90
@@ -38,20 +38,18 @@ subroutine sb1(a, x, y, z)
 ! HLFIR:}
 
 
-! FIR:  func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
+! FIR:  func.func private @_workshare_copy_box_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 ! FIR:  func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>
 
 ! FIR:  func.func @_QPsb1
 ! FIR:    omp.parallel {
-! FIR:      omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref<i32>, %{{[a-z0-9]+}} -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
+! FIR:      omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref<i32>, %{{[a-z0-9]+}} -> @_workshare_copy_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
 ! FIR:        fir.allocmem
-! FIR:      omp.wsloop {
-! FIR:        omp.loop_nest
-! FIR:      omp.single nowait {
-! FIR:        fir.call @_FortranAAssign
 ! FIR:        fir.freemem
 ! FIR:        omp.terminator
 ! FIR:      }
+! FIR:      omp.wsloop nowait {
+! FIR:        omp.loop_nest
 ! FIR:      omp.barrier
 ! FIR:      omp.terminator
 ! FIR:    }


        


More information about the flang-commits mailing list