[flang-commits] [flang] 1872f06 - [flang][HLFIR] Add SeparateAllocatableAssign pass (#197814)
via flang-commits
flang-commits at lists.llvm.org
Tue Jun 2 04:55:10 PDT 2026
Author: khaki3
Date: 2026-06-02T04:55:05-07:00
New Revision: 1872f06d60f35133bbc06e047c763b490c2d31be
URL: https://github.com/llvm/llvm-project/commit/1872f06d60f35133bbc06e047c763b490c2d31be
DIFF: https://github.com/llvm/llvm-project/commit/1872f06d60f35133bbc06e047c763b490c2d31be.diff
LOG: [flang][HLFIR] Add SeparateAllocatableAssign pass (#197814)
Example:
```fortran
!$acc kernels
B = A ! A, B allocatable
!$acc end kernels
```
In this code, `B = A` lowers to `hlfir.assign ... realloc`, which
becomes a `_FortranAAssign` runtime call inside the compute region — the
allocation can't be separated from the copy, and it crashes when `B` is
unallocated.
Fix: add `SeparateAllocatableAssign`, which splits the realloc assign
into an explicit conditional [re-]allocation followed by a plain
`hlfir.assign`, exposing the allocation as plain FIR. For variable RHS
it skips aliasing cases (`a = a(:n)`) by loading the LHS `fir.box` and
querying `fir::AliasAnalysis` on the data. For `hlfir.expr` RHS it
leaves ordering to bufferization, and fixes
`ElementalAssignBufferization` to stop fusing an elemental across a
deallocation.
Added:
flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
flang/test/HLFIR/separate-allocatable-assign.fir
Modified:
flang/include/flang/Optimizer/HLFIR/Passes.td
flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
flang/lib/Optimizer/Passes/Pipelines.cpp
flang/test/Driver/mlir-debug-pass-pipeline.f90
flang/test/Driver/mlir-pass-pipeline.f90
flang/test/Fir/basic-program.fir
flang/test/Integration/OpenMP/workshare-axpy.f90
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td
index 7ef4e5e9c4ade..4973715c1055c 100644
--- a/flang/include/flang/Optimizer/HLFIR/Passes.td
+++ b/flang/include/flang/Optimizer/HLFIR/Passes.td
@@ -84,6 +84,16 @@ def InlineElementals : Pass<"inline-elementals"> {
let summary = "Inline chained hlfir.elemental operations";
}
+def SeparateAllocatableAssign : Pass<"separate-allocatable-assign"> {
+ let summary = "Separate reallocation from allocatable array assignments";
+ let description = [{
+ Transform `hlfir.assign %rhs to %lhs realloc` into a conditional
+ reallocation of the LHS followed by a non-realloc `hlfir.assign`.
+ This separates host-side allocation from device-side computation
+ for OpenACC/OpenMP offloading and runs at all optimization levels.
+ }];
+}
+
def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
let summary = "Inline hlfir.assign operations";
let options = [Option<
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index 5c24fe58b05c4..c0c64c19e3826 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -6,6 +6,7 @@ add_flang_library(HLFIRTransforms
ExpressionSimplification.cpp
InlineElementals.cpp
InlineHLFIRAssign.cpp
+ SeparateAllocatableAssign.cpp
InlineHLFIRCopyIn.cpp
LowerHLFIRIntrinsics.cpp
LowerHLFIROrderedAssignments.cpp
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index ede5aeab2436e..51af673406b4a 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -401,6 +401,28 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) const {
return std::nullopt;
}
for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
+ // A deallocation between the elemental and the assignment would invalidate
+ // memory accessed by the elemental once its evaluation is moved down to the
+ // assignment. containsReadOrWriteEffectOn only covers Read/Write effects,
+ // so MemoryEffects::Free is checked explicitly here.
+ if (mlir::isa<mlir::MemoryEffects::Free>(effect.getEffect())) {
+ mlir::Value freed = effect.getValue();
+ auto mayAccessFreed = [&](llvm::ArrayRef<mlir::Value> vals) {
+ if (!freed)
+ return true; // unknown freed memory - be conservative
+ for (mlir::Value val : vals)
+ if (!aliasAnalysis.alias(val, freed).isNo())
+ return true;
+ return false;
+ };
+ if (mayAccessFreed(notToBeWrittenBeforeAssign) ||
+ mayAccessFreed(notToBeAccessedBeforeAssign)) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "disallowed deallocation between elemental and assign: "
+ << freed << " for " << elemental.getLoc() << "\n");
+ return std::nullopt;
+ }
+ }
// not safe to access anything written in the elemental as this write
// will be moved to the assignment
for (mlir::Value val : notToBeAccessedBeforeAssign) {
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
new file mode 100644
index 0000000000000..0160ff7d75f76
--- /dev/null
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
@@ -0,0 +1,175 @@
+//===- SeparateAllocatableAssign.cpp - Split realloc from assign ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Transform hlfir.assign with realloc semantics into a conditional
+// reallocation of the LHS followed by a plain hlfir.assign (without realloc).
+//
+// Before:
+// hlfir.assign %rhs to %lhs realloc
+//
+// After:
+// %shape = shape_of(%rhs)
+// %new_lhs = genReallocIfNeeded(%lhs, %shape) // host-side alloc
+// hlfir.assign %rhs to %new_lhs // element copy
+//
+// This is useful for OpenACC/OpenMP offloading where the allocation must
+// happen on the host before entering a device compute region.
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/HLFIR/Passes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/Debug.h"
+
+namespace hlfir {
+#define GEN_PASS_DEF_SEPARATEALLOCATABLEASSIGN
+#include "flang/Optimizer/HLFIR/Passes.h.inc"
+} // namespace hlfir
+
+#define DEBUG_TYPE "separate-allocatable-assign"
+
+namespace {
+
+class SeparateAllocatableAssignConversion
+ : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+ using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+ llvm::LogicalResult
+ matchAndRewrite(hlfir::AssignOp assign,
+ mlir::PatternRewriter &rewriter) const override {
+ if (!assign.isAllocatableAssignment())
+ return rewriter.notifyMatchFailure(assign, "not an allocatable assign");
+
+ hlfir::Entity rhs{assign.getRhs()};
+ hlfir::Entity lhs{assign.getLhs()};
+
+ if (!rhs.isArray())
+ return rewriter.notifyMatchFailure(assign, "RHS is not an array");
+
+ if (!lhs.isArray())
+ return rewriter.notifyMatchFailure(assign, "LHS is not an array");
+
+ mlir::Type rhsEleTy = rhs.getFortranElementType();
+ if (!fir::isa_trivial(rhsEleTy))
+ return rewriter.notifyMatchFailure(assign, "RHS type is not trivial");
+
+ mlir::Type lhsEleTy = lhs.getFortranElementType();
+ if (!fir::isa_trivial(lhsEleTy))
+ return rewriter.notifyMatchFailure(assign, "LHS type is not trivial");
+
+ if (lhsEleTy != rhsEleTy)
+ return rewriter.notifyMatchFailure(assign, "element type mismatch");
+
+ if (!fir::isBoxAddress(lhs.getType()))
+ return rewriter.notifyMatchFailure(assign, "LHS is not a box address");
+
+ mlir::Location loc = assign->getLoc();
+ fir::FirOpBuilder builder(rewriter, assign.getOperation());
+ builder.setInsertionPoint(assign);
+
+ // Reallocation frees the old LHS storage. If the RHS reads that same
+ // storage, the freed data would be read while producing the value to
+ // assign, causing use-after-free.
+ //
+ // For a variable RHS, query fir::AliasAnalysis to decide whether the RHS
+ // may access the LHS data and bail out if so. The aliasing question is
+ // about the *data* the allocatable points to, not the descriptor address:
+ // the RHS may reach the same storage through a
diff erent descriptor (e.g.
+ // a pointer or a function result whose local descriptor does not alias the
+ // LHS descriptor). To make the analysis reason about the data, materialize
+ // a temporary load of the LHS descriptor (a loaded fir.box is a data view)
+ // and use it as the LHS value in the query, then erase it.
+ //
+ // For an hlfir.expr RHS, the realloc is split out and the (lazy)
+ // expression evaluation is left in place before it. Keeping the expression
+ // evaluation from being moved across the deallocation is the
+ // responsibility of the hlfir.assign lowering / expression bufferization,
+ // so no aliasing analysis is performed here.
+ if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
+ fir::AliasAnalysis aliasAnalysis;
+ auto lhsDataView = fir::LoadOp::create(builder, loc, lhs.getFirBase());
+ mlir::AliasResult aliasRes =
+ aliasAnalysis.alias(lhsDataView.getResult(), assign.getRhs());
+ rewriter.eraseOp(lhsDataView);
+ if (!aliasRes.isNo())
+ return rewriter.notifyMatchFailure(assign, "LHS and RHS may alias");
+ }
+
+ LLVM_DEBUG(llvm::dbgs() << "SeparateAllocatableAssign: splitting realloc "
+ "from assign\n");
+
+ mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
+ llvm::SmallVector<mlir::Value> rhsExtents =
+ hlfir::getIndexExtents(loc, builder, rhsShape);
+
+ // F2018 10.2.1.3: when the LHS is (re-)allocated, its lower bounds
+ // come from LBOUND(rhs). For variable RHS, extract the actual lower
+ // bounds from the entity; for hlfir.expr RHS, LBOUND is always 1.
+ llvm::SmallVector<mlir::Value> rhsLbounds;
+ if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
+ auto bounds = hlfir::genBounds(loc, builder, rhs);
+ for (auto &[lb, ub] : bounds)
+ rhsLbounds.push_back(lb);
+ }
+
+ fir::MutableBoxValue mutableBox(lhs.getFirBase(), /*lenParameters=*/{},
+ /*mutableProperties=*/{});
+
+ auto noopHandler = [](fir::ExtendedValue) {};
+ llvm::SmallVector<mlir::Value> lenParams;
+ fir::factory::MutableBoxReallocation realloc =
+ fir::factory::genReallocIfNeeded(builder, loc, mutableBox, rhsExtents,
+ lenParams, noopHandler);
+ fir::factory::finalizeRealloc(builder, loc, mutableBox, rhsLbounds,
+ /*takeLboundsIfRealloc=*/true, realloc);
+
+ mlir::Value lhsBox = fir::LoadOp::create(builder, loc, lhs.getFirBase());
+ hlfir::AssignOp::create(builder, loc, rhs, lhsBox,
+ /*realloc=*/false,
+ /*keep_lhs_length_if_realloc=*/false,
+ assign.isTemporaryLHS());
+
+ rewriter.eraseOp(assign);
+ return mlir::success();
+ }
+};
+
+class SeparateAllocatableAssignPass
+ : public hlfir::impl::SeparateAllocatableAssignBase<
+ SeparateAllocatableAssignPass> {
+public:
+ using SeparateAllocatableAssignBase<
+ SeparateAllocatableAssignPass>::SeparateAllocatableAssignBase;
+
+ void runOnOperation() override {
+ mlir::MLIRContext *context = &getContext();
+
+ mlir::GreedyRewriteConfig config;
+ config.setRegionSimplificationLevel(
+ mlir::GreedySimplifyRegionLevel::Disabled);
+
+ mlir::RewritePatternSet patterns(context);
+ patterns.insert<SeparateAllocatableAssignConversion>(context);
+
+ if (mlir::failed(mlir::applyPatternsGreedily(
+ getOperation(), std::move(patterns), config))) {
+ mlir::emitError(getOperation()->getLoc(),
+ "failure in separate-allocatable-assign");
+ signalPassFailure();
+ }
+ }
+};
+} // namespace
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 2b80da308a7d4..682e3e48e0a22 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -291,6 +291,8 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
}
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineElementals);
+ addNestedPassToAllTopLevelOperations<PassConstructor>(
+ pm, hlfir::createSeparateAllocatableAssign);
if (optLevel.isOptimizingForSpeed()) {
addCanonicalizerPassWithoutRegionSimplification(pm);
pm.addPass(mlir::createCSEPass());
diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90
index 62f8f98d028a8..c5e63fdbd9d2b 100644
--- a/flang/test/Driver/mlir-debug-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90
@@ -31,18 +31,23 @@
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_mapper', 'omp.declare_reduction', 'omp.private']
! ALL-NEXT: 'fir.global' Pipeline
! ALL-NEXT: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! ALL-NEXT: InlineHLFIRAssign
! ALL-NEXT: 'func.func' Pipeline
! ALL-NEXT: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! ALL-NEXT: InlineHLFIRAssign
! ALL-NEXT: 'omp.declare_mapper' Pipeline
! ALL-NEXT: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! ALL-NEXT: InlineHLFIRAssign
! ALL-NEXT: 'omp.declare_reduction' Pipeline
! ALL-NEXT: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! ALL-NEXT: InlineHLFIRAssign
! ALL-NEXT: 'omp.private' Pipeline
! ALL-NEXT: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! ALL-NEXT: InlineHLFIRAssign
! ALL-NEXT: LowerHLFIROrderedAssignments
! ALL-NEXT: LowerHLFIRIntrinsics
diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 3719113367018..a7ea0a9de4867 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -31,22 +31,27 @@
! ALL-NEXT:'fir.global' Pipeline
! O2-NEXT: SimplifyHLFIRIntrinsics
! ALL: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! O0-NEXT: InlineHLFIRAssign
! ALL-NEXT:'func.func' Pipeline
! O2-NEXT: SimplifyHLFIRIntrinsics
! ALL: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! O0-NEXT: InlineHLFIRAssign
! ALL-NEXT:'omp.declare_mapper' Pipeline
! O2-NEXT: SimplifyHLFIRIntrinsics
! ALL: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! O0-NEXT: InlineHLFIRAssign
! ALL-NEXT:'omp.declare_reduction' Pipeline
! O2-NEXT: SimplifyHLFIRIntrinsics
! ALL: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! O0-NEXT: InlineHLFIRAssign
! ALL-NEXT:'omp.private' Pipeline
! O2-NEXT: SimplifyHLFIRIntrinsics
! ALL: InlineElementals
+! ALL-NEXT: SeparateAllocatableAssign
! O0-NEXT: InlineHLFIRAssign
! O2-NEXT: Canonicalizer
! O2-NEXT: CSE
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index d76fbc3df0704..2502a2a89a539 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -21,18 +21,23 @@ func.func @_QQmain() {
// PASSES-NEXT: 'fir.global' Pipeline
// PASSES-NEXT: SimplifyHLFIRIntrinsics
// PASSES-NEXT: InlineElementals
+// PASSES-NEXT: SeparateAllocatableAssign
// PASSES-NEXT: 'func.func' Pipeline
// PASSES-NEXT: SimplifyHLFIRIntrinsics
// PASSES-NEXT: InlineElementals
+// PASSES-NEXT: SeparateAllocatableAssign
// PASSES-NEXT: 'omp.declare_mapper' Pipeline
// PASSES-NEXT: SimplifyHLFIRIntrinsics
// PASSES-NEXT: InlineElementals
+// PASSES-NEXT: SeparateAllocatableAssign
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
// PASSES-NEXT: SimplifyHLFIRIntrinsics
// PASSES-NEXT: InlineElementals
+// PASSES-NEXT: SeparateAllocatableAssign
// PASSES-NEXT: 'omp.private' Pipeline
// PASSES-NEXT: SimplifyHLFIRIntrinsics
// PASSES-NEXT: InlineElementals
+// PASSES-NEXT: SeparateAllocatableAssign
// PASSES-NEXT: Canonicalizer
// PASSES-NEXT: CSE
// PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
diff --git a/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir b/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
new file mode 100644
index 0000000000000..2b5eda169a7fc
--- /dev/null
+++ b/flang/test/HLFIR/opt-bufferization-dealloc-conflict.fir
@@ -0,0 +1,32 @@
+// RUN: fir-opt --opt-bufferization %s | FileCheck %s
+
+// Verify that ElementalAssignBufferization does NOT fuse an elemental into the
+// assignment when a deallocation between the elemental and the assignment frees
+// memory that the elemental reads. Moving the elemental evaluation down to the
+// assignment would read freed memory.
+
+func.func @dealloc_conflict(%dst: !fir.box<!fir.array<?xf32>>, %n: index) {
+ %c1 = arith.constant 1 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ %shape = fir.shape %n : (index) -> !fir.shape<1>
+ %heap = fir.allocmem !fir.array<?xf32>, %n {uniq_name = ".src"}
+ %src = fir.embox %heap(%shape) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ %elem = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+ ^bb0(%i: index):
+ %d = hlfir.designate %src (%i) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %v = fir.load %d : !fir.ref<f32>
+ %s = arith.addf %v, %cst fastmath<contract> : f32
+ hlfir.yield_element %s : f32
+ }
+ // Deallocation of the elemental's read source between elemental and assign.
+ fir.freemem %heap : !fir.heap<!fir.array<?xf32>>
+ hlfir.assign %elem to %dst : !hlfir.expr<?xf32>, !fir.box<!fir.array<?xf32>>
+ hlfir.destroy %elem : !hlfir.expr<?xf32>
+ return
+}
+
+// CHECK-LABEL: func.func @dealloc_conflict
+// The elemental and the array-level assign must be preserved (no fusion).
+// CHECK: hlfir.elemental
+// CHECK: fir.freemem
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !hlfir.expr<?xf32>, !fir.box<!fir.array<?xf32>>
diff --git a/flang/test/HLFIR/separate-allocatable-assign.fir b/flang/test/HLFIR/separate-allocatable-assign.fir
new file mode 100644
index 0000000000000..97c664c38a94f
--- /dev/null
+++ b/flang/test/HLFIR/separate-allocatable-assign.fir
@@ -0,0 +1,181 @@
+// Test the separate-allocatable-assign pass.
+// It should transform hlfir.assign ... realloc into conditional reallocation
+// followed by a non-realloc hlfir.assign.
+
+// RUN: fir-opt --separate-allocatable-assign %s | FileCheck %s
+
+// Test: allocatable array assignment with elemental RHS
+func.func @test_expr_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+ %c:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEc"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+
+ %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+ ^bb0(%i: index):
+ %lb_offset = arith.subi %dims#0, %c1 : index
+ %idx = arith.addi %i, %lb_offset : index
+ %a_elem = hlfir.designate %a_box (%idx) : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> !fir.ref<f64>
+ %a_val = fir.load %a_elem : !fir.ref<f64>
+ %cos_val = math.cos %a_val fastmath<contract> : f64
+ hlfir.yield_element %cos_val : f64
+ }
+
+ hlfir.assign %elemental to %c#0 realloc : !hlfir.expr<?xf64>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ hlfir.destroy %elemental : !hlfir.expr<?xf64>
+ return
+}
+
+// CHECK-LABEL: func.func @test_expr_rhs
+// The realloc assign should be separated into realloc + non-realloc assign.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !hlfir.expr<?xf64>, !fir.box<!fir.heap<!fir.array<?xf64>>>
+
+// Test: allocatable array assignment with variable RHS
+func.func @test_var_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+ %c10 = arith.constant 10 : index
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+ %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+ hlfir.assign %b#0 to %a#0 realloc : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+ return
+}
+
+// CHECK-LABEL: func.func @test_var_rhs
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !fir.ref<!fir.array<10xf64>>, !fir.box<!fir.heap<!fir.array<?xf64>>>
+
+// Test: non-trivial element type should NOT be separated
+func.func @test_nontrivial(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) {
+ %c10 = arith.constant 10 : index
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>)
+
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.char<1,10>> {
+ ^bb0(%i: index):
+ %str = fir.undefined !fir.char<1,10>
+ hlfir.yield_element %str : !fir.char<1,10>
+ }
+
+ hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?x!fir.char<1,10>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,10>>>>>
+ hlfir.destroy %elemental : !hlfir.expr<?x!fir.char<1,10>>
+ return
+}
+
+// CHECK-LABEL: func.func @test_nontrivial
+// Character types are not trivial, so the assign should remain
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} realloc
+
+// Test: non-allocatable assign should NOT be modified
+func.func @test_non_allocatable(%arg0: !fir.ref<!fir.array<10xf64>>, %arg1: !fir.ref<!fir.array<10xf64>>) {
+ %c10 = arith.constant 10 : index
+ %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+
+ %a:2 = hlfir.declare %arg0(%shape) {uniq_name = "_QFEa"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+ %b:2 = hlfir.declare %arg1(%shape) {uniq_name = "_QFEb"} : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>)
+
+ hlfir.assign %b#0 to %a#0 : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>
+ return
+}
+
+// CHECK-LABEL: func.func @test_non_allocatable
+// Non-allocatable assign should pass through unchanged
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} : !fir.ref<!fir.array<10xf64>>, !fir.ref<!fir.array<10xf64>>
+// CHECK-NOT: fir.if
+
+// Test: self-aliasing (a = a(:n)) should NOT be separated because realloc
+// would free the old LHS storage that the RHS still references.
+func.func @test_self_alias(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c5 = arith.constant 5 : index
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+ %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ %dims:3 = fir.box_dims %a_box, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+
+ %shape = fir.shape %c5 : (index) -> !fir.shape<1>
+ %section = hlfir.designate %a_box (%c1:%c5:%c1) shape %shape : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<5xi32>>
+
+ hlfir.assign %section to %a#0 realloc : !fir.box<!fir.array<5xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ return
+}
+
+// CHECK-LABEL: func.func @test_self_alias
+// Self-aliasing must keep the original realloc assign
+// CHECK: hlfir.assign %{{.*}} to %{{.*}} realloc
+
+// Test: expr RHS whose producing elemental reads from the LHS
+// (e.g. a = a(:n) + 1). The pass separates the reallocation regardless of
+// any self-reference in the expression: keeping the (lazy) expression
+// evaluation from being moved across the reallocation's deallocation is the
+// responsibility of the hlfir.assign lowering / expression bufferization, not
+// of this pass. See the deallocation-conflict check in OptimizedBufferization.
+func.func @test_self_alias_expr(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+ %c1 = arith.constant 1 : index
+ %c5 = arith.constant 5 : index
+ %one_i32 = arith.constant 1 : i32
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+ %a_box = fir.load %a#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ %shape = fir.shape %c5 : (index) -> !fir.shape<1>
+
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+ ^bb0(%i: index):
+ %elem = hlfir.designate %a_box (%i) : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+ %val = fir.load %elem : !fir.ref<i32>
+ %sum = arith.addi %val, %one_i32 : i32
+ hlfir.yield_element %sum : i32
+ }
+
+ hlfir.assign %elemental to %a#0 realloc : !hlfir.expr<?xi32>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ hlfir.destroy %elemental : !hlfir.expr<?xi32>
+ return
+}
+
+// CHECK-LABEL: func.func @test_self_alias_expr
+// Expression RHS is always separated; alias safety is left to bufferization.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !hlfir.expr<?xi32>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+
+// Test: lower bounds from RHS should be preserved during reallocation.
+// source(10:12) has lower bound 10; dest should get lower bound 10 after
+// dest = source.
+func.func @test_lower_bounds(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.array<3xi32>>) {
+ %c10 = arith.constant 10 : index
+ %c3 = arith.constant 3 : index
+ %shapeshift = fir.shape_shift %c10, %c3 : (index, index) -> !fir.shapeshift<1>
+
+ %dest:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEdest"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+ %source:2 = hlfir.declare %arg1(%shapeshift) {uniq_name = "_QFEsource"} : (!fir.ref<!fir.array<3xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+
+ hlfir.assign %source#0 to %dest#0 realloc : !fir.box<!fir.array<3xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ return
+}
+
+// CHECK-LABEL: func.func @test_lower_bounds
+// The realloc should be separated with lower bound 10 propagated.
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: %[[C10:.*]] = arith.constant 10 : index
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// Lower bound 10 should appear in the embox/store of the new allocation.
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !fir.box<!fir.array<3xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
diff --git a/flang/test/Integration/OpenMP/workshare-axpy.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90
index 12246e54d3432..846bef5f5082c 100644
--- a/flang/test/Integration/OpenMP/workshare-axpy.f90
+++ b/flang/test/Integration/OpenMP/workshare-axpy.f90
@@ -38,20 +38,18 @@ subroutine sb1(a, x, y, z)
! HLFIR:}
-! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
+! FIR: func.func private @_workshare_copy_box_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>
! FIR: func.func @_QPsb1
! FIR: omp.parallel {
-! FIR: omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref<i32>, %{{[a-z0-9]+}} -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
+! FIR: omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref<i32>, %{{[a-z0-9]+}} -> @_workshare_copy_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
! FIR: fir.allocmem
-! FIR: omp.wsloop {
-! FIR: omp.loop_nest
-! FIR: omp.single nowait {
-! FIR: fir.call @_FortranAAssign
! FIR: fir.freemem
! FIR: omp.terminator
! FIR: }
+! FIR: omp.wsloop nowait {
+! FIR: omp.loop_nest
! FIR: omp.barrier
! FIR: omp.terminator
! FIR: }
More information about the flang-commits
mailing list