[flang-commits] [flang] [flang][hlfir] Resolve shape_of users when bufferizing eval_in_mem (PR #201214)
via flang-commits
flang-commits at lists.llvm.org
Wed Jun 3 02:18:51 PDT 2026
https://github.com/khaki3 updated https://github.com/llvm/llvm-project/pull/201214
>From 39e966b014fd36f21185856ed5dcd1eeede711b2 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Tue, 2 Jun 2026 15:01:43 -0700
Subject: [PATCH 1/3] [flang][hlfir] Reuse eval_in_mem shape in
SeparateAllocatableAssign
When the RHS of an allocatable assignment is an hlfir.eval_in_mem, reuse
its shape operand instead of emitting hlfir.shape_of. A shape_of adds an
extra use of the eval_in_mem result, which prevents
EvaluateIntoMemoryAssignBufferization from evaluating the expression in
place and triggers a use-after-erase assertion in OptimizedBufferization.
---
.../Transforms/SeparateAllocatableAssign.cpp | 9 +++++-
.../HLFIR/separate-allocatable-assign.fir | 30 +++++++++++++++++++
2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
index 0160ff7d75f76..69ca483642df0 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
@@ -111,7 +111,14 @@ class SeparateAllocatableAssignConversion
LLVM_DEBUG(llvm::dbgs() << "SeparateAllocatableAssign: splitting realloc "
"from assign\n");
- mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
+ // Reuse the evaluate_in_memory shape operand instead of emitting a
+ // shape_of, which would add an extra use and block in-place bufferization.
+ mlir::Value rhsShape;
+ if (auto evalInMem =
+ assign.getRhs().getDefiningOp<hlfir::EvaluateInMemoryOp>())
+ rhsShape = evalInMem.getShape();
+ if (!rhsShape)
+ rhsShape = hlfir::genShape(loc, builder, rhs);
llvm::SmallVector<mlir::Value> rhsExtents =
hlfir::getIndexExtents(loc, builder, rhsShape);
diff --git a/flang/test/HLFIR/separate-allocatable-assign.fir b/flang/test/HLFIR/separate-allocatable-assign.fir
index 97c664c38a94f..406de58724c71 100644
--- a/flang/test/HLFIR/separate-allocatable-assign.fir
+++ b/flang/test/HLFIR/separate-allocatable-assign.fir
@@ -179,3 +179,33 @@ func.func @test_lower_bounds(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32
// Lower bound 10 should appear in the embox/store of the new allocation.
// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !fir.box<!fir.array<3xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+
+// Test: allocatable = hlfir.eval_in_mem (e.g. b = matmul(...)). The pass must
+// reuse the eval_in_mem shape operand and must NOT emit an hlfir.shape_of of
+// the eval_in_mem result: an extra use would prevent
+// EvaluateIntoMemoryAssignBufferization from rewriting the evaluation in place.
+func.func private @sink(!fir.ref<!fir.array<?xf32>>)
+func.func @test_eval_in_mem_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, %n: index) {
+ %shape = fir.shape %n : (index) -> !fir.shape<1>
+
+ %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+
+ %expr = hlfir.eval_in_mem shape %shape : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+ ^bb0(%mem: !fir.ref<!fir.array<?xf32>>):
+ fir.call @sink(%mem) : (!fir.ref<!fir.array<?xf32>>) -> ()
+ }
+
+ hlfir.assign %expr to %a#0 realloc : !hlfir.expr<?xf32>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+ hlfir.destroy %expr : !hlfir.expr<?xf32>
+ return
+}
+
+// CHECK-LABEL: func.func @test_eval_in_mem_rhs
+// The eval_in_mem and its shape operand are reused; no shape_of is emitted.
+// CHECK: hlfir.eval_in_mem
+// CHECK-NOT: hlfir.shape_of
+// CHECK-NOT: hlfir.assign{{.*}}realloc
+// CHECK: fir.if
+// CHECK: fir.allocmem
+// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !hlfir.expr<?xf32>, !fir.box<!fir.heap<!fir.array<?xf32>>>
>From fc7fa65ee56db838440431f6d29659254a40ff19 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Tue, 2 Jun 2026 17:31:40 -0700
Subject: [PATCH 2/3] [flang][hlfir] Resolve shape_of users when bufferizing
eval_in_mem
The RHS of an allocatable assignment is still a transformational intrinsic
(e.g. hlfir.matmul) when SeparateAllocatableAssign runs, so genShape emits
an hlfir.shape_of to size the reallocation. After the intrinsic is lowered
to hlfir.eval_in_mem, that shape_of is an extra user that
EvaluateIntoMemoryAssignBufferization did not expect, causing a
use-after-erase assertion at -O2.
Handle it generally in OptimizedBufferization: a shape_of user of an
eval_in_mem only needs the shape, which is already an operand, so redirect
it to that operand before erasing the eval_in_mem. This keeps the in-place
evaluation for any transformational intrinsic assigned to an allocatable.
This supersedes the earlier eval_in_mem shape-operand reuse in
SeparateAllocatableAssign, which is reverted here since the RHS is not an
eval_in_mem at that point.
---
.../Transforms/OptimizedBufferization.cpp | 24 +++++++++++----
.../Transforms/SeparateAllocatableAssign.cpp | 9 +-----
.../HLFIR/opt-bufferization-eval_in_mem.fir | 30 +++++++++++++++++++
.../HLFIR/separate-allocatable-assign.fir | 30 -------------------
4 files changed, 50 insertions(+), 43 deletions(-)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 51af673406b4a..34a0ade751dfa 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -638,16 +638,26 @@ tryUsingAssignLhsDirectly(hlfir::EvaluateInMemoryOp evalInMem,
mlir::Location loc = evalInMem.getLoc();
hlfir::DestroyOp destroy;
hlfir::AssignOp assign;
- for (auto user : llvm::enumerate(evalInMem->getUsers())) {
- if (user.index() > 2)
+ // A hlfir.shape_of of the result only needs the shape, which the
+ // eval_in_mem already carries as an operand, so it can be redirected to that
+ // operand and does not prevent the in-place rewrite below. Any other user
+ // would dangle when the eval_in_mem is erased, so bail out on it.
+ llvm::SmallVector<hlfir::ShapeOfOp> shapeOfs;
+ for (mlir::Operation *user : evalInMem->getUsers()) {
+ if (auto op = mlir::dyn_cast<hlfir::AssignOp>(user))
+ assign = op;
+ else if (auto op = mlir::dyn_cast<hlfir::DestroyOp>(user))
+ destroy = op;
+ else if (auto op = mlir::dyn_cast<hlfir::ShapeOfOp>(user))
+ shapeOfs.push_back(op);
+ else
return mlir::failure();
- mlir::TypeSwitch<mlir::Operation *, void>(user.value())
- .Case([&](hlfir::AssignOp op) { assign = op; })
- .Case([&](hlfir::DestroyOp op) { destroy = op; });
}
if (!assign || !destroy || destroy.mustFinalizeExpr() ||
assign.isAllocatableAssignment())
return mlir::failure();
+ if (!shapeOfs.empty() && !evalInMem.getShape())
+ return mlir::failure();
hlfir::Entity lhs{assign.getLhs()};
// EvaluateInMemoryOp memory is contiguous, so in general, it can only be
@@ -690,6 +700,10 @@ tryUsingAssignLhsDirectly(hlfir::EvaluateInMemoryOp evalInMem,
fir::FirOpBuilder builder(rewriter, evalInMem.getOperation());
mlir::Value rawLhs = hlfir::genVariableRawAddress(loc, builder, lhs);
hlfir::computeEvaluateOpIn(loc, builder, evalInMem, rawLhs);
+ // Redirect shape_of users to the shape operand so the eval_in_mem can be
+ // erased without leaving dangling uses.
+ for (hlfir::ShapeOfOp shapeOf : shapeOfs)
+ rewriter.replaceOp(shapeOf, evalInMem.getShape());
rewriter.eraseOp(assign);
rewriter.eraseOp(destroy);
rewriter.eraseOp(evalInMem);
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
index 69ca483642df0..0160ff7d75f76 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SeparateAllocatableAssign.cpp
@@ -111,14 +111,7 @@ class SeparateAllocatableAssignConversion
LLVM_DEBUG(llvm::dbgs() << "SeparateAllocatableAssign: splitting realloc "
"from assign\n");
- // Reuse the evaluate_in_memory shape operand instead of emitting a
- // shape_of, which would add an extra use and block in-place bufferization.
- mlir::Value rhsShape;
- if (auto evalInMem =
- assign.getRhs().getDefiningOp<hlfir::EvaluateInMemoryOp>())
- rhsShape = evalInMem.getShape();
- if (!rhsShape)
- rhsShape = hlfir::genShape(loc, builder, rhs);
+ mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
llvm::SmallVector<mlir::Value> rhsExtents =
hlfir::getIndexExtents(loc, builder, rhsShape);
diff --git a/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir b/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir
index fdfb2ce6979dc..9ef8115284514 100644
--- a/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir
+++ b/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir
@@ -60,3 +60,33 @@ func.func @_QPnegative_test_is_target(%arg0: !fir.ref<!fir.array<10xf32>> {fir.b
// CHECK: hlfir.destroy %[[VAL_10]] : !hlfir.expr<10xf32>
// CHECK: return
// CHECK: }
+
+// A hlfir.shape_of user of the eval_in_mem (such as the one left behind by
+// SeparateAllocatableAssign when sizing a reallocation) must not block the
+// in-place rewrite: it is redirected to the eval_in_mem shape operand.
+func.func @_QPtest_shape_of_user(%arg0: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<index>) {
+ %c10 = arith.constant 10 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %2:2 = hlfir.declare %arg0(%1) dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+ %3 = hlfir.eval_in_mem shape %1 : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
+ ^bb0(%arg2: !fir.ref<!fir.array<10xf32>>):
+ %4 = fir.call @_QPfoo() fastmath<contract> : () -> !fir.array<10xf32>
+ fir.save_result %4 to %arg2(%1) : !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>, !fir.shape<1>
+ }
+ %shp = hlfir.shape_of %3 : (!hlfir.expr<10xf32>) -> !fir.shape<1>
+ %ext = hlfir.get_extent %shp {dim = 0 : index} : (!fir.shape<1>) -> index
+ hlfir.assign %3 to %2#0 : !hlfir.expr<10xf32>, !fir.ref<!fir.array<10xf32>>
+ hlfir.destroy %3 : !hlfir.expr<10xf32>
+ fir.store %ext to %arg1 : !fir.ref<index>
+ return
+}
+// CHECK-LABEL: func.func @_QPtest_shape_of_user(
+// CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+// The shape_of is redirected to the eval_in_mem shape operand...
+// CHECK: hlfir.get_extent %[[SHAPE]]
+// ...and the result is still evaluated directly into the LHS (no temporary).
+// CHECK: %[[CALL:.*]] = fir.call @_QPfoo() fastmath<contract> : () -> !fir.array<10xf32>
+// CHECK: fir.save_result %[[CALL]] to %{{.*}}#0(%[[SHAPE]])
+// CHECK-NOT: hlfir.eval_in_mem
+// CHECK-NOT: hlfir.shape_of
diff --git a/flang/test/HLFIR/separate-allocatable-assign.fir b/flang/test/HLFIR/separate-allocatable-assign.fir
index 406de58724c71..97c664c38a94f 100644
--- a/flang/test/HLFIR/separate-allocatable-assign.fir
+++ b/flang/test/HLFIR/separate-allocatable-assign.fir
@@ -179,33 +179,3 @@ func.func @test_lower_bounds(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32
// Lower bound 10 should appear in the embox/store of the new allocation.
// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !fir.box<!fir.array<3xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
-
-// Test: allocatable = hlfir.eval_in_mem (e.g. b = matmul(...)). The pass must
-// reuse the eval_in_mem shape operand and must NOT emit an hlfir.shape_of of
-// the eval_in_mem result: an extra use would prevent
-// EvaluateIntoMemoryAssignBufferization from rewriting the evaluation in place.
-func.func private @sink(!fir.ref<!fir.array<?xf32>>)
-func.func @test_eval_in_mem_rhs(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, %n: index) {
- %shape = fir.shape %n : (index) -> !fir.shape<1>
-
- %a:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
-
- %expr = hlfir.eval_in_mem shape %shape : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
- ^bb0(%mem: !fir.ref<!fir.array<?xf32>>):
- fir.call @sink(%mem) : (!fir.ref<!fir.array<?xf32>>) -> ()
- }
-
- hlfir.assign %expr to %a#0 realloc : !hlfir.expr<?xf32>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
- hlfir.destroy %expr : !hlfir.expr<?xf32>
- return
-}
-
-// CHECK-LABEL: func.func @test_eval_in_mem_rhs
-// The eval_in_mem and its shape operand are reused; no shape_of is emitted.
-// CHECK: hlfir.eval_in_mem
-// CHECK-NOT: hlfir.shape_of
-// CHECK-NOT: hlfir.assign{{.*}}realloc
-// CHECK: fir.if
-// CHECK: fir.allocmem
-// CHECK: %[[BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
-// CHECK: hlfir.assign %{{.*}} to %[[BOX]] : !hlfir.expr<?xf32>, !fir.box<!fir.heap<!fir.array<?xf32>>>
>From c43e21ce004601e390dc53b1242d8643c94df5f2 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Wed, 3 Jun 2026 02:17:43 -0700
Subject: [PATCH 3/3] [flang][hlfir] Address review comments
Rephrase the comment in tryUsingAssignLhsDirectly and bail out if the
eval_in_mem result has more than one hlfir.assign or hlfir.destroy user,
since the in-place rewrite only handles a single assign/destroy.
---
.../Transforms/OptimizedBufferization.cpp | 20 +++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 34a0ade751dfa..d717b39479380 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -638,20 +638,24 @@ tryUsingAssignLhsDirectly(hlfir::EvaluateInMemoryOp evalInMem,
mlir::Location loc = evalInMem.getLoc();
hlfir::DestroyOp destroy;
hlfir::AssignOp assign;
- // A hlfir.shape_of of the result only needs the shape, which the
- // eval_in_mem already carries as an operand, so it can be redirected to that
- // operand and does not prevent the in-place rewrite below. Any other user
- // would dangle when the eval_in_mem is erased, so bail out on it.
+ // To evaluate the hlfir.eval_in_mem directly into the LHS, its result must
+ // only be used in the assignment, in a destroy, and in hlfir.shape_of (which
+ // can be replaced by a direct use of the shape operand).
llvm::SmallVector<hlfir::ShapeOfOp> shapeOfs;
for (mlir::Operation *user : evalInMem->getUsers()) {
- if (auto op = mlir::dyn_cast<hlfir::AssignOp>(user))
+ if (auto op = mlir::dyn_cast<hlfir::AssignOp>(user)) {
+ if (assign)
+ return mlir::failure();
assign = op;
- else if (auto op = mlir::dyn_cast<hlfir::DestroyOp>(user))
+ } else if (auto op = mlir::dyn_cast<hlfir::DestroyOp>(user)) {
+ if (destroy)
+ return mlir::failure();
destroy = op;
- else if (auto op = mlir::dyn_cast<hlfir::ShapeOfOp>(user))
+ } else if (auto op = mlir::dyn_cast<hlfir::ShapeOfOp>(user)) {
shapeOfs.push_back(op);
- else
+ } else {
return mlir::failure();
+ }
}
if (!assign || !destroy || destroy.mustFinalizeExpr() ||
assign.isAllocatableAssignment())
More information about the flang-commits
mailing list