[flang-commits] [flang] [flang] Use optimal shape for assign expansion as a loop. (PR #143050)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Thu Jun 5 17:04:30 PDT 2025
https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/143050
During `hlfir.assign` inlining and `ElementalAssignBufferization`
we can deduce the optimal shape from `lhs` and `rhs` shapes.
It is probably better be done in a separate pass that propagates
constant shapes, but I have not seen any benchmarks that would
benefit from this yet. So consider this as a workaround for a bigger
TODO issue.
The `ElementalAssignBufferization` case is from 465.tonto,
but I do not have performance results yet (I do not expect much).
>From ef05ca674913377fbcae26b6a597f3e0862ea405 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Thu, 5 Jun 2025 16:50:21 -0700
Subject: [PATCH] [flang] Use optimal shape for assign expansion as a loop.
During `hlfir.assign` inlining and `ElementalAssignBufferization`
we can deduce the optimal shape from `lhs` and `rhs` shapes.
It is probably better be done in a separate pass that propagates
constant shapes, but I have not seen any benchmarks that would
benefit from this yet. So consider this as a workaround for a bigger
TODO issue.
The `ElementalAssignBufferization` case is from 465.tonto,
but I do not have performance results yet (I do not expect much).
---
.../HLFIR/Transforms/InlineHLFIRAssign.cpp | 9 +++-
.../Transforms/OptimizedBufferization.cpp | 13 +++--
flang/test/HLFIR/inline-hlfir-assign.fir | 10 ++--
...t-bufferization-elemental-assign-shape.fir | 54 +++++++++++++++++++
4 files changed, 75 insertions(+), 11 deletions(-)
create mode 100644 flang/test/HLFIR/opt-bufferization-elemental-assign-shape.fir
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
index 6e209cce07ad4..6c4a07be52a48 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -109,9 +109,14 @@ class InlineHLFIRAssignConversion
builder.setInsertionPoint(assign);
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
- mlir::Value shape = hlfir::genShape(loc, builder, lhs);
+ mlir::Value lhsShape = hlfir::genShape(loc, builder, lhs);
+ llvm::SmallVector<mlir::Value> lhsExtents =
+ hlfir::getIndexExtents(loc, builder, lhsShape);
+ mlir::Value rhsShape = hlfir::genShape(loc, builder, rhs);
+ llvm::SmallVector<mlir::Value> rhsExtents =
+ hlfir::getIndexExtents(loc, builder, rhsShape);
llvm::SmallVector<mlir::Value> extents =
- hlfir::getIndexExtents(loc, builder, shape);
+ fir::factory::deduceOptimalExtents(lhsExtents, rhsExtents);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(assign));
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index e2ca754a1817a..d7373cc6cd7a2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -688,10 +688,17 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
mlir::Location loc = elemental->getLoc();
fir::FirOpBuilder builder(rewriter, elemental.getOperation());
- auto extents = hlfir::getIndexExtents(loc, builder, elemental.getShape());
+ auto rhsExtents = hlfir::getIndexExtents(loc, builder, elemental.getShape());
// create the loop at the assignment
builder.setInsertionPoint(match->assign);
+ hlfir::Entity lhs{match->array};
+ lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
+ mlir::Value lhsShape = hlfir::genShape(loc, builder, lhs);
+ llvm::SmallVector<mlir::Value> lhsExtents =
+ hlfir::getIndexExtents(loc, builder, lhsShape);
+ llvm::SmallVector<mlir::Value> extents =
+ fir::factory::deduceOptimalExtents(rhsExtents, lhsExtents);
// Generate a loop nest looping around the hlfir.elemental shape and clone
// hlfir.elemental region inside the inner loop
@@ -705,8 +712,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
rewriter.eraseOp(yield);
// Assign the element value to the array element for this iteration.
- auto arrayElement = hlfir::getElementAt(
- loc, builder, hlfir::Entity{match->array}, loopNest.oneBasedIndices);
+ auto arrayElement =
+ hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(
loc, elementValue, arrayElement, /*realloc=*/false,
/*keep_lhs_length_if_realloc=*/false, match->assign.getTemporaryLhs());
diff --git a/flang/test/HLFIR/inline-hlfir-assign.fir b/flang/test/HLFIR/inline-hlfir-assign.fir
index f834e7971e3d5..db71720119e71 100644
--- a/flang/test/HLFIR/inline-hlfir-assign.fir
+++ b/flang/test/HLFIR/inline-hlfir-assign.fir
@@ -145,18 +145,16 @@ func.func @_QPtest3(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"})
// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_10]]#1, %[[VAL_1]] : index
// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_12]], %[[VAL_14]] : (index, index) -> !fir.shape<2>
// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_2]]:%[[VAL_9]]#1:%[[VAL_2]], %[[VAL_2]]:%[[VAL_10]]#1:%[[VAL_2]]) shape %[[VAL_15]] : (!fir.box<!fir.array<?x?xf32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box<!fir.array<?x?xf32>>
-// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_14]] step %[[VAL_2]] unordered {
-// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_12]] step %[[VAL_2]] unordered {
+// CHECK: fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
+// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_18]], %[[VAL_17]]) : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<f32>
// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_18]], %[[VAL_17]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
// CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_21]] : f32, !fir.ref<f32>
// CHECK: }
// CHECK: }
-// CHECK: %[[VAL_22:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_1]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
-// CHECK: %[[VAL_23:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
-// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_2]] to %[[VAL_23]]#1 step %[[VAL_2]] unordered {
-// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_2]] to %[[VAL_22]]#1 step %[[VAL_2]] unordered {
+// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
+// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_2]] unordered {
// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_25]], %[[VAL_24]]) : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f32>
// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_25]], %[[VAL_24]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
diff --git a/flang/test/HLFIR/opt-bufferization-elemental-assign-shape.fir b/flang/test/HLFIR/opt-bufferization-elemental-assign-shape.fir
new file mode 100644
index 0000000000000..b55848225a41c
--- /dev/null
+++ b/flang/test/HLFIR/opt-bufferization-elemental-assign-shape.fir
@@ -0,0 +1,54 @@
+// RUN: fir-opt --opt-bufferization %s | FileCheck %s
+
+// Check that the elemental+assign are rewritten into a loop
+// with "optimal" loop bounds, e.g. that we use constants
+// when possible.
+
+// CHECK-LABEL: func.func @_QPtest1(
+// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
+// CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_0]] unordered {
+// CHECK-NOT: hlfir.assign{{.*}}array
+func.func @_QPtest1(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<!fir.array<3xf32>> {fir.bindc_name = "y"}) {
+ %c0 = arith.constant 0 : index
+ %c3 = arith.constant 3 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+ %2 = fir.shape %c3 : (index) -> !fir.shape<1>
+ %3:2 = hlfir.declare %arg1(%2) dummy_scope %0 {uniq_name = "_QFtest1Ey"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
+ %4:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+ %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+ %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+ ^bb0(%arg2: index):
+ %7 = hlfir.designate %1#0 (%arg2) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %8 = fir.load %7 : !fir.ref<f32>
+ %9 = arith.addf %8, %8 fastmath<contract> : f32
+ hlfir.yield_element %9 : f32
+ }
+ hlfir.assign %6 to %3#0 : !hlfir.expr<?xf32>, !fir.ref<!fir.array<3xf32>>
+ hlfir.destroy %6 : !hlfir.expr<?xf32>
+ return
+}
+
+// CHECK-LABEL: func.func @_QPtest2(
+// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
+// CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_0]] to %[[VAL_1]] step %[[VAL_0]] unordered {
+// CHECK-NOT: hlfir.assign{{.*}}array
+func.func @_QPtest2(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<!fir.array<3xf32>> {fir.bindc_name = "y"}) {
+ %c3 = arith.constant 3 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest2Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+ %2 = fir.shape %c3 : (index) -> !fir.shape<1>
+ %3:2 = hlfir.declare %arg1(%2) dummy_scope %0 {uniq_name = "_QFtest2Ey"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
+ %4 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
+ ^bb0(%arg2: index):
+ %5 = hlfir.designate %3#0 (%arg2) : (!fir.ref<!fir.array<3xf32>>, index) -> !fir.ref<f32>
+ %6 = fir.load %5 : !fir.ref<f32>
+ %7 = arith.addf %6, %6 fastmath<contract> : f32
+ hlfir.yield_element %7 : f32
+ }
+ hlfir.assign %4 to %1#0 : !hlfir.expr<3xf32>, !fir.box<!fir.array<?xf32>>
+ hlfir.destroy %4 : !hlfir.expr<3xf32>
+ return
+}
More information about the flang-commits
mailing list