[flang-commits] [flang] [flang][hlfir] Shallow copy elemental results with allocatable components. (PR #68040)
via flang-commits
flang-commits at lists.llvm.org
Mon Oct 2 14:17:41 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
<details>
<summary>Changes</summary>
To avoid the overhead of deallocating allocatable components of the elemental
temporary result on every iteration of the elemental operation, we can use
a shallow copy instead of deep-copy assign.
---
Full diff: https://github.com/llvm/llvm-project/pull/68040.diff
2 Files Affected:
- (modified) flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp (+29-20)
- (added) flang/test/HLFIR/elemental-shallow-copy.fir (+31)
``````````diff
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 3ddaf1f2af8fddb..3da8666d7c53f70 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -791,26 +791,35 @@ struct ElementalOpConversion
// Assign the element value to the temp element for this iteration.
auto tempElement =
hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices);
- // FIXME: if the elemental result is a function result temporary
- // of a derived type, we have to make sure that we are either
- // deallocate any allocatable/automatic components after the assignment
- // or that we do not do the deep copy with the AssignOp. The latter
- // seems to be preferrable, because the deep copy is more expensive.
- // The shallow copy may be done with a load/store of the RecordType scalar.
- builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
- /*realloc=*/false,
- /*keep_lhs_length_if_realloc=*/false,
- /*temporary_lhs=*/true);
- // hlfir.yield_element implicitly marks the end-of-life its operand if
- // it is an expression created in the hlfir.elemental (since it is its
- // last use and an hlfir.destroy could not be created afterwards)
- // Now that this node has been removed and the expression has been used in
- // the assign, insert an hlfir.destroy to mark the expression end-of-life.
- // If the expression creation allocated a buffer on the heap inside the
- // loop, this will ensure the buffer properly deallocated.
- if (elementValue.getType().isa<hlfir::ExprType>() &&
- wasCreatedInCurrentBlock(elementValue, builder))
- builder.create<hlfir::DestroyOp>(loc, elementValue);
+ // If the elemental result is a temporary of a derived type,
+ // we can avoid the deep copy implied by the AssignOp and just
+ // do the shallow copy with load/store. This helps avoiding the overhead
+ // of deallocating allocatable components of the temporary (if any)
+ // on each iteration of the elemental operation.
+ auto asExpr = elementValue.getDefiningOp<hlfir::AsExprOp>();
+ auto elemType = hlfir::getFortranElementType(elementValue.getType());
+ if (asExpr && asExpr.isMove() && mlir::isa<fir::RecordType>(elemType) &&
+ hlfir::mayHaveAllocatableComponent(elemType) &&
+ wasCreatedInCurrentBlock(elementValue, builder)) {
+ auto load = builder.create<fir::LoadOp>(loc, asExpr.getVar());
+ builder.create<fir::StoreOp>(loc, load, tempElement);
+ } else {
+ builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
+ /*realloc=*/false,
+ /*keep_lhs_length_if_realloc=*/false,
+ /*temporary_lhs=*/true);
+
+ // hlfir.yield_element implicitly marks the end-of-life its operand if
+ // it is an expression created in the hlfir.elemental (since it is its
+ // last use and an hlfir.destroy could not be created afterwards)
+ // Now that this node has been removed and the expression has been used in
+ // the assign, insert an hlfir.destroy to mark the expression end-of-life.
+ // If the expression creation allocated a buffer on the heap inside the
+ // loop, this will ensure the buffer properly deallocated.
+ if (elementValue.getType().isa<hlfir::ExprType>() &&
+ wasCreatedInCurrentBlock(elementValue, builder))
+ builder.create<hlfir::DestroyOp>(loc, elementValue);
+ }
builder.restoreInsertionPoint(insPt);
mlir::Value bufferizedExpr =
diff --git a/flang/test/HLFIR/elemental-shallow-copy.fir b/flang/test/HLFIR/elemental-shallow-copy.fir
new file mode 100644
index 000000000000000..c57a2766e318dee
--- /dev/null
+++ b/flang/test/HLFIR/elemental-shallow-copy.fir
@@ -0,0 +1,31 @@
+// Check that an elemental result of a derived type with an allocatable
+// component is shallow-copied into the array result.
+// RUN: fir-opt %s --bufferize-hlfir | FileCheck %s
+
+func.func @_QMtypesPtest() {
+ %false = arith.constant false
+ %c1 = arith.constant 1 : index
+ %0 = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
+ %11 = fir.shape %c1 : (index) -> !fir.shape<1>
+ %18 = fir.alloca !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = "y", uniq_name = "_QMtypesFtestEy"}
+ %19:2 = hlfir.declare %18(%11) {uniq_name = "_QMtypesFtestEy"} : (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
+ %23 = hlfir.elemental %11 : (!fir.shape<1>) -> !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {
+ ^bb0(%arg0: index):
+ %26:2 = hlfir.declare %0 {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
+ %27 = hlfir.as_expr %26#0 move %false : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, i1) -> !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+ hlfir.yield_element %27 : !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+ }
+ hlfir.assign %23 to %19#0 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>
+ hlfir.destroy %23 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+ return
+}
+// CHECK-LABEL: func.func @_QMtypesPtest() {
+// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
+// CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%{{.*}}) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
+// CHECK: fir.do_loop %[[VAL_10:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
+// CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_10]]) : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, index) -> !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK: fir.store %[[VAL_16]] to %[[VAL_15]] : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK: }
``````````
</details>
https://github.com/llvm/llvm-project/pull/68040
More information about the flang-commits
mailing list