[flang-commits] [flang] f857bef - [flang][hlfir] Shallow copy elemental results with allocatable components. (#68040)

via flang-commits flang-commits at lists.llvm.org
Tue Oct 3 13:09:58 PDT 2023


Author: Slava Zakharin
Date: 2023-10-03T13:09:55-07:00
New Revision: f857bef59df9c6e127630b389bfaceeceef92923

URL: https://github.com/llvm/llvm-project/commit/f857bef59df9c6e127630b389bfaceeceef92923
DIFF: https://github.com/llvm/llvm-project/commit/f857bef59df9c6e127630b389bfaceeceef92923.diff

LOG: [flang][hlfir] Shallow copy elemental results with allocatable components. (#68040)

To avoid the overhead of deallocating allocatable components of the
elemental temporary result on every iteration of the elemental operation,
we can use a shallow copy instead of deep-copy assign.

Added: 
    flang/test/HLFIR/elemental-shallow-copy.fir

Modified: 
    flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 3ddaf1f2af8fddb..3da8666d7c53f70 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -791,26 +791,35 @@ struct ElementalOpConversion
     // Assign the element value to the temp element for this iteration.
     auto tempElement =
         hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices);
-    // FIXME: if the elemental result is a function result temporary
-    // of a derived type, we have to make sure that we are either
-    // deallocate any allocatable/automatic components after the assignment
-    // or that we do not do the deep copy with the AssignOp. The latter
-    // seems to be preferrable, because the deep copy is more expensive.
-    // The shallow copy may be done with a load/store of the RecordType scalar.
-    builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
-                                    /*realloc=*/false,
-                                    /*keep_lhs_length_if_realloc=*/false,
-                                    /*temporary_lhs=*/true);
-    // hlfir.yield_element implicitly marks the end-of-life its operand if
-    // it is an expression created in the hlfir.elemental (since it is its
-    // last use and an hlfir.destroy could not be created afterwards)
-    // Now that this node has been removed and the expression has been used in
-    // the assign, insert an hlfir.destroy to mark the expression end-of-life.
-    // If the expression creation allocated a buffer on the heap inside the
-    // loop, this will ensure the buffer properly deallocated.
-    if (elementValue.getType().isa<hlfir::ExprType>() &&
-        wasCreatedInCurrentBlock(elementValue, builder))
-      builder.create<hlfir::DestroyOp>(loc, elementValue);
+    // If the elemental result is a temporary of a derived type,
+    // we can avoid the deep copy implied by the AssignOp and just
+    // do the shallow copy with load/store. This helps avoiding the overhead
+    // of deallocating allocatable components of the temporary (if any)
+    // on each iteration of the elemental operation.
+    auto asExpr = elementValue.getDefiningOp<hlfir::AsExprOp>();
+    auto elemType = hlfir::getFortranElementType(elementValue.getType());
+    if (asExpr && asExpr.isMove() && mlir::isa<fir::RecordType>(elemType) &&
+        hlfir::mayHaveAllocatableComponent(elemType) &&
+        wasCreatedInCurrentBlock(elementValue, builder)) {
+      auto load = builder.create<fir::LoadOp>(loc, asExpr.getVar());
+      builder.create<fir::StoreOp>(loc, load, tempElement);
+    } else {
+      builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
+                                      /*realloc=*/false,
+                                      /*keep_lhs_length_if_realloc=*/false,
+                                      /*temporary_lhs=*/true);
+
+      // hlfir.yield_element implicitly marks the end-of-life its operand if
+      // it is an expression created in the hlfir.elemental (since it is its
+      // last use and an hlfir.destroy could not be created afterwards)
+      // Now that this node has been removed and the expression has been used in
+      // the assign, insert an hlfir.destroy to mark the expression end-of-life.
+      // If the expression creation allocated a buffer on the heap inside the
+      // loop, this will ensure the buffer properly deallocated.
+      if (elementValue.getType().isa<hlfir::ExprType>() &&
+          wasCreatedInCurrentBlock(elementValue, builder))
+        builder.create<hlfir::DestroyOp>(loc, elementValue);
+    }
     builder.restoreInsertionPoint(insPt);
 
     mlir::Value bufferizedExpr =

diff  --git a/flang/test/HLFIR/elemental-shallow-copy.fir b/flang/test/HLFIR/elemental-shallow-copy.fir
new file mode 100644
index 000000000000000..c57a2766e318dee
--- /dev/null
+++ b/flang/test/HLFIR/elemental-shallow-copy.fir
@@ -0,0 +1,31 @@
+// Check that an elemental result of a derived type with an allocatable
+// component is shallow-copied into the array result.
+// RUN: fir-opt %s --bufferize-hlfir | FileCheck %s
+
+func.func @_QMtypesPtest() {
+  %false = arith.constant false
+  %c1 = arith.constant 1 : index
+  %0 = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
+  %11 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %18 = fir.alloca !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = "y", uniq_name = "_QMtypesFtestEy"}
+  %19:2 = hlfir.declare %18(%11) {uniq_name = "_QMtypesFtestEy"} : (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
+  %23 = hlfir.elemental %11 : (!fir.shape<1>) -> !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {
+  ^bb0(%arg0: index):
+    %26:2 = hlfir.declare %0 {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
+    %27 = hlfir.as_expr %26#0 move %false : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, i1) -> !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+    hlfir.yield_element %27 : !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+  }
+  hlfir.assign %23 to %19#0 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>
+  hlfir.destroy %23 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+  return
+}
+// CHECK-LABEL:   func.func @_QMtypesPtest() {
+// CHECK:           %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
+// CHECK:           %[[VAL_6:.*]] = fir.allocmem !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%{{.*}}) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
+// CHECK:           fir.do_loop %[[VAL_10:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK:             %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
+// CHECK:             %[[VAL_15:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_10]])  : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, index) -> !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK:             %[[VAL_16:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK:             fir.store %[[VAL_16]] to %[[VAL_15]] : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
+// CHECK:           }


        


More information about the flang-commits mailing list