[flang-commits] [flang] 0e3fda6 - [flang][hlfir] allow assoicate where the expr is also used by shape_of

Fri Jul 7 02:26:46 PDT 2023

Author: Tom Eccles
Date: 2023-07-07T09:25:13Z
New Revision: 0e3fda6c72e3f04046df42f1eb1ad1519a8015ec

URL: https://github.com/llvm/llvm-project/commit/0e3fda6c72e3f04046df42f1eb1ad1519a8015ec
DIFF: https://github.com/llvm/llvm-project/commit/0e3fda6c72e3f04046df42f1eb1ad1519a8015ec.diff

LOG: [flang][hlfir] allow assoicate where the expr is also used by shape_of

This fixes the majority of cases where we hit the "hlfir.associate of
hlfir.expr with more than one use" TODO. In particular, this allows cam4
to be built.

hlfir.shape_of is just a way to delay reading shape information until
after intrinsics have been lowered to FIR runtime calls. It gets the
shape information from reading existing SSA values (e.g. fetching the
shape used when hlfir.declare'ing the variable).

Therefore hlfir.shape_of doesn't affect decisions about when to
deallocate the buffer.

Differential Revision: https://reviews.llvm.org/D154521

Added: 
    

Modified: 
    flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
    flang/test/HLFIR/associate-codegen.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 9b894027a9f203..f589cb8715ff1e 100644

--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -341,11 +341,30 @@ struct GetLengthOpConversion
   }
 };
 
-static bool allOtherUsesAreDestroys(mlir::Value value,
-                                    mlir::Operation *currentUse) {
+/// The current hlfir.associate lowering does not handle multiple uses of a
+/// non-trivial expression value because it generates the cleanup for the
+/// expression bufferization at hlfir.end_associate. If there was more than one
+/// hlfir.end_associate, it would be cleaned up multiple times, perhaps before
+/// one of the other uses.
+static bool allOtherUsesAreSafeForAssociate(mlir::Value value,
+                                            mlir::Operation *currentUse,
+                                            mlir::Operation *endAssociate) {
   for (mlir::Operation *useOp : value.getUsers())
-    if (!mlir::isa<hlfir::DestroyOp>(useOp) && useOp != currentUse)
+    if (!mlir::isa<hlfir::DestroyOp>(useOp) && useOp != currentUse) {
+      // hlfir.shape_of will not disrupt cleanup so it is safe for
+      // hlfir.associate. hlfir.shape_of might read the box dimensions and so it
+      // needs to come before the hflir.end_associate (which may deallocate).
+      if (mlir::isa<hlfir::ShapeOfOp>(useOp)) {
+        if (!endAssociate)
+          continue;
+        // not known to occur in practice:
+        if (useOp->getBlock() != endAssociate->getBlock())
+          TODO(endAssociate->getLoc(), "Associate split over multiple blocks");
+        if (useOp->isBeforeInBlock(endAssociate))
+          continue;
+      }
       return false;
+    }
   return true;
 }
 
@@ -370,6 +389,15 @@ struct AssociateOpConversion
     mlir::Value bufferizedExpr = getBufferizedExprStorage(adaptor.getSource());
     const bool isTrivialValue = fir::isa_trivial(bufferizedExpr.getType());
 
+    auto getEndAssociate =
+        [](hlfir::AssociateOp associate) -> mlir::Operation * {
+      for (mlir::Operation *useOp : associate->getUsers())
+        if (mlir::isa<hlfir::EndAssociateOp>(useOp))
+          return useOp;
+      // happens in some hand coded mlir in tests
+      return nullptr;
+    };
+
     auto replaceWith = [&](mlir::Value hlfirVar, mlir::Value firVar,
                            mlir::Value flag) {
       // 0-dim variables may need special handling:
@@ -382,8 +410,8 @@ struct AssociateOpConversion
       //        !fir.ref<!fir.type<_T{y:i32}>>,
       //        i1)
       //
-      // !fir.box<!fir.heap<!fir.type<_T{y:i32}>>> value must be propagated
-      // as the box address !fir.ref<!fir.type<_T{y:i32}>>.
+      // !fir.box<!fir.heap<!fir.type<_T{y:i32}>>> value must be
+      // propagated as the box address !fir.ref<!fir.type<_T{y:i32}>>.
       mlir::Type associateHlfirVarType = associate.getResultTypes()[0];
       if (hlfirVar.getType().isa<fir::BaseBoxType>() &&
           !associateHlfirVarType.isa<fir::BaseBoxType>())
@@ -410,8 +438,9 @@ struct AssociateOpConversion
     // If this is the last use of the expression value and this is an hlfir.expr
     // that was bufferized, re-use the storage.
     // Otherwise, create a temp and assign the storage to it.
-    if (!isTrivialValue && allOtherUsesAreDestroys(associate.getSource(),
-                                                   associate.getOperation())) {
+    if (!isTrivialValue && allOtherUsesAreSafeForAssociate(
+                               associate.getSource(), associate.getOperation(),
+                               getEndAssociate(associate))) {
       // Re-use hlfir.expr buffer if this is the only use of the hlfir.expr
       // outside of the hlfir.destroy. Take on the cleaning-up responsibility
       // for the related hlfir.end_associate, and erase the hlfir.destroy (if

diff  --git a/flang/test/HLFIR/associate-codegen.fir b/flang/test/HLFIR/associate-codegen.fir
index 5127f78e783cc8..7bd70ad58d8ac3 100644
--- a/flang/test/HLFIR/associate-codegen.fir
+++ b/flang/test/HLFIR/associate-codegen.fir
@@ -194,6 +194,51 @@ func.func @test_0dim_box(%x : !fir.ref<!fir.box<!fir.heap<i32>>>) {
 // CHECK:           return
 // CHECK:         }
 
+// test that we support a hlfir.associate operation where the expr is also used in a hlfir.shape_of op
+func.func @test_shape_of(%arg0: !fir.ref<!fir.array<4x3xi32>>) {
+  %c4 = arith.constant 4 : index
+  %c3 = arith.constant 3 : index
+  %shape = fir.shape %c3, %c4 : (index, index) -> !fir.shape<2>
+  // %0 = hlfir.transpose %arg0 : (!fir.ref<!fir.array<4x3xi32>>) -> !hlfir.expr<3x4xi32>
+  %0 = hlfir.elemental %shape unordered : (!fir.shape<2>) -> !hlfir.expr<3x4xi32> {
+  ^bb0(%arg1: index, %arg2: index):
+    %4 = hlfir.designate %arg0 (%arg2, %arg1) : (!fir.ref<!fir.array<4x3xi32>>, index, index) -> !fir.ref<i32>
+    %5 = fir.load %4 : !fir.ref<i32>
+    hlfir.yield_element %5 : i32
+  }
+  %1 = hlfir.shape_of %0 : (!hlfir.expr<3x4xi32>) -> !fir.shape<2>
+  %2:3 = hlfir.associate %0(%1) {uniq_name = "adapt.valuebyref"} : (!hlfir.expr<3x4xi32>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4xi32>>, !fir.ref<!fir.array<3x4xi32>>, i1)
+  // ...
+  hlfir.end_associate %2#1, %2#2 : !fir.ref<!fir.array<3x4xi32>>, i1
+  return
+}
+// CHECK-LABEL:   func.func @test_shape_of(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.ref<!fir.array<4x3xi32>>) {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 4 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = fir.shape %[[VAL_2]], %[[VAL_1]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_4:.*]] = fir.allocmem !fir.array<3x4xi32> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<3x4xi32>>, !fir.shape<2>) -> (!fir.heap<!fir.array<3x4xi32>>, !fir.heap<!fir.array<3x4xi32>>)
+// CHECK:           %[[VAL_6:.*]] = arith.constant true
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_1]] step %[[VAL_7]] unordered {
+// CHECK:             fir.do_loop %[[VAL_9:.*]] = %[[VAL_7]] to %[[VAL_2]] step %[[VAL_7]] unordered {
+// CHECK:               %[[VAL_10:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_8]], %[[VAL_9]])  : (!fir.ref<!fir.array<4x3xi32>>, index, index) -> !fir.ref<i32>
+// CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+// CHECK:               %[[VAL_12:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_9]], %[[VAL_8]])  : (!fir.heap<!fir.array<3x4xi32>>, index, index) -> !fir.ref<i32>
+// CHECK:               hlfir.assign %[[VAL_11]] to %[[VAL_12]] temporary_lhs : i32, !fir.ref<i32>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[VAL_13:.*]] = fir.undefined tuple<!fir.heap<!fir.array<3x4xi32>>, i1>
+// CHECK:           %[[VAL_14:.*]] = fir.insert_value %[[VAL_13]], %[[VAL_6]], [1 : index] : (tuple<!fir.heap<!fir.array<3x4xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<3x4xi32>>, i1>
+// CHECK:           %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_5]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<3x4xi32>>, i1>, !fir.heap<!fir.array<3x4xi32>>) -> tuple<!fir.heap<!fir.array<3x4xi32>>, i1>
+// CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_5]]#0 : (!fir.heap<!fir.array<3x4xi32>>) -> !fir.ref<!fir.array<3x4xi32>>
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_5]]#1 : (!fir.heap<!fir.array<3x4xi32>>) -> !fir.ref<!fir.array<3x4xi32>>
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (!fir.ref<!fir.array<3x4xi32>>) -> !fir.heap<!fir.array<3x4xi32>>
+// CHECK:           fir.freemem %[[VAL_18]] : !fir.heap<!fir.array<3x4xi32>>
+// CHECK:           return
+// CHECK:         }
+
 func.func private @take_i4(!fir.ref<i32>)
 func.func private @take_r4(!fir.ref<f32>)
 func.func private @take_l4(!fir.ref<!fir.logical<4>>)