[flang-commits] [flang] 8f1671c - [flang][hlfir] Allow hlfir.assign expansion for array slices.

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Fri Sep 1 12:09:36 PDT 2023


Author: Slava Zakharin
Date: 2023-09-01T12:09:23-07:00
New Revision: 8f1671c065539965834132c682da05dd8858b42d

URL: https://github.com/llvm/llvm-project/commit/8f1671c065539965834132c682da05dd8858b42d
DIFF: https://github.com/llvm/llvm-project/commit/8f1671c065539965834132c682da05dd8858b42d.diff

LOG: [flang][hlfir] Allow hlfir.assign expansion for array slices.

This case is important for `Polyhedron/channel2`:
```
    u(2:M-1,1:N,new) = u(2:M-1,1:N,old) &
        +2.d0*dt*f(2:M-1,1:N)*v(2:M-1,1:N,mid) &
        -2.d0*dt/(2.d0*dx)*g*dhdx(2:M-1,1:N)
```

The slices of `u` on the left and the right hand sides are completely
disjoint, but `old` and `new` are unknown runtime values. So the slices
may also be identical rather than disjoint. For the purpose of
hlfir.assign expansion we do not care whether they are identical or
disjoint. Such kind of an answer does not fit well into the alias
analysis definition, so I added a very simplified check to handle
this case. This drops icelake execution time from 120 to 70 seconds.

Reviewed By: tblah

Differential Revision: https://reviews.llvm.org/D159323

Added: 
    flang/test/HLFIR/opt-array-slice-assign.fir

Modified: 
    flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index fd7179e4b3510c..65c66eea221916 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -157,6 +157,83 @@ containsReadOrWriteEffectOn(const mlir::MemoryEffects::EffectInstance &effect,
   return mlir::AliasResult::NoAlias;
 }
 
+// Returns true if the given array references represent identical
+// or completely disjoint array slices. The callers may use this
+// method when the alias analysis reports an alias of some kind,
+// so that we can run Fortran specific analysis on the array slices
+// to see if they are identical or disjoint. Note that the alias
+// analysis are not able to give such an answer about the references.
+static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) {
+  if (ref1 == ref2)
+    return true;
+
+  auto des1 = ref1.getDefiningOp<hlfir::DesignateOp>();
+  auto des2 = ref2.getDefiningOp<hlfir::DesignateOp>();
+  // We only support a pair of designators right now.
+  if (!des1 || !des2)
+    return false;
+
+  if (des1.getMemref() != des2.getMemref()) {
+    // If the bases are 
diff erent, then there is unknown overlap.
+    LLVM_DEBUG(llvm::dbgs() << "No identical base for:\n"
+                            << des1 << "and:\n"
+                            << des2 << "\n");
+    return false;
+  }
+
+  // Require all components of the designators to be the same.
+  // It might be too strict, e.g. we may probably allow for
+  // 
diff erent type parameters.
+  if (des1.getComponent() != des2.getComponent() ||
+      des1.getComponentShape() != des2.getComponentShape() ||
+      des1.getSubstring() != des2.getSubstring() ||
+      des1.getComplexPart() != des2.getComplexPart() ||
+      des1.getShape() != des2.getShape() ||
+      des1.getTypeparams() != des2.getTypeparams()) {
+    LLVM_DEBUG(llvm::dbgs() << "Different designator specs for:\n"
+                            << des1 << "and:\n"
+                            << des2 << "\n");
+    return false;
+  }
+
+  if (des1.getIsTriplet() != des2.getIsTriplet()) {
+    LLVM_DEBUG(llvm::dbgs() << "Different sections for:\n"
+                            << des1 << "and:\n"
+                            << des2 << "\n");
+    return false;
+  }
+
+  // Analyze the subscripts.
+  // For example:
+  //   hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %0)  shape %9
+  //   hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %1)  shape %9
+  //
+  // If all the triplets (section speficiers) are the same, then
+  // we do not care if %0 is equal to %1 - the slices are either
+  // identical or completely disjoint.
+  //
+  // TODO: if we can prove that all non-triplet subscripts are 
diff erent
+  // (by value), then we may return true regardless of the triplet
+  // values - the sections must be completely disjoint.
+  auto des1It = des1.getIndices().begin();
+  auto des2It = des2.getIndices().begin();
+  for (bool isTriplet : des1.getIsTriplet()) {
+    if (isTriplet) {
+      for (int i = 0; i < 3; ++i)
+        if (*des1It++ != *des2It++) {
+          LLVM_DEBUG(llvm::dbgs() << "Triplet mismatch for:\n"
+                                  << des1 << "and:\n"
+                                  << des2 << "\n");
+          return false;
+        }
+    } else {
+      ++des1It;
+      ++des2It;
+    }
+  }
+  return true;
+}
+
 std::optional<ElementalAssignBufferization::MatchInfo>
 ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
   mlir::Operation::user_range users = elemental->getUsers();
@@ -274,7 +351,7 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
     if (!res.isPartial()) {
       if (auto designate =
               effect.getValue().getDefiningOp<hlfir::DesignateOp>()) {
-        if (designate.getMemref() != match.array) {
+        if (!areIdenticalOrDisjointSlices(match.array, designate.getMemref())) {
           LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate
                                   << " at " << elemental.getLoc() << "\n");
           return std::nullopt;
@@ -291,7 +368,7 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
           continue;
       }
     }
-    LLVM_DEBUG(llvm::dbgs() << "diasllowed side-effect: " << effect.getValue()
+    LLVM_DEBUG(llvm::dbgs() << "disallowed side-effect: " << effect.getValue()
                             << " for " << elemental.getLoc() << "\n");
     return std::nullopt;
   }
@@ -484,6 +561,8 @@ mlir::LogicalResult VariableAssignBufferization::matchAndRewrite(
 
   fir::AliasAnalysis aliasAnalysis;
   mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
+  // TODO: use areIdenticalOrDisjointSlices() to check if
+  // we can still do the expansion.
   if (!aliasRes.isNo()) {
     LLVM_DEBUG(llvm::dbgs() << "VariableAssignBufferization:\n"
                             << "\tLHS: " << lhs << "\n"

diff  --git a/flang/test/HLFIR/opt-array-slice-assign.fir b/flang/test/HLFIR/opt-array-slice-assign.fir
new file mode 100644
index 00000000000000..dc42cbd302b87e
--- /dev/null
+++ b/flang/test/HLFIR/opt-array-slice-assign.fir
@@ -0,0 +1,130 @@
+// Test optimized bufferization for hlfir.assign of array
+// slices, e.g.:
+//   x(2:7999,1:120,new) = (x(2:7999,1:120,old))
+// We can expand hlfir.assign if the slices are either identical
+// or completely disjoint. In case they are identical, we still
+// need to make sure that the one-based indices are used
+// uniformly for both LHS and RHS.
+// RUN: fir-opt --opt-bufferization %s | FileCheck %s
+
+func.func @_QPtest1(%arg0: !fir.ref<!fir.array<8000x120x3xf32>> {fir.bindc_name = "x"}) {
+  %c7998 = arith.constant 7998 : index
+  %c1 = arith.constant 1 : index
+  %c7999 = arith.constant 7999 : index
+  %c2 = arith.constant 2 : index
+  %c3 = arith.constant 3 : index
+  %c120 = arith.constant 120 : index
+  %c8000 = arith.constant 8000 : index
+  %0 = fir.alloca i32 {bindc_name = "new", uniq_name = "_QFtest1Enew"}
+  %1:2 = hlfir.declare %0 {uniq_name = "_QFtest1Enew"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2 = fir.alloca i32 {bindc_name = "old", uniq_name = "_QFtest1Eold"}
+  %3:2 = hlfir.declare %2 {uniq_name = "_QFtest1Eold"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %4 = fir.shape %c8000, %c120, %c3 : (index, index, index) -> !fir.shape<3>
+  %5:2 = hlfir.declare %arg0(%4) {uniq_name = "_QFtest1Ex"} : (!fir.ref<!fir.array<8000x120x3xf32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<8000x120x3xf32>>, !fir.ref<!fir.array<8000x120x3xf32>>)
+  %6 = fir.load %3#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> i64
+  %8 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2>
+  %9 = hlfir.designate %5#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %7)  shape %8 : (!fir.ref<!fir.array<8000x120x3xf32>>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
+  %10 = hlfir.elemental %8 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> {
+  ^bb0(%arg1: index, %arg2: index):
+    %14 = hlfir.designate %9 (%arg1, %arg2)  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+    %15 = fir.load %14 : !fir.ref<f32>
+    %16 = hlfir.no_reassoc %15 : f32
+    hlfir.yield_element %16 : f32
+  }
+  %11 = fir.load %1#0 : !fir.ref<i32>
+  %12 = fir.convert %11 : (i32) -> i64
+  %13 = hlfir.designate %5#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %12)  shape %8 : (!fir.ref<!fir.array<8000x120x3xf32>>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
+  hlfir.assign %10 to %13 : !hlfir.expr<7998x120xf32>, !fir.box<!fir.array<7998x120xf32>>
+  hlfir.destroy %10 : !hlfir.expr<7998x120xf32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPtest1(
+// CHECK:           fir.do_loop %[[VAL_21:.*]] =
+// CHECK:             fir.do_loop %[[VAL_22:.*]] =
+// CHECK:               %[[VAL_23:.*]] = hlfir.designate %[[VAL_17:.*]] (%[[VAL_22]], %[[VAL_21]])  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f32>
+// CHECK:               %[[VAL_25:.*]] = hlfir.no_reassoc %[[VAL_24]] : f32
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_20:.*]] (%[[VAL_22]], %[[VAL_21]])  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+// CHECK:               hlfir.assign %[[VAL_25]] to %[[VAL_26]] : f32, !fir.ref<f32>
+// CHECK:             }
+// CHECK:           }
+
+func.func @_QPtest2(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "x"}) {
+  %c120 = arith.constant 120 : index
+  %c7998 = arith.constant 7998 : index
+  %c1 = arith.constant 1 : index
+  %c7999 = arith.constant 7999 : index
+  %c2 = arith.constant 2 : index
+  %0:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest2Ex"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>)
+  %1 = fir.load %0#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+  %2 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2>
+  %3 = hlfir.designate %1 (%c2:%c7999:%c1, %c1:%c120:%c1, %c2)  shape %2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
+  %4 = hlfir.elemental %2 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> {
+  ^bb0(%arg1: index, %arg2: index):
+    %6 = hlfir.designate %3 (%arg1, %arg2)  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+    %7 = fir.load %6 : !fir.ref<f32>
+    %8 = hlfir.no_reassoc %7 : f32
+    hlfir.yield_element %8 : f32
+  }
+  %5 = hlfir.designate %1 (%c2:%c7999:%c1, %c1:%c120:%c1, %c1)  shape %2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
+  hlfir.assign %4 to %5 : !hlfir.expr<7998x120xf32>, !fir.box<!fir.array<7998x120xf32>>
+  hlfir.destroy %4 : !hlfir.expr<7998x120xf32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPtest2(
+// CHECK:           fir.do_loop %[[VAL_11:.*]] =
+// CHECK:             fir.do_loop %[[VAL_12:.*]] =
+// CHECK:               %[[VAL_13:.*]] = hlfir.designate %[[VAL_9:.*]] (%[[VAL_12]], %[[VAL_11]])  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<f32>
+// CHECK:               %[[VAL_15:.*]] = hlfir.no_reassoc %[[VAL_14]] : f32
+// CHECK:               %[[VAL_16:.*]] = hlfir.designate %[[VAL_10:.*]] (%[[VAL_12]], %[[VAL_11]])  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+// CHECK:               hlfir.assign %[[VAL_15]] to %[[VAL_16]] : f32, !fir.ref<f32>
+// CHECK:             }
+// CHECK:           }
+
+func.func @_QPtest3(%arg0: !fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>> {fir.bindc_name = "x"}) {
+  %c7998 = arith.constant 7998 : index
+  %c7999 = arith.constant 7999 : index
+  %c2 = arith.constant 2 : index
+  %c3 = arith.constant 3 : index
+  %c120 = arith.constant 120 : index
+  %c8000 = arith.constant 8000 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.alloca i32 {bindc_name = "new", uniq_name = "_QFtest3Enew"}
+  %1:2 = hlfir.declare %0 {uniq_name = "_QFtest3Enew"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2 = fir.alloca i32 {bindc_name = "old", uniq_name = "_QFtest3Eold"}
+  %3:2 = hlfir.declare %2 {uniq_name = "_QFtest3Eold"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %4 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %5:2 = hlfir.declare %arg0(%4) {uniq_name = "_QFtest3Ex"} : (!fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>>, !fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>>)
+  %6 = hlfir.designate %5#0 (%c1)  : (!fir.ref<!fir.array<10x!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>>, index) -> !fir.ref<!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>
+  %7 = fir.shape %c8000, %c120, %c3 : (index, index, index) -> !fir.shape<3>
+  %8 = fir.load %3#0 : !fir.ref<i32>
+  %9 = fir.convert %8 : (i32) -> i64
+  %10 = fir.shape %c7998, %c120 : (index, index) -> !fir.shape<2>
+  %11 = hlfir.designate %6{"x"} <%7> (%c2:%c7999:%c1, %c1:%c120:%c1, %9)  shape %10 : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>, !fir.shape<3>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
+  %12 = hlfir.elemental %10 unordered : (!fir.shape<2>) -> !hlfir.expr<7998x120xf32> {
+  ^bb0(%arg1: index, %arg2: index):
+    %16 = hlfir.designate %11 (%arg1, %arg2)  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+    %17 = fir.load %16 : !fir.ref<f32>
+    %18 = hlfir.no_reassoc %17 : f32
+    hlfir.yield_element %18 : f32
+  }
+  %13 = fir.load %1#0 : !fir.ref<i32>
+  %14 = fir.convert %13 : (i32) -> i64
+  %15 = hlfir.designate %6{"x"} <%7> (%c2:%c7999:%c1, %c1:%c120:%c1, %14)  shape %10 : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.array<8000x120x3xf32>}>>, !fir.shape<3>, index, index, index, index, index, index, i64, !fir.shape<2>) -> !fir.box<!fir.array<7998x120xf32>>
+  hlfir.assign %12 to %15 : !hlfir.expr<7998x120xf32>, !fir.box<!fir.array<7998x120xf32>>
+  hlfir.destroy %12 : !hlfir.expr<7998x120xf32>
+  return
+}
+// CHECK-LABEL:   func.func @_QPtest3(
+// CHECK:           fir.do_loop %[[VAL_24:.*]] =
+// CHECK:             fir.do_loop %[[VAL_25:.*]] =
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_20:.*]] (%[[VAL_25]], %[[VAL_24]])  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f32>
+// CHECK:               %[[VAL_28:.*]] = hlfir.no_reassoc %[[VAL_27]] : f32
+// CHECK:               %[[VAL_29:.*]] = hlfir.designate %[[VAL_23:.*]] (%[[VAL_25]], %[[VAL_24]])  : (!fir.box<!fir.array<7998x120xf32>>, index, index) -> !fir.ref<f32>
+// CHECK:               hlfir.assign %[[VAL_28]] to %[[VAL_29]] : f32, !fir.ref<f32>
+// CHECK:             }
+// CHECK:           }


        


More information about the flang-commits mailing list