[flang-commits] [flang] [flang] Canonicalize fir.array_coor by pulling in embox/rebox. (PR #92858)

Tue May 21 08:07:57 PDT 2024

================
@@ -417,6 +423,93 @@ mlir::LogicalResult fir::ArrayCoorOp::verify() {
   return mlir::success();
 }
 
+// Pull in fir.embox and fir.rebox into fir.array_coor when possible.
+struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
+  using mlir::OpRewritePattern<fir::ArrayCoorOp>::OpRewritePattern;
+  mlir::LogicalResult
+  matchAndRewrite(fir::ArrayCoorOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::Value memref = op.getMemref();
+    if (!mlir::isa<fir::BaseBoxType>(memref.getType()))
+      return mlir::failure();
+
+    mlir::Value boxedMemref, boxedShape, boxedSlice;
+    if (auto emboxOp =
+            mlir::dyn_cast_or_null<fir::EmboxOp>(memref.getDefiningOp())) {
+      boxedMemref = emboxOp.getMemref();
+      boxedShape = emboxOp.getShape();
+      boxedSlice = emboxOp.getSlice();
+      // If any of operands, that are not currently supported for migration
+      // to ArrayCoorOp, is present, don't rewrite.
+      if (!emboxOp.getTypeparams().empty() || emboxOp.getSourceBox() ||
+          emboxOp.getAccessMap())
+        return mlir::failure();
+    } else if (auto reboxOp = mlir::dyn_cast_or_null<fir::ReboxOp>(
+                   memref.getDefiningOp())) {
+      boxedMemref = reboxOp.getBox();
+      boxedShape = reboxOp.getShape();
+      boxedSlice = reboxOp.getSlice();
+    } else {
+      return mlir::failure();
+    }
+
+    // Slices changing the number of dimensions are not supported
+    // for array_coor yet.
+    unsigned origBoxRank;
+    if (mlir::isa<fir::BaseBoxType>(boxedMemref.getType()))
+      origBoxRank = fir::getBoxRank(boxedMemref.getType());
+    else if (auto arrTy = mlir::dyn_cast<fir::SequenceType>(
+                 fir::unwrapRefType(boxedMemref.getType())))
+      origBoxRank = arrTy.getDimension();
+    else
+      return mlir::failure();
+
+    if (fir::getBoxRank(memref.getType()) != origBoxRank)
+      return mlir::failure();
+
+    // Slices with substring are not supported by array_coor.
+    if (boxedSlice)
+      if (auto sliceOp =
+              mlir::dyn_cast_or_null<fir::SliceOp>(boxedSlice.getDefiningOp()))
+        if (!sliceOp.getSubstr().empty())
+          return mlir::failure();
+
+    // If embox/rebox and array_coor have conflicting shapes or slices,
+    // do nothing.
+    if (op.getShape() && boxedShape && boxedShape != op.getShape())
+      return mlir::failure();
+    if (op.getSlice() && boxedSlice && boxedSlice != op.getSlice())
+      return mlir::failure();
+
+    // TODO: temporarily avoid producing array_coor with the shape shift
+    // and plain array reference (it seems to be a limitation of
+    // ArrayCoorOp verifier).
+    if (!mlir::isa<fir::BaseBoxType>(boxedMemref.getType())) {
+      if (boxedShape) {
+        if (mlir::isa<fir::ShiftType>(boxedShape.getType()))
+          return mlir::failure();
+      } else if (op.getShape() &&
+                 mlir::isa<fir::ShiftType>(op.getShape().getType())) {
+        return mlir::failure();
+      }
+    }
+
+    rewriter.modifyOpInPlace(op, [&]() {
+      op.getMemrefMutable().assign(boxedMemref);
+      if (boxedShape)
+        op.getShapeMutable().assign(boxedShape);
----------------
jeanPerier wrote:

fir.array_coor of fir.box are one based unless there is a fir.shift/fir.shape_shift operand. So the canonicalization pattern should not add lower bounds from the embox/rebox unconditionally. There is no guarantee they match the lower bounds of the fir.array_coor addressing.


The following code now leads to different LLVM:
```
func.func @test3(%arg0: !fir.ref<!fir.array<10xi32>>) -> !fir.ref<i32> {
  %c10 = arith.constant 10 : index
  %c12 = arith.constant 12 : index
  %c19 = arith.constant 19 : index
  %1 = fir.shape_shift %c10, %c19 : (index, index) -> !fir.shapeshift<1>
  %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<10xi32>>
  %3 = fir.array_coor %2 %c12 : (!fir.box<!fir.array<10xi32>>, index) -> !fir.ref<i32>
  return %3 : !fir.ref<i32>
}
```

- With your patch `tco -o -` : `getelementptr i32, ptr %0, i64 2`
- Before your patch: `tco -o - | opt -S -O2 -o -` :  `getelementptr i8, ptr %0, i64 44`

Even more tricky, if both the fir.array_coor and embox have lower bounds and there is a slice, there is no way to fold that into a single array_coor since the lower bounds of the fir.array_coor apply to the fir.slice when there is one, not the indices of the fir.array_coor.

```
func.func @test4(%arg0: !fir.ref<!fir.array<10xi32>>) -> !fir.ref<i32> {
  %c1 = arith.constant 1 : index
  %c10 = arith.constant 10 : index
  %c12 = arith.constant 12 : index
  %c19 = arith.constant 19 : index
  %1 = fir.shape_shift %c10, %c19 : (index, index) -> !fir.shapeshift<1>
  %s = fir.slice %c10, %c19, %c1 : (index, index, index) -> !fir.slice<1>
  %2 = fir.embox %arg0(%1) [%s] : (!fir.ref<!fir.array<10xi32>>, !fir.shapeshift<1>, !fir.slice<1>) -> !fir.box<!fir.array<10xi32>>
  %3 = fir.array_coor %2(%1) %c12 : (!fir.box<!fir.array<10xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
  return %3 : !fir.ref<i32>
}
```

- With your patch `tco -o -` : `getelementptr i32, ptr %0, i64 11`
- Before your patch: `tco -o - | opt -S -O2 -o -` :  `getelementptr i8, ptr %0, i64 8`

I am not sure there is a way to generate these patterns currently from Fortran, but we should preserve FIR semantics here to avoid troubles when other optimizations kicks in.

https://github.com/llvm/llvm-project/pull/92858