[flang-commits] [flang] [flang] Canonicalize fir.array_coor by pulling in embox/rebox. (PR #92858)
via flang-commits
flang-commits at lists.llvm.org
Tue May 21 08:07:57 PDT 2024
================
@@ -417,6 +423,93 @@ mlir::LogicalResult fir::ArrayCoorOp::verify() {
return mlir::success();
}
+// Pull in fir.embox and fir.rebox into fir.array_coor when possible.
+struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
+ using mlir::OpRewritePattern<fir::ArrayCoorOp>::OpRewritePattern;
+ mlir::LogicalResult
+ matchAndRewrite(fir::ArrayCoorOp op,
+ mlir::PatternRewriter &rewriter) const override {
+ mlir::Value memref = op.getMemref();
+ if (!mlir::isa<fir::BaseBoxType>(memref.getType()))
+ return mlir::failure();
+
+ mlir::Value boxedMemref, boxedShape, boxedSlice;
+ if (auto emboxOp =
+ mlir::dyn_cast_or_null<fir::EmboxOp>(memref.getDefiningOp())) {
+ boxedMemref = emboxOp.getMemref();
+ boxedShape = emboxOp.getShape();
+ boxedSlice = emboxOp.getSlice();
+ // If any of operands, that are not currently supported for migration
+ // to ArrayCoorOp, is present, don't rewrite.
+ if (!emboxOp.getTypeparams().empty() || emboxOp.getSourceBox() ||
+ emboxOp.getAccessMap())
+ return mlir::failure();
+ } else if (auto reboxOp = mlir::dyn_cast_or_null<fir::ReboxOp>(
+ memref.getDefiningOp())) {
+ boxedMemref = reboxOp.getBox();
+ boxedShape = reboxOp.getShape();
+ boxedSlice = reboxOp.getSlice();
+ } else {
+ return mlir::failure();
+ }
+
+ // Slices changing the number of dimensions are not supported
+ // for array_coor yet.
+ unsigned origBoxRank;
+ if (mlir::isa<fir::BaseBoxType>(boxedMemref.getType()))
+ origBoxRank = fir::getBoxRank(boxedMemref.getType());
+ else if (auto arrTy = mlir::dyn_cast<fir::SequenceType>(
+ fir::unwrapRefType(boxedMemref.getType())))
+ origBoxRank = arrTy.getDimension();
+ else
+ return mlir::failure();
+
+ if (fir::getBoxRank(memref.getType()) != origBoxRank)
+ return mlir::failure();
+
+ // Slices with substring are not supported by array_coor.
+ if (boxedSlice)
+ if (auto sliceOp =
+ mlir::dyn_cast_or_null<fir::SliceOp>(boxedSlice.getDefiningOp()))
+ if (!sliceOp.getSubstr().empty())
+ return mlir::failure();
+
+ // If embox/rebox and array_coor have conflicting shapes or slices,
+ // do nothing.
+ if (op.getShape() && boxedShape && boxedShape != op.getShape())
+ return mlir::failure();
+ if (op.getSlice() && boxedSlice && boxedSlice != op.getSlice())
+ return mlir::failure();
+
+ // TODO: temporarily avoid producing array_coor with the shape shift
+ // and plain array reference (it seems to be a limitation of
+ // ArrayCoorOp verifier).
+ if (!mlir::isa<fir::BaseBoxType>(boxedMemref.getType())) {
+ if (boxedShape) {
+ if (mlir::isa<fir::ShiftType>(boxedShape.getType()))
+ return mlir::failure();
+ } else if (op.getShape() &&
+ mlir::isa<fir::ShiftType>(op.getShape().getType())) {
+ return mlir::failure();
+ }
+ }
+
+ rewriter.modifyOpInPlace(op, [&]() {
+ op.getMemrefMutable().assign(boxedMemref);
+ if (boxedShape)
+ op.getShapeMutable().assign(boxedShape);
----------------
jeanPerier wrote:
fir.array_coor of fir.box are one based unless there is a fir.shift/fir.shape_shift operand. So the canonicalization pattern should not add lower bounds from the embox/rebox unconditionally. There is no guarantee they match the lower bounds of the fir.array_coor addressing.
The following code now leads to different LLVM:
```
func.func @test3(%arg0: !fir.ref<!fir.array<10xi32>>) -> !fir.ref<i32> {
%c10 = arith.constant 10 : index
%c12 = arith.constant 12 : index
%c19 = arith.constant 19 : index
%1 = fir.shape_shift %c10, %c19 : (index, index) -> !fir.shapeshift<1>
%2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<10xi32>>
%3 = fir.array_coor %2 %c12 : (!fir.box<!fir.array<10xi32>>, index) -> !fir.ref<i32>
return %3 : !fir.ref<i32>
}
```
- With your patch `tco -o -` : `getelementptr i32, ptr %0, i64 2`
- Before your patch: `tco -o - | opt -S -O2 -o -` : `getelementptr i8, ptr %0, i64 44`
Even more tricky, if both the fir.array_coor and embox have lower bounds and there is a slice, there is no way to fold that into a single array_coor since the lower bounds of the fir.array_coor apply to the fir.slice when there is one, not the indices of the fir.array_coor.
```
func.func @test4(%arg0: !fir.ref<!fir.array<10xi32>>) -> !fir.ref<i32> {
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
%c12 = arith.constant 12 : index
%c19 = arith.constant 19 : index
%1 = fir.shape_shift %c10, %c19 : (index, index) -> !fir.shapeshift<1>
%s = fir.slice %c10, %c19, %c1 : (index, index, index) -> !fir.slice<1>
%2 = fir.embox %arg0(%1) [%s] : (!fir.ref<!fir.array<10xi32>>, !fir.shapeshift<1>, !fir.slice<1>) -> !fir.box<!fir.array<10xi32>>
%3 = fir.array_coor %2(%1) %c12 : (!fir.box<!fir.array<10xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
return %3 : !fir.ref<i32>
}
```
- With your patch `tco -o -` : `getelementptr i32, ptr %0, i64 11`
- Before your patch: `tco -o - | opt -S -O2 -o -` : `getelementptr i8, ptr %0, i64 8`
I am not sure there is a way to generate these patterns currently from Fortran, but we should preserve FIR semantics here to avoid troubles when other optimizations kicks in.
https://github.com/llvm/llvm-project/pull/92858
More information about the flang-commits
mailing list