[flang-commits] [flang] [flang] Canonicalize sliced array access by fir.array_coor. (PR #197845)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Mon May 18 09:54:08 PDT 2026
https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/197845
>From add1b1d0b68e736af3569b1ca38e5812befe6e69 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Thu, 14 May 2026 16:46:40 -0700
Subject: [PATCH 1/2] [flang] Canonicalize sliced array access by
fir.array_coor.
This patch adds a canonicalization pattern for pulling rank-reducing
slices into `fir.array_coor`. This is helpful to preserve the original
rank of the array in `fir.array_coor`, which then helps representing
slice accesses in memref dialect as accesses to the original array.
This way, further conversion to affine dialect has benefits of
exposing the math applied to indices of all array dimensions.
For example, if a non-scalar slice depends on a loop IV, pulling
this into the original array access allows building access maps
enabling affine dependency analysis.
I considered the following three models for the addressing used
in `fir.array_coor` with rank reducing slices:
1) Mixed model:
- Encoding
* Scalar-sliced dims: index is slice_lb
* Non-scalar dims: index is section-style (1-based)
- Pros
* Minimal canonicalization math
* Keeps scalar selection explicit in the index list
- Cons
* Semantics are mixed in one array_coor
* Harder to reason about/debug
* Requires special-case logic in later lowering (FIRToMemRef / CodeGen)
to interpret non-scalar dims differently from scalar dims
2) Uniform section-style model (all 1-based):
- Encoding
* Scalar-sliced dims: index is 1
* Non-scalar dims: index is section-style (1-based)
* Scalar selection recovered only from slice_lb
- Pros
* Conceptually clean and consistent (single index convention)
* Easier mental model
- Cons
* Needs coordinated changes in both lowerings
* Lowerings must always reconstruct scalar selected coordinates
from slice metadata
* Higher migration risk unless all paths are updated together
3) CodeGen-compatible coordinate-space model (the implemented one):
- Encoding
* Scalar-sliced dims: index is slice_lb
* Non-scalar dims: index converted to coordinate-space (lb + idx - 1)
- Pros
* No behavioral changes needed in CodeGen logic
* Aligns canonicalized IR with existing coordinate-space assumptions
* Avoids extra CodeGen compatibility branches
- Cons
* Canonicalization is more complex (must synthesize index transforms)
* FIRToMemRef still needs alignment for full-rank vs reduced-rank
index consumption
* Slightly less "visually uniform" than all-1-based
Assisted by Codex.
---
flang/lib/Optimizer/Dialect/FIROps.cpp | 133 ++++++++++++++--
.../lib/Optimizer/Transforms/FIRToMemRef.cpp | 24 ++-
.../test/Fir/array-coor-canonicalization.fir | 149 ++++++++++++++++--
.../FIRToMemRef/array-coor-slice-shift.mlir | 28 ++++
4 files changed, 310 insertions(+), 24 deletions(-)
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 8b730aba92d2b..0bad76f717211 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -570,12 +570,23 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
matchAndRewrite(fir::ArrayCoorOp op,
mlir::PatternRewriter &rewriter) const override {
mlir::Value memref = op.getMemref();
- if (!mlir::isa<fir::BaseBoxType>(memref.getType()))
- return mlir::failure();
+
+ // Look through fir.box_addr: if the array_coor's memref is a ref-typed
+ // value produced by fir.box_addr, treat the underlying box as the
+ // defining producer for the embox/rebox lookup below. This exposes a
+ // sliced embox that is otherwise hidden by the box_addr indirection,
+ // which is necessary for later lowering (FIRToMemRef) to see the
+ // full-rank access pattern.
+ mlir::Operation *defOp = memref.getDefiningOp();
+ if (!mlir::isa<fir::BaseBoxType>(memref.getType())) {
+ auto boxAddrOp = mlir::dyn_cast_or_null<fir::BoxAddrOp>(defOp);
+ if (!boxAddrOp)
+ return mlir::failure();
+ defOp = boxAddrOp.getVal().getDefiningOp();
+ }
mlir::Value boxedMemref, boxedShape, boxedSlice;
- if (auto emboxOp =
- mlir::dyn_cast_or_null<fir::EmboxOp>(memref.getDefiningOp())) {
+ if (auto emboxOp = mlir::dyn_cast_or_null<fir::EmboxOp>(defOp)) {
boxedMemref = emboxOp.getMemref();
boxedShape = emboxOp.getShape();
boxedSlice = emboxOp.getSlice();
@@ -584,15 +595,14 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
if (!emboxOp.getTypeparams().empty() || emboxOp.getSourceBox() ||
emboxOp.getAccessMap())
return mlir::failure();
- } else if (auto reboxOp = mlir::dyn_cast_or_null<fir::ReboxOp>(
- memref.getDefiningOp())) {
+ } else if (auto reboxOp = mlir::dyn_cast_or_null<fir::ReboxOp>(defOp)) {
// Don't pull in rebox when the array_coor is inside an ACC construct
// and the rebox result is referenced by an ACC data clause.
// The data legalization pipeline relies on the rebox result being the
// copyin var; folding through it would leave the rebox source as an
// unhandled live-in inside the compute region.
if (op->getParentOfType<ACC_COMPUTE_AND_DATA_CONSTRUCT_OPS>() &&
- llvm::any_of(memref.getUsers(), [](mlir::Operation *u) {
+ llvm::any_of(reboxOp->getUsers(), [](mlir::Operation *u) {
return mlir::isa<ACC_DATA_ENTRY_OPS>(u);
}))
return mlir::failure();
@@ -623,8 +633,8 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
bool boxedShapeIsShapeShift =
boxedShape && mlir::isa<fir::ShapeShiftType>(boxedShape.getType());
- // Slices changing the number of dimensions are not supported
- // for array_coor yet.
+ // Compute the rank of the original underlying memref and the rank of
+ // the view the current array_coor operates on.
unsigned origBoxRank;
if (mlir::isa<fir::BaseBoxType>(boxedMemref.getType()))
origBoxRank = fir::getBoxRank(boxedMemref.getType());
@@ -634,7 +644,16 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
else
return mlir::failure();
- if (fir::getBoxRank(memref.getType()) != origBoxRank)
+ unsigned opRank;
+ if (mlir::isa<fir::BaseBoxType>(memref.getType()))
+ opRank = fir::getBoxRank(memref.getType());
+ else if (auto arrTy = mlir::dyn_cast<fir::SequenceType>(
+ fir::unwrapRefType(memref.getType())))
+ opRank = arrTy.getDimension();
+ else
+ return mlir::failure();
+
+ if (opRank > origBoxRank)
return mlir::failure();
// Slices with substring are not supported by array_coor.
@@ -644,6 +663,100 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
if (!sliceOp.getSubstr().empty())
return mlir::failure();
+ // Rank-reducing case: the underlying memref has more dimensions than the
+ // array_coor view because boxedSlice contains scalar triples (whose upper
+ // bound is fir.undefined). Rebuild the array_coor on the original memref
+ // with origBoxRank indices, filling scalar dims from the slice's lower
+ // bounds and the remaining range dims from the existing array_coor
+ // indices.
+ if (opRank < origBoxRank) {
+ auto sliceOp = mlir::dyn_cast_or_null<fir::SliceOp>(
+ boxedSlice ? boxedSlice.getDefiningOp() : nullptr);
+ if (!sliceOp)
+ return mlir::failure();
+ // A component-path slice (substr handled above already) is not
+ // representable in the new array_coor's indices.
+ if (!sliceOp.getFields().empty())
+ return mlir::failure();
+ // Combining the array_coor's own slice with the boxedSlice when the
+ // ranks differ is out of scope.
+ if (op.getSlice())
+ return mlir::failure();
+ if (!boxedShape)
+ return mlir::failure();
+ // Avoid emitting a plain ref array_coor whose shape is a ShiftType:
+ // the verifier rejects this (shift can only pair with fir.box memref).
+ if (!mlir::isa<fir::BaseBoxType>(boxedMemref.getType()) &&
+ mlir::isa<fir::ShiftType>(boxedShape.getType()))
+ return mlir::failure();
+
+ auto triples = sliceOp.getTriples();
+ if (triples.size() != 3 * origBoxRank)
+ return mlir::failure();
+
+ IndicesVectorTy newIndices;
+ newIndices.reserve(origBoxRank);
+ llvm::SmallVector<mlir::Value> lowerBounds;
+ if (auto shiftOp = mlir::dyn_cast_or_null<fir::ShiftOp>(
+ boxedShape.getDefiningOp())) {
+ for (mlir::Value lb : shiftOp.getOrigins())
+ lowerBounds.push_back(lb);
+ } else if (auto shapeShiftOp = mlir::dyn_cast_or_null<fir::ShapeShiftOp>(
+ boxedShape.getDefiningOp())) {
+ for (mlir::Value lb : shapeShiftOp.getOrigins())
+ lowerBounds.push_back(lb);
+ }
+ if (!lowerBounds.empty() && lowerBounds.size() != origBoxRank)
+ return mlir::failure();
+
+ mlir::Type idxTy = rewriter.getIndexType();
+ mlir::Value one =
+ mlir::arith::ConstantIndexOp::create(rewriter, op.getLoc(), 1);
+ auto nsw = mlir::arith::IntegerOverflowFlags::nsw;
+ auto opIndices = op.getIndices();
+ unsigned opIdxPos = 0;
+ for (unsigned i = 0; i < origBoxRank; ++i) {
+ mlir::Value upper = triples[3 * i + 1];
+ bool isScalar =
+ mlir::isa_and_nonnull<fir::UndefOp>(upper.getDefiningOp());
+ if (isScalar) {
+ // fir.array_coor's indices are typed as AnyCoordinateType, so any
+ // signless integer (or index) is accepted directly without an
+ // explicit fir.convert to index.
+ newIndices.push_back(triples[3 * i]);
+ } else {
+ if (opIdxPos >= opIndices.size())
+ return mlir::failure();
+ mlir::Value idx = opIndices[opIdxPos++];
+ if (!lowerBounds.empty()) {
+ mlir::Value lb = lowerBounds[i];
+ auto constLb = fir::getIntIfConstant(lb);
+ if (!(constLb && *constLb == 1)) {
+ mlir::Location loc = op.getLoc();
+ mlir::Value extLb =
+ fir::ConvertOp::create(rewriter, loc, idxTy, lb);
+ mlir::Value extIdx =
+ fir::ConvertOp::create(rewriter, loc, idxTy, idx);
+ mlir::Value add = mlir::arith::AddIOp::create(rewriter, loc,
+ extIdx, extLb, nsw);
+ idx = mlir::arith::SubIOp::create(rewriter, loc, add, one, nsw);
+ }
+ }
+ newIndices.push_back(idx);
+ }
+ }
+ if (opIdxPos != opIndices.size())
+ return mlir::failure();
+
+ rewriter.replaceOpWithNewOp<fir::ArrayCoorOp>(
+ op, op.getType(), boxedMemref, boxedShape, boxedSlice, newIndices,
+ op.getTypeparams());
+ return mlir::success();
+ }
+
+ // Rank-preserving case from here on.
+ assert(opRank == origBoxRank && "expected rank-preserving case");
+
// If embox/rebox and array_coor have conflicting shapes or slices,
// do nothing.
if (op.getShape() && boxedShape && boxedShape != op.getShape())
diff --git a/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp b/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp
index 5b353a2e3f689..77a030f5b2672 100644
--- a/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp
+++ b/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp
@@ -522,14 +522,29 @@ FIRToMemRef::getMemrefIndices(fir::ArrayCoorOp arrayCoorOp, Operation *memref,
}
}
+ const int nonScalarRank = llvm::count(filledPositions, false);
+ const bool hasReducedRankIndices =
+ static_cast<int>(idxs.size()) == nonScalarRank;
+ const bool hasFullRankIndices = static_cast<int>(idxs.size()) == rank;
+ if (!hasReducedRankIndices && !hasFullRankIndices)
+ return failure();
+
int arrayCoorIdx = 0;
for (int i = 0; i < rank; ++i) {
if (filledPositions[i])
continue;
- assert((unsigned int)arrayCoorIdx < idxs.size() &&
- "empty dimension should be eliminated\n");
- Value index = canonicalizeIndex(idxs[arrayCoorIdx], rewriter);
+ Value sourceIndex;
+ if (hasFullRankIndices) {
+ // Canonicalized rank-reducing array_coor may carry full-rank indices
+ // (including scalar-sliced dimensions).
+ sourceIndex = idxs[i];
+ } else {
+ assert((unsigned int)arrayCoorIdx < idxs.size() &&
+ "empty dimension should be eliminated\n");
+ sourceIndex = idxs[arrayCoorIdx];
+ }
+ Value index = canonicalizeIndex(sourceIndex, rewriter);
Type cTy = index.getType();
if (!llvm::isa<IndexType>(cTy)) {
assert(cTy.isSignlessInteger() && "expected signless integer type");
@@ -560,7 +575,8 @@ FIRToMemRef::getMemrefIndices(fir::ArrayCoorOp arrayCoorOp, Operation *memref,
Value finalIndex = arith::AddIOp::create(rewriter, loc, scaled, offset);
indices[i] = finalIndex;
- arrayCoorIdx++;
+ if (hasReducedRankIndices)
+ arrayCoorIdx++;
}
std::reverse(indices.begin(), indices.end());
diff --git a/flang/test/Fir/array-coor-canonicalization.fir b/flang/test/Fir/array-coor-canonicalization.fir
index bc58347bdf392..ce31c7a0609e2 100644
--- a/flang/test/Fir/array-coor-canonicalization.fir
+++ b/flang/test/Fir/array-coor-canonicalization.fir
@@ -90,17 +90,21 @@ func.func @_QPtest3(%arg0: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "u"})
return
}
-// TODO: fir.array_coor with slices changing the number of dimensions
-// is not supported yet.
+// Rank-reducing slice: the middle dimension is scalar-selected by the slice
+// (its upper bound is fir.undefined). The canonicalization pulls in the embox
+// and rewrites the rank-2 array_coor into a rank-3 array_coor on the original
+// 3D reference, with the scalar dim's index synthesized from the slice's
+// lower bound.
// CHECK-LABEL: func.func @_QPtest4() {
-// CHECK: %[[VAL_3:.*]] = fir.alloca !fir.array<100x100x100xi32> {bindc_name = "u", uniq_name = "_QFtest4Eu"}
-// CHECK: %[[VAL_4:.*]] = fir.shape
-// CHECK: %[[VAL_5:.*]] = fir.declare %[[VAL_3]](%[[VAL_4]]) {uniq_name = "_QFtest4Eu"} : (!fir.ref<!fir.array<100x100x100xi32>>, !fir.shape<3>) -> !fir.ref<!fir.array<100x100x100xi32>>
-// CHECK: %[[VAL_7:.*]] = fir.slice
-// CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]](%[[VAL_4]]) {{\[}}%[[VAL_7]]] : (!fir.ref<!fir.array<100x100x100xi32>>, !fir.shape<3>, !fir.slice<3>) -> !fir.box<!fir.array<100x100xi32>>
-// CHECK: fir.do_loop
-// CHECK: fir.do_loop
-// CHECK: %[[VAL_11:.*]] = fir.array_coor %[[VAL_8]]
+// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 100 : index
+// CHECK: %[[VAL_4:.*]] = fir.alloca !fir.array<100x100x100xi32> {bindc_name = "u", uniq_name = "_QFtest4Eu"}
+// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_3]], %[[VAL_3]], %[[VAL_3]] : (index, index, index) -> !fir.shape<3>
+// CHECK: %[[VAL_6:.*]] = fir.declare %[[VAL_4]](%[[VAL_5]]) {uniq_name = "_QFtest4Eu"} : (!fir.ref<!fir.array<100x100x100xi32>>, !fir.shape<3>) -> !fir.ref<!fir.array<100x100x100xi32>>
+// CHECK: %[[VAL_8:.*]] = fir.slice
+// CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_1]] to %[[VAL_3]]
+// CHECK: fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_3]]
+// CHECK: %[[VAL_11:.*]] = fir.array_coor %[[VAL_6]](%[[VAL_5]]) {{\[}}%[[VAL_8]]] %[[VAL_10]], %[[VAL_1]], %[[VAL_9]] : (!fir.ref<!fir.array<100x100x100xi32>>, !fir.shape<3>, !fir.slice<3>, index, index, index) -> !fir.ref<i32>
func.func @_QPtest4() {
%c1 = arith.constant 1 : index
%c2_i32 = arith.constant 2 : i32
@@ -590,3 +594,128 @@ func.func @test20(%arg0: !fir.ref<!fir.array<10xi32>>) -> !fir.ref<i32> {
%3 = fir.array_coor %2(%1) %c10 : (!fir.box<!fir.array<10xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
return %3 : !fir.ref<i32>
}
+
+// Rank-reducing slice reached through a fir.box_addr indirection.
+// Mirrors the pattern produced by HLFIR-to-FIR lowering for a 2D contiguous
+// section assignment: `cli(:m, i) = ...`. The embox creates a 1D box of a
+// 2D ref by selecting one element in the second dim; box_addr yields a 1D
+// ref; the array_coor accesses elements with one index. The canonicalization
+// must look through the box_addr to find the embox and rewrite the access
+// as a 2D array_coor on the original ref so later lowering can see the
+// full 2D address pattern.
+// CHECK-LABEL: func.func @test21_box_addr_rank_reducing(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<16x6xf32>>,
+// CHECK-SAME: %[[VAL_1:.*]]: index,
+// CHECK-SAME: %[[VAL_2:.*]]: i64) -> !fir.ref<f32> {
+// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_4:.*]] = arith.constant -2 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant 16 : index
+// CHECK: %[[VAL_6:.*]] = arith.constant 6 : index
+// CHECK: %[[VAL_7:.*]] = fir.shape_shift %[[VAL_3]], %[[VAL_5]], %[[VAL_4]], %[[VAL_6]] : (index, index, index, index) -> !fir.shapeshift<2>
+// CHECK: %[[VAL_8:.*]] = fir.undefined index
+// CHECK: %[[VAL_9:.*]] = fir.slice %[[VAL_3]], %[[VAL_1]], %[[VAL_3]], %[[VAL_2]], %[[VAL_8]], %[[VAL_8]] : (index, index, index, i64, index, index) -> !fir.slice<2>
+// CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_7]]) {{\[}}%[[VAL_9]]] %[[VAL_3]], %[[VAL_2]] : (!fir.ref<!fir.array<16x6xf32>>, !fir.shapeshift<2>, !fir.slice<2>, index, i64) -> !fir.ref<f32>
+// CHECK: return %[[VAL_10]] : !fir.ref<f32>
+// CHECK: }
+func.func @test21_box_addr_rank_reducing(%arg0: !fir.ref<!fir.array<16x6xf32>>, %m: index, %i: i64) -> !fir.ref<f32> {
+ %c1 = arith.constant 1 : index
+ %c_neg2 = arith.constant -2 : index
+ %c16 = arith.constant 16 : index
+ %c6 = arith.constant 6 : index
+ %ss = fir.shape_shift %c1, %c16, %c_neg2, %c6 : (index, index, index, index) -> !fir.shapeshift<2>
+ %u = fir.undefined index
+ %s = fir.slice %c1, %m, %c1, %i, %u, %u : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %b = fir.embox %arg0(%ss) [%s] : (!fir.ref<!fir.array<16x6xf32>>, !fir.shapeshift<2>, !fir.slice<2>) -> !fir.box<!fir.array<?xf32>>
+ %ba = fir.box_addr %b : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+ %sh1 = fir.shape %m : (index) -> !fir.shape<1>
+ %ac = fir.array_coor %ba(%sh1) %c1 : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+ return %ac : !fir.ref<f32>
+}
+
+// Rank-reducing slice via fir.rebox with no shift: 3D source box -> 2D box.
+// The rank-reducing branch currently requires the rebox to carry an explicit
+// shift (so the new array_coor on the source box gets a valid shape operand
+// of source rank). Without a shift we leave the IR unchanged.
+// CHECK-LABEL: func.func @test22_rebox_rank_reducing(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf32>>,
+// CHECK-SAME: %[[VAL_1:.*]]: index) -> !fir.ref<f32> {
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 100 : index
+// CHECK: %[[VAL_4:.*]] = fir.undefined index
+// CHECK: %[[VAL_5:.*]] = fir.slice %[[VAL_2]], %[[VAL_3]], %[[VAL_2]], %[[VAL_1]], %[[VAL_4]], %[[VAL_4]], %[[VAL_2]], %[[VAL_3]], %[[VAL_2]] : (index, index, index, index, index, index, index, index, index) -> !fir.slice<3>
+// CHECK: %[[VAL_6:.*]] = fir.rebox %[[VAL_0]] {{\[}}%[[VAL_5]]] : (!fir.box<!fir.array<?x?x?xf32>>, !fir.slice<3>) -> !fir.box<!fir.array<?x?xf32>>
+// CHECK: %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_2]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+// CHECK: return %[[VAL_7]] : !fir.ref<f32>
+// CHECK: }
+func.func @test22_rebox_rank_reducing(%arg0: !fir.box<!fir.array<?x?x?xf32>>, %j: index) -> !fir.ref<f32> {
+ %c1 = arith.constant 1 : index
+ %c100 = arith.constant 100 : index
+ %u = fir.undefined index
+ %s = fir.slice %c1, %c100, %c1, %j, %u, %u, %c1, %c100, %c1 : (index, index, index, index, index, index, index, index, index) -> !fir.slice<3>
+ %r = fir.rebox %arg0 [%s] : (!fir.box<!fir.array<?x?x?xf32>>, !fir.slice<3>) -> !fir.box<!fir.array<?x?xf32>>
+ %ac = fir.array_coor %r %c1, %c1 : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+ return %ac : !fir.ref<f32>
+}
+
+// Rank-reducing slice via fir.rebox with an explicit shift. The fir.rebox
+// verifier requires the shift's rank to match the *source* box rank when a
+// slice is present, so the shift here is rank-3 (same as the new array_coor
+// that we want to emit on the source box). The canonicalization can fold
+// through to a 3D array_coor on the source box, synthesizing the dropped
+// middle dim's index from the slice's lower bound.
+// CHECK-LABEL: func.func @test23_rebox_rank_reducing_with_shift(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf32>>,
+// CHECK-SAME: %[[VAL_1:.*]]: index) -> !fir.ref<f32> {
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 100 : index
+// CHECK: %[[VAL_4:.*]] = fir.undefined index
+// CHECK: %[[VAL_5:.*]] = fir.slice %[[VAL_2]], %[[VAL_3]], %[[VAL_2]], %[[VAL_1]], %[[VAL_4]], %[[VAL_4]], %[[VAL_2]], %[[VAL_3]], %[[VAL_2]] : (index, index, index, index, index, index, index, index, index) -> !fir.slice<3>
+// CHECK: %[[VAL_6:.*]] = fir.shift %[[VAL_2]], %[[VAL_2]], %[[VAL_2]] : (index, index, index) -> !fir.shift<3>
+// CHECK: %[[VAL_7:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_6]]) {{\[}}%[[VAL_5]]] %[[VAL_2]], %[[VAL_1]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?xf32>>, !fir.shift<3>, !fir.slice<3>, index, index, index) -> !fir.ref<f32>
+// CHECK: return %[[VAL_7]] : !fir.ref<f32>
+// CHECK: }
+func.func @test23_rebox_rank_reducing_with_shift(%arg0: !fir.box<!fir.array<?x?x?xf32>>, %j: index) -> !fir.ref<f32> {
+ %c1 = arith.constant 1 : index
+ %c100 = arith.constant 100 : index
+ %u = fir.undefined index
+ %s = fir.slice %c1, %c100, %c1, %j, %u, %u, %c1, %c100, %c1 : (index, index, index, index, index, index, index, index, index) -> !fir.slice<3>
+ %sh = fir.shift %c1, %c1, %c1 : (index, index, index) -> !fir.shift<3>
+ %r = fir.rebox %arg0(%sh) [%s] : (!fir.box<!fir.array<?x?x?xf32>>, !fir.shift<3>, !fir.slice<3>) -> !fir.box<!fir.array<?x?xf32>>
+ %ac = fir.array_coor %r %c1, %c1 : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+ return %ac : !fir.ref<f32>
+}
+
+// Rank-reducing canonicalization with non-default lower bounds in non-scalar
+// dimensions keeps indices in shape-shift coordinate space so CodeGen can
+// consume them without additional special cases.
+// CHECK-LABEL: func.func @test24_rank_reducing_shifted_nonscalar(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<20x20x20xf32>>,
+// CHECK-SAME: %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index, %[[VAL_3:.*]]: index, %[[VAL_4:.*]]: index, %[[VAL_5:.*]]: index) -> !fir.ref<f32> {
+// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_7:.*]] = arith.constant 20 : index
+// CHECK: %[[VAL_8:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_9:.*]] = arith.constant -2 : index
+// CHECK: %[[VAL_10:.*]] = arith.constant 5 : index
+// CHECK: %[[VAL_11:.*]] = fir.shape_shift %[[VAL_8]], %[[VAL_7]], %[[VAL_9]], %[[VAL_7]], %[[VAL_10]], %[[VAL_7]] : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+// CHECK: %[[VAL_12:.*]] = fir.undefined index
+// CHECK: %[[VAL_13:.*]] = fir.slice %[[VAL_6]], %[[VAL_1]], %[[VAL_6]], %[[VAL_2]], %[[VAL_12]], %[[VAL_12]], %[[VAL_6]], %[[VAL_3]], %[[VAL_6]] : (index, index, index, index, index, index, index, index, index) -> !fir.slice<3>
+// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_4]], %c9 : index
+// CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_5]], %c4 : index
+// CHECK: %[[VAL_16:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_11]]) {{\[}}%[[VAL_13]]] %[[VAL_14]], %[[VAL_2]], %[[VAL_15]] : (!fir.ref<!fir.array<20x20x20xf32>>, !fir.shapeshift<3>, !fir.slice<3>, index, index, index) -> !fir.ref<f32>
+// CHECK: return %[[VAL_16]] : !fir.ref<f32>
+// CHECK: }
+func.func @test24_rank_reducing_shifted_nonscalar(
+ %arg0: !fir.ref<!fir.array<20x20x20xf32>>, %m: index, %j: index, %n: index,
+ %i: index, %k: index) -> !fir.ref<f32> {
+ %c1 = arith.constant 1 : index
+ %c20 = arith.constant 20 : index
+ %c10 = arith.constant 10 : index
+ %c_neg2 = arith.constant -2 : index
+ %c5 = arith.constant 5 : index
+ %ss = fir.shape_shift %c10, %c20, %c_neg2, %c20, %c5, %c20 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+ %u = fir.undefined index
+ %s = fir.slice %c1, %m, %c1, %j, %u, %u, %c1, %n, %c1 : (index, index, index, index, index, index, index, index, index) -> !fir.slice<3>
+ %b = fir.embox %arg0(%ss) [%s] : (!fir.ref<!fir.array<20x20x20xf32>>, !fir.shapeshift<3>, !fir.slice<3>) -> !fir.box<!fir.array<?x?xf32>>
+ %ac = fir.array_coor %b %i, %k : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+ return %ac : !fir.ref<f32>
+}
diff --git a/flang/test/Transforms/FIRToMemRef/array-coor-slice-shift.mlir b/flang/test/Transforms/FIRToMemRef/array-coor-slice-shift.mlir
index 22b69a3fa9e60..ff751cfe26955 100644
--- a/flang/test/Transforms/FIRToMemRef/array-coor-slice-shift.mlir
+++ b/flang/test/Transforms/FIRToMemRef/array-coor-slice-shift.mlir
@@ -74,3 +74,31 @@ func.func @array_coor_slice_shift_section() {
fir.store %c1_i32 to %4 : !fir.ref<i32>
return
}
+
+// Full-rank canonicalized form with scalar dim kept explicit.
+// Scalar dim offset uses (sliceLb-shift), while non-scalar dim consumes its
+// own full-rank coordinate-space index.
+// CHECK-LABEL: func.func @array_coor_slice_scalar_full_rank_dim1_shifted
+// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
+// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index
+// CHECK-DAG: %[[CM4:.*]] = arith.constant -4 : index
+// CHECK: arith.subi %[[C5]], %[[C3]] : index
+// CHECK: arith.subi %[[CM4]], %[[CM4]] : index
+// CHECK: memref.store
+// CHECK-NOT: fir.array_coor
+func.func @array_coor_slice_scalar_full_rank_dim1_shifted() {
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c5 = arith.constant 5 : index
+ %c10 = arith.constant 10 : index
+ %c_neg4 = arith.constant -4 : index
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.alloca !fir.array<10x10xi32> {bindc_name = "a", uniq_name = "_QFEa"}
+ %1 = fir.shape_shift %c3, %c10, %c_neg4, %c10 : (index, index, index, index) -> !fir.shapeshift<2>
+ %2 = fir.declare %0(%1) {uniq_name = "_QFEa"} : (!fir.ref<!fir.array<10x10xi32>>, !fir.shapeshift<2>) -> !fir.ref<!fir.array<10x10xi32>>
+ %u = fir.undefined index
+ %3 = fir.slice %c5, %u, %u, %c_neg4, %c10, %c1 : (index, index, index, index, index, index) -> !fir.slice<2>
+ %4 = fir.array_coor %2(%1) [%3] %c5, %c_neg4 : (!fir.ref<!fir.array<10x10xi32>>, !fir.shapeshift<2>, !fir.slice<2>, index, index) -> !fir.ref<i32>
+ fir.store %c1_i32 to %4 : !fir.ref<i32>
+ return
+}
>From 557065491d1d8c70c30f6d11575b7fe2a122359b Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Mon, 18 May 2026 09:53:13 -0700
Subject: [PATCH 2/2] Fixed pulling with typeparams.
---
flang/lib/Optimizer/Dialect/FIROps.cpp | 14 ++-
.../test/Fir/array-coor-canonicalization.fir | 111 ++++++++++++++++++
2 files changed, 124 insertions(+), 1 deletion(-)
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 0bad76f717211..dbe3b3ecd3ee6 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -750,7 +750,7 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
rewriter.replaceOpWithNewOp<fir::ArrayCoorOp>(
op, op.getType(), boxedMemref, boxedShape, boxedSlice, newIndices,
- op.getTypeparams());
+ typeparamsForCanonicalizedMemref(boxedMemref, op.getTypeparams()));
return mlir::success();
}
@@ -994,6 +994,8 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
op.getSliceMutable().assign(boxedSlice);
if (shiftedIndices)
op.getIndicesMutable().assign(*shiftedIndices);
+ op.getTypeparamsMutable().assign(
+ typeparamsForCanonicalizedMemref(boxedMemref, op.getTypeparams()));
});
return mlir::success();
}
@@ -1001,6 +1003,16 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
private:
using IndicesVectorTy = std::vector<mlir::Value>;
+ // array_coor on a fir.box carries length/type info in the descriptor;
+ // explicit typeparams are only valid for plain ref memrefs.
+ static mlir::ValueRange
+ typeparamsForCanonicalizedMemref(mlir::Value memref,
+ mlir::ValueRange typeparams) {
+ if (mlir::isa<fir::BaseBoxType>(memref.getType()))
+ return mlir::ValueRange{};
+ return typeparams;
+ }
+
// If v is a shape_shift operation:
// fir.shape_shift %l1, %e1, %l2, %e2, ...
// create:
diff --git a/flang/test/Fir/array-coor-canonicalization.fir b/flang/test/Fir/array-coor-canonicalization.fir
index ce31c7a0609e2..33b131167b7ac 100644
--- a/flang/test/Fir/array-coor-canonicalization.fir
+++ b/flang/test/Fir/array-coor-canonicalization.fir
@@ -719,3 +719,114 @@ func.func @test24_rank_reducing_shifted_nonscalar(
%ac = fir.array_coor %b %i, %k : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
return %ac : !fir.ref<f32>
}
+
+// Deferred-length character array accessed through box_addr: typeparams on the
+// ref array_coor carry the element LEN, but after pulling the rebox through
+// box_addr the memref becomes a box and must not keep those typeparams.
+// CHECK-LABEL: func.func @test25_box_addr_char_typeparams(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x!fir.char<1,?>>>,
+// CHECK-SAME: %[[VAL_1:.*]]: index) -> !fir.ref<!fir.char<1,?>> {
+// CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_3:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_2]]) %[[VAL_1]] : (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.ref<!fir.char<1,?>>
+// CHECK: return %[[VAL_3]] : !fir.ref<!fir.char<1,?>>
+// CHECK: }
+func.func @test25_box_addr_char_typeparams(%arg0: !fir.box<!fir.array<?x!fir.char<1,?>>>, %idx: index) -> !fir.ref<!fir.char<1,?>> {
+ %c0 = arith.constant 0 : index
+ %rebox = fir.rebox %arg0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
+ %elemsize = fir.box_elesize %rebox : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> index
+ %ref = fir.box_addr %rebox : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+ %shape = fir.shape %idx : (index) -> !fir.shape<1>
+ %co = fir.array_coor %ref(%shape) %idx typeparams %elemsize : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index, index) -> !fir.ref<!fir.char<1,?>>
+ return %co : !fir.ref<!fir.char<1,?>>
+}
+
+// Rank-reducing deferred-length character slice through fir.box_addr: the ref
+// array_coor carries element LEN in typeparams; canonicalization must fold to a
+// full-rank array_coor on the original ref and keep those typeparams.
+// CHECK-LABEL: func.func @test26_box_addr_char_rank_reducing_typeparams(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<16x6x!fir.char<1,?>>>,
+// CHECK-SAME: %[[VAL_1:.*]]: index,
+// CHECK-SAME: %[[VAL_2:.*]]: i64,
+// CHECK-SAME: %[[VAL_3:.*]]: index) -> !fir.ref<!fir.char<1,?>> {
+// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant -2 : index
+// CHECK: %[[VAL_6:.*]] = arith.constant 16 : index
+// CHECK: %[[VAL_7:.*]] = arith.constant 6 : index
+// CHECK: %[[VAL_8:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_6]], %[[VAL_5]], %[[VAL_7]] : (index, index, index, index) -> !fir.shapeshift<2>
+// CHECK: %[[VAL_9:.*]] = fir.undefined index
+// CHECK: %[[VAL_10:.*]] = fir.slice %[[VAL_4]], %[[VAL_1]], %[[VAL_4]], %[[VAL_2]], %[[VAL_9]], %[[VAL_9]] : (index, index, index, i64, index, index) -> !fir.slice<2>
+// CHECK: %[[VAL_11:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_8]]) {{\[}}%[[VAL_10]]] %[[VAL_4]], %[[VAL_2]] typeparams %[[VAL_3]] : (!fir.ref<!fir.array<16x6x!fir.char<1,?>>>, !fir.shapeshift<2>, !fir.slice<2>, index, i64, index) -> !fir.ref<!fir.char<1,?>>
+// CHECK: return %[[VAL_11]] : !fir.ref<!fir.char<1,?>>
+// CHECK: }
+func.func @test26_box_addr_char_rank_reducing_typeparams(
+ %arg0: !fir.ref<!fir.array<16x6x!fir.char<1,?>>>, %m: index, %i: i64,
+ %elemsize: index) -> !fir.ref<!fir.char<1,?>> {
+ %c1 = arith.constant 1 : index
+ %c_neg2 = arith.constant -2 : index
+ %c16 = arith.constant 16 : index
+ %c6 = arith.constant 6 : index
+ %ss = fir.shape_shift %c1, %c16, %c_neg2, %c6 : (index, index, index, index) -> !fir.shapeshift<2>
+ %u = fir.undefined index
+ %s = fir.slice %c1, %m, %c1, %i, %u, %u : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %b = fir.embox %arg0(%ss) [%s] : (!fir.ref<!fir.array<16x6x!fir.char<1,?>>>, !fir.shapeshift<2>, !fir.slice<2>) -> !fir.box<!fir.array<?x!fir.char<1,?>>>
+ %ba = fir.box_addr %b : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.ref<!fir.array<?x!fir.char<1,?>>>
+ %sh1 = fir.shape %m : (index) -> !fir.shape<1>
+ %ac = fir.array_coor %ba(%sh1) %c1 typeparams %elemsize : (!fir.ref<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index, index) -> !fir.ref<!fir.char<1,?>>
+ return %ac : !fir.ref<!fir.char<1,?>>
+}
+
+// Parametric derived type accessed through box_addr: typeparams on the ref
+// array_coor carry the element LEN, but after pulling the rebox through box_addr
+// the memref becomes a box and must not keep those typeparams.
+// CHECK-LABEL: func.func @test27_box_addr_derived_typeparams(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>,
+// CHECK-SAME: %[[VAL_1:.*]]: index,
+// CHECK-SAME: %[[VAL_2:.*]]: i32) -> !fir.ref<!fir.type<dt(l:i32){x:i32}>> {
+// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_4:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_3]]) %[[VAL_1]] : (!fir.box<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>, !fir.shape<1>, index) -> !fir.ref<!fir.type<dt(l:i32){x:i32}>>
+// CHECK: return %[[VAL_4]] : !fir.ref<!fir.type<dt(l:i32){x:i32}>>
+// CHECK: }
+func.func @test27_box_addr_derived_typeparams(
+ %arg0: !fir.box<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>, %idx: index,
+ %len: i32) -> !fir.ref<!fir.type<dt(l:i32){x:i32}>> {
+ %rebox = fir.rebox %arg0 : (!fir.box<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>) -> !fir.box<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>
+ %ref = fir.box_addr %rebox : (!fir.box<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>) -> !fir.ref<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>
+ %shape = fir.shape %idx : (index) -> !fir.shape<1>
+ %co = fir.array_coor %ref(%shape) %idx typeparams %len : (!fir.ref<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>, !fir.shape<1>, index, i32) -> !fir.ref<!fir.type<dt(l:i32){x:i32}>>
+ return %co : !fir.ref<!fir.type<dt(l:i32){x:i32}>>
+}
+
+// Rank-reducing slice of an array of parametric derived types through
+// fir.box_addr: typeparams on the ref array_coor must be kept after folding to
+// a full-rank array_coor on the original ref.
+// CHECK-LABEL: func.func @test28_box_addr_derived_rank_reducing_typeparams(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<16x6x!fir.type<dt(l:i32){x:i32}>>>,
+// CHECK-SAME: %[[VAL_1:.*]]: index,
+// CHECK-SAME: %[[VAL_2:.*]]: i64,
+// CHECK-SAME: %[[VAL_3:.*]]: i32) -> !fir.ref<!fir.type<dt(l:i32){x:i32}>> {
+// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant -2 : index
+// CHECK: %[[VAL_6:.*]] = arith.constant 16 : index
+// CHECK: %[[VAL_7:.*]] = arith.constant 6 : index
+// CHECK: %[[VAL_8:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_6]], %[[VAL_5]], %[[VAL_7]] : (index, index, index, index) -> !fir.shapeshift<2>
+// CHECK: %[[VAL_9:.*]] = fir.undefined index
+// CHECK: %[[VAL_10:.*]] = fir.slice %[[VAL_4]], %[[VAL_1]], %[[VAL_4]], %[[VAL_2]], %[[VAL_9]], %[[VAL_9]] : (index, index, index, i64, index, index) -> !fir.slice<2>
+// CHECK: %[[VAL_11:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_8]]) {{\[}}%[[VAL_10]]] %[[VAL_4]], %[[VAL_2]] typeparams %[[VAL_3]] : (!fir.ref<!fir.array<16x6x!fir.type<dt(l:i32){x:i32}>>>, !fir.shapeshift<2>, !fir.slice<2>, index, i64, i32) -> !fir.ref<!fir.type<dt(l:i32){x:i32}>>
+// CHECK: return %[[VAL_11]] : !fir.ref<!fir.type<dt(l:i32){x:i32}>>
+// CHECK: }
+func.func @test28_box_addr_derived_rank_reducing_typeparams(
+ %arg0: !fir.ref<!fir.array<16x6x!fir.type<dt(l:i32){x:i32}>>>, %m: index,
+ %i: i64, %len: i32) -> !fir.ref<!fir.type<dt(l:i32){x:i32}>> {
+ %c1 = arith.constant 1 : index
+ %c_neg2 = arith.constant -2 : index
+ %c16 = arith.constant 16 : index
+ %c6 = arith.constant 6 : index
+ %ss = fir.shape_shift %c1, %c16, %c_neg2, %c6 : (index, index, index, index) -> !fir.shapeshift<2>
+ %u = fir.undefined index
+ %s = fir.slice %c1, %m, %c1, %i, %u, %u : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %b = fir.embox %arg0(%ss) [%s] : (!fir.ref<!fir.array<16x6x!fir.type<dt(l:i32){x:i32}>>>, !fir.shapeshift<2>, !fir.slice<2>) -> !fir.box<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>
+ %ba = fir.box_addr %b : (!fir.box<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>) -> !fir.ref<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>
+ %sh1 = fir.shape %m : (index) -> !fir.shape<1>
+ %ac = fir.array_coor %ba(%sh1) %c1 typeparams %len : (!fir.ref<!fir.array<?x!fir.type<dt(l:i32){x:i32}>>>, !fir.shape<1>, index, i32) -> !fir.ref<!fir.type<dt(l:i32){x:i32}>>
+ return %ac : !fir.ref<!fir.type<dt(l:i32){x:i32}>>
+}
More information about the flang-commits
mailing list