[flang-commits] [flang] a0150ce - [flang] Canonicalize fir.array_coor for contiguous arrays. (#200106)

Thu May 28 10:50:58 PDT 2026

Author: Slava Zakharin
Date: 2026-05-28T10:50:53-07:00
New Revision: a0150ce371d1d808e0753c954618388fbe7ef7b7

URL: https://github.com/llvm/llvm-project/commit/a0150ce371d1d808e0753c954618388fbe7ef7b7
DIFF: https://github.com/llvm/llvm-project/commit/a0150ce371d1d808e0753c954618388fbe7ef7b7.diff

LOG: [flang] Canonicalize fir.array_coor for contiguous arrays. (#200106)

This patch adds new canonicalization pattern for `fir.array_coor`
sequences appearing for contiguous array slices like `dx(1:3)`,
`dx(1:3, 2)`, etc. This canonicalization helps exposing the original
rank of the array, which makes MLIR after FIRToMemRef better
optimizable.

Assisted by Cursor

Added: 
    

Modified: 
    flang/lib/Optimizer/Dialect/FIROps.cpp
    flang/test/Fir/array-coor-canonicalization.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 55cafc2b32a3c..c3c6397c16fca 100644

--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -568,6 +568,33 @@ llvm::LogicalResult fir::ArrayCoorOp::verify() {
   return mlir::success();
 }
 
+// Helper shared by array_coor canonicalization patterns. Returns true if
+// folding `producers` into `consumer` would cross an ACC data-clause or CUDA
+// kernel boundary that the lowering pipeline relies on:
+//
+//   - ACC: when `consumer` lives inside an ACC compute or data construct and
+//     any producer's result is referenced by an ACC data-clause op, the
+//     producer must remain (the data legalization pipeline expects it to be
+//     the copyin var / kernel argument).
+//   - CUF: every producer must share the same enclosing CUDA kernel as the
+//     consumer; otherwise the kernel would end up capturing a host-side
+//     descriptor directly, causing illegal device dereferences at runtime.
+static bool arrayCoorFoldCrossesACCOrCUDABoundary(
+    mlir::Operation *consumer, llvm::ArrayRef<mlir::Operation *> producers) {
+  if (consumer->getParentOfType<ACC_COMPUTE_AND_DATA_CONSTRUCT_OPS>() &&
+      llvm::any_of(producers, [](mlir::Operation *p) {
+        return llvm::any_of(p->getUsers(), [](mlir::Operation *u) {
+          return mlir::isa<ACC_DATA_ENTRY_OPS>(u);
+        });
+      }))
+    return true;
+  auto consumerKernel = consumer->getParentOfType<fir::CUDAKernelOpInterface>();
+  for (mlir::Operation *p : producers)
+    if (p->getParentOfType<fir::CUDAKernelOpInterface>() != consumerKernel)
+      return true;
+  return false;
+}
+
 // Pull in fir.embox and fir.rebox into fir.array_coor when possible.
 struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
   using mlir::OpRewritePattern<fir::ArrayCoorOp>::OpRewritePattern;
@@ -609,24 +636,12 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
           emboxOp.getAccessMap())
         return mlir::failure();
     } else if (auto reboxOp = mlir::dyn_cast_or_null<fir::ReboxOp>(defOp)) {
-      // Don't pull in rebox when the array_coor is inside an ACC construct
-      // and the rebox result is referenced by an ACC data clause.
-      // The data legalization pipeline relies on the rebox result being the
-      // copyin var; folding through it would leave the rebox source as an
-      // unhandled live-in inside the compute region.
-      if (op->getParentOfType<ACC_COMPUTE_AND_DATA_CONSTRUCT_OPS>() &&
-          llvm::any_of(reboxOp->getUsers(), [](mlir::Operation *u) {
-            return mlir::isa<ACC_DATA_ENTRY_OPS>(u);
-          }))
-        return mlir::failure();
-      // Don't pull in rebox defined outside a CUDA kernel boundary when the
-      // array_coor is inside that kernel. CUF lowering converts such a rebox
-      // into a managed-memory descriptor that the kernel needs to receive as
-      // its argument; folding the rebox away would leave the kernel capturing
-      // the host-side descriptor directly, causing illegal device dereferences
-      // at runtime.
-      if (op->getParentOfType<fir::CUDAKernelOpInterface>() !=
-          reboxOp->getParentOfType<fir::CUDAKernelOpInterface>())
+      // Skip when folding the rebox into the array_coor would cross an ACC
+      // data-clause or CUDA kernel boundary that the lowering pipeline
+      // relies on (data legalization expects the rebox to remain as the
+      // copyin var / kernel argument; folding across a CUDA kernel boundary
+      // would leave the kernel capturing a host-side descriptor directly).
+      if (arrayCoorFoldCrossesACCOrCUDABoundary(op, {reboxOp}))
         return mlir::failure();
       boxedMemref = reboxOp.getBox();
       boxedShape = reboxOp.getShape();
@@ -1111,10 +1126,127 @@ struct SimplifyArrayCoorOp : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
   }
 };
 
+// Pull a producer fir.array_coor (via a fir.convert that reinterprets a
+// scalar element address as an M-D array ref) into the consumer fir.array_coor.
+// Mirrors the slice-designator lowering pattern emitted by ConvertHLFIRtoFIR
+// for Fortran array sections like `dx(1:3)` or `dx(1:3, 4)`.
+//
+// Pattern:
+//   %outer = fir.array_coor %base(%base_shape) %ox_0, ..., %ox_{N-1}
+//                : (BaseTy, ShapeTy, index, ...) -> !fir.ref<EleTy>
+//   %view  = fir.convert %outer
+//                : !fir.ref<EleTy> -> !fir.ref<!fir.array<...x EleTy>>  (M-D)
+//   %inner = fir.array_coor %view(%inner_shape) %ix_0, ..., %ix_{M-1}
+//                : (!fir.ref<!fir.array<...>>, !fir.shape<M>, index, ...)
+//                -> !fir.ref<EleTy>
+//
+// In Fortran column-major storage, the M leading dimensions of the original
+// array are exactly the dimensions of the contiguous sub-array; the trailing
+// (N-M) outer indices stay fixed. So %inner is rewritten as:
+//   %combined_i = arith.addi %ox_i, arith.subi(%ix_i, 1)        for i in 0..M-1
+//   %inner = fir.array_coor %base(%base_shape)
+//                %combined_0, ..., %combined_{M-1},
+//                %ox_M, ..., %ox_{N-1}
+//                : (BaseTy, ShapeTy, index, ...) -> !fir.ref<EleTy>
+//
+// Restrictions (kept minimal):
+//   - No slice on outer or inner.
+//   - Inner shape must be a plain fir.shape (default lb=1); shift/shape_shift
+//     on the inner are not handled.
+//   - Outer's result type must be a scalar ref (not an array ref).
+//   - Convert source type must equal outer's result type.
+//   - Convert result type's element type must equal outer's pointee.
+//   - Inner rank M <= outer rank N.
+//   - Neither op carries typeparams.
+//   - Mirrors SimplifyArrayCoorOp's ACC/CUF guards.
+struct MergeArrayCoorOnConvert
+    : public mlir::OpRewritePattern<fir::ArrayCoorOp> {
+  using mlir::OpRewritePattern<fir::ArrayCoorOp>::OpRewritePattern;
+  llvm::LogicalResult
+  matchAndRewrite(fir::ArrayCoorOp inner,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (inner.getSlice() || !inner.getTypeparams().empty())
+      return mlir::failure();
+
+    auto innerShapeOp = mlir::dyn_cast_or_null<fir::ShapeOp>(
+        inner.getShape() ? inner.getShape().getDefiningOp() : nullptr);
+    if (!innerShapeOp)
+      return mlir::failure();
+
+    auto convertOp = inner.getMemref().getDefiningOp<fir::ConvertOp>();
+    if (!convertOp)
+      return mlir::failure();
+
+    auto outer = convertOp.getValue().getDefiningOp<fir::ArrayCoorOp>();
+    if (!outer)
+      return mlir::failure();
+
+    if (outer.getSlice() || !outer.getTypeparams().empty())
+      return mlir::failure();
+
+    mlir::Type outerResTy = outer.getType();
+    mlir::Type outerEleTy = fir::dyn_cast_ptrEleTy(outerResTy);
+    if (!outerEleTy || mlir::isa<fir::SequenceType>(outerEleTy))
+      return mlir::failure();
+
+    if (convertOp.getValue().getType() != outerResTy)
+      return mlir::failure();
+
+    mlir::Type convResTy = convertOp.getType();
+    mlir::Type convResEleTy = fir::dyn_cast_ptrEleTy(convResTy);
+    auto convResSeqTy = mlir::dyn_cast_or_null<fir::SequenceType>(convResEleTy);
+    if (!convResSeqTy || convResSeqTy.getEleTy() != outerEleTy)
+      return mlir::failure();
+
+    unsigned innerRank = inner.getIndices().size();
+    unsigned outerRank = outer.getIndices().size();
+    if (innerRank == 0 || innerRank > outerRank)
+      return mlir::failure();
+
+    if (innerShapeOp.getExtents().size() != innerRank)
+      return mlir::failure();
+
+    // Skip when folding the convert+outer chain into the inner array_coor
+    // would cross an ACC data-clause or CUDA kernel boundary.
+    if (arrayCoorFoldCrossesACCOrCUDABoundary(inner, {convertOp, outer}))
+      return mlir::failure();
+
+    mlir::Location loc = inner.getLoc();
+    mlir::Type idxTy = rewriter.getIndexType();
+    mlir::Value one = mlir::arith::ConstantIndexOp::create(rewriter, loc, 1);
+    auto nsw = mlir::arith::IntegerOverflowFlags::nsw;
+
+    auto toIndex = [&](mlir::Value v) -> mlir::Value {
+      if (v.getType() == idxTy)
+        return v;
+      return fir::ConvertOp::create(rewriter, loc, idxTy, v);
+    };
+
+    llvm::SmallVector<mlir::Value> combinedIndices;
+    combinedIndices.reserve(outerRank);
+    for (unsigned i = 0; i < innerRank; ++i) {
+      mlir::Value outerIdx = toIndex(outer.getIndices()[i]);
+      mlir::Value innerIdx = toIndex(inner.getIndices()[i]);
+      mlir::Value innerMinusOne =
+          mlir::arith::SubIOp::create(rewriter, loc, innerIdx, one, nsw);
+      mlir::Value combined = mlir::arith::AddIOp::create(
+          rewriter, loc, outerIdx, innerMinusOne, nsw);
+      combinedIndices.push_back(combined);
+    }
+    for (unsigned i = innerRank; i < outerRank; ++i)
+      combinedIndices.push_back(outer.getIndices()[i]);
+
+    rewriter.replaceOpWithNewOp<fir::ArrayCoorOp>(
+        inner, inner.getType(), outer.getMemref(), outer.getShape(),
+        /*slice=*/mlir::Value{}, combinedIndices, outer.getTypeparams());
+    return mlir::success();
+  }
+};
+
 void fir::ArrayCoorOp::getCanonicalizationPatterns(
     mlir::RewritePatternSet &patterns, mlir::MLIRContext *context) {
   // TODO: !fir.shape<1> operand may be removed from array_coor always.
-  patterns.add<SimplifyArrayCoorOp>(context);
+  patterns.add<SimplifyArrayCoorOp, MergeArrayCoorOnConvert>(context);
 }
 
 std::optional<std::int64_t> fir::ArrayCoorOp::getViewOffset(mlir::OpResult) {

diff  --git a/flang/test/Fir/array-coor-canonicalization.fir b/flang/test/Fir/array-coor-canonicalization.fir
index 88e23b404f2b6..cb2efa007294f 100644
--- a/flang/test/Fir/array-coor-canonicalization.fir
+++ b/flang/test/Fir/array-coor-canonicalization.fir
@@ -878,3 +878,184 @@ func.func @test30_rebox_rank_reducing_2d(
   %ac = fir.array_coor %b %row : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
   return %ac : !fir.ref<f32>
 }
+
+// Pull a previous fir.array_coor through a fir.convert into the current
+// fir.array_coor. This mirrors how ConvertHLFIRtoFIR lowers a contiguous
+// slice designator (e.g. `dx(1:3)`): the outer array_coor produces the
+// address of the first element, fir.convert reinterprets it as a fixed-size
+// array ref, and the inner array_coor indexes into that view.
+//
+// 1-D case: dx(1:3)(k) -> dx(k). When outer's index is the constant 1, the
+// arith identity c1 + (k - 1) == k makes the canonicalizer collapse the
+// combined index back to the inner index.
+// CHECK-LABEL:   func.func @test31_merge_array_coor_1d(
+// CHECK-SAME:                      %[[VAL_0:.*]]: !fir.ref<!fir.array<?xf64>>,
+// CHECK-SAME:                      %[[VAL_1:.*]]: index,
+// CHECK-SAME:                      %[[VAL_2:.*]]: index) -> !fir.ref<f64> {
+// CHECK:           %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_4:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_3]]) %[[VAL_2]] : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+// CHECK:           return %[[VAL_4]] : !fir.ref<f64>
+// CHECK-NOT:       fir.convert
+// CHECK-NOT:       fir.array_coor %{{.*}} -> !fir.ref<!fir.array
+func.func @test31_merge_array_coor_1d(%base: !fir.ref<!fir.array<?xf64>>,
+                                      %n: index,
+                                      %k: index) -> !fir.ref<f64> {
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %base_shape = fir.shape %n : (index) -> !fir.shape<1>
+  %inner_shape = fir.shape %c3 : (index) -> !fir.shape<1>
+  %outer = fir.array_coor %base(%base_shape) %c1 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %view = fir.convert %outer : (!fir.ref<f64>) -> !fir.ref<!fir.array<3xf64>>
+  %inner = fir.array_coor %view(%inner_shape) %k : (!fir.ref<!fir.array<3xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  return %inner : !fir.ref<f64>
+}
+
+// 2-D base with 1-D inner view: dx(4:6, 4)(k) -> dx(4 + k - 1, 4). Outer
+// index is not 1 here, so the index arithmetic doesn't fully collapse;
+// canonicalizer reassociates to a single arith.addi.
+// CHECK-LABEL:   func.func @test32a_merge_array_coor_2d_offset(
+// CHECK-SAME:        %[[ARG0:[A-Za-z0-9_]+]]: !fir.ref<!fir.array<?x?xf64>>,
+// CHECK-SAME:        %[[ARG1:[A-Za-z0-9_]+]]: index, %[[ARG2:[A-Za-z0-9_]+]]: index, %[[ARG3:[A-Za-z0-9_]+]]: index)
+// CHECK:           %[[C3:.*]] = arith.constant 3 : index
+// CHECK:           %[[C4:.*]] = arith.constant 4 : index
+// CHECK:           %[[SH:.*]] = fir.shape %[[ARG1]], %[[ARG2]]
+// CHECK:           %[[IDX:.*]] = arith.addi %[[ARG3]], %[[C3]]
+// CHECK:           %[[AC:.*]] = fir.array_coor %[[ARG0]](%[[SH]]) %[[IDX]], %[[C4]]
+// CHECK:           return %[[AC]] : !fir.ref<f64>
+// CHECK-NOT:       fir.convert
+func.func @test32a_merge_array_coor_2d_offset(%base: !fir.ref<!fir.array<?x?xf64>>,
+                                              %d1: index, %d2: index,
+                                              %k: index) -> !fir.ref<f64> {
+  %c3 = arith.constant 3 : index
+  %c4 = arith.constant 4 : index
+  %base_shape = fir.shape %d1, %d2 : (index, index) -> !fir.shape<2>
+  %inner_shape = fir.shape %c3 : (index) -> !fir.shape<1>
+  %outer = fir.array_coor %base(%base_shape) %c4, %c4 : (!fir.ref<!fir.array<?x?xf64>>, !fir.shape<2>, index, index) -> !fir.ref<f64>
+  %view = fir.convert %outer : (!fir.ref<f64>) -> !fir.ref<!fir.array<3xf64>>
+  %inner = fir.array_coor %view(%inner_shape) %k : (!fir.ref<!fir.array<3xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  return %inner : !fir.ref<f64>
+}
+
+// 2-D base with 1-D inner view: dx(1:3, 4)(k) -> dx(k, 4). Inner index
+// arithmetic folds away as in test31.
+// CHECK-LABEL:   func.func @test32_merge_array_coor_2d_inner_1d(
+// CHECK-SAME:                      %[[VAL_0:.*]]: !fir.ref<!fir.array<?x?xf64>>,
+// CHECK-SAME:                      %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index,
+// CHECK-SAME:                      %[[VAL_3:.*]]: index) -> !fir.ref<f64> {
+// CHECK:           %[[VAL_4:.*]] = arith.constant 4 : index
+// CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_1]], %[[VAL_2]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_6:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_5]]) %[[VAL_3]], %[[VAL_4]] : (!fir.ref<!fir.array<?x?xf64>>, !fir.shape<2>, index, index) -> !fir.ref<f64>
+// CHECK:           return %[[VAL_6]] : !fir.ref<f64>
+// CHECK-NOT:       fir.convert
+func.func @test32_merge_array_coor_2d_inner_1d(%base: !fir.ref<!fir.array<?x?xf64>>,
+                                               %d1: index, %d2: index,
+                                               %k: index) -> !fir.ref<f64> {
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %c4 = arith.constant 4 : index
+  %base_shape = fir.shape %d1, %d2 : (index, index) -> !fir.shape<2>
+  %inner_shape = fir.shape %c3 : (index) -> !fir.shape<1>
+  %outer = fir.array_coor %base(%base_shape) %c1, %c4 : (!fir.ref<!fir.array<?x?xf64>>, !fir.shape<2>, index, index) -> !fir.ref<f64>
+  %view = fir.convert %outer : (!fir.ref<f64>) -> !fir.ref<!fir.array<3xf64>>
+  %inner = fir.array_coor %view(%inner_shape) %k : (!fir.ref<!fir.array<3xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  return %inner : !fir.ref<f64>
+}
+
+// 3-D base with 2-D inner view: dx(1:3, 1:4, 5)(i, j) -> dx(i, j, 5).
+// CHECK-LABEL:   func.func @test33_merge_array_coor_3d_inner_2d(
+// CHECK-SAME:        %[[ARG0:[A-Za-z0-9_]+]]: !fir.ref<!fir.array<?x?x?xf64>>,
+// CHECK-SAME:        %[[ARG1:[A-Za-z0-9_]+]]: index, %[[ARG2:[A-Za-z0-9_]+]]: index, %[[ARG3:[A-Za-z0-9_]+]]: index,
+// CHECK-SAME:        %[[ARG4:[A-Za-z0-9_]+]]: index, %[[ARG5:[A-Za-z0-9_]+]]: index)
+// CHECK:           %[[C5:.*]] = arith.constant 5 : index
+// CHECK:           %[[SH:.*]] = fir.shape %[[ARG1]], %[[ARG2]], %[[ARG3]]
+// CHECK:           %[[AC:.*]] = fir.array_coor %[[ARG0]](%[[SH]]) %[[ARG4]], %[[ARG5]], %[[C5]]
+// CHECK:           return %[[AC]] : !fir.ref<f64>
+func.func @test33_merge_array_coor_3d_inner_2d(%base: !fir.ref<!fir.array<?x?x?xf64>>,
+                                               %d1: index, %d2: index, %d3: index,
+                                               %i: index, %j: index) -> !fir.ref<f64> {
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %c4 = arith.constant 4 : index
+  %c5 = arith.constant 5 : index
+  %base_shape = fir.shape %d1, %d2, %d3 : (index, index, index) -> !fir.shape<3>
+  %inner_shape = fir.shape %c3, %c4 : (index, index) -> !fir.shape<2>
+  %outer = fir.array_coor %base(%base_shape) %c1, %c1, %c5 : (!fir.ref<!fir.array<?x?x?xf64>>, !fir.shape<3>, index, index, index) -> !fir.ref<f64>
+  %view = fir.convert %outer : (!fir.ref<f64>) -> !fir.ref<!fir.array<3x4xf64>>
+  %inner = fir.array_coor %view(%inner_shape) %i, %j : (!fir.ref<!fir.array<3x4xf64>>, !fir.shape<2>, index, index) -> !fir.ref<f64>
+  return %inner : !fir.ref<f64>
+}
+
+// No fold: inner shape carries a shift (non-default lower bound).
+// CHECK-LABEL:   func.func @test34_merge_no_fold_inner_shape_shift(
+// CHECK:           fir.array_coor %{{.*}} -> !fir.ref<f64>
+// CHECK:           fir.convert
+// CHECK:           fir.array_coor %{{.*}} : (!fir.ref<!fir.array<3xf64>>, !fir.shapeshift<1>, index) -> !fir.ref<f64>
+func.func @test34_merge_no_fold_inner_shape_shift(%base: !fir.ref<!fir.array<?xf64>>,
+                                                  %n: index,
+                                                  %k: index) -> !fir.ref<f64> {
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %base_shape = fir.shape %n : (index) -> !fir.shape<1>
+  %inner_shape = fir.shape_shift %c1, %c3 : (index, index) -> !fir.shapeshift<1>
+  %outer = fir.array_coor %base(%base_shape) %c1 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %view = fir.convert %outer : (!fir.ref<f64>) -> !fir.ref<!fir.array<3xf64>>
+  %inner = fir.array_coor %view(%inner_shape) %k : (!fir.ref<!fir.array<3xf64>>, !fir.shapeshift<1>, index) -> !fir.ref<f64>
+  return %inner : !fir.ref<f64>
+}
+
+// No fold: inner has a slice.
+// CHECK-LABEL:   func.func @test35_merge_no_fold_inner_slice(
+// CHECK:           fir.array_coor %{{.*}} -> !fir.ref<f64>
+// CHECK:           fir.convert
+// CHECK:           fir.array_coor %{{.*}} {{\[}}%{{.*}}]
+func.func @test35_merge_no_fold_inner_slice(%base: !fir.ref<!fir.array<?xf64>>,
+                                            %n: index,
+                                            %k: index) -> !fir.ref<f64> {
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %base_shape = fir.shape %n : (index) -> !fir.shape<1>
+  %inner_shape = fir.shape %c3 : (index) -> !fir.shape<1>
+  %slice = fir.slice %c1, %c3, %c1 : (index, index, index) -> !fir.slice<1>
+  %outer = fir.array_coor %base(%base_shape) %c1 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %view = fir.convert %outer : (!fir.ref<f64>) -> !fir.ref<!fir.array<3xf64>>
+  %inner = fir.array_coor %view(%inner_shape) [%slice] %k : (!fir.ref<!fir.array<3xf64>>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref<f64>
+  return %inner : !fir.ref<f64>
+}
+
+// No fold: convert reinterprets to a 
diff erent element type.
+// CHECK-LABEL:   func.func @test36_merge_no_fold_element_type_mismatch(
+// CHECK:           fir.array_coor %{{.*}} -> !fir.ref<f64>
+// CHECK:           fir.convert
+// CHECK:           fir.array_coor %{{.*}} : (!fir.ref<!fir.array<6xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
+func.func @test36_merge_no_fold_element_type_mismatch(%base: !fir.ref<!fir.array<?xf64>>,
+                                                      %n: index,
+                                                      %k: index) -> !fir.ref<i32> {
+  %c1 = arith.constant 1 : index
+  %c6 = arith.constant 6 : index
+  %base_shape = fir.shape %n : (index) -> !fir.shape<1>
+  %inner_shape = fir.shape %c6 : (index) -> !fir.shape<1>
+  %outer = fir.array_coor %base(%base_shape) %c1 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %view = fir.convert %outer : (!fir.ref<f64>) -> !fir.ref<!fir.array<6xi32>>
+  %inner = fir.array_coor %view(%inner_shape) %k : (!fir.ref<!fir.array<6xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
+  return %inner : !fir.ref<i32>
+}
+
+// No fold: inner rank exceeds outer rank.
+// CHECK-LABEL:   func.func @test37_merge_no_fold_inner_rank_too_large(
+// CHECK:           fir.array_coor %{{.*}} -> !fir.ref<f64>
+// CHECK:           fir.convert
+// CHECK:           fir.array_coor %{{.*}} : (!fir.ref<!fir.array<3x4xf64>>, !fir.shape<2>, index, index) -> !fir.ref<f64>
+func.func @test37_merge_no_fold_inner_rank_too_large(%base: !fir.ref<!fir.array<?xf64>>,
+                                                     %n: index,
+                                                     %i: index,
+                                                     %j: index) -> !fir.ref<f64> {
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %c4 = arith.constant 4 : index
+  %base_shape = fir.shape %n : (index) -> !fir.shape<1>
+  %inner_shape = fir.shape %c3, %c4 : (index, index) -> !fir.shape<2>
+  %outer = fir.array_coor %base(%base_shape) %c1 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %view = fir.convert %outer : (!fir.ref<f64>) -> !fir.ref<!fir.array<3x4xf64>>
+  %inner = fir.array_coor %view(%inner_shape) %i, %j : (!fir.ref<!fir.array<3x4xf64>>, !fir.shape<2>, index, index) -> !fir.ref<f64>
+  return %inner : !fir.ref<f64>
+}