[Mlir-commits] [mlir] [mlir][memref] Rewrite scalar `memref.copy` through reinterpret_cast into load/store (PR #186118)

Fri Mar 13 02:42:14 PDT 2026

================
@@ -0,0 +1,205 @@
+//===- BypassReinterpretCast.cpp - Expansion patterns for MemRef operations
+//----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Arith/Transforms/Passes.h"
+#include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace mlir {
+namespace memref {
+#define GEN_PASS_DEF_BYPASSREINTERPRETCASTPASS
+#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
+} // namespace memref
+} // namespace mlir
+
+using namespace mlir;
+
+namespace {
+
+// Checks if strided memref is an (expanded) scalar view
+// of an (expanded) single row/column memref, with the same rank.
+static bool isScalarSlice(memref::ReinterpretCastOp rc) {
+  auto underlyingType = dyn_cast<MemRefType>(rc.getSource().getType());
+  auto stridedType = dyn_cast<MemRefType>(rc.getType());
+  if (!underlyingType || !stridedType)
+    return false;
+
+  unsigned rank = underlyingType.getRank();
+  if (rank != stridedType.getRank())
+    return false;
+
+  ArrayRef<int64_t> sizes = rc.getStaticSizes();
+  ArrayRef<int64_t> strides = rc.getStaticStrides();
+  if (sizes.size() != rank || strides.size() != rank)
+    return false;
+
+  // Rank-1 special case
+  if (rank == 1) {
+    if (underlyingType.getDimSize(0) == 1)
+      return false;
+
+    if (strides[0] != underlyingType.getDimSize(0))
+      return false;
+
+    return true;
+  }
+
+  int nonUnitDim = -1;
+
+  for (unsigned i = 0; i < rank; ++i) {
+    int64_t underlyingDim = underlyingType.getDimSize(i);
+
+    if (stridedType.getDimSize(i) != 1)
+      return false;
+
+    if (ShapedType::isDynamic(sizes[i]) || sizes[i] != 1)
+      return false;
+
+    if (ShapedType::isDynamic(strides[i]))
+      return false;
+
+    // Make sure no more than one dim is non-unit
+    if (underlyingDim != 1) {
+      if (nonUnitDim != -1)
+        return false;
+      nonUnitDim = i;
+    }
+
+    if (i > 0 && i + 1 < rank && strides[i] != 1)
+      return false;
+  }
+
+  // Make sure at least one dim is non-unit
+  if (nonUnitDim == -1)
+    return false;
+
+  bool firstStrideIsOne = strides[0] == 1;
+  bool lastStrideIsOne = strides[rank - 1] == 1;
+
+  if (firstStrideIsOne == lastStrideIsOne)
+    return false;
+
+  if (firstStrideIsOne)
+    return underlyingType.getDimSize(0) == strides[rank - 1];
+
+  return underlyingType.getDimSize(rank - 1) == strides[0];
+}
+
+/// Rewrites `memref.copy` of a 1-element MemRef as a scalar load-store pair
+///
+/// The pattern matches a reinterpret_cast that creates a scalar view
+/// (`sizes = [1, ..., 1]`) into a memref with a single non-unit dimension.
+/// Since the view contains only one element, the accessed address is
+/// determined solely by the base pointer and the offset.
+///
+/// Two layouts are supported:
+///   * row-major slice  (stride pattern [N, ..., 1])
+///   * column-major slice (stride pattern [1, ..., N])
+///
+/// BEFORE (row-major slice)
+///   %view = memref.reinterpret_cast %base
+///     to offset: [%off], sizes: [1, ..., 1], strides: [N, ..., 1]
+///       : memref<1x...xNxf32>
+///         to memref<1x...x1xf32, strided<[N, ..., 1], offset: ?>>
+///   memref.copy %src, %view
+///     : memref<1x...x1xf32>
+///       to memref<1x...x1xf32, strided<[N, ..., 1], offset: ?>>
+///
+/// AFTER
+///   %c0 = arith.constant 0 : index
+///   %v  = memref.load %src[%c0, ..., %c0] : memref<1x...x1xf32>
+///   memref.store %v, %base[%c0, ..., %off] : memref<1x...xNxf32>
+///
+/// BEFORE (column-major slice)
+///   %view = memref.reinterpret_cast %base
+///     to offset: [%off], sizes: [1, ..., 1], strides: [1, ..., N]
+///       : memref<Nx...x1xf32>
+///         to memref<1x...x1xf32, strided<[1, ..., N], offset: ?>>
+///   memref.copy %src, %view
+///     : memref<1x...x1xf32>
+///       to memref<1x...x1xf32, strided<[1, ..., N], offset: ?>>
+///
+/// AFTER
+///   %c0 = arith.constant 0 : index
+///   %v  = memref.load %src[%c0, ..., %c0] : memref<1x...x1xf32>
+///   memref.store %v, %base[%off, ..., %c0] : memref<Nx...x1xf32>
+struct CopyToScalarLoadAndStore : public OpRewritePattern<memref::CopyOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(memref::CopyOp op,
+                                PatternRewriter &rewriter) const final {
+    auto rc = op.getTarget().getDefiningOp<memref::ReinterpretCastOp>();
+    if (!rc)
+      return rewriter.notifyMatchFailure(
+          op, "target is not a memref.reinterpret_cast");
+
+    if (!isScalarSlice(rc))
+      return rewriter.notifyMatchFailure(
+          op, "reinterpret_cast does not match scalar slice");
+
+    Location loc = op.getLoc();
+
+    Value src = op.getSource();
+    Value dst = rc.getSource();
+
+    auto dstType = cast<MemRefType>(dst.getType());
+    unsigned rank = dstType.getRank();
+
+    Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+
+    auto srcType = cast<MemRefType>(src.getType());
+    SmallVector<Value> loadIndices(srcType.getRank(), zero);
+
+    OpFoldResult offset = rc.getMixedOffsets()[0];
----------------
banach-space wrote:

What makes the 0th index special?

https://github.com/llvm/llvm-project/pull/186118