[Mlir-commits] [mlir] [mlir][memref] Rewrite scalar `memref.copy` through reinterpret_cast into load/store (PR #186118)
Andrzej WarzyĆski
llvmlistbot at llvm.org
Fri Mar 13 02:42:14 PDT 2026
================
@@ -0,0 +1,205 @@
+//===- BypassReinterpretCast.cpp - Expansion patterns for MemRef operations
+//----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Arith/Transforms/Passes.h"
+#include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace mlir {
+namespace memref {
+#define GEN_PASS_DEF_BYPASSREINTERPRETCASTPASS
+#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
+} // namespace memref
+} // namespace mlir
+
+using namespace mlir;
+
+namespace {
+
+// Checks if strided memref is an (expanded) scalar view
+// of an (expanded) single row/column memref, with the same rank.
+static bool isScalarSlice(memref::ReinterpretCastOp rc) {
+ auto underlyingType = dyn_cast<MemRefType>(rc.getSource().getType());
+ auto stridedType = dyn_cast<MemRefType>(rc.getType());
+ if (!underlyingType || !stridedType)
+ return false;
+
+ unsigned rank = underlyingType.getRank();
+ if (rank != stridedType.getRank())
+ return false;
+
+ ArrayRef<int64_t> sizes = rc.getStaticSizes();
+ ArrayRef<int64_t> strides = rc.getStaticStrides();
+ if (sizes.size() != rank || strides.size() != rank)
+ return false;
+
+ // Rank-1 special case
+ if (rank == 1) {
+ if (underlyingType.getDimSize(0) == 1)
+ return false;
+
+ if (strides[0] != underlyingType.getDimSize(0))
+ return false;
+
+ return true;
+ }
+
+ int nonUnitDim = -1;
+
+ for (unsigned i = 0; i < rank; ++i) {
+ int64_t underlyingDim = underlyingType.getDimSize(i);
+
+ if (stridedType.getDimSize(i) != 1)
+ return false;
+
+ if (ShapedType::isDynamic(sizes[i]) || sizes[i] != 1)
+ return false;
+
+ if (ShapedType::isDynamic(strides[i]))
+ return false;
+
+ // Make sure no more than one dim is non-unit
+ if (underlyingDim != 1) {
+ if (nonUnitDim != -1)
+ return false;
+ nonUnitDim = i;
+ }
+
+ if (i > 0 && i + 1 < rank && strides[i] != 1)
+ return false;
+ }
+
+ // Make sure at least one dim is non-unit
+ if (nonUnitDim == -1)
+ return false;
+
+ bool firstStrideIsOne = strides[0] == 1;
+ bool lastStrideIsOne = strides[rank - 1] == 1;
+
+ if (firstStrideIsOne == lastStrideIsOne)
+ return false;
+
+ if (firstStrideIsOne)
+ return underlyingType.getDimSize(0) == strides[rank - 1];
+
+ return underlyingType.getDimSize(rank - 1) == strides[0];
+}
+
+/// Rewrites `memref.copy` of a 1-element MemRef as a scalar load-store pair
+///
+/// The pattern matches a reinterpret_cast that creates a scalar view
+/// (`sizes = [1, ..., 1]`) into a memref with a single non-unit dimension.
+/// Since the view contains only one element, the accessed address is
+/// determined solely by the base pointer and the offset.
+///
+/// Two layouts are supported:
+/// * row-major slice (stride pattern [N, ..., 1])
+/// * column-major slice (stride pattern [1, ..., N])
+///
+/// BEFORE (row-major slice)
+/// %view = memref.reinterpret_cast %base
+/// to offset: [%off], sizes: [1, ..., 1], strides: [N, ..., 1]
+/// : memref<1x...xNxf32>
+/// to memref<1x...x1xf32, strided<[N, ..., 1], offset: ?>>
+/// memref.copy %src, %view
+/// : memref<1x...x1xf32>
+/// to memref<1x...x1xf32, strided<[N, ..., 1], offset: ?>>
+///
+/// AFTER
+/// %c0 = arith.constant 0 : index
+/// %v = memref.load %src[%c0, ..., %c0] : memref<1x...x1xf32>
+/// memref.store %v, %base[%c0, ..., %off] : memref<1x...xNxf32>
+///
+/// BEFORE (column-major slice)
+/// %view = memref.reinterpret_cast %base
+/// to offset: [%off], sizes: [1, ..., 1], strides: [1, ..., N]
+/// : memref<Nx...x1xf32>
+/// to memref<1x...x1xf32, strided<[1, ..., N], offset: ?>>
+/// memref.copy %src, %view
+/// : memref<1x...x1xf32>
+/// to memref<1x...x1xf32, strided<[1, ..., N], offset: ?>>
+///
+/// AFTER
+/// %c0 = arith.constant 0 : index
+/// %v = memref.load %src[%c0, ..., %c0] : memref<1x...x1xf32>
+/// memref.store %v, %base[%off, ..., %c0] : memref<Nx...x1xf32>
+struct CopyToScalarLoadAndStore : public OpRewritePattern<memref::CopyOp> {
+public:
+ using OpRewritePattern::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(memref::CopyOp op,
+ PatternRewriter &rewriter) const final {
+ auto rc = op.getTarget().getDefiningOp<memref::ReinterpretCastOp>();
+ if (!rc)
+ return rewriter.notifyMatchFailure(
+ op, "target is not a memref.reinterpret_cast");
+
+ if (!isScalarSlice(rc))
+ return rewriter.notifyMatchFailure(
+ op, "reinterpret_cast does not match scalar slice");
+
+ Location loc = op.getLoc();
+
+ Value src = op.getSource();
+ Value dst = rc.getSource();
+
+ auto dstType = cast<MemRefType>(dst.getType());
+ unsigned rank = dstType.getRank();
+
+ Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+
+ auto srcType = cast<MemRefType>(src.getType());
+ SmallVector<Value> loadIndices(srcType.getRank(), zero);
+
+ OpFoldResult offset = rc.getMixedOffsets()[0];
----------------
banach-space wrote:
What makes the 0th index special?
https://github.com/llvm/llvm-project/pull/186118
More information about the Mlir-commits
mailing list