[Mlir-commits] [mlir] Introduce tensorEncodingToMemref (PR #195856)

Tue May 5 06:42:19 PDT 2026

https://github.com/Devjiu created https://github.com/llvm/llvm-project/pull/195856

Implement TensorEncodingToMemref (previously called as "ConstructMemRefLayoutFn") 

Missing some better lit tests - (partially introduced at https://github.com/Devjiu/llvm-project/pull/2/changes) 

Should be easy to merge as no breaking changes were introduced

>From 5195e2460c57dd7dc5d12f94c13389d5729d7a5f Mon Sep 17 00:00:00 2001
From: Dmitrii Makarenko <dmitrii.makarenko at intel.com>
Date: Fri, 24 Apr 2026 09:02:02 +0000
Subject: [PATCH 1/3] [mlir][bufferization] Introduce
 tensorEncodingToMemRefLayoutFn hook

Add a framework-provided hook that derives a MemRefLayoutAttrInterface
from a tensor type encoding during tensor-to-memref conversion.

The hook is wired into the two main tensor-to-memref entry points that
are not driven by an op-specific BufferizableOpInterface::getBufferType:

* BuiltinTensorExternalModel::getBufferType (generic tensor-like
  fallback),
* bufferization.alloc_tensor (replaces the unconditional identity layout
  when the hook is set; identity is preserved as the default).

Behaviour is unchanged when the hook is not set (nullptr default).
---
 .../Bufferization/IR/BufferizableOpInterface.h   | 16 ++++++++++++++++
 .../Bufferization/IR/BufferizableOpInterface.cpp | 14 +++++++++++++-
 .../Bufferization/IR/BufferizationDialect.cpp    |  5 ++++-
 .../Bufferization/IR/BufferizationOps.cpp        |  7 +++++++
 4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 3f8392e3b8970..a58452df1cea6 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -269,6 +269,14 @@ struct BufferizationOptions {
   /// Parameters: tensor type, memory space, bufferization options
   using UnknownTypeConverterFn = std::function<BaseMemRefType(
       TensorType, Attribute memorySpace, const BufferizationOptions &)>;
+  /// Build a MemRefLayoutAttrInterface from the encoding of `tensorType`.
+  /// Called whenever bufferization materializes a memref for a tensor value
+  /// (function arguments, `bufferization.alloc_tensor`, generic tensor-like
+  /// conversion fallback). Returning `nullptr` keeps the default layout
+  /// (identity or fully-dynamic, depending on the call site). The hook must
+  /// not change rank / shape / element type of the result.
+  using TensorEncodingToMemRefLayoutFn =
+      std::function<MemRefLayoutAttrInterface(TensorType)>;
   // Produce a MemorySpace attribute from a tensor type
   using DefaultMemorySpaceFn =
       std::function<std::optional<Attribute>(TensorType t)>;
@@ -364,6 +372,14 @@ struct BufferizationOptions {
   DefaultMemorySpaceFn defaultMemorySpaceFn =
       [](TensorType t) -> std::optional<Attribute> { return Attribute(); };
 
+  /// Hook to derive a MemRef layout from a tensor encoding.
+  /// The default implementation returns the tensor encoding itself when it
+  /// already implements `MemRefLayoutAttrInterface`, and `{}` otherwise; this
+  /// makes `tensor<..., #layout>` naturally map to `memref<..., #layout>`.
+  /// Downstream callers can override the hook to provide custom mapping for
+  /// dialect-specific encodings.
+  TensorEncodingToMemRefLayoutFn tensorEncodingToMemRefLayoutFn = nullptr;
+
   /// If set to `true`, the analysis is skipped. A buffer is copied before every
   /// write. This flag cannot be used together with `testAnalysisOnly = true`.
   bool copyBeforeWrite = false;
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index f77edf23d4bc4..65972486a6ea9 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -370,12 +370,24 @@ defaultUnknownTypeConverter(TensorType tensorType, Attribute memorySpace,
   return getMemRefTypeWithFullyDynamicLayout(tensorType, memorySpace);
 }
 
+/// Default tensor-encoding to memref-layout hook: if the tensor is a
+/// `RankedTensorType` whose encoding already implements
+/// `MemRefLayoutAttrInterface`, use it directly. Downstream callers can
+/// override the hook on `BufferizationOptions` to customize this mapping.
+MemRefLayoutAttrInterface defaultTensorEncodingToMemRefLayout(TensorType t) {
+  auto rtt = dyn_cast<RankedTensorType>(t);
+  if (!rtt)
+    return {};
+  return dyn_cast_or_null<MemRefLayoutAttrInterface>(rtt.getEncoding());
+}
+
 } // namespace
 
 // Default constructor for BufferizationOptions.
 BufferizationOptions::BufferizationOptions()
     : functionArgTypeConverterFn(defaultFunctionArgTypeConverter),
-      unknownTypeConverterFn(defaultUnknownTypeConverter) {}
+      unknownTypeConverterFn(defaultUnknownTypeConverter),
+      tensorEncodingToMemRefLayoutFn(defaultTensorEncodingToMemRefLayout) {}
 
 bool BufferizationOptions::isOpAllowed(Operation *op) const {
   // Special case: If function boundary bufferization is deactivated, do not
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
index bd177ba1afccd..704db852a5253 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
@@ -47,8 +47,11 @@ struct BuiltinTensorExternalModel
     if (!memSpace.has_value())
       return emitError() << "could not infer memory space";
 
+    MemRefLayoutAttrInterface layout = {};
+    if (options.tensorEncodingToMemRefLayoutFn)
+      layout = options.tensorEncodingToMemRefLayoutFn(tensorType);
     return cast<BufferLikeType>(
-        getMemRefType(tensorType, options, /*layout=*/{}, *memSpace));
+        getMemRefType(tensorType, options, layout, *memSpace));
   }
 
   mlir::LogicalResult verifyCompatibleBufferType(
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index c525ec116f699..1759716bb7330 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -246,6 +246,13 @@ AllocTensorOp::getBufferType(Value value, const BufferizationOptions &options,
     return getOperation()->emitError("could not infer memory space");
   }
 
+  if (options.tensorEncodingToMemRefLayoutFn) {
+    if (auto layout = options.tensorEncodingToMemRefLayoutFn(getType())) {
+      return cast<BufferLikeType>(
+          getMemRefType(getType(), options, layout, memorySpace));
+    }
+  }
+
   return cast<BufferLikeType>(
       getMemRefTypeWithStaticIdentityLayout(getType(), memorySpace));
 }

>From 5158aa6148fbe7b4381c7ed45e158ad837b165e2 Mon Sep 17 00:00:00 2001
From: Dmitrii Makarenko <dmitrii.makarenko at intel.com>
Date: Fri, 24 Apr 2026 09:04:39 +0000
Subject: [PATCH 2/3] [mlir][bufferization] Route function-boundary and
 unknown-type converters through tensorEncodingToMemRefLayoutFn

Consult the tensorEncodingToMemRefLayoutFn hook in the three remaining
entry points that materialize a memref type for a builtin TensorType
without going through an op-specific BufferizableOpInterface::
getBufferType:

* defaultFunctionArgTypeConverter,
* defaultUnknownTypeConverter,
* BufferizationOptions::setFunctionBoundaryTypeConversion (both the
  IdentityLayoutMap and FullyDynamicLayoutMap presets).

If the hook is not set, or returns a null layout, the previous default
layout (fully-dynamic / identity) is kept. Behaviour is unchanged for
in-tree users.
---
 .../IR/BufferizableOpInterface.cpp            | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 65972486a6ea9..2f7c049848fa9 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -346,12 +346,19 @@ bool OpFilter::isOpAllowed(Operation *op) const {
 
 namespace {
 
-/// Default function arg type converter: Use a fully dynamic layout map.
+/// Default function arg type converter: Use a fully dynamic layout map, or
+/// the layout produced by `tensorEncodingToMemRefLayoutFn` when the hook is
+/// set and returns a non-null layout.
 BufferLikeType
 defaultFunctionArgTypeConverter(TensorLikeType type, Attribute memorySpace,
                                 func::FuncOp funcOp,
                                 const BufferizationOptions &options) {
   if (auto tensorType = mlir::dyn_cast<TensorType>(type)) {
+    if (options.tensorEncodingToMemRefLayoutFn) {
+      if (auto layout = options.tensorEncodingToMemRefLayoutFn(tensorType))
+        return cast<BufferLikeType>(
+            getMemRefType(tensorType, options, layout, memorySpace));
+    }
     return cast<BufferLikeType>(
         getMemRefTypeWithFullyDynamicLayout(tensorType, memorySpace));
   }
@@ -363,10 +370,17 @@ defaultFunctionArgTypeConverter(TensorLikeType type, Attribute memorySpace,
          "a valid buffer is always expected at function boundary");
   return *bufferType;
 }
-/// Default unknown type converter: Use a fully dynamic layout map.
+/// Default unknown type converter: Use a fully dynamic layout map, or the
+/// layout produced by `tensorEncodingToMemRefLayoutFn` when the hook is set
+/// and returns a non-null layout.
 BaseMemRefType
 defaultUnknownTypeConverter(TensorType tensorType, Attribute memorySpace,
                             const BufferizationOptions &options) {
+  if (options.tensorEncodingToMemRefLayoutFn) {
+    if (auto layout = options.tensorEncodingToMemRefLayoutFn(tensorType))
+      return cast<BaseMemRefType>(
+          getMemRefType(tensorType, options, layout, memorySpace));
+  }
   return getMemRefTypeWithFullyDynamicLayout(tensorType, memorySpace);
 }
 
@@ -420,6 +434,12 @@ void BufferizationOptions::setFunctionBoundaryTypeConversion(
                                    func::FuncOp funcOp,
                                    const BufferizationOptions &options) {
     if (auto tensorType = mlir::dyn_cast<TensorType>(type)) {
+      if (options.tensorEncodingToMemRefLayoutFn) {
+        if (auto layout = options.tensorEncodingToMemRefLayoutFn(tensorType))
+          return cast<BufferLikeType>(
+              bufferization::getMemRefType(tensorType, options, layout,
+                                           memorySpace));
+      }
       if (layoutMapOption == LayoutMapOption::IdentityLayoutMap)
         return cast<BufferLikeType>(
             bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType,

>From 9b87d037b24c8ab2561d1a46df57c1868075aacd Mon Sep 17 00:00:00 2001
From: Dmitrii Makarenko <dmitrii.makarenko at intel.com>
Date: Fri, 24 Apr 2026 09:42:13 +0000
Subject: [PATCH 3/3] [mlir][bufferization] Expose
 tensorEncodingToMemRefLayoutFn via -one-shot-bufferize

Add a new pass option 'use-encoding-for-layout': when set, the pass
populates BufferizationOptions::tensorEncodingToMemRefLayoutFn with a
callback that returns the tensor encoding if it implements
MemRefLayoutAttrInterface. This mirrors the existing
'use-encoding-for-memory-space' option.

Also thread the hook into the pass-level unknownTypeConverterFn (which
overrides the default converter) so that the encoding layout is
honored on every tensor-to-memref path exposed by -one-shot-bufferize.

Add a lit test covering function-boundary conversion and
bufferization.alloc_tensor with an affine_map tensor encoding.
---
 .../Bufferization/Transforms/Bufferize.cpp    |  7 ++
 .../one-shot-bufferize-encoding-layout.mlir   | 71 +++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encoding-layout.mlir

diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index 701ab52a491a8..665ad9d0b45ac 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -111,6 +111,13 @@ struct OneShotBufferizePass
       opt.unknownTypeConverterFn = [=](TensorType tensorType,
                                        Attribute memorySpace,
                                        const BufferizationOptions &options) {
+        if (options.tensorEncodingToMemRefLayoutFn) {
+          if (auto layout =
+                  options.tensorEncodingToMemRefLayoutFn(tensorType)) {
+            return cast<BaseMemRefType>(bufferization::getMemRefType(
+                tensorType, options, layout, memorySpace));
+          }
+        }
         if (unknownTypeConversionOption == LayoutMapOption::IdentityLayoutMap)
           return bufferization::getMemRefTypeWithStaticIdentityLayout(
               tensorType, memorySpace);
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encoding-layout.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encoding-layout.mlir
new file mode 100644
index 0000000000000..489676af508d4
--- /dev/null
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-encoding-layout.mlir
@@ -0,0 +1,71 @@
+// Default: function-boundary-type-conversion=infer-layout-map.
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-unknown-ops" -split-input-file | FileCheck %s
+
+// The tensor encoding implements `MemRefLayoutAttrInterface`, so it wins over
+// the requested layout option and the resulting memref uses the encoding.
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-unknown-ops function-boundary-type-conversion=identity-layout-map unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-unknown-ops function-boundary-type-conversion=fully-dynamic-layout-map unknown-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s
+
+// Exercises the `tensorEncodingToMemRefLayoutFn` hook on the three
+// tensor-to-memref paths that do not delegate to an op-specific
+// `BufferizableOpInterface::getBufferType`:
+//   * function-boundary conversion (arg + result),
+//   * `bufferization.alloc_tensor`,
+//   * unknown-type fallback (unknown op result).
+
+#transpose = affine_map<(d0, d1) -> (d1, d0)>
+
+// CHECK-LABEL: func @encoding_layout_function_boundary(
+//  CHECK-SAME:     %[[A:.*]]: memref<4x4xf32, #[[$MAP:[^>]+]]>) -> memref<4x4xf32, #[[$MAP]]> {
+//       CHECK:   return %[[A]] : memref<4x4xf32, #[[$MAP]]>
+func.func @encoding_layout_function_boundary(
+    %arg0: tensor<4x4xf32, #transpose>) -> tensor<4x4xf32, #transpose> {
+  return %arg0 : tensor<4x4xf32, #transpose>
+}
+
+// -----
+
+#transpose = affine_map<(d0, d1) -> (d1, d0)>
+
+// The alloc layout must come from the encoding, not from the default static
+// identity layout used by `alloc_tensor` otherwise.
+// CHECK-LABEL: func @encoding_layout_alloc_tensor(
+//       CHECK:   %[[ALLOC:.*]] = memref.alloc() {{.*}} : memref<4x4xf32, #{{.*}}>
+//       CHECK:   return %[[ALLOC]] : memref<4x4xf32, #{{.*}}>
+func.func @encoding_layout_alloc_tensor() -> tensor<4x4xf32, #transpose> {
+  %0 = bufferization.alloc_tensor() : tensor<4x4xf32, #transpose>
+  return %0 : tensor<4x4xf32, #transpose>
+}
+
+// -----
+
+#transpose = affine_map<(d0, d1) -> (d1, d0)>
+
+// The unknown op stays on tensors but is bracketed by to_tensor/to_buffer
+// conversions whose memref types must use the encoding layout.
+// CHECK-LABEL: func @encoding_layout_unknown_op(
+//  CHECK-SAME:     %[[A:.*]]: memref<4x4xf32, #[[$MAP:[^>]+]]>
+//       CHECK:   %[[T:.*]] = bufferization.to_tensor %[[A]] : memref<4x4xf32, #[[$MAP]]>
+//       CHECK:   %[[R:.*]] = "test.dummy_op"(%[[T]])
+//       CHECK:   %[[B:.*]] = bufferization.to_buffer %[[R]] {{.*}} to memref<4x4xf32, #[[$MAP]]>
+//       CHECK:   return %[[B]] : memref<4x4xf32, #[[$MAP]]>
+func.func @encoding_layout_unknown_op(
+    %arg0: tensor<4x4xf32, #transpose>) -> tensor<4x4xf32, #transpose> {
+  %0 = "test.dummy_op"(%arg0)
+      : (tensor<4x4xf32, #transpose>) -> tensor<4x4xf32, #transpose>
+  return %0 : tensor<4x4xf32, #transpose>
+}
+
+// -----
+
+// Control case: without an encoding that implements `MemRefLayoutAttrInterface`
+// the default path is taken. The function boundary infers the layout (identity
+// for an equivalent return), matching the behavior of other bufferization
+// tests.
+// CHECK-LABEL: func @no_encoding_function_boundary(
+//  CHECK-SAME:     %[[A:.*]]: memref<4x4xf32{{.*}}>) -> memref<4x4xf32{{.*}}> {
+//       CHECK:   return %[[A]]
+func.func @no_encoding_function_boundary(
+    %arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
+  return %arg0 : tensor<4x4xf32>
+}