[Mlir-commits] [mlir] [mlir] Expose linalg vectorization without replacement (PR #144158)

Fri Jun 13 13:36:24 PDT 2025

https://github.com/Max191 created https://github.com/llvm/llvm-project/pull/144158

Exposes a new `linalg::vectorize` function, which populates the replacement values for the vectorized op, instead of immediately replacing the results of the original operation. This allows for more control over vectorization transformations. The old vectorize function remains the same, and simply calls the new function and replaces the op results in its implementation.

>From fe437426c50173a0782dbc67c4d79ea65845cdc9 Mon Sep 17 00:00:00 2001
From: Max Dawkins <max.dawkins at gmail.com>
Date: Fri, 13 Jun 2025 20:18:16 +0000
Subject: [PATCH] [mlir] Expose linalg vectorization without replacement

Signed-off-by: Max Dawkins <max.dawkins at gmail.com>
---
 .../Dialect/Linalg/Transforms/Transforms.h    |  19 ++-
 .../Linalg/Transforms/Vectorization.cpp       | 115 +++++++++---------
 2 files changed, 73 insertions(+), 61 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 2eef0a06d0eb4..de09dae24eccf 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -772,17 +772,26 @@ LogicalResult deallocateGPUPrivateMemory(OpBuilder &, Value /*buffer*/);
 bool hasVectorizationImpl(Operation *);
 
 /// Emit a suitable vector form for an operation. If provided,
-/// `inputVectorSizes` are used to vectorize this operation. `inputVectorSizes`
-/// must match the rank of the iteration space of the operation and the sizes
-/// must be smaller or equal than their counterpart interation space sizes, if
-/// static. `inputVectorShapes` also allows the vectorization of operations with
-/// dynamic shapes.
+/// `inputVectorSizes` are used to vectorize this operation.
+/// `inputVectorSizes` must match the rank of the iteration space of the
+/// operation and the input vector sizes must be greater than or equal to
+/// their counterpart iteration space sizes, if static. `inputVectorShapes`
+/// also allows the vectorization of operations with dynamic shapes.
 LogicalResult vectorize(RewriterBase &rewriter, Operation *op,
                         ArrayRef<int64_t> inputVectorSizes = {},
                         ArrayRef<bool> inputScalableVecDims = {},
                         bool vectorizeNDExtract = false,
                         bool flatten1DDepthwiseConv = false);
 
+/// Vectorize and store new vectorized results in `newResuls`, without replacing
+/// the old `op`.
+LogicalResult vectorize(RewriterBase &rewriter, Operation *op,
+                        SmallVector<Value> &newResults,
+                        ArrayRef<int64_t> inputVectorSizes = {},
+                        ArrayRef<bool> inputScalableVecDims = {},
+                        bool vectorizeNDExtract = false,
+                        bool flatten1DDepthwiseConv = false);
+
 /// Emit a suitable vector form for a Copy op with fully static shape.
 LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp);
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index ff28bd7c48342..3efef3af93fa3 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -2522,13 +2522,8 @@ bool mlir::linalg::hasVectorizationImpl(Operation *op) {
              tensor::InsertSliceOp>(op);
 }
 
-/// Emit a suitable vector form for an operation. If provided,
-/// `inputVectorSizes` are used to vectorize this operation.
-/// `inputVectorSizes` must match the rank of the iteration space of the
-/// operation and the input vector sizes must be greater than or equal to
-/// their counterpart iteration space sizes, if static. `inputVectorShapes`
-/// also allows the vectorization of operations with dynamic shapes.
 LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
+                                      SmallVector<Value> &newResults,
                                       ArrayRef<int64_t> inputVectorSizes,
                                       ArrayRef<bool> inputScalableVecDims,
                                       bool vectorizeNDExtract,
@@ -2558,57 +2553,65 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
     }
   }
 
-  SmallVector<Value> results;
-  auto vectorizeResult =
-      TypeSwitch<Operation *, LogicalResult>(op)
-          .Case<linalg::LinalgOp>([&](auto linalgOp) {
-            // TODO: isaConvolutionOpInterface that can also infer from
-            // generic features. Will require stride/dilation attributes
-            // inference.
-            if (isa<ConvolutionOpInterface>(linalgOp.getOperation())) {
-              FailureOr<Operation *> convOr = vectorizeConvolution(
-                  rewriter, linalgOp, inputVectorSizes, inputScalableVecDims,
-                  flatten1DDepthwiseConv);
-              if (succeeded(convOr)) {
-                llvm::append_range(results, (*convOr)->getResults());
-                return success();
-              }
-
-              LDBG("Unsupported convolution can't be vectorized.\n");
-              return failure();
-            }
-
-            LDBG("Vectorize generic by broadcasting to the canonical vector "
-                 "shape\n");
-
-            // Pre-process before proceeding.
-            convertAffineApply(rewriter, linalgOp);
-
-            // TODO: 'vectorize' takes in a 'RewriterBase' which is up-casted
-            // to 'OpBuilder' when it is passed over to some methods like
-            // 'vectorizeAsLinalgGeneric'. This is highly problematic: if we
-            // erase an op within these methods, the actual rewriter won't be
-            // notified and we will end up with read-after-free issues!
-            return vectorizeAsLinalgGeneric(rewriter, state, linalgOp, results);
-          })
-          .Case<tensor::PadOp>([&](auto padOp) {
-            return vectorizeAsTensorPadOp(rewriter, padOp, inputVectorSizes,
-                                          results);
-          })
-          .Case<linalg::PackOp>([&](auto packOp) {
-            return vectorizeAsTensorPackOp(rewriter, packOp, inputVectorSizes,
-                                           results);
-          })
-          .Case<linalg::UnPackOp>([&](auto unpackOp) {
-            return vectorizeAsTensorUnpackOp(rewriter, unpackOp,
-                                             inputVectorSizes, results);
-          })
-          .Case<tensor::InsertSliceOp>([&](auto sliceOp) {
-            return vectorizeAsInsertSliceOp(rewriter, sliceOp, inputVectorSizes,
-                                            results);
-          })
-          .Default([](auto) { return failure(); });
+  return TypeSwitch<Operation *, LogicalResult>(op)
+      .Case<linalg::LinalgOp>([&](auto linalgOp) {
+        // TODO: isaConvolutionOpInterface that can also infer from
+        // generic features. Will require stride/dilation attributes
+        // inference.
+        if (isa<ConvolutionOpInterface>(linalgOp.getOperation())) {
+          FailureOr<Operation *> convOr = vectorizeConvolution(
+              rewriter, linalgOp, inputVectorSizes, inputScalableVecDims,
+              flatten1DDepthwiseConv);
+          if (succeeded(convOr)) {
+            llvm::append_range(newResults, (*convOr)->getResults());
+            return success();
+          }
+
+          LDBG("Unsupported convolution can't be vectorized.\n");
+          return failure();
+        }
+
+        LDBG("Vectorize generic by broadcasting to the canonical vector "
+             "shape\n");
+
+        // Pre-process before proceeding.
+        convertAffineApply(rewriter, linalgOp);
+
+        // TODO: 'vectorize' takes in a 'RewriterBase' which is up-casted
+        // to 'OpBuilder' when it is passed over to some methods like
+        // 'vectorizeAsLinalgGeneric'. This is highly problematic: if we
+        // erase an op within these methods, the actual rewriter won't be
+        // notified and we will end up with read-after-free issues!
+        return vectorizeAsLinalgGeneric(rewriter, state, linalgOp, newResults);
+      })
+      .Case<tensor::PadOp>([&](auto padOp) {
+        return vectorizeAsTensorPadOp(rewriter, padOp, inputVectorSizes,
+                                      newResults);
+      })
+      .Case<linalg::PackOp>([&](auto packOp) {
+        return vectorizeAsTensorPackOp(rewriter, packOp, inputVectorSizes,
+                                       newResults);
+      })
+      .Case<linalg::UnPackOp>([&](auto unpackOp) {
+        return vectorizeAsTensorUnpackOp(rewriter, unpackOp, inputVectorSizes,
+                                         newResults);
+      })
+      .Case<tensor::InsertSliceOp>([&](auto sliceOp) {
+        return vectorizeAsInsertSliceOp(rewriter, sliceOp, inputVectorSizes,
+                                        newResults);
+      })
+      .Default([](auto) { return failure(); });
+}
 
+LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
+                                      ArrayRef<int64_t> inputVectorSizes,
+                                      ArrayRef<bool> inputScalableVecDims,
+                                      bool vectorizeNDExtract,
+                                      bool flatten1DDepthwiseConv) {
+  SmallVector<Value> results;
+  LogicalResult vectorizeResult = mlir::linalg::vectorize(
+      rewriter, op, results, inputVectorSizes, inputScalableVecDims,
+      vectorizeNDExtract, flatten1DDepthwiseConv);
   if (failed(vectorizeResult)) {
     LDBG("Vectorization failed\n");
     return failure();