[Mlir-commits] [mlir] [mlir] Vectorize tenosr.pad with low padding for unit dims (PR #133808)
Nirvedh Meshram
llvmlistbot at llvm.org
Mon Mar 31 14:56:02 PDT 2025
https://github.com/nirvedhmeshram created https://github.com/llvm/llvm-project/pull/133808
We currently do not have masked vectorization support for tenor.pad with low padding. However, we can allow this in the special case where the result dimension after padding is a unit dim. The reason is when we actually have a low pad on a unit dim, the input size of that dimension will be (or should be for correct IR) dynamically be zero and hence we will create a zero mask which is correct. If the low pad is dynamically zero then the lowering is correct as well.
>From 8b74b2bf1459477bb00fd7a82bbce4c89dd21090 Mon Sep 17 00:00:00 2001
From: Nirvedh <nirvedh at gmail.com>
Date: Mon, 31 Mar 2025 16:50:19 -0500
Subject: [PATCH] [mlir] Vectorize tenosr.pad with low padding for unit dims
Signed-off-by: Nirvedh <nirvedh at gmail.com>
---
.../Linalg/Transforms/Vectorization.cpp | 11 +++--
.../Linalg/vectorization-unsupported.mlir | 24 +++++++++++
mlir/test/Dialect/Linalg/vectorization.mlir | 40 +++++++++++++++++++
3 files changed, 71 insertions(+), 4 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 2dcd897330d1e..4adacbe7d45ca 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -2178,11 +2178,14 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
inputVectorSizes)))
return failure();
- if (llvm::any_of(padOp.getLow(), [](Value v) {
- std::optional<int64_t> res = getConstantIntValue(v);
- return !res.has_value() || res.value() != 0;
+ if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) {
+ Value padValue = en.value();
+ unsigned pos = en.index();
+ std::optional<int64_t> res = getConstantIntValue(padValue);
+ return (!res.has_value() || res.value() != 0) &&
+ resultTensorShape[pos] != 1;
})) {
- LDBG("low pad must all be zero: " << padOp << "\n");
+ LDBG("low pad must all be zero for all non unit dims: " << padOp << "\n");
return failure();
}
diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index 2d1f0191eb798..f419d81d8df2b 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -305,6 +305,30 @@ module attributes {transform.with_named_sequence} {
// -----
+func.func @test_masked_vectorize_lowpad(
+ %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
+ -> tensor<2x4xf32> {
+ // expected-error @+3 {{Attempted to vectorize, but failed}}
+ %cst = arith.constant 42.43 : f32
+ %c0 = arith.constant 0 : index
+ %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] {
+ ^bb0(%hh1: index, %hh2: index):
+ tensor.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<2x4xf32>
+ return %1: tensor<2x4xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
// With dynamically shaped source, the vectorizer infers the vector size for
// xfer Ops from the destination tensor and, conservatively, assumes
// out-of-bounds accesses. Out-of-bounds accesses require a pad value, but
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index c6d9ec6215715..efd752e70df03 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -666,6 +666,46 @@ module attributes {transform.with_named_sequence} {
// -----
+// CHECK-LABEL: func @test_masked_vectorize_unit_lowpad
+func.func @test_masked_vectorize_unit_lowpad(
+ %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
+ -> tensor<1x4xf32>
+{
+ // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32
+ // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+ // CHECK: %[[c0_1:.*]] = arith.constant 0 : index
+ // CHECK-DAG: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+ // CHECK-DAG: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+ // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<1x4xi1>
+ // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
+ // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_1]], %[[c0_1]]], %[[c42]]
+ // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x4xf32>
+ // CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32>
+ // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<1x4xf32>
+ // CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
+ // CHECK: %[[masked_write:.*]] = vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_2]], %[[c0_2]]]
+ // CHECK-SAME: {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
+ // CHECK: return %[[masked_write]] : tensor<1x4xf32>
+ %cst = arith.constant 42.43 : f32
+ %c0 = arith.constant 0 : index
+ %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] {
+ ^bb0(%hh1: index, %hh2: index):
+ tensor.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<1x4xf32>
+ return %1: tensor<1x4xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 vector_sizes [1, 4] : !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
// Input identical as the test in vectorization-with-patterns.mlir. Output is
// different - vector sizes are inferred (rather than user-specified) and hence
// masking was used.
More information about the Mlir-commits
mailing list