[Mlir-commits] [mlir] [mlir] Add optimization to bubbleUpPadSlice pattern for no pad case (PR #135859)
Nirvedh Meshram
llvmlistbot at llvm.org
Tue Apr 15 14:20:33 PDT 2025
https://github.com/nirvedhmeshram created https://github.com/llvm/llvm-project/pull/135859
In cases where there is no padding on a dim, we do not need to compute new offsets, lengths and padding, for example the new test case added can just be lowered to
```
%extracted_slice = tensor.extract_slice %arg0[%arg2, 1, 2] [%arg2, 2, 1] [1, 1, 1] : tensor<3x4x5xf32> to tensor<?x2x1xf32>
```
with this PR but without this PR we will have affine maps like
```
#map = affine_map<()[s0] -> (3, s0)>
#map1 = affine_map<()[s0, s1] -> (-s0 + 3, s1)>
%0 = affine.min #map()[%arg2]
%1 = affine.min #map1()[%0, %arg2]
%extracted_slice = tensor.extract_slice %arg0[%0, 1, 2] [%1, 2, 1] [1, 1, 1] : tensor<3x4x5xf32> to tensor<?x2x1xf32>
```
which are unnecessary
>From dacce41401cc15d2a86d2b79bc56aedb25442cbf Mon Sep 17 00:00:00 2001
From: Nirvedh <nirvedh at gmail.com>
Date: Tue, 15 Apr 2025 16:14:22 -0500
Subject: [PATCH] [mlir] Add optimization to bubbleUpPadSlice pattern for no
pad case
Signed-off-by: Nirvedh <nirvedh at gmail.com>
---
.../Tensor/IR/TensorTilingInterfaceImpl.cpp | 17 +++++++++++++----
.../Linalg/matmul-shared-memory-padding.mlir | 6 +++---
.../Dialect/Linalg/subtensor-of-padtensor.mlir | 17 +++++++++++++++++
3 files changed, 33 insertions(+), 7 deletions(-)
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
index 138e4be6b18e9..7778a02dbeaf4 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -122,7 +122,7 @@ FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
OpFoldResult zero = b.getIndexAttr(0);
// Compute new offsets, lengths, low padding, high padding.
- SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
+ SmallVector<OpFoldResult> newOffsets, newLengths;
SmallVector<OpFoldResult> newLows, newHighs;
// Set to true if the original data source is not read at all.
bool hasZeroLen = false;
@@ -131,6 +131,8 @@ FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
Value dynHasZeroLenCond;
int64_t rank = padOp.getSourceType().getRank();
+ // Only unit stride supported.
+ SmallVector<OpFoldResult> newStrides(rank, b.getIndexAttr(1));
for (unsigned dim = 0; dim < rank; ++dim) {
auto low = padOp.getMixedLowPad()[dim];
bool hasLowPad = !isConstantIntValue(low, 0);
@@ -138,6 +140,16 @@ FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
bool hasHighPad = !isConstantIntValue(high, 0);
auto offset = offsets[dim];
auto length = sizes[dim];
+ // If the dim has no padding, we dont need to calculate new values for that
+ // dim as the exisiting ones are correct even after the pattern.
+ if (!hasLowPad && !hasHighPad) {
+ newOffsets.push_back(offset);
+ newLengths.push_back(length);
+ newLows.push_back(low);
+ newHighs.push_back(high);
+ continue;
+ }
+
auto srcSize = tensor::getMixedSize(b, loc, padOp.getSource(), dim);
// The new amount of low padding is `low - offset`. Except for the case
@@ -216,9 +228,6 @@ FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
OpFoldResult newHigh =
hasHighPad ? sub(sub(length, newLength), newLow) : zero;
newHighs.push_back(newHigh);
-
- // Only unit stride supported.
- newStrides.push_back(b.getIndexAttr(1));
}
// The shape of the result can be obtained from the sizes passed in.
diff --git a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
index d6c400dcbf2b9..6cab25b50460d 100644
--- a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
+++ b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
@@ -7,17 +7,17 @@
// CHECK: scf.for
// CHECK: memref.alloc() : memref<128x16xf32, 3>
// CHECK: scf.forall
-// CHECK: vector.create_mask
+// CHECK: vector.constant_mask [16, 4] : vector<128x4xi1>
// CHECK: vector.transfer_read
// CHECK: vector.transfer_write
// CHECK: memref.alloc() : memref<16x128xf32, 3>
// CHECK: scf.forall
-// CHECK: vector.create_mask
+// CHECK: vector.constant_mask [16, 4] : vector<128x4xi1>
// CHECK: vector.transfer_read
// CHECK: vector.transfer_write
// CHECK: memref.alloc() : memref<128x128xf32, 3>
// CHECK: scf.forall
-// CHECK: vector.create_mask
+// CHECK-NOT: mask
// CHECK: vector.transfer_read
// CHECK: vector.transfer_write
// CHECK: linalg.matmul
diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
index b4417641c9f83..d43b9a7ac6c04 100644
--- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
+++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
@@ -216,3 +216,20 @@ func.func @dynamic_zero_high_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
%1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
return %1 : tensor<?x?xf32>
}
+
+// -----
+// CHECK-LABEL: @nopaddim_with_dynamic_extract(
+// CHECK-SAME: %[[ARG0:.*]]: tensor<3x4x5xf32>
+// CHECK-SAME: %[[ARG1:.*]]: f32
+// CHECK-SAME: %[[ARG2:.*]]: index
+// CHECK: %[[RESULT:.*]] = tensor.extract_slice %[[ARG0]][%[[ARG2]], 1, 2] [%[[ARG2]], 2, 1] [1, 1, 1] : tensor<3x4x5xf32> to tensor<?x2x1xf32>
+// CHECK: return %[[RESULT]]
+func.func @nopaddim_with_dynamic_extract(%arg0 : tensor<3x4x5xf32>, %pad : f32, %index : index)
+ -> tensor<?x2x1xf32> {
+ %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 8] {
+ ^bb0(%arg1: index, %arg2: index, %arg3: index):
+ tensor.yield %pad : f32
+ } : tensor<3x4x5xf32> to tensor<3x11x13xf32>
+ %1 = tensor.extract_slice %0[%index, 1, 2] [%index, 2, 1] [1, 1, 1] : tensor<3x11x13xf32> to tensor<?x2x1xf32>
+ return %1 : tensor<?x2x1xf32>
+}
More information about the Mlir-commits
mailing list