[Mlir-commits] [mlir] [Tensor] Simplify tenor.pad tiling length calculations. (PR #119039)

Thu Dec 12 09:01:41 PST 2024

https://github.com/nirvedhmeshram updated https://github.com/llvm/llvm-project/pull/119039

>From 6d7024d91a1522d6e53f77a036a1aebda862fbbe Mon Sep 17 00:00:00 2001
From: Nirvedh <nirvedh at gmail.com>
Date: Thu, 5 Dec 2024 18:46:50 -0600
Subject: [PATCH 1/2] [Tensor] Simplify tenor.pad tiling length calculations.

Signed-off-by: Nirvedh <nirvedh at gmail.com>
---
 .../Tensor/IR/TensorTilingInterfaceImpl.cpp   | 22 ++++++-------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
index 68c3d1cabb11cb..e647e53bc33af6 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -746,11 +746,6 @@ FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
   Location loc = padOp->getLoc();
   AffineExpr dim0, dim1;
   bindDims(b.getContext(), dim0, dim1);
-  // Add two integers.
-  auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
-  auto add = [&](OpFoldResult v1, OpFoldResult v2) {
-    return affine::makeComposedFoldedAffineApply(b, loc, addMap, {v1, v2});
-  };
   // Subtract two integers.
   auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
   auto sub = [&](OpFoldResult v1, OpFoldResult v2) {
@@ -825,16 +820,13 @@ FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
     // The original read could also have stopped in the high padding zone.
     // In that case, set the end positition of the read should be the end of
     // the source tensor. (Similar to newOffset.)
-    //
-    // endLoc = min(max(offset - low + length, 0), srcSize)
-    //
-    // The new ExtractSliceOp length is `endLoc - newOffset`.
-    //
-    // Optimization: If low = 0, then the formula can be simplified.
-    OpFoldResult endLoc =
-        hasLowPad ? min(max(add(sub(offset, low), length), zero), srcSize)
-                  : min(add(offset, length), srcSize);
-    OpFoldResult newLength = sub(endLoc, newOffset);
+    // srcSize - newOffset represents how much length we have available
+    // and length - newLow represents how much length we want at most.
+    OpFoldResult newLength = min(sub(srcSize, newOffset), sub(length, newLow));
+    // Optimization: If low = 0, then newLow = 0. then newLength >= 0 assuming
+    // length >= 0.
+    if (hasLowPad)
+      newLength = max(newLength, zero);
     newLengths.push_back(newLength);
 
     // Check if newLength is zero. In that case, no SubTensorOp should be

>From 473c00b175ba55be5802e67584b2f642cf96af44 Mon Sep 17 00:00:00 2001
From: Nirvedh <nirvedh at gmail.com>
Date: Thu, 12 Dec 2024 11:00:15 -0600
Subject: [PATCH 2/2] Address reviwer comments

Signed-off-by: Nirvedh <nirvedh at gmail.com>
---
 mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
index e647e53bc33af6..bcaf2ccdad1378 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -822,6 +822,11 @@ FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
     // the source tensor. (Similar to newOffset.)
     // srcSize - newOffset represents how much length we have available
     // and length - newLow represents how much length we want at most.
+    // In the dynamic case if ValueBoundsOpInterface is used this calcuation
+    // will lead to an upper bound of length - newLow. If we calculated the
+    // ending location as min(some expression, srcSize) and then subtracted
+    // the new offset then ValueBoundsOpInterface will upper bound by SrcSize
+    // which is larger than needed.
     OpFoldResult newLength = min(sub(srcSize, newOffset), sub(length, newLow));
     // Optimization: If low = 0, then newLow = 0. then newLength >= 0 assuming
     // length >= 0.