[Mlir-commits] [mlir] [mlir][tosa] Fold PadOp to tensor operations (PR #132700)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Mar 24 02:48:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-tosa
Author: Georgios Pinitas (GeorgeARM)
<details>
<summary>Changes</summary>
Add a canonicalizer to enable folding of explicit padding operations to implicit padding attributes of tensor operations.
This enables folding to the following operations:
- Conv2d
- DepthwiseConv2d
- AvgPool2d
- MaxPool2d
---
Full diff: https://github.com/llvm/llvm-project/pull/132700.diff
3 Files Affected:
- (modified) mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td (+5)
- (modified) mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp (+169-35)
- (modified) mlir/test/Dialect/Tosa/canonicalize.mlir (+79)
``````````diff
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
index 14e15173de7bc..49ee478e1dea9 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -107,6 +107,7 @@ def Tosa_AvgPool2dOp : Tosa_InferShapedTypeOp<"avg_pool2d"> {
LogicalResult verifyOutputZeroPoint(int64_t zp);
}];
+ let hasCanonicalizer = 1;
let hasVerifier = 1;
}
@@ -153,6 +154,8 @@ def Tosa_Conv2DOp : Tosa_ConvOp<"conv2d"> {
}];
let builders = [Tosa_ConvOpQuantInfoBuilder];
+
+ let hasCanonicalizer = 1;
let hasVerifier = 1;
}
@@ -244,6 +247,8 @@ def Tosa_DepthwiseConv2DOp : Tosa_ConvOp<"depthwise_conv2d"> {
}];
let builders = [Tosa_ConvOpQuantInfoBuilder];
+
+ let hasCanonicalizer = 1;
let hasVerifier = 1;
}
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
index 09d2c5d35263c..6a36b7a0cd57d 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
@@ -39,6 +39,175 @@ using namespace mlir::tosa;
// Operator Canonicalizers.
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Tensor Data Engine Operators.
+//===----------------------------------------------------------------------===//
+
+namespace {
+template <typename OpTy>
+struct PoolPadFoldAdaptor;
+
+template <>
+struct PoolPadFoldAdaptor<tosa::AvgPool2dOp> {
+ static void replaceOpWithNewPad(PatternRewriter &rewriter,
+ tosa::AvgPool2dOp op, Value padInput,
+ ArrayRef<int64_t> newPad) {
+ rewriter.replaceOpWithNewOp<tosa::AvgPool2dOp>(
+ op, op.getType(), padInput, op.getInputZp(), op.getOutputZp(),
+ op.getKernel(), op.getStride(), rewriter.getDenseI64ArrayAttr(newPad),
+ op.getAccType());
+ }
+};
+
+template <>
+struct PoolPadFoldAdaptor<tosa::MaxPool2dOp> {
+ static void replaceOpWithNewPad(PatternRewriter &rewriter,
+ tosa::MaxPool2dOp op, Value padInput,
+ ArrayRef<int64_t> newPad) {
+ rewriter.replaceOpWithNewOp<tosa::MaxPool2dOp>(
+ op, op.getType(), padInput, op.getKernel(), op.getStride(),
+ rewriter.getDenseI64ArrayAttr(newPad), op.getNanMode());
+ }
+};
+
+template <typename OpTy>
+struct ConvPadFoldAdaptor {
+ static void replaceOpWithNewPad(PatternRewriter &rewriter, OpTy op,
+ Value padInput, ArrayRef<int64_t> newPad) {
+ rewriter.replaceOpWithNewOp<OpTy>(
+ op, op.getResult().getType(), padInput, op.getWeight(), op.getBias(),
+ op.getInputZp(), op.getWeightZp(), newPad, op.getStrideAttr(),
+ op.getDilationAttr(), op.getAccType(), op.getLocalBound());
+ }
+};
+
+// Pattern attempts to fold a `tosa.pad` operator to a following tensor
+// operation like `tosa.conv2d` by merging the padding associated with the
+// pad operator directly to the implicit padding of the tensor operation.
+// This helps eliminate the explicit padding operator if unused.
+template <typename OpTy, typename AdaptorTy>
+struct FoldPadToTensorOp : public OpRewritePattern<OpTy> {
+ using OpRewritePattern<OpTy>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(OpTy tensorOp,
+ PatternRewriter &rewriter) const override {
+ // Check producer is a tosa::PadOp
+ auto padOp = tensorOp.getInput().template getDefiningOp<tosa::PadOp>();
+ if (!padOp)
+ return rewriter.notifyMatchFailure(tensorOp,
+ "Producer must be a tosa::PadOp.");
+
+ // Validate that tensor operation has sane padding
+ const std::vector<int64_t> &tensorOpPad = tensorOp.getPad().vec();
+ if (tensorOpPad.size() != 4) // pad_top, pad_bottom, pad_left, pad_right
+ return rewriter.notifyMatchFailure(
+ tensorOp, "Tensor operation padding shall have 4 elements.");
+
+ // Validate tosa::PadOp padding
+ DenseIntElementsAttr padOpPadding;
+ if (!matchPattern(padOp.getPadding(), m_Constant(&padOpPadding))) {
+ return rewriter.notifyMatchFailure(
+ tensorOp,
+ "The `padding` input specified on the tosa::PadOp must be constant.");
+ }
+ // N_before, N_after, H_before, H_after, W_before, W_after, C_before,
+ // C_after
+ if (padOpPadding.size() != 8)
+ return rewriter.notifyMatchFailure(tensorOp,
+ "Pad padding should have 8 elements.");
+ int64_t padNBefore = (*(padOpPadding.begin() + 0)).getLimitedValue();
+ int64_t padNAfter = (*(padOpPadding.begin() + 1)).getLimitedValue();
+ int64_t padHBefore = (*(padOpPadding.begin() + 2)).getLimitedValue();
+ int64_t padHAfter = (*(padOpPadding.begin() + 3)).getLimitedValue();
+ int64_t padWBefore = (*(padOpPadding.begin() + 4)).getLimitedValue();
+ int64_t padWAfter = (*(padOpPadding.begin() + 5)).getLimitedValue();
+ int64_t padCBefore = (*(padOpPadding.begin() + 6)).getLimitedValue();
+ int64_t padCAfter = (*(padOpPadding.begin() + 7)).getLimitedValue();
+
+ if (padNBefore != 0 || padNAfter != 0 || padCBefore != 0 || padCAfter != 0)
+ return rewriter.notifyMatchFailure(
+ tensorOp, "Folding padding in N or C dimensions is not supported.");
+
+ // Fold padding from Pad into the tensor operation
+ // 4 elements - pad_top, pad_bottom, pad_left, pad_right
+ SmallVector<int64_t> foldedPad(tensorOpPad.size());
+ foldedPad[0] = padHBefore + tensorOpPad[0];
+ foldedPad[1] = padHAfter + tensorOpPad[1];
+ foldedPad[2] = padWBefore + tensorOpPad[2];
+ foldedPad[3] = padWAfter + tensorOpPad[3];
+
+ // Replace operator
+ AdaptorTy::replaceOpWithNewPad(rewriter, tensorOp, padOp.getInput1(),
+ foldedPad);
+
+ return success();
+ }
+};
+} // namespace
+
+void AvgPool2dOp::getCanonicalizationPatterns(RewritePatternSet &results,
+ MLIRContext *context) {
+ results.add<FoldPadToTensorOp<tosa::AvgPool2dOp,
+ PoolPadFoldAdaptor<tosa::AvgPool2dOp>>>(
+ context);
+}
+
+void Conv2DOp::getCanonicalizationPatterns(RewritePatternSet &results,
+ MLIRContext *context) {
+ results.add<
+ FoldPadToTensorOp<tosa::Conv2DOp, ConvPadFoldAdaptor<tosa::Conv2DOp>>>(
+ context);
+}
+
+void DepthwiseConv2DOp::getCanonicalizationPatterns(RewritePatternSet &results,
+ MLIRContext *context) {
+ results.add<FoldPadToTensorOp<tosa::DepthwiseConv2DOp,
+ ConvPadFoldAdaptor<tosa::DepthwiseConv2DOp>>>(
+ context);
+}
+
+struct MaxPool2dIsNoOp : public OpRewritePattern<tosa::MaxPool2dOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(tosa::MaxPool2dOp op,
+ PatternRewriter &rewriter) const override {
+ Value input = op.getInput();
+ Value output = op.getOutput();
+ ShapedType inputType = llvm::cast<ShapedType>(input.getType());
+ ShapedType outputType = llvm::cast<ShapedType>(output.getType());
+
+ if (!inputType.hasStaticShape() || !outputType.hasStaticShape()) {
+ return failure();
+ }
+
+ // If the output and input shapes are 1x1, then this is a no op.
+ ArrayRef<int64_t> outputShape = outputType.getShape();
+ if (outputShape[1] != 1 || outputShape[2] != 1) {
+ return failure();
+ }
+
+ ArrayRef<int64_t> inputShape = inputType.getShape();
+ if (inputShape[1] != 1 || inputShape[2] != 1) {
+ return failure();
+ }
+
+ rewriter.replaceOp(op, input);
+ return success();
+ }
+};
+
+void MaxPool2dOp::getCanonicalizationPatterns(RewritePatternSet &results,
+ MLIRContext *context) {
+ results.add<MaxPool2dIsNoOp,
+ FoldPadToTensorOp<tosa::MaxPool2dOp,
+ PoolPadFoldAdaptor<tosa::MaxPool2dOp>>>(
+ context);
+}
+
+//===----------------------------------------------------------------------===//
+// Data Layout / Memory Reinterpretation.
+//===----------------------------------------------------------------------===//
+
struct ConcatOptimization : public OpRewritePattern<tosa::ConcatOp> {
using OpRewritePattern<tosa::ConcatOp>::OpRewritePattern;
@@ -175,41 +344,6 @@ void TransposeOp::getCanonicalizationPatterns(RewritePatternSet &results,
results.add<ConsolidateTransposeOptimization, TransposeIsReshape>(context);
}
-struct MaxPool2dIsNoOp : public OpRewritePattern<tosa::MaxPool2dOp> {
- using OpRewritePattern::OpRewritePattern;
-
- LogicalResult matchAndRewrite(tosa::MaxPool2dOp op,
- PatternRewriter &rewriter) const override {
- Value input = op.getInput();
- Value output = op.getOutput();
- ShapedType inputType = llvm::cast<ShapedType>(input.getType());
- ShapedType outputType = llvm::cast<ShapedType>(output.getType());
-
- if (!inputType.hasStaticShape() || !outputType.hasStaticShape()) {
- return failure();
- }
-
- // If the output and input shapes are 1x1, then this is a no op.
- ArrayRef<int64_t> outputShape = outputType.getShape();
- if (outputShape[1] != 1 || outputShape[2] != 1) {
- return failure();
- }
-
- ArrayRef<int64_t> inputShape = inputType.getShape();
- if (inputShape[1] != 1 || inputShape[2] != 1) {
- return failure();
- }
-
- rewriter.replaceOp(op, input);
- return success();
- }
-};
-
-void MaxPool2dOp::getCanonicalizationPatterns(RewritePatternSet &results,
- MLIRContext *context) {
- results.add<MaxPool2dIsNoOp>(context);
-}
-
struct ClampIsNoOp : public OpRewritePattern<tosa::ClampOp> {
using OpRewritePattern::OpRewritePattern;
diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir
index 077a6cee0a1bb..84bc86384ce85 100644
--- a/mlir/test/Dialect/Tosa/canonicalize.mlir
+++ b/mlir/test/Dialect/Tosa/canonicalize.mlir
@@ -9,6 +9,85 @@ func.func @argmax_nofold(%arg0: tensor<?x1xf32>) -> tensor<1xi32> {
// -----
+// CHECK-LABEL: @pad_wh_avg_pool2d_fold
+func.func @pad_wh_avg_pool2d_fold(%input: tensor<1x10x8x3xf32>) -> tensor<1x6x5x3xf32> {
+ // CHECK-NOT: tosa.pad
+ // CHECK: tosa.avg_pool2d
+ // CHECK-SAME: pad = array<i64: 1, 1, 1, 1>
+ %pad_shape = tosa.const_shape { values = dense<[0, 0, 1, 0, 1, 0, 0, 0]> : tensor<8xindex>} : () -> !tosa.shape<8>
+ %pad_const = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %input_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %output_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %padded = tosa.pad %input, %pad_shape, %pad_const : (tensor<1x10x8x3xf32>, !tosa.shape<8>, tensor<1xf32>) -> tensor<1x11x9x3xf32>
+ %pool = tosa.avg_pool2d %padded, %input_zp, %output_zp {acc_type = f32, kernel = array<i64: 2, 2>, pad = array<i64: 0, 1, 0, 1>, stride = array<i64: 2, 2>} : (tensor<1x11x9x3xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x6x5x3xf32>
+ return %pool : tensor<1x6x5x3xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @pad_wh_conv2d_fold
+func.func @pad_wh_conv2d_fold(%input: tensor<1x8x4x3xf32>, %weight: tensor<1x3x3x3xf32>, %bias: tensor<1xf32>) -> tensor<1x10x8x1xf32> {
+ // CHECK-NOT: tosa.pad
+ // CHECK: tosa.conv2d
+ // CHECK-SAME: pad = array<i64: 2, 2, 3, 3>
+ %pad_shape = tosa.const_shape { values = dense<[0, 0, 1, 1, 2, 2, 0, 0]> : tensor<8xindex>} : () -> !tosa.shape<8>
+ %pad_const = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %input_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %weight_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %padded = tosa.pad %input, %pad_shape, %pad_const : (tensor<1x8x4x3xf32>, !tosa.shape<8>, tensor<1xf32>) -> tensor<1x10x8x3xf32>
+ %conv = tosa.conv2d %padded, %weight, %bias, %input_zp, %weight_zp {acc_type = f32, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>, dilation = array<i64: 1, 1>} : (tensor<1x10x8x3xf32>, tensor<1x3x3x3xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x10x8x1xf32>
+ return %conv : tensor<1x10x8x1xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @pad_bwh_conv2d_nofold
+func.func @pad_bwh_conv2d_nofold(%input: tensor<1x8x4x3xf32>, %weight: tensor<1x3x3x3xf32>, %bias: tensor<1xf32>) -> tensor<3x10x8x1xf32> {
+ // CHECK: tosa.pad
+ // CHECK: tosa.conv2d
+ // CHECK-SAME: pad = array<i64: 1, 1, 1, 1>
+ %pad_shape = tosa.const_shape { values = dense<[1, 1, 1, 1, 2, 2, 0, 0]> : tensor<8xindex>} : () -> !tosa.shape<8>
+ %pad_const = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %input_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %weight_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %padded = tosa.pad %input, %pad_shape, %pad_const : (tensor<1x8x4x3xf32>, !tosa.shape<8>, tensor<1xf32>) -> tensor<3x10x8x3xf32>
+ %conv = tosa.conv2d %padded, %weight, %bias, %input_zp, %weight_zp {acc_type = f32, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>, dilation = array<i64: 1, 1>} : (tensor<3x10x8x3xf32>, tensor<1x3x3x3xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<3x10x8x1xf32>
+ return %conv : tensor<3x10x8x1xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @pad_wh_depthwise_conv2d_fold
+func.func @pad_wh_depthwise_conv2d_fold(%input: tensor<1x8x4x3xf32>, %weight: tensor<3x3x3x1xf32>, %bias: tensor<3xf32>) -> tensor<1x10x8x3xf32> {
+ // CHECK-NOT: tosa.pad
+ // CHECK: tosa.depthwise_conv2d
+ // CHECK-SAME: pad = array<i64: 2, 2, 3, 3>
+ %pad_shape = tosa.const_shape { values = dense<[0, 0, 1, 1, 2, 2, 0, 0]> : tensor<8xindex>} : () -> !tosa.shape<8>
+ %pad_const = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %input_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %weight_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %padded = tosa.pad %input, %pad_shape, %pad_const : (tensor<1x8x4x3xf32>, !tosa.shape<8>, tensor<1xf32>) -> tensor<1x10x8x3xf32>
+ %conv = tosa.depthwise_conv2d %padded, %weight, %bias, %input_zp, %weight_zp {acc_type = f32, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>, dilation = array<i64: 1, 1>} : (tensor<1x10x8x3xf32>, tensor<3x3x3x1xf32>, tensor<3xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x10x8x3xf32>
+ return %conv : tensor<1x10x8x3xf32>
+}
+
+// -----
+
+
+// CHECK-LABEL: @pad_wh_max_pool2d_fold
+func.func @pad_wh_max_pool2d_fold(%input: tensor<1x10x8x3xf32>) -> tensor<1x6x5x3xf32> {
+ // CHECK-NOT: tosa.pad
+ // CHECK: tosa.max_pool2d
+ // CHECK-SAME: pad = array<i64: 1, 1, 1, 1>
+ %pad_shape = tosa.const_shape { values = dense<[0, 0, 1, 0, 1, 0, 0, 0]> : tensor<8xindex>} : () -> !tosa.shape<8>
+ %pad_const = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : ()-> tensor<1xf32>
+ %padded = tosa.pad %input, %pad_shape, %pad_const : (tensor<1x10x8x3xf32>, !tosa.shape<8>, tensor<1xf32>) -> tensor<1x11x9x3xf32>
+ %pool = tosa.max_pool2d %padded {kernel = array<i64: 2, 2>, pad = array<i64: 0, 1, 0, 1>, stride = array<i64: 2, 2>} : (tensor<1x11x9x3xf32>) -> tensor<1x6x5x3xf32>
+ return %pool : tensor<1x6x5x3xf32>
+}
+
+// -----
+
// CHECK-LABEL: @add_bcast_zero_int
func.func @add_bcast_zero_int(%arg0: tensor<4x2x3xi32>) -> tensor<4x2x3xi32> {
// CHECK-NOT: tosa.add
``````````
</details>
https://github.com/llvm/llvm-project/pull/132700
More information about the Mlir-commits
mailing list