[Mlir-commits] [mlir] fd0c6f5 - [mlir] Move linalg::PadTensorOp to tensor::PadOp.
Alexander Belyaev
llvmlistbot at llvm.org
Fri Jan 21 11:02:54 PST 2022
Author: Alexander Belyaev
Date: 2022-01-21T20:02:39+01:00
New Revision: fd0c6f53913f272ddd88948644fae36e63db120c
URL: https://github.com/llvm/llvm-project/commit/fd0c6f53913f272ddd88948644fae36e63db120c
DIFF: https://github.com/llvm/llvm-project/commit/fd0c6f53913f272ddd88948644fae36e63db120c.diff
LOG: [mlir] Move linalg::PadTensorOp to tensor::PadOp.
RFC: https://llvm.discourse.group/t/rfc-move-linalg-padtensorop-to-tensor-padop/5785
Differential Revision: https://reviews.llvm.org/D117892
Added:
mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h
mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt
mlir/lib/Dialect/Tensor/Utils/Utils.cpp
Modified:
mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
mlir/include/mlir/InitAllDialects.h
mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp
mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/lib/Dialect/Tensor/CMakeLists.txt
mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp
mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
mlir/test/Dialect/Linalg/bufferize.mlir
mlir/test/Dialect/Linalg/canonicalize.mlir
mlir/test/Dialect/Linalg/codegen-strategy.mlir
mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
mlir/test/Dialect/Linalg/hoist-padding.mlir
mlir/test/Dialect/Linalg/invalid.mlir
mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
mlir/test/Dialect/Linalg/pad.mlir
mlir/test/Dialect/Linalg/pad_fusion.mlir
mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
mlir/test/Dialect/Linalg/roundtrip.mlir
mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
mlir/test/Dialect/Linalg/vectorization.mlir
mlir/test/Dialect/Tensor/canonicalize.mlir
mlir/test/Dialect/Tensor/invalid.mlir
mlir/test/Dialect/Tensor/ops.mlir
mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 4b8ae4985ca53..4150dee567fc7 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -18,7 +18,6 @@ include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/Interfaces/TilingInterface.td"
include "mlir/Interfaces/ViewLikeInterface.td"
// Base class for Linalg dialect ops that do not correspond to library calls.
@@ -130,207 +129,6 @@ def Linalg_InitTensorOp : Linalg_Op<"init_tensor",
let hasCanonicalizer = 1;
}
-def Linalg_PadTensorOp : Linalg_Op<"pad_tensor",
- [AttrSizedOperandSegments, NoSideEffect,
- DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
- DeclareOpInterfaceMethods<TilingInterface,
- ["getDestinationOperands", "getLoopIteratorTypes", "getIterationDomain",
- "getTiledImplementation"]>]> {
- let summary = "tensor pad operation";
- let description = [{
- `linalg.pad_tensor` is an operation that pads the `source` tensor
- with given `low` and `high` padding config.
-
- The PadTensor operation supports the following arguments:
-
- * source: the "base" tensor on which to pad.
- * low: A list contains the padding along the start of each
- dimension, i.e `low`.
- * high: A list contains the padding along the end of each
- dimension, i.e. `high`.
- * nofold: indicates that the operation should not be folded when source and
- result types are equal.
-
- The result tensor dimensions are `low` + `dim` + `high` along that
- dimension. The number of elements of `low` and `high` must match
- the rank of the input tensor. They can be either a constant or a
- dynamic value.
-
- The region of the `pad_tensor` operation returns the value to use
- for the padding. The arguments of the region represent the index
- of the source being accessed. There should be as many arguments as
- the rank of the `source` tensor. The value `yield`-ed by the
- region is used as the value of the view at the given position.
-
- If `nofold` is set, the padding operation will not be folded away even
- if the source type and the padded type have the same static shape. This can
- be used, e.g., for packing or promotion to faster memory.
-
- Example 1:
-
- ```mlir
- %pad_value = ... : f32
- %0 = linalg.pad_tensor %0 low[1, 2] high[2, 3] {
- ^bb0(%arg0 : index, %arg1 : index):
- linalg.yield %pad_value : f32
- } : tensor<?x?xf32> to tensor<?x?xf32>
- ```
-
- Example 2:
-
- ```mlir
- %pad_value = ... : f32
- %0 = linalg.pad_tensor %arg0 low[2, %arg1, 3, 3] high[3, 3, %arg1, 2] {
- ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
- linalg.yield %pad_value : f32
- } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
- ```
-
- Example 3:
-
- ```mlir
- %pad_value = ... : f32
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad_value : f32
- } : tensor<2x3xf32> to tensor<?x?xf32>
- ```
-
- Example 4:
-
- ```mlir
- // Force a padded value to be always exist with `nofold`.
- %pad_value = ... : f32
- %0 = linalg.pad_tensor %arg0 nofold low[0, 0] high[0, 0] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad_value : f32
- } : tensor<2x3xf32> to tensor<2x3xf32>
- ```
- }];
-
- let arguments = (ins
- AnyTensor:$source,
- Variadic<Index>:$low,
- Variadic<Index>:$high,
- I64ArrayAttr:$static_low,
- I64ArrayAttr:$static_high,
- UnitAttr:$nofold);
-
- let regions = (region SizedRegion<1>:$region);
-
- let results = (outs AnyTensor:$result);
-
- // TODO: Remove custom<InferType> when AllTypesMatch supports opt. operands.
- let assemblyFormat = [{
- $source
- (`nofold` $nofold^)?
- `low` `` custom<OperandsOrIntegersSizesList>($low, $static_low)
- `high` `` custom<OperandsOrIntegersSizesList>($high, $static_high)
- $region attr-dict `:` type($source) `to` type($result)
- }];
-
- let extraClassDeclaration = [{
- static StringRef getStaticLowAttrName() {
- return "static_low";
- }
-
- static StringRef getStaticHighAttrName() {
- return "static_high";
- }
-
- RankedTensorType getSourceType() {
- return source().getType().cast<RankedTensorType>();
- }
- RankedTensorType getResultType() {
- return getResult().getType().cast<RankedTensorType>();
- }
-
- // Infer the shape of the result tensor given the type of the source tensor
- // and paddings. Known result dimensions that cannot necessarily be inferred
- // from low/high padding sizes can be optionally specified. Those will be
- // considered when computing the result type.
- static RankedTensorType inferResultType(
- RankedTensorType sourceType,
- ArrayRef<int64_t> staticLow,
- ArrayRef<int64_t> staticHigh,
- ArrayRef<int64_t> resultShape = {});
-
- // Return a PadTensorOp that pads `source` to `type` size where the static
- // sizes are assumed to be greater than the dynamic sizes. The op performs
- // "high" padding (i.e. it adds trailing padding values until the desired
- // size is met).
- static linalg::PadTensorOp createPadHighOp(
- Type type, Value source, Value pad, bool nofold, Location loc,
- OpBuilder & builder);
-
- // Return a PadTensorOp that pads `source to `type` size with `pad` value.
- // I.e., a block will be created and the `pad` value will be yielded
- // directly. If the type passed is nullptr, it is inferred.
- static linalg::PadTensorOp createPadScalarOp(
- Type type, Value source, Value pad, ArrayRef<OpFoldResult> low,
- ArrayRef<OpFoldResult> high, bool nofold, Location loc,
- OpBuilder & builder);
-
- // Return the pad value if it is a constant. Return null value otherwise.
- Value getConstantPaddingValue();
-
- // Return a vector of all the static or dynamic values (low/high padding) of
- // the op.
- inline SmallVector<OpFoldResult> getMixedPadImpl(ArrayAttr staticAttrs,
- ValueRange values) {
- SmallVector<OpFoldResult> res;
- unsigned numDynamic = 0;
- unsigned count = staticAttrs.size();
- for (unsigned idx = 0; idx < count; ++idx) {
- if (ShapedType::isDynamic(staticAttrs[idx].cast<IntegerAttr>().getInt()))
- res.push_back(values[numDynamic++]);
- else
- res.push_back(staticAttrs[idx]);
- }
- return res;
- }
- SmallVector<OpFoldResult> getMixedLowPad() {
- return getMixedPadImpl(static_low(), low());
- }
- SmallVector<OpFoldResult> getMixedHighPad() {
- return getMixedPadImpl(static_high(), high());
- }
- // Return true if low padding is guaranteed to be 0.
- bool hasZeroLowPad() {
- return llvm::all_of(getMixedLowPad(), [](OpFoldResult ofr) {
- return getConstantIntValue(ofr) == static_cast<int64_t>(0);
- });
- }
- // Return true if high padding is guaranteed to be 0.
- bool hasZeroHighPad() {
- return llvm::all_of(getMixedHighPad(), [](OpFoldResult ofr) {
- return getConstantIntValue(ofr) == static_cast<int64_t>(0);
- });
- }
- }];
-
- let builders = [
- // Build a PadTensorOp with mixed static and dynamic entries.
- OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$staticLow,
- "ArrayRef<int64_t>":$staticHigh, "ValueRange":$low, "ValueRange":$high,
- CArg<"bool", "false">:$nofold,
- CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
- // Build a PadTensorOp with all dynamic entries.
- OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high,
- CArg<"bool", "false">:$nofold,
- CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
- // Build a PadTensorOp with mixed static and dynamic entries and custom
- // result type. If the type passed is nullptr, it is inferred.
- OpBuilder<(ins "Type":$resultType, "Value":$source,
- "ArrayRef<OpFoldResult>":$low, "ArrayRef<OpFoldResult>":$high,
- CArg<"bool", "false">:$nofold,
- CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
- ];
-
- let hasCanonicalizer = 1;
- let hasFolder = 1;
-}
-
def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, ReturnLike, Terminator]>,
Arguments<(ins Variadic<AnyType>:$values)> {
let summary = "Linalg yield operation";
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
index 90e78ca0e274e..8d3315d5ea971 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
@@ -1,4 +1,4 @@
-//===- HoistPadding.h - Hoisting transformation for PadTensorOp -*- C++ -*-===//
+//===- HoistPadding.h - Hoisting for tensor::PadOp -*- C++ --------------*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,8 +14,11 @@
namespace mlir {
class Value;
+namespace tensor {
+class PadOp;
+} // namespace tensor
+
namespace linalg {
-class PadTensorOp;
/// Mechanically hoist padding operations on tensors by `numLoops` into a new,
/// generally larger tensor. This achieves packing of multiple padding ops into
@@ -59,8 +62,8 @@ class PadTensorOp;
/// }
/// }
/// ```
-FailureOr<Value> hoistPaddingOnTensors(PadTensorOp opToHoist, int numLoops,
- PadTensorOp &hoistedOp);
+FailureOr<Value> hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops,
+ tensor::PadOp &hoistedOp);
} // namespace linalg
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index f52276e8e9e69..f5e99d5afe83e 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1132,18 +1132,18 @@ void populateLinalgDistributeTiledLoopPattern(
// Op-specific patterns.
//===----------------------------------------------------------------------===//
-/// PadTensorOp is not canonicalized away yet, so we provide a transformation to
-/// `linalg.generic`.
-struct PadTensorOpTransformationPattern : public OpRewritePattern<PadTensorOp> {
- using OpRewritePattern<PadTensorOp>::OpRewritePattern;
+/// tensor::PadOp is not canonicalized away yet, so we provide a transformation
+/// to `linalg.generic`.
+struct PadOpTransformationPattern : public OpRewritePattern<tensor::PadOp> {
+ using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(PadTensorOp padOp,
+ LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const override;
};
/// Pad the operands of `opToPad` to a static bounding box. Use `paddingFunc`
/// and `nofoldFunc` to set the padding value and the nofold attribute of the
-/// introduced PadTensorOps, respectively. Update `paddedOp` to the cloned
+/// introduced tensor::PadOps, respectively. Update `paddedOp` to the cloned
/// statically shaped operation and return the extracted dynamically shaped
/// results. If padding fails, return failure.
FailureOr<SmallVector<Value>>
@@ -1153,23 +1153,23 @@ rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
LinalgOp &paddedOp);
using OptimizeCopyFn =
- std::function<LogicalResult(PatternRewriter &, PadTensorOp, Value)>;
+ std::function<LogicalResult(PatternRewriter &, tensor::PadOp, Value)>;
-/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and
+/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and
/// InsertSliceOp. For now, only constant padding values are supported.
/// `OptimizeCopyFn` can be used to customize copying step optimization.
-struct GeneralizePadTensorOpPattern : public OpRewritePattern<PadTensorOp> {
- GeneralizePadTensorOpPattern(MLIRContext *context,
- OptimizeCopyFn optimizeCopyFn = nullptr,
- PatternBenefit benefit = 1)
- : OpRewritePattern<PadTensorOp>(context, benefit),
+struct GeneralizePadOpPattern : public OpRewritePattern<tensor::PadOp> {
+ GeneralizePadOpPattern(MLIRContext *context,
+ OptimizeCopyFn optimizeCopyFn = nullptr,
+ PatternBenefit benefit = 1)
+ : OpRewritePattern<tensor::PadOp>(context, benefit),
optimizeCopyFn(std::move(optimizeCopyFn)) {}
- LogicalResult matchAndRewrite(PadTensorOp padOp,
+ LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const override;
protected:
OptimizeCopyFn optimizeCopyFn;
- Value createFillOrGenerateOp(PatternRewriter &rewriter, PadTensorOp padOp,
+ Value createFillOrGenerateOp(PatternRewriter &rewriter, tensor::PadOp padOp,
Value dest,
const SmallVector<Value> &dynSizes) const;
};
@@ -1179,9 +1179,9 @@ struct GeneralizePadTensorOpPattern : public OpRewritePattern<PadTensorOp> {
/// are used to encode a certain ordering of pattern application. To avoid
/// scattering magic constants throughout the code base, the patterns must be
/// added with this function. `baseBenefit` can be used to offset the benefit
-/// of all PadTensorOp vectorization patterns by a certain value.
-void populatePadTensorOpVectorizationPatterns(RewritePatternSet &patterns,
- PatternBenefit baseBenefit = 1);
+/// of all tensor::PadOp vectorization patterns by a certain value.
+void populatePadOpVectorizationPatterns(RewritePatternSet &patterns,
+ PatternBenefit baseBenefit = 1);
/// Match and rewrite for the pattern:
/// ```
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 7646ef2b5df67..b466d7726f502 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -107,12 +107,12 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp(
OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides);
-/// Create a PadTensorOp that pads `source` to the size of the statically sized
-/// `type` whose static sizes are assumed to be greater than the dynamic
+/// Create a tensor::PadOp that pads `source` to the size of the statically
+/// sized `type` whose static sizes are assumed to be greater than the dynamic
/// `source` size. The padding introduces trailing `pad` values until the target
/// size is met. If `source` is defined by one or more LinalgOps that have been
/// padded with the same value and sizes, return their padded result instead of
-/// creating a PadTensorOp.
+/// creating a tensor::PadOp.
///
/// Example:
/// ```
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
index db8a07a689ee2..cfec22be37e5f 100644
--- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
+++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
@@ -19,6 +19,7 @@
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/TilingInterface.h"
#include "mlir/Interfaces/ViewLikeInterface.h"
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
index 1a95d921fee22..05cb41d791d35 100644
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -14,6 +14,7 @@ include "mlir/Interfaces/CastInterfaces.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/TilingInterface.td"
include "mlir/Interfaces/ViewLikeInterface.td"
class Tensor_Op<string mnemonic, list<OpTrait> traits = []>
@@ -777,6 +778,190 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> {
let extraClassDeclaration = commonExtraClassDeclaration;
}
+//===----------------------------------------------------------------------===//
+// PadOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect]> {
+ let summary = "tensor pad operation";
+ let description = [{
+ `tensor.pad` is an operation that pads the `source` tensor
+ with given `low` and `high` padding config.
+
+ The PadTensor operation supports the following arguments:
+
+ * source: the "base" tensor on which to pad.
+ * low: A list contains the padding along the start of each
+ dimension, i.e `low`.
+ * high: A list contains the padding along the end of each
+ dimension, i.e. `high`.
+ * nofold: indicates that the operation should not be folded when source and
+ result types are equal.
+
+ The result tensor dimensions are `low` + `dim` + `high` along that
+ dimension. The number of elements of `low` and `high` must match
+ the rank of the input tensor. They can be either a constant or a
+ dynamic value.
+
+ The region of the `tensor.pad` operation returns the value to use
+ for the padding. The arguments of the region represent the index
+ of the source being accessed. There should be as many arguments as
+ the rank of the `source` tensor. The value `yield`-ed by the
+ region is used as the value of the view at the given position.
+
+ If `nofold` is set, the padding operation will not be folded away even
+ if the source type and the padded type have the same static shape. This can
+ be used, e.g., for packing or promotion to faster memory.
+
+ Example 1:
+
+ ```mlir
+ %pad_value = ... : f32
+ %0 = tensor.pad %0 low[1, 2] high[2, 3] {
+ ^bb0(%arg0 : index, %arg1 : index):
+ tensor.yield %pad_value : f32
+ } : tensor<?x?xf32> to tensor<?x?xf32>
+ ```
+
+ Example 2:
+
+ ```mlir
+ %pad_value = ... : f32
+ %0 = tensor.pad %arg0 low[2, %arg1, 3, 3] high[3, 3, %arg1, 2] {
+ ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
+ tensor.yield %pad_value : f32
+ } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
+ ```
+
+ Example 3:
+
+ ```mlir
+ %pad_value = ... : f32
+ %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : f32
+ } : tensor<2x3xf32> to tensor<?x?xf32>
+ ```
+
+ Example 4:
+
+ ```mlir
+ // Force a padded value to be always exist with `nofold`.
+ %pad_value = ... : f32
+ %0 = tensor.pad %arg0 nofold low[0, 0] high[0, 0] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : f32
+ } : tensor<2x3xf32> to tensor<2x3xf32>
+ ```
+ }];
+
+ let arguments = (ins
+ AnyTensor:$source,
+ Variadic<Index>:$low,
+ Variadic<Index>:$high,
+ I64ArrayAttr:$static_low,
+ I64ArrayAttr:$static_high,
+ UnitAttr:$nofold);
+
+ let regions = (region SizedRegion<1>:$region);
+
+ let results = (outs AnyTensor:$result);
+
+ // TODO: Remove custom<InferType> when AllTypesMatch supports opt. operands.
+ let assemblyFormat = [{
+ $source
+ (`nofold` $nofold^)?
+ `low` `` custom<OperandsOrIntegersSizesList>($low, $static_low)
+ `high` `` custom<OperandsOrIntegersSizesList>($high, $static_high)
+ $region attr-dict `:` type($source) `to` type($result)
+ }];
+
+ let extraClassDeclaration = [{
+ static StringRef getStaticLowAttrName() {
+ return "static_low";
+ }
+
+ static StringRef getStaticHighAttrName() {
+ return "static_high";
+ }
+
+ RankedTensorType getSourceType() {
+ return source().getType().cast<RankedTensorType>();
+ }
+ RankedTensorType getResultType() {
+ return getResult().getType().cast<RankedTensorType>();
+ }
+
+ // Infer the shape of the result tensor given the type of the source tensor
+ // and paddings. Known result dimensions that cannot necessarily be inferred
+ // from low/high padding sizes can be optionally specified. Those will be
+ // considered when computing the result type.
+ static RankedTensorType inferResultType(
+ RankedTensorType sourceType,
+ ArrayRef<int64_t> staticLow,
+ ArrayRef<int64_t> staticHigh,
+ ArrayRef<int64_t> resultShape = {});
+
+ // Return the pad value if it is a constant. Return null value otherwise.
+ Value getConstantPaddingValue();
+
+ // Return a vector of all the static or dynamic values (low/high padding) of
+ // the op.
+ inline SmallVector<OpFoldResult> getMixedPadImpl(ArrayAttr staticAttrs,
+ ValueRange values) {
+ SmallVector<OpFoldResult> res;
+ unsigned numDynamic = 0;
+ unsigned count = staticAttrs.size();
+ for (unsigned idx = 0; idx < count; ++idx) {
+ if (ShapedType::isDynamic(staticAttrs[idx].cast<IntegerAttr>().getInt()))
+ res.push_back(values[numDynamic++]);
+ else
+ res.push_back(staticAttrs[idx]);
+ }
+ return res;
+ }
+ SmallVector<OpFoldResult> getMixedLowPad() {
+ return getMixedPadImpl(static_low(), low());
+ }
+ SmallVector<OpFoldResult> getMixedHighPad() {
+ return getMixedPadImpl(static_high(), high());
+ }
+ // Return true if low padding is guaranteed to be 0.
+ bool hasZeroLowPad() {
+ return llvm::all_of(getMixedLowPad(), [](OpFoldResult ofr) {
+ return getConstantIntValue(ofr) == static_cast<int64_t>(0);
+ });
+ }
+ // Return true if high padding is guaranteed to be 0.
+ bool hasZeroHighPad() {
+ return llvm::all_of(getMixedHighPad(), [](OpFoldResult ofr) {
+ return getConstantIntValue(ofr) == static_cast<int64_t>(0);
+ });
+ }
+ }];
+
+ let builders = [
+ // Build a PadOp with mixed static and dynamic entries.
+ OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$staticLow,
+ "ArrayRef<int64_t>":$staticHigh, "ValueRange":$low, "ValueRange":$high,
+ CArg<"bool", "false">:$nofold,
+ CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+ // Build a PadOp with all dynamic entries.
+ OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high,
+ CArg<"bool", "false">:$nofold,
+ CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+ // Build a PadOp with mixed static and dynamic entries and custom
+ // result type. If the type passed is nullptr, it is inferred.
+ OpBuilder<(ins "Type":$resultType, "Value":$source,
+ "ArrayRef<OpFoldResult>":$low, "ArrayRef<OpFoldResult>":$high,
+ CArg<"bool", "false">:$nofold,
+ CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+ ];
+
+ let hasCanonicalizer = 1;
+ let hasFolder = 1;
+}
+
//===----------------------------------------------------------------------===//
// YieldOp
@@ -784,16 +969,17 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> {
def Tensor_YieldOp : Tensor_Op<"yield",
[NoSideEffect, ReturnLike, Terminator,
- HasParent<"::mlir::tensor::GenerateOp">]> {
+ HasParent<"::mlir::tensor::GenerateOp, ::mlir::tensor::PadOp">]> {
let summary = "Yield a value from a region";
let description = [{
This operation is used to yield a single value from a within a region. It
is used to create dynamically sized tensors
- (see `tensor.generate` op).
+ (see `tensor.generate` and `tensor.pad` ops).
}];
let arguments = (ins AnyType:$value);
let assemblyFormat = "$value attr-dict `:` type($value)";
+
// Dummy builder to appease code in templated ensureTerminator that
// GenerateOp's auto-generated parser calls.
let builders = [OpBuilder<(ins), [{ /* nothing to do */ }]>];
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h b/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h
new file mode 100644
index 0000000000000..6cd819758eab7
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h
@@ -0,0 +1,36 @@
+//===- TensorTilingOpInterfaceImpl.h - ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Tiling interface for TensorOps with ExternalModel.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_
+#define MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_
+
+#include "mlir/IR/Dialect.h"
+
+namespace mlir {
+namespace tensor {
+
+/// Registers external models for Tiling interface for tensor ops.
+/// Currently, it registers:
+///
+/// * TilingInterface for `tensor.pad`.
+///
+/// Unfortunately, a "normal" internal registration is not possible at the
+/// moment, because of the dependency of the interface implementation for these
+/// ops on `affine.apply` and Affine dialect already depends on TensorOps. In
+/// order to break the cyclic dependency (TensorOps->AffineOps->TensorOps) the
+/// implementation is moved to a separate library.
+void registerTilingOpInterfaceExternalModels(mlir::DialectRegistry ®istry);
+
+} // namespace tensor
+} // namespace mlir
+
+#endif // MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_
diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
new file mode 100644
index 0000000000000..4b4c53896b7d8
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
@@ -0,0 +1,34 @@
+//===- Utils.h - Utilities to support the Tensor dialect -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_TENSOR_UTILS_UTILS_H_
+#define MLIR_DIALECT_TENSOR_UTILS_UTILS_H_
+
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+
+namespace mlir {
+namespace tensor {
+
+// Return a PadOp that pads `source` to `type` size where the static
+// sizes are assumed to be greater than the dynamic sizes. The op performs
+// "high" padding (i.e. it adds trailing padding values until the desired
+// size is met).
+PadOp createPadHighOp(Type type, Value source, Value pad, bool nofold,
+ Location loc, OpBuilder &builder);
+
+// Return a PadOp that pads `source to `type` size with `pad` value.
+// I.e., a block will be created and the `pad` value will be yielded
+// directly. If the type passed is nullptr, it is inferred.
+PadOp createPadScalarOp(Type type, Value source, Value pad,
+ ArrayRef<OpFoldResult> low, ArrayRef<OpFoldResult> high,
+ bool nofold, Location loc, OpBuilder &builder);
+
+} // namespace tensor
+} // namespace mlir
+
+#endif // MLIR_DIALECT_TENSOR_UTILS_UTILS_H_
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index 40e9781edcc90..86f4e1db7a284 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -43,6 +43,7 @@
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
+#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/X86Vector/X86VectorDialect.h"
@@ -86,6 +87,7 @@ inline void registerAllDialects(DialectRegistry ®istry) {
x86vector::X86VectorDialect>();
// clang-format on
tensor::registerInferTypeOpInterfaceExternalModels(registry);
+ tensor::registerTilingOpInterfaceExternalModels(registry);
}
/// Append all the MLIR dialects to the registry contained in the given context.
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
index 0f53b30125625..6833a0c2d72cb 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -17,6 +17,7 @@
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Tosa/Utils/CoversionUtils.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
@@ -1932,7 +1933,7 @@ class PadConverter : public OpRewritePattern<tosa::PadOp> {
highValues.push_back(highVal);
}
- auto newPadOp = linalg::PadTensorOp::createPadScalarOp(
+ auto newPadOp = tensor::createPadScalarOp(
padOp.getType(), input, padConstant, lowValues, highValues,
/*nofold=*/false, loc, rewriter);
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
index 54012c9760eb2..4fcd2cf56c35c 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
@@ -17,6 +17,7 @@
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Tosa/Utils/CoversionUtils.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
@@ -55,9 +56,9 @@ static mlir::Value applyPad(Location loc, Value input, ArrayRef<int64_t> pad,
Value padValue = rewriter.create<arith::ConstantOp>(loc, padAttr);
- return linalg::PadTensorOp::createPadScalarOp(
- RankedTensorType::get(paddedShape, inputETy), input, padValue,
- lowIndices, highIndices, /*nofold=*/false, loc, rewriter)
+ return tensor::createPadScalarOp(RankedTensorType::get(paddedShape, inputETy),
+ input, padValue, lowIndices, highIndices,
+ /*nofold=*/false, loc, rewriter)
.result();
}
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index f41e8487858f3..3ca3932a44eec 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1079,561 +1079,6 @@ LogicalResult InitTensorOp::reifyResultShapes(
return success();
}
-//===----------------------------------------------------------------------===//
-// PadTensorOp
-//===----------------------------------------------------------------------===//
-
-// TODO: Replace custom<InferType> directive with AllTypesMatch as soon as it
-// supports optional types.
-void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand,
- Type typeToInfer, Type typeToInferFrom) {}
-
-ParseResult parseInferType(OpAsmParser &parser,
- Optional<OpAsmParser::OperandType> optOperand,
- Type &typeToInfer, Type typeToInferFrom) {
- if (optOperand)
- typeToInfer = typeToInferFrom;
- return success();
-}
-
-static LogicalResult verify(PadTensorOp op) {
- auto sourceType = op.source().getType().cast<RankedTensorType>();
- auto resultType = op.result().getType().cast<RankedTensorType>();
- auto expectedType = PadTensorOp::inferResultType(
- sourceType, extractFromI64ArrayAttr(op.static_low()),
- extractFromI64ArrayAttr(op.static_high()));
- for (int i = 0, e = sourceType.getRank(); i < e; ++i) {
- if (resultType.getDimSize(i) == expectedType.getDimSize(i))
- continue;
- if (expectedType.isDynamicDim(i))
- continue;
- return op.emitError("specified type ")
- << resultType << " does not match the inferred type "
- << expectedType;
- }
-
- auto ®ion = op.region();
- unsigned rank = resultType.getRank();
- Block &block = region.front();
- if (block.getNumArguments() != rank)
- return op.emitError("expected the block to have ") << rank << " arguments";
-
- // Note: the number and type of yield values are checked in the YieldOp.
- for (const auto &en : llvm::enumerate(block.getArgumentTypes())) {
- if (!en.value().isIndex())
- return op.emitOpError("expected block argument ")
- << (en.index() + 1) << " to be an index";
- }
-
- return success();
-}
-
-RankedTensorType PadTensorOp::inferResultType(RankedTensorType sourceType,
- ArrayRef<int64_t> staticLow,
- ArrayRef<int64_t> staticHigh,
- ArrayRef<int64_t> resultShape) {
- unsigned rank = sourceType.getRank();
- assert(staticLow.size() == rank && "unexpected staticLow size mismatch");
- assert(staticHigh.size() == rank && "unexpected staticHigh size mismatch");
- assert((resultShape.empty() || resultShape.size() == rank) &&
- "unexpected resultShape size mismatch");
-
- SmallVector<int64_t, 4> inferredShape;
- for (auto i : llvm::seq<unsigned>(0, rank)) {
- if (sourceType.isDynamicDim(i) ||
- staticLow[i] == ShapedType::kDynamicSize ||
- staticHigh[i] == ShapedType::kDynamicSize) {
- inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamicSize
- : resultShape[i]);
- } else {
- int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i];
- assert((resultShape.empty() || size == resultShape[i] ||
- resultShape[i] == ShapedType::kDynamicSize) &&
- "mismatch between inferred shape and result shape");
- inferredShape.push_back(size);
- }
- }
-
- return RankedTensorType::get(inferredShape, sourceType.getElementType());
-}
-
-void PadTensorOp::build(OpBuilder &b, OperationState &result, Value source,
- ArrayRef<int64_t> staticLow,
- ArrayRef<int64_t> staticHigh, ValueRange low,
- ValueRange high, bool nofold,
- ArrayRef<NamedAttribute> attrs) {
- auto sourceType = source.getType().cast<RankedTensorType>();
- auto resultType = inferResultType(sourceType, staticLow, staticHigh);
- build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow),
- b.getI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr());
- result.addAttributes(attrs);
-}
-
-void PadTensorOp::build(OpBuilder &b, OperationState &result, Value source,
- ValueRange low, ValueRange high, bool nofold,
- ArrayRef<NamedAttribute> attrs) {
- auto sourceType = source.getType().cast<RankedTensorType>();
- unsigned rank = sourceType.getRank();
- SmallVector<int64_t, 4> staticVector(rank, ShapedType::kDynamicSize);
- build(b, result, source, staticVector, staticVector, low, high, nofold,
- attrs);
-}
-
-void PadTensorOp::build(OpBuilder &b, OperationState &result, Type resultType,
- Value source, ArrayRef<OpFoldResult> low,
- ArrayRef<OpFoldResult> high, bool nofold,
- ArrayRef<NamedAttribute> attrs) {
- assert(resultType.isa<RankedTensorType>());
- auto sourceType = source.getType().cast<RankedTensorType>();
- SmallVector<Value, 4> dynamicLow, dynamicHigh;
- SmallVector<int64_t, 4> staticLow, staticHigh;
- // staticLow and staticHigh have full information of the padding config.
- // This will grow staticLow and staticHigh with 1 value. If the config is
- // dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1
- // value as well.
- dispatchIndexOpFoldResults(low, dynamicLow, staticLow,
- ShapedType::kDynamicSize);
- dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh,
- ShapedType::kDynamicSize);
- if (!resultType) {
- resultType =
- PadTensorOp::inferResultType(sourceType, staticLow, staticHigh);
- }
- build(b, result, resultType, source, dynamicLow, dynamicHigh,
- b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh),
- nofold ? b.getUnitAttr() : UnitAttr());
- result.addAttributes(attrs);
-}
-
-PadTensorOp PadTensorOp::createPadScalarOp(Type type, Value source, Value pad,
- ArrayRef<OpFoldResult> low,
- ArrayRef<OpFoldResult> high,
- bool nofold, Location loc,
- OpBuilder &builder) {
- auto padTensorOp =
- builder.create<linalg::PadTensorOp>(loc, type, source, low, high, nofold);
- int rank = padTensorOp.getResultType().getRank();
- SmallVector<Type, 4> blockArgTypes(rank, builder.getIndexType());
- SmallVector<Location, 4> blockArgLocs(rank, loc);
- auto ®ion = padTensorOp.region();
- // `builder.createBlock` changes the insertion point within the block. Create
- // a guard to reset the insertion point of the builder after it is destroyed.
- OpBuilder::InsertionGuard guard(builder);
- builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs);
- builder.create<linalg::YieldOp>(loc, pad);
- return padTensorOp;
-}
-
-PadTensorOp PadTensorOp::createPadHighOp(Type type, Value source, Value pad,
- bool nofold, Location loc,
- OpBuilder &b) {
- SmallVector<OpFoldResult, 4> low, high;
- auto rankedTensorType = type.cast<RankedTensorType>();
- assert(rankedTensorType.hasStaticShape());
- for (const auto &en : enumerate(rankedTensorType.getShape())) {
- AffineExpr d0;
- bindDims(b.getContext(), d0);
- auto dimOp = b.createOrFold<tensor::DimOp>(loc, source, en.index());
- Value paddingWidth =
- makeComposedAffineApply(b, loc, en.value() - d0, {dimOp});
- high.push_back(paddingWidth);
- low.push_back(b.createOrFold<arith::ConstantIndexOp>(loc, 0));
- }
- return PadTensorOp::createPadScalarOp(type, source, pad, low, high, nofold,
- loc, b);
-}
-
-LogicalResult PadTensorOp::reifyResultShapes(
- OpBuilder &b, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
- Location loc = getLoc();
- auto lowPad = getMixedLowPad();
- auto highPad = getMixedHighPad();
- SmallVector<Value> shapes;
- for (auto dim : llvm::seq<int64_t>(0, getSourceType().getRank())) {
- // Shape along each dimension is source dim + low pad + high pad.
- SmallVector<Value> mapOperands;
- mapOperands.push_back(b.createOrFold<tensor::DimOp>(loc, source(), dim));
- AffineExpr expr = b.getAffineDimExpr(0);
- unsigned numSymbols = 0;
- auto addOpFoldResult = [&](OpFoldResult valueOrAttr) {
- if (Value v = valueOrAttr.dyn_cast<Value>()) {
- expr = expr + b.getAffineSymbolExpr(numSymbols++);
- mapOperands.push_back(v);
- return;
- }
- int64_t staticValue =
- valueOrAttr.get<Attribute>().cast<IntegerAttr>().getInt();
- expr = expr + staticValue;
- };
- addOpFoldResult(lowPad[dim]);
- addOpFoldResult(highPad[dim]);
- shapes.push_back(applyMapToValues(
- b, loc, AffineMap::get(1, numSymbols, expr), mapOperands)[0]);
- }
- reifiedReturnShapes.emplace_back(std::move(shapes));
- return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Methods related to PadTensor tiling.
-//===----------------------------------------------------------------------===//
-
-SmallVector<Value> PadTensorOp::getDestinationOperands(OpBuilder &b) {
- ReifiedRankedShapedTypeDims reifiedShapes;
- (void)reifyResultShapes(b, reifiedShapes);
- SmallVector<OpFoldResult> mixedSizes = getAsOpFoldResult(reifiedShapes[0]);
- Value initTensor = b.create<InitTensorOp>(getLoc(), mixedSizes,
- getResultType().getElementType());
- return {initTensor};
-}
-
-SmallVector<StringRef> PadTensorOp::getLoopIteratorTypes() {
- SmallVector<StringRef> iteratorTypes(getResultType().getRank(),
- getParallelIteratorTypeName());
- return iteratorTypes;
-}
-
-SmallVector<Range> PadTensorOp::getIterationDomain(OpBuilder &b) {
- ReifiedRankedShapedTypeDims reifiedShapes;
- (void)reifyResultShapes(b, reifiedShapes);
- Value zero = b.create<arith::ConstantIndexOp>(getLoc(), 0);
- Value one = b.create<arith::ConstantIndexOp>(getLoc(), 1);
- // Initialize all the ranges to {zero, one, one}. All the `ub`s are
- // overwritten.
- SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
- for (const auto &ub : enumerate(reifiedShapes[0]))
- loopRanges[ub.index()].size = ub.value();
- return loopRanges;
-}
-
-SmallVector<Operation *> PadTensorOp::getTiledImplementation(
- OpBuilder &b, ValueRange dest, ArrayRef<OpFoldResult> offsets,
- ArrayRef<OpFoldResult> sizes, bool /*tileDestOperands*/) {
- // Only constant padding value supported.
- Value padValue = getConstantPaddingValue();
- if (!padValue)
- return {};
-
- // Helper variables and functions for various arithmetic operations. These are
- // used extensively for computing new offset/length and padding values.
- Location loc = getLoc();
- AffineExpr dim0, dim1;
- bindDims(b.getContext(), dim0, dim1);
- // Add two integers.
- auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
- auto add = [&](Value v1, Value v2) {
- return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
- };
- // Subtract two integers.
- auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
- auto sub = [&](Value v1, Value v2) {
- return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
- };
- // Take the minimum of two integers.
- auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
- auto min = [&](Value v1, Value v2) {
- return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
- };
- // Take the maximum of two integers.
- auto max = [&](Value v1, Value v2) {
- return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
- };
- // Zero index-typed integer.
- auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
-
- // Helper function for filling static/dynamic low/high padding indices vectors
- // of PadTensorOp.
- auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
- SmallVector<int64_t> &staticIndices) {
- if (auto constInt = getConstantIntValue(val)) {
- staticIndices.push_back(*constInt);
- } else {
- staticIndices.push_back(ShapedType::kDynamicSize);
- dynIndices.push_back(val);
- }
- };
-
- // Compute new offsets, lengths, low padding, high padding.
- SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
- SmallVector<Value> newLows, newHighs;
- SmallVector<int64_t> staticNewLows, staticNewHighs;
- // Set to true if the original data source is not read at all.
- bool hasZeroLen = false;
- // Same as hasZeroLen, but for dynamic dimension sizes. This condition
- // is true if the original data source turns out to be unused at runtime.
- Value dynHasZeroLenCond;
-
- int64_t rank = getSourceType().getRank();
- for (unsigned dim = 0; dim < rank; ++dim) {
- auto low = getValueOrCreateConstantIndexOp(b, loc, getMixedLowPad()[dim]);
- bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
- auto high = getValueOrCreateConstantIndexOp(b, loc, getMixedHighPad()[dim]);
- bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
- auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]);
- auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]);
- auto srcSize = b.createOrFold<tensor::DimOp>(loc, source(), dim);
-
- // The new amount of low padding is `low - offset`. Except for the case
- // where none of the low padding is read. In that case, the new amount of
- // low padding is zero.
- //
- // Optimization: If low = 0, then newLow = 0.
- Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
- appendIndex(newLow, newLows, staticNewLows);
-
- // Start reading the data from position `offset - low`. Since the original
- // read may have started in the low padding zone, this value could be
- // negative. Therefore, start reading from:
- //
- // max(offset - low, 0)
- //
- // The original read could also have started in the high padding zone.
- // In that case, set the offset to the end of source tensor. The new
- // ExtractSliceOp length will be zero in that case. (Effectively reading no
- // data from the source.)
- //
- // Optimization: If low = 0, then the formula can be simplified.
- Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
- : min(offset, srcSize);
- newOffsets.push_back(getAsOpFoldResult(newOffset));
-
- // The original ExtractSliceOp was reading until position `offset + length`.
- // Therefore, the corresponding position within the source tensor is:
- //
- // offset + length - low
- //
- // In case the original ExtractSliceOp stopped reading within the low
- // padding zone, this value can be negative. In that case, the end position
- // of the read should be zero. (Similar to newOffset.)
- //
- // The original read could also have stopped in the high padding zone.
- // In that case, set the end positition of the read should be the end of the
- // source tensor. (Similar to newOffset.)
- //
- // endLoc = min(max(offset - low + length, 0), srcSize)
- //
- // The new ExtractSliceOp length is `endLoc - newOffset`.
- //
- // Optimization: If low = 0, then the formula can be simplified.
- Value endLoc = hasLowPad
- ? min(max(add(sub(offset, low), length), zero), srcSize)
- : min(add(offset, length), srcSize);
- Value newLength = sub(endLoc, newOffset);
- newLengths.push_back(getAsOpFoldResult(newLength));
-
- // Check if newLength is zero. In that case, no SubTensorOp should be
- // executed.
- if (auto newLengthInt = getConstantIntValue(newLength)) {
- hasZeroLen |= *newLengthInt == 0;
- } else {
- Value check = b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
- newLength, zero);
- dynHasZeroLenCond =
- dynHasZeroLenCond
- ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
- : check;
- }
-
- // The amount of high padding is simply the number of elements remaining,
- // so that the result has the same length as the original ExtractSliceOp.
- // As an optimization, if the original high padding is zero, then the new
- // high padding must also be zero.
- Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
- appendIndex(newHigh, newHighs, staticNewHighs);
-
- // Only unit stride supported.
- newStrides.push_back(b.getIndexAttr(1));
- }
-
- // The shape of the result can be obtained from the sizes passed in.
- SmallVector<Value> dynDims;
- SmallVector<int64_t> shape;
- dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize);
- RankedTensorType resultType =
- RankedTensorType::get(shape, getResultType().getElementType());
-
- // Insert cast to ensure that types match. (May be folded away.)
- auto castResult = [&](Value val) -> Operation * {
- auto castOp = b.create<tensor::CastOp>(loc, resultType, val);
- return castOp;
- };
-
- // In cases where the original data source is unused: Emit a GenerateOp and
- // do not generate a SliceOp. (The result shape of the SliceOp would
- // have a dimension of size 0, the semantics of which is unclear.)
- auto createGenerateOp = [&]() {
- // Create GenerateOp.
- auto generateOp = b.create<tensor::GenerateOp>(
- loc, resultType, dynDims,
- [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
- builder.create<tensor::YieldOp>(gLoc, padValue);
- });
- return castResult(generateOp);
- };
-
- // Emit a SliceOp and a PadTensorOp. Should not be used in cases where
- // the result shape of the new SliceOp has a zero dimension.
- auto createPadTensorOfSubTensor = [&]() {
- // Create pad_tensor(subtensor(x)).
- auto newSliceOp = b.create<tensor::ExtractSliceOp>(
- loc, source(), newOffsets, newLengths, newStrides);
- auto newPadTensorOp = b.create<PadTensorOp>(
- loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs);
-
- // Copy region to new PadTensorOp.
- BlockAndValueMapping bvm;
- region().cloneInto(&newPadTensorOp.getRegion(), bvm);
-
- // Cast result and return.
- return castResult(newPadTensorOp);
- };
-
- // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
- // that the original data source x is not used.
- if (hasZeroLen) {
- return {createGenerateOp()};
- }
-
- // If there are dynamic dimensions: Generate an scf.if check to avoid creating
- // SliceOps with result dimensions of size 0 at runtime.
- if (dynHasZeroLenCond) {
- auto result = b.create<scf::IfOp>(
- loc, resultType, dynHasZeroLenCond,
- /*thenBuilder=*/
- [&](OpBuilder &b, Location loc) {
- b.create<scf::YieldOp>(loc, createGenerateOp()->getResult(0));
- },
- /*elseBuilder=*/
- [&](OpBuilder &b, Location loc) {
- b.create<scf::YieldOp>(loc,
- createPadTensorOfSubTensor()->getResult(0));
- });
- return {result};
- }
- return {createPadTensorOfSubTensor()};
-}
-
-namespace {
-// Folds linalg.pad_tensor when padding is static zeros and the attribute
-// doesn't request otherwise.
-struct FoldStaticZeroPadding : public OpRewritePattern<PadTensorOp> {
- using OpRewritePattern<PadTensorOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(PadTensorOp padTensorOp,
- PatternRewriter &rewriter) const override {
- if (!padTensorOp.hasZeroLowPad() || !padTensorOp.hasZeroHighPad())
- return failure();
- if (padTensorOp.nofold())
- return failure();
- rewriter.replaceOpWithNewOp<tensor::CastOp>(
- padTensorOp, padTensorOp.result().getType(), padTensorOp.source());
- return success();
- }
-};
-
-// Fold CastOp into PadTensorOp when adding static information.
-struct FoldSourceTensorCast : public OpRewritePattern<PadTensorOp> {
- using OpRewritePattern<PadTensorOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(PadTensorOp padTensorOp,
- PatternRewriter &rewriter) const override {
- auto castOp = padTensorOp.source().getDefiningOp<tensor::CastOp>();
- if (!tensor::canFoldIntoConsumerOp(castOp))
- return failure();
-
- auto newResultType = PadTensorOp::inferResultType(
- castOp.source().getType().cast<RankedTensorType>(),
- extractFromI64ArrayAttr(padTensorOp.static_low()),
- extractFromI64ArrayAttr(padTensorOp.static_high()),
- padTensorOp.getResultType().getShape());
-
- if (newResultType == padTensorOp.getResultType()) {
- rewriter.updateRootInPlace(padTensorOp, [&]() {
- padTensorOp.sourceMutable().assign(castOp.source());
- });
- } else {
- auto newOp = rewriter.create<PadTensorOp>(
- padTensorOp->getLoc(), newResultType, padTensorOp.source(),
- padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(),
- padTensorOp.static_high(), padTensorOp.nofold());
- BlockAndValueMapping mapper;
- padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper);
-
- rewriter.replaceOpWithNewOp<tensor::CastOp>(
- padTensorOp, padTensorOp.getResultType(), newOp);
- }
- return success();
- }
-};
-
-// Fold CastOp using the result of PadTensorOp back into the latter if it adds
-// static information.
-struct FoldTargetTensorCast : public OpRewritePattern<PadTensorOp> {
- using OpRewritePattern<PadTensorOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(PadTensorOp padTensorOp,
- PatternRewriter &rewriter) const override {
- if (!padTensorOp.result().hasOneUse())
- return failure();
- auto tensorCastOp =
- dyn_cast<tensor::CastOp>(*padTensorOp->getUsers().begin());
- if (!tensorCastOp)
- return failure();
- if (!tensor::preservesStaticInformation(padTensorOp.result().getType(),
- tensorCastOp.dest().getType()))
- return failure();
-
- auto replacementOp = rewriter.create<PadTensorOp>(
- padTensorOp.getLoc(), tensorCastOp.dest().getType(),
- padTensorOp.source(), padTensorOp.low(), padTensorOp.high(),
- padTensorOp.static_low(), padTensorOp.static_high(),
- padTensorOp.nofold());
- replacementOp.region().takeBody(padTensorOp.region());
-
- rewriter.replaceOp(padTensorOp, replacementOp.result());
- rewriter.replaceOp(tensorCastOp, replacementOp.result());
- return success();
- }
-};
-} // namespace
-
-void PadTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
- MLIRContext *context) {
- results.add<FoldStaticZeroPadding, FoldSourceTensorCast>(context);
- results.add<FoldTargetTensorCast>(context);
-}
-
-/// Return the padding value of the PadTensorOp if it constant. In this context,
-/// "constant" means an actual constant or "defined outside of the block".
-///
-/// Values are considered constant in three cases:
-/// - A ConstantLike value.
-/// - A basic block argument from a
diff erent block.
-/// - A value defined outside of the block.
-///
-/// If the padding value is not constant, an empty Value is returned.
-Value PadTensorOp::getConstantPaddingValue() {
- auto yieldOp = dyn_cast<YieldOp>(getRegion().front().getTerminator());
- if (!yieldOp || yieldOp.values().size() != 1)
- return {};
- Value padValue = yieldOp.values().front();
- // Check if yield value is a constant.
- if (matchPattern(padValue, m_Constant()))
- return padValue;
- // Check if yield value is defined inside the PadTensorOp block.
- if (padValue.getParentBlock() == &getRegion().front())
- return {};
- // Else: Yield value defined outside of the PadTensorOp block.
- return padValue;
-}
-
-OpFoldResult PadTensorOp::fold(ArrayRef<Attribute>) {
- if (getResultType().hasStaticShape() && getResultType() == getSourceType() &&
- !nofold())
- return source();
- return {};
-}
-
//===----------------------------------------------------------------------===//
// YieldOp
//===----------------------------------------------------------------------===//
@@ -1687,16 +1132,6 @@ static LogicalResult verify(linalg::YieldOp op) {
if (auto linalgOp = dyn_cast<LinalgOp>(parentOp))
return verifyYield(op, cast<LinalgOp>(parentOp));
- if (auto padTensorOp = dyn_cast<linalg::PadTensorOp>(parentOp)) {
- if (op.getNumOperands() != 1)
- return op.emitOpError("expected single yield operand (got ")
- << op->getNumOperands() << ")";
- if (op.getOperand(0).getType() !=
- padTensorOp.getType().cast<ShapedType>().getElementType())
- return op.emitOpError("expected yield type to match shape element type");
- return success();
- }
-
if (auto tiledLoopOp = dyn_cast<linalg::TiledLoopOp>(parentOp)) {
// Check if output args with tensor types match results types.
SmallVector<Value, 2> tensorOuts;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
index 1283dec2a0a5b..9e45ed2a8fbf0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
@@ -320,7 +320,7 @@ struct LinalgBufferizePass : public LinalgBufferizeBase<LinalgBufferizePass> {
target.addLegalDialect<arith::ArithmeticDialect, AffineDialect,
memref::MemRefDialect, StandardOpsDialect,
tensor::TensorDialect>();
- target.addIllegalOp<InitTensorOp, PadTensorOp, tensor::CollapseShapeOp,
+ target.addIllegalOp<InitTensorOp, tensor::PadOp, tensor::CollapseShapeOp,
tensor::ExpandShapeOp, tensor::ExtractSliceOp,
tensor::InsertSliceOp>();
@@ -363,5 +363,5 @@ void mlir::linalg::populateLinalgBufferizePatterns(
VectorTransferWriteOpConverter
>(typeConverter, patterns.getContext());
// clang-format on
- patterns.add<GeneralizePadTensorOpPattern>(patterns.getContext());
+ patterns.add<GeneralizePadOpPattern>(patterns.getContext());
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 5ea77641b7f06..3c8b9c9606952 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -67,7 +67,7 @@ struct LinalgComprehensiveModuleBufferize
static void applyEnablingTransformations(ModuleOp moduleOp) {
RewritePatternSet patterns(moduleOp.getContext());
- patterns.add<GeneralizePadTensorOpPattern>(moduleOp.getContext());
+ patterns.add<GeneralizePadOpPattern>(moduleOp.getContext());
(void)applyPatternsAndFoldGreedily(moduleOp, std::move(patterns));
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index a4edd8a87bba7..21c92ee304dfa 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -1,4 +1,4 @@
-//===- HoistPadding.cpp - Hoisting transformation for PadTensorOp ---------===//
+//===- HoistPadding.cpp - Hoisting for tensor::PadOp ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -37,7 +37,7 @@ using llvm::dbgs;
using namespace mlir;
using namespace mlir::linalg;
-/// Analysis class to support PadTensorOp hoisting across multiple enclosing
+/// Analysis class to support tensor::PadOp hoisting across multiple enclosing
/// loops. The failure conditions are:
/// 1. Pad op has a use that is not an input of a LinalgOp.
/// 2. Pad op does not have a constant padding value.
@@ -53,7 +53,7 @@ using namespace mlir::linalg;
/// 8. There is no enclosing scf::ForOp that indexes the padded data.
/// Other cases succeed and will trigger hoisting of the pad op.
struct HoistingAnalysis {
- HoistingAnalysis(PadTensorOp padTensorOp, int numLoops);
+ HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops);
bool isValid() { return valid; }
@@ -98,7 +98,7 @@ struct HoistingAnalysis {
/// ```
/// dropNonIndexDependencies(%padded_slice, %slice)
/// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice.
- LogicalResult dropNonIndexDependencies(PadTensorOp padTensorOp,
+ LogicalResult dropNonIndexDependencies(tensor::PadOp padTensorOp,
tensor::ExtractSliceOp sliceOp);
/// Encodes whether the analysis is valid and hoisting can proceed.
@@ -107,7 +107,7 @@ struct HoistingAnalysis {
/// Return true if all uses of `padTensorOp` are an input tensor of some
/// LinalgOp.
-static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) {
+static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padTensorOp) {
for (OpOperand &use : padTensorOp.result().getUses()) {
auto linalgUser = dyn_cast<linalg::LinalgOp>(use.getOwner());
if (!linalgUser || !linalgUser.isInputTensor(&use)) {
@@ -126,7 +126,7 @@ static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) {
/// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm.
/// Control-flow and other containing ops with regions are not modeled atm.
static void
-getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels,
+getAtMostNEnclosingLoops(tensor::PadOp padTensorOp, int nLevels,
SmallVector<scf::ForOp> &reverseEnclosingLoops) {
AsmState state(padTensorOp->getParentOfType<mlir::FuncOp>());
(void)state;
@@ -143,7 +143,7 @@ getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels,
}
}
-HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
+HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
valid = false;
// Bail on any use that isn't an input of a Linalg op.
@@ -232,7 +232,7 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
}
LogicalResult
-HoistingAnalysis::dropNonIndexDependencies(PadTensorOp padTensorOp,
+HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
tensor::ExtractSliceOp sliceOp) {
// Set of all values used for index computation.
SetVector<Value> indexEdges;
@@ -373,9 +373,9 @@ static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer,
ValueRange{ivVal, lbVal, stepVal});
}
-FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
+FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
int numLoops,
- PadTensorOp &hoistedOp) {
+ tensor::PadOp &hoistedOp) {
LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops
<< " loops\n");
HoistingAnalysis analysis(opToHoist, numLoops);
@@ -399,7 +399,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
// Create the packed tensor<?x?x..?xpadded_shape> into which we amortize
// padding.
SmallVector<int64_t> packedShape(nPackedLoops, ShapedType::kDynamicSize);
- // TODO: go grab dims when necessary, for now PadTensorOp returns a static
+ // TODO: go grab dims when necessary, for now tensor::PadOp returns a static
// tensor.
llvm::append_range(packedShape, paddedTensorType.getShape());
auto packedTensorType =
@@ -463,7 +463,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
// sizes = [1 .. 1, paddedShape].
SmallVector<OpFoldResult> sizes(nPackedLoops, b.getIndexAttr(1));
for (int64_t sz : paddedTensorType.getShape()) {
- // TODO: go grab dims when necessary, for now PadTensorOp returns a static
+ // TODO: go grab dims when necessary, for now tensor::PadOp returns a static
// tensor.
assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes");
sizes.push_back(b.getIndexAttr(sz));
@@ -506,6 +506,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides);
// Make the newly cloned `opToHoist` available to the caller.
- hoistedOp = cast<PadTensorOp>(bvm.lookup(opToHoist.result()).getDefiningOp());
+ hoistedOp =
+ cast<tensor::PadOp>(bvm.lookup(opToHoist.result()).getDefiningOp());
return newResult;
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
index 31f8fa5b369e3..025adc2c56b2b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@@ -100,7 +100,7 @@ struct LinalgStrategyTilePass
filter);
else
tilingPattern.add<LinalgTilingPattern>(ctx, options, filter);
- if (anchorOpName == linalg::PadTensorOp::getOperationName())
+ if (anchorOpName == tensor::PadOp::getOperationName())
populatePadTensorTilingPatterns(tilingPattern, options);
(void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern));
}
@@ -302,12 +302,12 @@ struct LinalgStrategyVectorizePass
std::move(vectorizationPatterns));
// Apply the pad tensor op vectorization separately to avoid running the
- // GenericPadTensorOpVectorizationPattern too early.
+ // GenericPadOpVectorizationPattern too early.
// TODO: Improve once we have better infrastructure to control pattern
// application.
if (vectorizePadding) {
RewritePatternSet patterns(funcOp.getContext());
- linalg::populatePadTensorOpVectorizationPatterns(patterns);
+ linalg::populatePadOpVectorizationPatterns(patterns);
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
}
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp
index 64de5197266be..78b5305c8a1ec 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp
@@ -38,9 +38,9 @@ namespace {
/// ```
///
/// if the `linalg.generic` has all parallel iterator types.
-struct FusePadTensorOp : OpRewritePattern<PadTensorOp> {
- using OpRewritePattern<PadTensorOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(PadTensorOp padOp,
+struct FusePadOp : OpRewritePattern<tensor::PadOp> {
+ using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
+ LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const override {
// Only works on padding op that sets the padded value to a constant.
Value padValue = padOp.getConstantPaddingValue();
@@ -61,7 +61,10 @@ struct FusePadTensorOp : OpRewritePattern<PadTensorOp> {
padOp, "only supported for ops with all parallel iterator types");
}
ReifiedRankedShapedTypeDims resultShape;
- if (failed(padOp.reifyResultShapes(rewriter, resultShape)) ||
+ ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+ dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
+ if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter,
+ resultShape)) ||
resultShape.size() != 1) {
return rewriter.notifyMatchFailure(
padOp, "failed to get shape of pad op result");
@@ -118,5 +121,5 @@ struct FusePadTensorOp : OpRewritePattern<PadTensorOp> {
void mlir::linalg::populateFusePadTensorWithProducerLinalgOpPatterns(
RewritePatternSet &patterns) {
- patterns.add<FusePadTensorOp>(patterns.getContext());
+ patterns.add<FusePadOp>(patterns.getContext());
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index f005846ef4667..22fc7df2d69aa 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -338,18 +338,18 @@ mlir::linalg::tileLinalgOp(RewriterBase &b, LinalgOp op,
return failure();
}
-/// Generate a loop nest around a given PadTensorOp (for tiling). `newPadOp`
-/// and `loopNest` are output parameters that return the new (tiled) PadTensorOp
-/// and the loop nest.
-static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
- PadTensorOp &newPadOp, LoopNest &loopNest,
- const LinalgTilingOptions &options) {
+/// Generate a loop nest around a given tensor::PadOp (for tiling). `newPadOp`
+/// and `loopNest` are output parameters that return the new (tiled)
+/// tensor::PadOp and the loop nest.
+static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op,
+ tensor::PadOp &newPadOp, LoopNest &loopNest,
+ const LinalgTilingOptions &options) {
Location loc = op.getLoc();
OpBuilder::InsertionGuard g(builder);
builder.setInsertionPoint(op);
- // Clone PadTensorOp so that the existing op can be replaced more easily.
- newPadOp = cast<PadTensorOp>(builder.clone(*op.getOperation()));
+ // Clone tensor::PadOp so that the existing op can be replaced more easily.
+ newPadOp = cast<tensor::PadOp>(builder.clone(*op.getOperation()));
// Get rank and tile sizes.
int64_t rank = op.getResultType().getRank();
SmallVector<Value> tileSizes =
@@ -358,7 +358,9 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
Value zero = builder.create<arith::ConstantIndexOp>(loc, 0);
tileSizes.append(rank - tileSizes.size(), zero);
// Compute lower and upper bounds of the loop nest.
- SmallVector<Range> ranges = op.getIterationDomain(builder);
+ TilingInterface tilingInterface =
+ dyn_cast<TilingInterface>(op.getOperation());
+ SmallVector<Range> ranges = tilingInterface.getIterationDomain(builder);
SmallVector<Value> lbs, dims, allDims, steps;
for (int64_t i = 0; i < rank; ++i) {
allDims.push_back(ranges[i].size);
@@ -369,7 +371,8 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
}
}
// Generate loop nest: One loop per dimension.
- SmallVector<Value> destOperand = op.getDestinationOperands(builder);
+ SmallVector<Value> destOperand =
+ tilingInterface.getDestinationOperands(builder);
loopNest = mlir::scf::buildLoopNest(
builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(destOperand),
[&](OpBuilder &b, Location loc, ValueRange localIvs,
@@ -379,8 +382,8 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
computeTileOffsets(b, loc, localIvs, tileSizes);
SmallVector<Value> sizes =
computeTileSizes(b, loc, localIvs, tileSizes, allDims);
- // Create ExtractSliceOp: Extract a tile from the PadTensorOp.
- // Note: The PadTensorOp is located outside of the loop nest. It is
+ // Create ExtractSliceOp: Extract a tile from the tensor::PadOp.
+ // Note: The tensor::PadOp is located outside of the loop nest. It is
// later moved inside by ExtractSliceOfPadTensorSwapPattern.
auto map = AffineMap::getMultiDimIdentityMap(rank, b.getContext());
Value tiledOutput =
@@ -399,21 +402,21 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
}
namespace {
-struct PadTensorOpTilingPattern : public OpRewritePattern<PadTensorOp> {
- PadTensorOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt)
- : OpRewritePattern<PadTensorOp>(ctx), options(std::move(opt)) {}
+struct PadOpTilingPattern : public OpRewritePattern<tensor::PadOp> {
+ PadOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt)
+ : OpRewritePattern<tensor::PadOp>(ctx), options(std::move(opt)) {}
- LogicalResult matchAndRewrite(PadTensorOp op,
+ LogicalResult matchAndRewrite(tensor::PadOp op,
PatternRewriter &rewriter) const override {
if (op->hasAttr(LinalgTransforms::kLinalgTransformMarker))
return failure();
- PadTensorOp newPadOp;
+ tensor::PadOp newPadOp;
LoopNest loopNest;
- if (failed(tilePadTensorOp(rewriter, op, newPadOp, loopNest, options)))
+ if (failed(tilePadOp(rewriter, op, newPadOp, loopNest, options)))
return failure();
newPadOp->setAttr(LinalgTransforms::kLinalgTransformMarker,
rewriter.getUnitAttr());
- // Replace all uses of the original PadTensorOp.
+ // Replace all uses of the original tensor::PadOp.
rewriter.replaceOp(op, loopNest.getResults()[0]);
return success();
}
@@ -470,7 +473,7 @@ void mlir::linalg::populateLinalgTilingCanonicalizationPatterns(
tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
InitTensorOp::getCanonicalizationPatterns(patterns, ctx);
- PadTensorOp::getCanonicalizationPatterns(patterns, ctx);
+ tensor::PadOp::getCanonicalizationPatterns(patterns, ctx);
ctx->getLoadedDialect<LinalgDialect>()->getCanonicalizationPatterns(patterns);
CanonicalizationPatternList<
@@ -489,13 +492,13 @@ static void insertTilingPatterns(RewritePatternSet &patterns,
#define GET_OP_LIST
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
>::insert(patterns, options, f);
- patterns.add<PadTensorOpTilingPattern>(ctx, options);
+ patterns.add<PadOpTilingPattern>(ctx, options);
}
void mlir::linalg::populatePadTensorTilingPatterns(
RewritePatternSet &patterns, const LinalgTilingOptions &options) {
auto *ctx = patterns.getContext();
- patterns.add<PadTensorOpTilingPattern>(ctx, options);
+ patterns.add<PadOpTilingPattern>(ctx, options);
}
static void applyExtractSliceOfPadTensorSwapPattern(FuncOp funcOp) {
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 462f3c668f5c3..9eb7b7cfe751c 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -160,9 +160,9 @@ LinalgTilingOptions &mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims() {
/// Helper function that tries to pad `opOperand`. Exit early for scalar
/// operands, if `paddingFunc` returns failure, or if `opOperand` is not defined
/// by an ExtractSliceOp. Otherwise, try to pad the operand even if it already
-/// has a static shape. Set `result` to the result of the created PadTensorOp or
-/// and return success if the operand either has been padded to a static shape
-/// or already had a static shape and failure otherwise.
+/// has a static shape. Set `result` to the result of the created tensor::PadOp
+/// or and return success if the operand either has been padded to a static
+/// shape or already had a static shape and failure otherwise.
static LogicalResult padOperandToSmallestStaticBoundingBox(
OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand,
const PaddingValueComputationFunction &paddingFunc,
@@ -528,10 +528,10 @@ mlir::linalg::LinalgPaddingPattern::returningMatchAndRewrite(
// Hoist the padding.
for (const auto &en : enumerate(depths)) {
OpOperand &opOperand = paddedOp->getOpOperand(en.index());
- auto padTensorOp = opOperand.get().getDefiningOp<PadTensorOp>();
+ auto padTensorOp = opOperand.get().getDefiningOp<tensor::PadOp>();
if (!padTensorOp || en.value() == 0)
continue;
- PadTensorOp hoistedOp;
+ tensor::PadOp hoistedOp;
FailureOr<Value> newResult =
hoistPaddingOnTensors(padTensorOp, en.value(), hoistedOp);
if (failed(newResult))
@@ -749,10 +749,11 @@ static SmallVector<StringRef> getNParallelLoopsAttrs(unsigned nParallelLoops) {
return SmallVector<StringRef>(nParallelLoops, getParallelIteratorTypeName());
}
-/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp (to
+/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp (to
/// initialize with pad_val) and GenericOp (to copy contents).
-LogicalResult PadTensorOpTransformationPattern::matchAndRewrite(
- linalg::PadTensorOp padOp, PatternRewriter &rewriter) const {
+LogicalResult
+PadOpTransformationPattern::matchAndRewrite(tensor::PadOp padOp,
+ PatternRewriter &rewriter) const {
auto inputShapedType = padOp.source().getType().cast<ShapedType>();
auto resultShapedType = padOp.result().getType().cast<ShapedType>();
@@ -767,9 +768,8 @@ LogicalResult PadTensorOpTransformationPattern::matchAndRewrite(
// 1. A BBarg from a
diff erent block.
// 2. A value defined outside of the current block.
Block &block = padOp.region().front();
- auto yieldOp = cast<YieldOp>(block.getTerminator());
- assert(yieldOp.getNumOperands() == 1 && "expected single operand yield");
- Value padValue = yieldOp.values().front();
+ auto yieldOp = cast<tensor::YieldOp>(block.getTerminator());
+ Value padValue = yieldOp.value();
Operation *definingOp = padValue.getDefiningOp();
if (definingOp && definingOp->getBlock() == &block)
return failure();
@@ -812,8 +812,8 @@ LogicalResult PadTensorOpTransformationPattern::matchAndRewrite(
/// Filling `dest` using FillOp constant padding value if possible.
/// Otherwise, generate a tensor::GenerateOp.
-Value GeneralizePadTensorOpPattern::createFillOrGenerateOp(
- PatternRewriter &rewriter, PadTensorOp padOp, Value dest,
+Value GeneralizePadOpPattern::createFillOrGenerateOp(
+ PatternRewriter &rewriter, tensor::PadOp padOp, Value dest,
const SmallVector<Value> &dynSizes) const {
auto padValue = padOp.getConstantPaddingValue();
if (padValue)
@@ -825,20 +825,12 @@ Value GeneralizePadTensorOpPattern::createFillOrGenerateOp(
// Copy region to new op.
BlockAndValueMapping bvm;
padOp.region().cloneInto(&generateOp.getRegion(), bvm);
- // Rewrite linalg::YieldOp to tensor::YieldOp.
- OpBuilder::InsertionGuard guard(rewriter);
- auto yieldOp =
- dyn_cast<linalg::YieldOp>(generateOp.getRegion().front().getTerminator());
- assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator");
- assert(yieldOp.values().size() == 1);
- rewriter.setInsertionPoint(yieldOp);
- rewriter.replaceOpWithNewOp<tensor::YieldOp>(yieldOp, yieldOp.values()[0]);
return generateOp;
}
LogicalResult
-GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp,
- PatternRewriter &rewriter) const {
+GeneralizePadOpPattern::matchAndRewrite(tensor::PadOp padOp,
+ PatternRewriter &rewriter) const {
// Given an OpFoldResult, return an index-typed value.
auto getIdxValue = [&](OpFoldResult ofr) {
if (auto val = ofr.dyn_cast<Value>())
@@ -877,10 +869,10 @@ GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp,
if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded())
return success();
- // PadTensorOps cannot be optimized. Generate a InsertSliceOp instead
+ // tensor::PadOps cannot be optimized. Generate a InsertSliceOp instead
// for copying the PadOp source.
auto sourceType = padOp.getSourceType();
- // Compute size of source of PadTensorOp.
+ // Compute size of source of tensor::PadOp.
SmallVector<OpFoldResult> srcSizes;
for (unsigned dim = 0; dim < sourceType.getRank(); ++dim) {
if (sourceType.isDynamicDim(dim)) {
@@ -901,15 +893,17 @@ GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp,
LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const {
- auto padOp = sliceOp.source().getDefiningOp<PadTensorOp>();
+ auto padOp = sliceOp.source().getDefiningOp<tensor::PadOp>();
if (!padOp)
return failure();
// Only unit stride supported.
if (!sliceOp.hasUnitStride())
return failure();
+ TilingInterface tilingInterface =
+ dyn_cast<TilingInterface>(padOp.getOperation());
Operation *tiledPadOp =
- padOp
+ tilingInterface
.getTiledImplementation(
rewriter, /*dest=*/ValueRange{}, sliceOp.getMixedOffsets(),
sliceOp.getMixedSizes(), /*tileDestOperands=*/false)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 7472d9ee20898..e0b2c64056674 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -682,20 +682,19 @@ static SmallVector<Value> ofrToIndexValues(OpBuilder &builder, Location loc,
return result;
}
-/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and
+/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and
/// InsertSliceOp. For now, only constant padding values are supported.
/// If there is enough static type information, TransferReadOps and
/// TransferWriteOps may be generated instead of InsertSliceOps.
-struct GenericPadTensorOpVectorizationPattern
- : public GeneralizePadTensorOpPattern {
- GenericPadTensorOpVectorizationPattern(MLIRContext *context,
- PatternBenefit benefit = 1)
- : GeneralizePadTensorOpPattern(context, tryVectorizeCopy, benefit) {}
- /// Vectorize the copying of a PadTensorOp's source. This is possible if
+struct GenericPadOpVectorizationPattern : public GeneralizePadOpPattern {
+ GenericPadOpVectorizationPattern(MLIRContext *context,
+ PatternBenefit benefit = 1)
+ : GeneralizePadOpPattern(context, tryVectorizeCopy, benefit) {}
+ /// Vectorize the copying of a tensor::PadOp's source. This is possible if
/// each dimension size is statically know in the source type or the result
/// type (or both).
static LogicalResult tryVectorizeCopy(PatternRewriter &rewriter,
- PadTensorOp padOp, Value dest) {
+ tensor::PadOp padOp, Value dest) {
auto sourceType = padOp.getSourceType();
auto resultType = padOp.getResultType();
@@ -767,13 +766,13 @@ struct GenericPadTensorOpVectorizationPattern
}
};
-/// Base pattern for rewriting PadTensorOps whose result is consumed by a
+/// Base pattern for rewriting tensor::PadOps whose result is consumed by a
/// given operation type OpTy.
template <typename OpTy>
-struct VectorizePadTensorOpUserPattern : public OpRewritePattern<PadTensorOp> {
- using OpRewritePattern<PadTensorOp>::OpRewritePattern;
+struct VectorizePadOpUserPattern : public OpRewritePattern<tensor::PadOp> {
+ using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(PadTensorOp padOp,
+ LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const final {
bool changed = false;
// Insert users in vector, because some users may be replaced/removed.
@@ -785,10 +784,10 @@ struct VectorizePadTensorOpUserPattern : public OpRewritePattern<PadTensorOp> {
protected:
virtual LogicalResult rewriteUser(PatternRewriter &rewriter,
- PadTensorOp padOp, OpTy op) const = 0;
+ tensor::PadOp padOp, OpTy op) const = 0;
};
-/// Rewrite use of PadTensorOp result in TransferReadOp. E.g.:
+/// Rewrite use of tensor::PadOp result in TransferReadOp. E.g.:
/// ```
/// %0 = linalg.pad_tensor %src ... : tensor<?x?xf32> to tensor<17x5xf32>
/// %r = vector.transfer_read %0[%c0, %c0], %cst
@@ -807,12 +806,12 @@ struct VectorizePadTensorOpUserPattern : public OpRewritePattern<PadTensorOp> {
/// - `xferOp` has no out-of-bounds dims or mask.
/// - Low padding is static 0.
/// - Single, scalar padding value.
-struct PadTensorOpVectorizationWithTransferReadPattern
- : public VectorizePadTensorOpUserPattern<vector::TransferReadOp> {
- using VectorizePadTensorOpUserPattern<
- vector::TransferReadOp>::VectorizePadTensorOpUserPattern;
+struct PadOpVectorizationWithTransferReadPattern
+ : public VectorizePadOpUserPattern<vector::TransferReadOp> {
+ using VectorizePadOpUserPattern<
+ vector::TransferReadOp>::VectorizePadOpUserPattern;
- LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp,
+ LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp,
vector::TransferReadOp xferOp) const override {
// Low padding must be static 0.
if (!padOp.hasZeroLowPad())
@@ -837,7 +836,7 @@ struct PadTensorOpVectorizationWithTransferReadPattern
}
};
-/// Rewrite use of PadTensorOp result in TransferWriteOp.
+/// Rewrite use of tensor::PadOp result in TransferWriteOp.
/// This pattern rewrites TransferWriteOps that write to a padded tensor
/// value, where the same amount of padding is immediately removed again after
/// the write. In such cases, the TransferWriteOp can write to the non-padded
@@ -869,12 +868,12 @@ struct PadTensorOpVectorizationWithTransferReadPattern
/// ExtractSliceOp trims the same amount of padding that was added
/// beforehand.
/// - Single, scalar padding value.
-struct PadTensorOpVectorizationWithTransferWritePattern
- : public VectorizePadTensorOpUserPattern<vector::TransferWriteOp> {
- using VectorizePadTensorOpUserPattern<
- vector::TransferWriteOp>::VectorizePadTensorOpUserPattern;
+struct PadOpVectorizationWithTransferWritePattern
+ : public VectorizePadOpUserPattern<vector::TransferWriteOp> {
+ using VectorizePadOpUserPattern<
+ vector::TransferWriteOp>::VectorizePadOpUserPattern;
- LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp,
+ LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp,
vector::TransferWriteOp xferOp) const override {
// TODO: support 0-d corner case.
if (xferOp.getTransferRank() == 0)
@@ -925,7 +924,7 @@ struct PadTensorOpVectorizationWithTransferWritePattern
/// sizes may turn out to be equal at runtime.
bool hasSameTensorSize(Value beforePadding,
tensor::ExtractSliceOp afterTrimming) const {
- // If the input to PadTensorOp is a CastOp, try with with both CastOp
+ // If the input to tensor::PadOp is a CastOp, try with with both CastOp
// result and CastOp operand.
if (auto castOp = beforePadding.getDefiningOp<tensor::CastOp>())
if (hasSameTensorSize(castOp.source(), afterTrimming))
@@ -1000,7 +999,7 @@ struct PadTensorOpVectorizationWithTransferWritePattern
}
};
-/// Rewrite use of PadTensorOp result in InsertSliceOp. E.g.:
+/// Rewrite use of tensor::PadOp result in InsertSliceOp. E.g.:
/// ```
/// %0 = linalg.pad_tensor %src ... : tensor<?x?xf32> to tensor<17x5xf32>
/// %r = tensor.insert_slice %0
@@ -1023,12 +1022,12 @@ struct PadTensorOpVectorizationWithTransferWritePattern
/// - Only unit strides in `insertOp`.
/// - Single, scalar padding value.
/// - `padOp` result not used as destination.
-struct PadTensorOpVectorizationWithInsertSlicePattern
- : public VectorizePadTensorOpUserPattern<tensor::InsertSliceOp> {
- using VectorizePadTensorOpUserPattern<
- tensor::InsertSliceOp>::VectorizePadTensorOpUserPattern;
+struct PadOpVectorizationWithInsertSlicePattern
+ : public VectorizePadOpUserPattern<tensor::InsertSliceOp> {
+ using VectorizePadOpUserPattern<
+ tensor::InsertSliceOp>::VectorizePadOpUserPattern;
- LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp,
+ LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp,
tensor::InsertSliceOp insertOp) const override {
// Low padding must be static 0.
if (!padOp.hasZeroLowPad())
@@ -1087,14 +1086,14 @@ struct PadTensorOpVectorizationWithInsertSlicePattern
}
};
-void mlir::linalg::populatePadTensorOpVectorizationPatterns(
+void mlir::linalg::populatePadOpVectorizationPatterns(
RewritePatternSet &patterns, PatternBenefit baseBenefit) {
- patterns.add<GenericPadTensorOpVectorizationPattern>(patterns.getContext(),
- baseBenefit);
+ patterns.add<GenericPadOpVectorizationPattern>(patterns.getContext(),
+ baseBenefit);
// Try these specialized patterns first before resorting to the generic one.
- patterns.add<PadTensorOpVectorizationWithTransferReadPattern,
- PadTensorOpVectorizationWithTransferWritePattern,
- PadTensorOpVectorizationWithInsertSlicePattern>(
+ patterns.add<PadOpVectorizationWithTransferReadPattern,
+ PadOpVectorizationWithTransferWritePattern,
+ PadOpVectorizationWithInsertSlicePattern>(
patterns.getContext(), baseBenefit.getBenefit() + 1);
}
diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
index a55955654dafe..1231f378a306d 100644
--- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
@@ -12,5 +12,6 @@ add_mlir_dialect_library(MLIRLinalgUtils
MLIRSCF
MLIRPass
MLIRStandard
+ MLIRTensorUtils
MLIRTransformUtils
)
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 61be7bc6c6461..bf37719325ccb 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -23,6 +23,7 @@
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/StandardOps/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineExprVisitor.h"
@@ -328,7 +329,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
// Exit if `source` is not defined by an ExtractSliceOp.
auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
if (!sliceOp)
- return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+ return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
// Search the `source` use-def chain for padded LinalgOps.
Value current = sliceOp.source();
@@ -339,22 +340,22 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
OpResult opResult = current.cast<OpResult>();
current = linalgOp.getOutputOperand(opResult.getResultNumber())->get();
}
- auto padTensorOp = current ? current.getDefiningOp<PadTensorOp>() : nullptr;
+ auto padTensorOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
- // Exit if the search fails to match a PadTensorOp at the end of the matched
+ // Exit if the search fails to match a tensor::PadOp at the end of the matched
// LinalgOp sequence.
if (!padTensorOp)
- return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+ return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
// Exit if the padded result type does not match.
if (sliceOp.source().getType() != type)
- return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+ return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
// Exit if the LinalgOps are not high padded.
if (llvm::any_of(padTensorOp.getMixedLowPad(), [](OpFoldResult ofr) {
return getConstantIntValue(ofr) != static_cast<int64_t>(0);
}))
- return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+ return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
// Exit if `padTensorOpSliceOp`, which defines the slice used by
// `padTensorOp`, is rank-reducing.
@@ -362,7 +363,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
padTensorOp.source().getDefiningOp<tensor::ExtractSliceOp>();
if (!padTensorOpSliceOp || sliceOp.getMixedSizes().size() !=
padTensorOpSliceOp.getMixedSizes().size())
- return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+ return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
// Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
// of the slice padded by `padTensorOp`.
@@ -372,7 +373,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
return !isEqualConstantIntOrValue(std::get<0>(it),
std::get<1>(it));
}))
- return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+ return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
// Exit if the padding values do not match.
Attribute padTensorOpPadAttr, padAttr;
@@ -380,7 +381,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
if (!padTensorOpPad ||
!matchPattern(padTensorOpPad, m_Constant(&padTensorOpPadAttr)) ||
!matchPattern(pad, m_Constant(&padAttr)) || padTensorOpPadAttr != padAttr)
- return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+ return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
// Return the padded result if the padding values and sizes match.
return sliceOp.source();
diff --git a/mlir/lib/Dialect/Tensor/CMakeLists.txt b/mlir/lib/Dialect/Tensor/CMakeLists.txt
index 9f57627c321fb..31167e6af908b 100644
--- a/mlir/lib/Dialect/Tensor/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/CMakeLists.txt
@@ -1,2 +1,3 @@
add_subdirectory(IR)
add_subdirectory(Transforms)
+add_subdirectory(Utils)
diff --git a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
index 87aeaab6ca976..df2807f318e04 100644
--- a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_OPTIONAL_SOURCES
TensorDialect.cpp
TensorInferTypeOpInterfaceImpl.cpp
TensorOps.cpp
+ TensorTilingInterfaceImpl.cpp
)
add_mlir_dialect_library(MLIRTensor
@@ -43,3 +44,20 @@ add_mlir_dialect_library(MLIRTensorInferTypeOpInterfaceImpl
MLIRSupport
MLIRTensor
)
+
+add_mlir_dialect_library(MLIRTensorTilingInterfaceImpl
+ TensorTilingInterfaceImpl.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor
+
+ LINK_LIBS PUBLIC
+ MLIRAffine
+ MLIRIR
+ MLIRLinalg
+ MLIRSCF
+ MLIRStandard
+ MLIRSupport
+ MLIRTensor
+ MLIRTilingInterface
+ )
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp
index 588b635805893..bb7bd82f40a68 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp
@@ -161,6 +161,48 @@ struct ReifyExpandOrCollapseShapeOp
}
};
+namespace {
+
+struct ReifyPadOp
+ : public ReifyRankedShapedTypeOpInterface::ExternalModel<ReifyPadOp,
+ PadOp> {
+ LogicalResult
+ reifyResultShapes(Operation *op, OpBuilder &b,
+ ReifiedRankedShapedTypeDims &reifiedReturnShapes) const {
+ auto padOp = cast<PadOp>(op);
+ Location loc = padOp.getLoc();
+ auto lowPad = padOp.getMixedLowPad();
+ auto highPad = padOp.getMixedHighPad();
+ SmallVector<Value> shapes;
+ for (auto dim : llvm::seq<int64_t>(0, padOp.getSourceType().getRank())) {
+ // Shape along each dimension is source dim + low pad + high pad.
+ SmallVector<Value> mapOperands;
+ mapOperands.push_back(
+ b.createOrFold<tensor::DimOp>(loc, padOp.source(), dim));
+ AffineExpr expr = b.getAffineDimExpr(0);
+ unsigned numSymbols = 0;
+ auto addOpFoldResult = [&](OpFoldResult valueOrAttr) {
+ if (Value v = valueOrAttr.dyn_cast<Value>()) {
+ expr = expr + b.getAffineSymbolExpr(numSymbols++);
+ mapOperands.push_back(v);
+ return;
+ }
+ int64_t staticValue =
+ valueOrAttr.get<Attribute>().cast<IntegerAttr>().getInt();
+ expr = expr + staticValue;
+ };
+ addOpFoldResult(lowPad[dim]);
+ addOpFoldResult(highPad[dim]);
+ shapes.push_back(applyMapToValues(
+ b, loc, AffineMap::get(1, numSymbols, expr), mapOperands)[0]);
+ }
+ reifiedReturnShapes.emplace_back(std::move(shapes));
+ return success();
+ }
+};
+
+} // namespace
+
void mlir::tensor::registerInferTypeOpInterfaceExternalModels(
DialectRegistry ®istry) {
registry
@@ -169,4 +211,5 @@ void mlir::tensor::registerInferTypeOpInterfaceExternalModels(
registry
.addOpInterface<tensor::CollapseShapeOp,
ReifyExpandOrCollapseShapeOp<tensor::CollapseShapeOp>>();
+ registry.addOpInterface<tensor::PadOp, ReifyPadOp>();
}
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 613edde638683..42f57a9cf99bd 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -476,6 +476,7 @@ static LogicalResult verify(GenerateOp op) {
// Ensure that the region yields an element of the right type.
auto yieldOp =
llvm::cast<YieldOp>(op.body().getBlocks().front().getTerminator());
+
if (yieldOp.value().getType() != resultTy.getElementType())
return op.emitOpError(
"body must be terminated with a `yield` operation of the tensor "
@@ -1482,6 +1483,258 @@ Value mlir::tensor::createCanonicalRankReducingInsertSliceOp(OpBuilder &b,
sizes, strides);
}
+//===----------------------------------------------------------------------===//
+// PadOp
+//===----------------------------------------------------------------------===//
+
+// TODO: Replace custom<InferType> directive with AllTypesMatch as soon as it
+// supports optional types.
+void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand,
+ Type typeToInfer, Type typeToInferFrom) {}
+
+ParseResult parseInferType(OpAsmParser &parser,
+ Optional<OpAsmParser::OperandType> optOperand,
+ Type &typeToInfer, Type typeToInferFrom) {
+ if (optOperand)
+ typeToInfer = typeToInferFrom;
+ return success();
+}
+
+static LogicalResult verify(PadOp op) {
+ auto sourceType = op.source().getType().cast<RankedTensorType>();
+ auto resultType = op.result().getType().cast<RankedTensorType>();
+ auto expectedType = PadOp::inferResultType(
+ sourceType, extractFromI64ArrayAttr(op.static_low()),
+ extractFromI64ArrayAttr(op.static_high()));
+ for (int i = 0, e = sourceType.getRank(); i < e; ++i) {
+ if (resultType.getDimSize(i) == expectedType.getDimSize(i))
+ continue;
+ if (expectedType.isDynamicDim(i))
+ continue;
+ return op.emitError("specified type ")
+ << resultType << " does not match the inferred type "
+ << expectedType;
+ }
+
+ auto ®ion = op.region();
+ unsigned rank = resultType.getRank();
+ Block &block = region.front();
+ if (block.getNumArguments() != rank)
+ return op.emitError("expected the block to have ") << rank << " arguments";
+
+ // Note: the number and type of yield values are checked in the YieldOp.
+ for (const auto &en : llvm::enumerate(block.getArgumentTypes())) {
+ if (!en.value().isIndex())
+ return op.emitOpError("expected block argument ")
+ << (en.index() + 1) << " to be an index";
+ }
+
+ // Ensure that the region yields an element of the right type.
+ auto yieldOp = llvm::cast<YieldOp>(block.getTerminator());
+ if (yieldOp.value().getType() !=
+ op.getType().cast<ShapedType>().getElementType())
+ return op.emitOpError("expected yield type to match shape element type");
+
+ return success();
+}
+
+RankedTensorType PadOp::inferResultType(RankedTensorType sourceType,
+ ArrayRef<int64_t> staticLow,
+ ArrayRef<int64_t> staticHigh,
+ ArrayRef<int64_t> resultShape) {
+ unsigned rank = sourceType.getRank();
+ assert(staticLow.size() == rank && "unexpected staticLow size mismatch");
+ assert(staticHigh.size() == rank && "unexpected staticHigh size mismatch");
+ assert((resultShape.empty() || resultShape.size() == rank) &&
+ "unexpected resultShape size mismatch");
+
+ SmallVector<int64_t, 4> inferredShape;
+ for (auto i : llvm::seq<unsigned>(0, rank)) {
+ if (sourceType.isDynamicDim(i) ||
+ staticLow[i] == ShapedType::kDynamicSize ||
+ staticHigh[i] == ShapedType::kDynamicSize) {
+ inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamicSize
+ : resultShape[i]);
+ } else {
+ int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i];
+ assert((resultShape.empty() || size == resultShape[i] ||
+ resultShape[i] == ShapedType::kDynamicSize) &&
+ "mismatch between inferred shape and result shape");
+ inferredShape.push_back(size);
+ }
+ }
+
+ return RankedTensorType::get(inferredShape, sourceType.getElementType());
+}
+
+void PadOp::build(OpBuilder &b, OperationState &result, Value source,
+ ArrayRef<int64_t> staticLow, ArrayRef<int64_t> staticHigh,
+ ValueRange low, ValueRange high, bool nofold,
+ ArrayRef<NamedAttribute> attrs) {
+ auto sourceType = source.getType().cast<RankedTensorType>();
+ auto resultType = inferResultType(sourceType, staticLow, staticHigh);
+ build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow),
+ b.getI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr());
+ result.addAttributes(attrs);
+}
+
+void PadOp::build(OpBuilder &b, OperationState &result, Value source,
+ ValueRange low, ValueRange high, bool nofold,
+ ArrayRef<NamedAttribute> attrs) {
+ auto sourceType = source.getType().cast<RankedTensorType>();
+ unsigned rank = sourceType.getRank();
+ SmallVector<int64_t, 4> staticVector(rank, ShapedType::kDynamicSize);
+ build(b, result, source, staticVector, staticVector, low, high, nofold,
+ attrs);
+}
+
+void PadOp::build(OpBuilder &b, OperationState &result, Type resultType,
+ Value source, ArrayRef<OpFoldResult> low,
+ ArrayRef<OpFoldResult> high, bool nofold,
+ ArrayRef<NamedAttribute> attrs) {
+ assert(resultType.isa<RankedTensorType>());
+ auto sourceType = source.getType().cast<RankedTensorType>();
+ SmallVector<Value, 4> dynamicLow, dynamicHigh;
+ SmallVector<int64_t, 4> staticLow, staticHigh;
+ // staticLow and staticHigh have full information of the padding config.
+ // This will grow staticLow and staticHigh with 1 value. If the config is
+ // dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1
+ // value as well.
+ dispatchIndexOpFoldResults(low, dynamicLow, staticLow,
+ ShapedType::kDynamicSize);
+ dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh,
+ ShapedType::kDynamicSize);
+ if (!resultType) {
+ resultType = PadOp::inferResultType(sourceType, staticLow, staticHigh);
+ }
+ build(b, result, resultType, source, dynamicLow, dynamicHigh,
+ b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh),
+ nofold ? b.getUnitAttr() : UnitAttr());
+ result.addAttributes(attrs);
+}
+
+namespace {
+// Folds tensor.pad when padding is static zeros and the attribute
+// doesn't request otherwise.
+struct FoldStaticZeroPadding : public OpRewritePattern<PadOp> {
+ using OpRewritePattern<PadOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(PadOp padTensorOp,
+ PatternRewriter &rewriter) const override {
+ if (!padTensorOp.hasZeroLowPad() || !padTensorOp.hasZeroHighPad())
+ return failure();
+ if (padTensorOp.nofold())
+ return failure();
+ rewriter.replaceOpWithNewOp<tensor::CastOp>(
+ padTensorOp, padTensorOp.result().getType(), padTensorOp.source());
+ return success();
+ }
+};
+
+// Fold CastOp into PadOp when adding static information.
+struct FoldSourceTensorCast : public OpRewritePattern<PadOp> {
+ using OpRewritePattern<PadOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(PadOp padTensorOp,
+ PatternRewriter &rewriter) const override {
+ auto castOp = padTensorOp.source().getDefiningOp<tensor::CastOp>();
+ if (!tensor::canFoldIntoConsumerOp(castOp))
+ return failure();
+
+ auto newResultType = PadOp::inferResultType(
+ castOp.source().getType().cast<RankedTensorType>(),
+ extractFromI64ArrayAttr(padTensorOp.static_low()),
+ extractFromI64ArrayAttr(padTensorOp.static_high()),
+ padTensorOp.getResultType().getShape());
+
+ if (newResultType == padTensorOp.getResultType()) {
+ rewriter.updateRootInPlace(padTensorOp, [&]() {
+ padTensorOp.sourceMutable().assign(castOp.source());
+ });
+ } else {
+ auto newOp = rewriter.create<PadOp>(
+ padTensorOp->getLoc(), newResultType, padTensorOp.source(),
+ padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(),
+ padTensorOp.static_high(), padTensorOp.nofold());
+ BlockAndValueMapping mapper;
+ padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper);
+
+ rewriter.replaceOpWithNewOp<tensor::CastOp>(
+ padTensorOp, padTensorOp.getResultType(), newOp);
+ }
+ return success();
+ }
+};
+
+// Fold CastOp using the result of PadOp back into the latter if it adds
+// static information.
+struct FoldTargetTensorCast : public OpRewritePattern<PadOp> {
+ using OpRewritePattern<PadOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(PadOp padTensorOp,
+ PatternRewriter &rewriter) const override {
+ if (!padTensorOp.result().hasOneUse())
+ return failure();
+ auto tensorCastOp =
+ dyn_cast<tensor::CastOp>(*padTensorOp->getUsers().begin());
+ if (!tensorCastOp)
+ return failure();
+ if (!tensor::preservesStaticInformation(padTensorOp.result().getType(),
+ tensorCastOp.dest().getType()))
+ return failure();
+
+ auto replacementOp = rewriter.create<PadOp>(
+ padTensorOp.getLoc(), tensorCastOp.dest().getType(),
+ padTensorOp.source(), padTensorOp.low(), padTensorOp.high(),
+ padTensorOp.static_low(), padTensorOp.static_high(),
+ padTensorOp.nofold());
+ replacementOp.region().takeBody(padTensorOp.region());
+
+ rewriter.replaceOp(padTensorOp, replacementOp.result());
+ rewriter.replaceOp(tensorCastOp, replacementOp.result());
+ return success();
+ }
+};
+} // namespace
+
+void PadOp::getCanonicalizationPatterns(RewritePatternSet &results,
+ MLIRContext *context) {
+ results
+ .add<FoldStaticZeroPadding, FoldSourceTensorCast, FoldTargetTensorCast>(
+ context);
+}
+
+/// Return the padding value of the PadOp if it constant. In this context,
+/// "constant" means an actual constant or "defined outside of the block".
+///
+/// Values are considered constant in three cases:
+/// - A ConstantLike value.
+/// - A basic block argument from a
diff erent block.
+/// - A value defined outside of the block.
+///
+/// If the padding value is not constant, an empty Value is returned.
+Value PadOp::getConstantPaddingValue() {
+ auto yieldOp = dyn_cast<YieldOp>(getRegion().front().getTerminator());
+ if (!yieldOp)
+ return {};
+ Value padValue = yieldOp.value();
+ // Check if yield value is a constant.
+ if (matchPattern(padValue, m_Constant()))
+ return padValue;
+ // Check if yield value is defined inside the PadOp block.
+ if (padValue.getParentBlock() == &getRegion().front())
+ return {};
+ // Else: Yield value defined outside of the PadOp block.
+ return padValue;
+}
+
+OpFoldResult PadOp::fold(ArrayRef<Attribute>) {
+ if (getResultType().hasStaticShape() && getResultType() == getSourceType() &&
+ !nofold())
+ return source();
+ return {};
+}
+
//===----------------------------------------------------------------------===//
// TableGen'd op method definitions
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
new file mode 100644
index 0000000000000..d206dc2ce9e90
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -0,0 +1,279 @@
+//===- TensorTilingInterface.cpp - Tiling Interface models *- C++ ------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/StandardOps/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Interfaces/TilingInterface.h"
+
+using namespace mlir;
+using namespace mlir::tensor;
+
+namespace {
+
+struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
+
+ SmallVector<Value> getDestinationOperands(Operation *op, OpBuilder &b) const {
+ ReifiedRankedShapedTypeDims reifiedShapes;
+ ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+ dyn_cast<ReifyRankedShapedTypeOpInterface>(op);
+ (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes);
+
+ auto padOp = cast<PadOp>(op);
+ SmallVector<OpFoldResult> mixedSizes = getAsOpFoldResult(reifiedShapes[0]);
+ Value initTensor = b.create<linalg::InitTensorOp>(
+ op->getLoc(), mixedSizes, padOp.getResultType().getElementType());
+ return {initTensor};
+ }
+
+ SmallVector<StringRef> getLoopIteratorTypes(Operation *op) const {
+ auto padOp = cast<PadOp>(op);
+ SmallVector<StringRef> iteratorTypes(padOp.getResultType().getRank(),
+ getParallelIteratorTypeName());
+ return iteratorTypes;
+ }
+
+ SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
+ ReifiedRankedShapedTypeDims reifiedShapes;
+ ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+ dyn_cast<ReifyRankedShapedTypeOpInterface>(op);
+ (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes);
+
+ Location loc = op->getLoc();
+ Value zero = b.create<arith::ConstantIndexOp>(loc, 0);
+ Value one = b.create<arith::ConstantIndexOp>(loc, 1);
+ // Initialize all the ranges to {zero, one, one}. All the `ub`s are
+ // overwritten.
+ SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
+ for (const auto &ub : enumerate(reifiedShapes[0]))
+ loopRanges[ub.index()].size = ub.value();
+ return loopRanges;
+ }
+
+ SmallVector<Operation *>
+ getTiledImplementation(Operation *op, OpBuilder &b, ValueRange dest,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes,
+ bool /*tileDestOperands*/) const {
+ auto padOp = cast<PadOp>(op);
+ // Only constant padding value supported.
+ Value padValue = padOp.getConstantPaddingValue();
+ if (!padValue)
+ return {};
+
+ // Helper variables and functions for various arithmetic operations. These
+ // are used extensively for computing new offset/length and padding values.
+ Location loc = op->getLoc();
+ AffineExpr dim0, dim1;
+ bindDims(b.getContext(), dim0, dim1);
+ // Add two integers.
+ auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
+ auto add = [&](Value v1, Value v2) {
+ return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
+ };
+ // Subtract two integers.
+ auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
+ auto sub = [&](Value v1, Value v2) {
+ return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
+ };
+ // Take the minimum of two integers.
+ auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
+ auto min = [&](Value v1, Value v2) {
+ return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
+ };
+ // Take the maximum of two integers.
+ auto max = [&](Value v1, Value v2) {
+ return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
+ };
+ // Zero index-typed integer.
+ auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
+
+ // Helper function for filling static/dynamic low/high padding indices
+ // vectors of PadOp.
+ auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
+ SmallVector<int64_t> &staticIndices) {
+ if (auto constInt = getConstantIntValue(val)) {
+ staticIndices.push_back(*constInt);
+ } else {
+ staticIndices.push_back(ShapedType::kDynamicSize);
+ dynIndices.push_back(val);
+ }
+ };
+
+ // Compute new offsets, lengths, low padding, high padding.
+ SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
+ SmallVector<Value> newLows, newHighs;
+ SmallVector<int64_t> staticNewLows, staticNewHighs;
+ // Set to true if the original data source is not read at all.
+ bool hasZeroLen = false;
+ // Same as hasZeroLen, but for dynamic dimension sizes. This condition
+ // is true if the original data source turns out to be unused at runtime.
+ Value dynHasZeroLenCond;
+
+ int64_t rank = padOp.getSourceType().getRank();
+ for (unsigned dim = 0; dim < rank; ++dim) {
+ auto low =
+ getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedLowPad()[dim]);
+ bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
+ auto high =
+ getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedHighPad()[dim]);
+ bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
+ auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]);
+ auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]);
+ auto srcSize = b.createOrFold<tensor::DimOp>(loc, padOp.source(), dim);
+
+ // The new amount of low padding is `low - offset`. Except for the case
+ // where none of the low padding is read. In that case, the new amount of
+ // low padding is zero.
+ //
+ // Optimization: If low = 0, then newLow = 0.
+ Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
+ appendIndex(newLow, newLows, staticNewLows);
+
+ // Start reading the data from position `offset - low`. Since the original
+ // read may have started in the low padding zone, this value could be
+ // negative. Therefore, start reading from:
+ //
+ // max(offset - low, 0)
+ //
+ // The original read could also have started in the high padding zone.
+ // In that case, set the offset to the end of source tensor. The new
+ // ExtractSliceOp length will be zero in that case. (Effectively reading
+ // no data from the source.)
+ //
+ // Optimization: If low = 0, then the formula can be simplified.
+ Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
+ : min(offset, srcSize);
+ newOffsets.push_back(getAsOpFoldResult(newOffset));
+
+ // The original ExtractSliceOp was reading until position `offset +
+ // length`. Therefore, the corresponding position within the source tensor
+ // is:
+ //
+ // offset + length - low
+ //
+ // In case the original ExtractSliceOp stopped reading within the low
+ // padding zone, this value can be negative. In that case, the end
+ // position of the read should be zero. (Similar to newOffset.)
+ //
+ // The original read could also have stopped in the high padding zone.
+ // In that case, set the end positition of the read should be the end of
+ // the source tensor. (Similar to newOffset.)
+ //
+ // endLoc = min(max(offset - low + length, 0), srcSize)
+ //
+ // The new ExtractSliceOp length is `endLoc - newOffset`.
+ //
+ // Optimization: If low = 0, then the formula can be simplified.
+ Value endLoc =
+ hasLowPad ? min(max(add(sub(offset, low), length), zero), srcSize)
+ : min(add(offset, length), srcSize);
+ Value newLength = sub(endLoc, newOffset);
+ newLengths.push_back(getAsOpFoldResult(newLength));
+
+ // Check if newLength is zero. In that case, no SubTensorOp should be
+ // executed.
+ if (auto newLengthInt = getConstantIntValue(newLength)) {
+ hasZeroLen |= *newLengthInt == 0;
+ } else {
+ Value check = b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
+ newLength, zero);
+ dynHasZeroLenCond =
+ dynHasZeroLenCond
+ ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
+ : check;
+ }
+
+ // The amount of high padding is simply the number of elements remaining,
+ // so that the result has the same length as the original ExtractSliceOp.
+ // As an optimization, if the original high padding is zero, then the new
+ // high padding must also be zero.
+ Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
+ appendIndex(newHigh, newHighs, staticNewHighs);
+
+ // Only unit stride supported.
+ newStrides.push_back(b.getIndexAttr(1));
+ }
+
+ // The shape of the result can be obtained from the sizes passed in.
+ SmallVector<Value> dynDims;
+ SmallVector<int64_t> shape;
+ dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize);
+ RankedTensorType resultType =
+ RankedTensorType::get(shape, padOp.getResultType().getElementType());
+
+ // Insert cast to ensure that types match. (May be folded away.)
+ auto castResult = [&](Value val) -> Operation * {
+ auto castOp = b.create<tensor::CastOp>(loc, resultType, val);
+ return castOp;
+ };
+
+ // In cases where the original data source is unused: Emit a GenerateOp and
+ // do not generate a SliceOp. (The result shape of the SliceOp would
+ // have a dimension of size 0, the semantics of which is unclear.)
+ auto createGenerateOp = [&]() {
+ // Create GenerateOp.
+ auto generateOp = b.create<tensor::GenerateOp>(
+ loc, resultType, dynDims,
+ [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
+ builder.create<tensor::YieldOp>(gLoc, padValue);
+ });
+ return castResult(generateOp);
+ };
+
+ // Emit a SliceOp and a PadOp. Should not be used in cases where
+ // the result shape of the new SliceOp has a zero dimension.
+ auto createPadTensorOfSubTensor = [&]() {
+ // Create pad_tensor(subtensor(x)).
+ auto newSliceOp = b.create<tensor::ExtractSliceOp>(
+ loc, padOp.source(), newOffsets, newLengths, newStrides);
+ auto newPadOp = b.create<PadOp>(loc, newSliceOp, staticNewLows,
+ staticNewHighs, newLows, newHighs);
+
+ // Copy region to new PadOp.
+ BlockAndValueMapping bvm;
+ padOp.region().cloneInto(&newPadOp.getRegion(), bvm);
+
+ // Cast result and return.
+ return castResult(newPadOp);
+ };
+
+ // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
+ // that the original data source x is not used.
+ if (hasZeroLen)
+ return {createGenerateOp()};
+
+ // If there are dynamic dimensions: Generate an scf.if check to avoid
+ // creating SliceOps with result dimensions of size 0 at runtime.
+ if (dynHasZeroLenCond) {
+ auto result = b.create<scf::IfOp>(
+ loc, resultType, dynHasZeroLenCond,
+ /*thenBuilder=*/
+ [&](OpBuilder &b, Location loc) {
+ b.create<scf::YieldOp>(loc, createGenerateOp()->getResult(0));
+ },
+ /*elseBuilder=*/
+ [&](OpBuilder &b, Location loc) {
+ b.create<scf::YieldOp>(loc,
+ createPadTensorOfSubTensor()->getResult(0));
+ });
+ return {result};
+ }
+ return {createPadTensorOfSubTensor()};
+ }
+};
+
+} // namespace
+
+void mlir::tensor::registerTilingOpInterfaceExternalModels(
+ DialectRegistry ®istry) {
+ registry.addOpInterface<tensor::PadOp, PadOpTiling>();
+}
diff --git a/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt
new file mode 100644
index 0000000000000..19a00b5bc6eb9
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_mlir_dialect_library(MLIRTensorUtils
+ Utils.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor
+
+ LINK_LIBS PUBLIC
+ MLIRAffine
+ MLIRArithmetic
+ MLIRIR
+ MLIRTensor
+)
diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
new file mode 100644
index 0000000000000..c7054cf50d060
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
@@ -0,0 +1,54 @@
+//===- Utils.cpp - Utilities to support the Tensor dialect ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for the Tensor dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+
+using namespace mlir;
+using namespace mlir::tensor;
+
+PadOp mlir::tensor::createPadScalarOp(Type type, Value source, Value pad,
+ ArrayRef<OpFoldResult> low,
+ ArrayRef<OpFoldResult> high, bool nofold,
+ Location loc, OpBuilder &builder) {
+ auto padTensorOp =
+ builder.create<PadOp>(loc, type, source, low, high, nofold);
+ int rank = padTensorOp.getResultType().getRank();
+ SmallVector<Type, 4> blockArgTypes(rank, builder.getIndexType());
+ SmallVector<Location, 4> blockArgLocs(rank, loc);
+ auto ®ion = padTensorOp.region();
+ // `builder.createBlock` changes the insertion point within the block. Create
+ // a guard to reset the insertion point of the builder after it is destroyed.
+ OpBuilder::InsertionGuard guard(builder);
+ builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs);
+ builder.create<YieldOp>(loc, pad);
+ return padTensorOp;
+}
+
+PadOp mlir::tensor::createPadHighOp(Type type, Value source, Value pad,
+ bool nofold, Location loc, OpBuilder &b) {
+ SmallVector<OpFoldResult, 4> low, high;
+ auto rankedTensorType = type.cast<RankedTensorType>();
+ assert(rankedTensorType.hasStaticShape());
+ for (const auto &en : enumerate(rankedTensorType.getShape())) {
+ AffineExpr d0;
+ bindDims(b.getContext(), d0);
+ auto dimOp = b.createOrFold<tensor::DimOp>(loc, source, en.index());
+ Value paddingWidth =
+ makeComposedAffineApply(b, loc, en.value() - d0, {dimOp});
+ high.push_back(paddingWidth);
+ low.push_back(b.createOrFold<arith::ConstantIndexOp>(loc, 0));
+ }
+ return createPadScalarOp(type, source, pad, low, high, nofold, loc, b);
+}
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
index cac5cb5d7eb22..bd08b1ae2be4d 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -153,8 +153,8 @@ func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
// CHECK-LABEL: @max_pool_padded
func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () {
// CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32
- // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
- // CHECK-DAG: linalg.yield [[CONST]]
+ // CHECK-DAG: [[PAD:%.+]] = tensor.pad %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
+ // CHECK-DAG: tensor.yield [[CONST]]
// CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32
// CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62]
// CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]])
@@ -206,7 +206,7 @@ func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () {
func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) {
// Initial piece computes the sum of the pooling region, with appropriate padding.
// CHECK: [[CONST:%.+]] = arith.constant 0
- // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+ // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
// CHECK: [[CONST:%.+]] = arith.constant 0
// CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
// CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]])
@@ -268,7 +268,7 @@ func @avg_pool_dyn(%arg0: tensor<?x6x34x62xf32>) -> (tensor<?x5x33x62xf32>) {
// The calculations remain the same as above, only testing for dyn behavior
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[PAD:.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+ // CHECK: %[[PAD:.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
// CHECK: %[[POOLINIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 33, 62]
// CHECK: %[[FILL:.+]] = linalg.fill
// CHECK: %[[KERNEL:.+]] = linalg.init_tensor [4, 4]
@@ -386,8 +386,8 @@ func @conv2d_dyn(%input: tensor<?x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>
// CHECK-LABEL: @conv2d_padded_f32
func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () {
// CHECK: %[[C0:.+]] = arith.constant 0
- // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
- // CHECK: linalg.yield %[[C0]]
+ // CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+ // CHECK: tensor.yield %[[C0]]
// CHECK: linalg.conv_2d_nhwc_hwcf
%0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>)
return
@@ -398,8 +398,8 @@ func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x
// CHECK-LABEL: @conv2d_quant
func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () {
// CHECK: %[[C22:.+]] = arith.constant -22
- // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
- // CHECK: linalg.yield %[[C22]]
+ // CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+ // CHECK: tensor.yield %[[C22]]
// CHECK: linalg.conv_2d_nhwc_hwcf_q
%0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32>
return
@@ -481,8 +481,8 @@ func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x
// CHECK-LABEL: @depthwise_conv_quant
func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
// CHECK: [[PADV:%.+]] = arith.constant -128
- // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
- // CHECK: linalg.yield [[PADV]]
+ // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+ // CHECK: tensor.yield [[PADV]]
// CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128]
// CHECK: [[CST0:%.+]] = arith.constant 0
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index 452c04b3489cd..55b8bce54b1a2 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -1158,9 +1158,9 @@ func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
// CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index
// CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
// CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32
- // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] {
+ // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] {
// CHECK: ^bb0(%arg1: index, %arg2: index):
- // CHECK: linalg.yield [[CST]]
+ // CHECK: tensor.yield [[CST]]
// CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
%1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xf32>, tensor<2x2xi32>) -> (tensor<4x9xf32>)
return %1 : tensor<4x9xf32>
@@ -1169,8 +1169,8 @@ func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
%0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
// CHECK: [[CST:%.+]] = arith.constant 0 : i32
- // CHECK: linalg.pad_tensor
- // CHECK: linalg.yield [[CST]]
+ // CHECK: tensor.pad
+ // CHECK: tensor.yield [[CST]]
%1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xi32>, tensor<2x2xi32>) -> (tensor<4x9xi32>)
return %1 : tensor<4x9xi32>
}
@@ -1178,8 +1178,8 @@ func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
func @pad_quant(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
%0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
// CHECK: [[CST:%.+]] = arith.constant 42 : i32
- // CHECK: linalg.pad_tensor
- // CHECK: linalg.yield [[CST]]
+ // CHECK: tensor.pad
+ // CHECK: tensor.yield [[CST]]
%1 = "tosa.pad"(%arg0, %0) { quantization_info = { input_zp = 42 : i32}} : (tensor<1x2xi32>, tensor<2x2xi32>) -> (tensor<4x9xi32>)
return %1 : tensor<4x9xi32>
}
@@ -1194,9 +1194,9 @@ func @pad_float_explicit(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
// CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index
// CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
// CHECK-DAG: [[CST:%.+]] = arith.constant 4.200000e+01 : f32
- // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] {
+ // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] {
// CHECK: ^bb0(%arg1: index, %arg2: index):
- // CHECK: linalg.yield [[CST]]
+ // CHECK: tensor.yield [[CST]]
// CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
%1 = arith.constant dense<42.0> : tensor<f32>
%2 = "tosa.pad"(%arg0, %0, %1) : (tensor<1x2xf32>, tensor<2x2xi32>, tensor<f32>) -> (tensor<4x9xf32>)
diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir
index 45e722d9f74b6..80bd5f8363e3e 100644
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -277,9 +277,9 @@ func @bufferize_tensor_collapse_shape(%arg0: tensor<4x5xf32>) -> tensor<20xf32>
func @pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tensor<4x?x?x?xf32> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
- %out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] {
+ %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] {
^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32>
return %out : tensor<4x?x?x?xf32>
}
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 4d844b3035261..44cb18f11d152 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -282,7 +282,7 @@ func @fold_init_tensor_with_slice
// CHECK-NOT: linalg.fill
// CHECK-NOT: linalg.matmul
// CHECK-NOT: linalg.generic
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK: return
func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>,
%arg2: tensor<?x?xf32>, %high : index) {
@@ -296,146 +296,15 @@ func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>,
^bb(%3: i32) :
linalg.yield %3 : i32
} -> tensor<7x7xi32>
- %3 = linalg.pad_tensor %arg2 low[%c0, %c0] high[%high, %high] {
- ^bb0(%arg9: index, %arg10: index):
- linalg.yield %cst : f32
+ %3 = tensor.pad %arg2 low[%c0, %c0] high[%high, %high] {
+ ^bb0(%arg9: index, %arg10: index):
+ tensor.yield %cst : f32
} : tensor<?x?xf32> to tensor<2x4xf32>
return
}
// -----
-// CHECK-LABEL: func @pad_tensor_same_static_shape(
-// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
-// CHECK-NOT: linalg.pad_tensor
-// CHECK: return %[[ARG0]]
-func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
- -> tensor<5x6xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %0 = linalg.pad_tensor %arg0 low[%a, 0] high[0, %a] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %cst : f32
- } : tensor<5x6xf32> to tensor<5x6xf32>
- return %0 : tensor<5x6xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_tensor_nofold_same_static_shape(
-// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
-// CHECK: %[[PAD:.*]] = linalg.pad_tensor
-// CHECK: return %[[PAD]]
-func @pad_tensor_nofold_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
- -> tensor<5x6xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %0 = linalg.pad_tensor %arg0 nofold low[%a, 0] high[0, %a] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %cst : f32
- } : tensor<5x6xf32> to tensor<5x6xf32>
- return %0 : tensor<5x6xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_tensor_after_cast_
diff erent_shape(
-// CHECK-SAME: %[[INPUT:.*]]: tensor<?x64x?x?xf32>) -> tensor<?x?x?x?xf32> {
-// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]]
-// CHECK-SAME: low[0, 0, 1, 1] high[0, 0, 1, 1] {
-// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
-// CHECK: linalg.yield %[[CST]] : f32
-// CHECK: } : tensor<?x64x?x?xf32> to tensor<?x64x?x?xf32>
-// CHECK: %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] :
-// CHECK-SAME: tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
-// CHECK: return %[[DYNAMIC]] : tensor<?x?x?x?xf32>
-// CHECK: }
-func @pad_tensor_after_cast_
diff erent_shape(%arg0: tensor<?x64x?x?xf32>)
- -> tensor<?x?x?x?xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
- %padded = linalg.pad_tensor %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
- linalg.yield %cst: f32
- } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
- return %padded: tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_tensor_after_cast_same_shape(
-// CHECK-SAME: %[[INPUT:.*]]: tensor<?x64x?x?xf32>,
-// CHECK-SAME: %[[PADDING:.*]]: index) -> tensor<?x?x?x?xf32> {
-// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]]
-// CHECK-SAME: low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1] {
-// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
-// CHECK: linalg.yield %[[CST]] : f32
-// CHECK: } : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
-// CHECK: return %[[PADDED:.*]] : tensor<?x?x?x?xf32>
-// CHECK: }
-func @pad_tensor_after_cast_same_shape(%arg0: tensor<?x64x?x?xf32>, %padding : index)
- -> tensor<?x?x?x?xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
- %padded = linalg.pad_tensor %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
- linalg.yield %cst: f32
- } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
- return %padded: tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_tensor_of_cast(
-// CHECK-NOT: tensor.cast
-// CHECK: linalg.pad_tensor
-// CHECK: tensor<8x?xf32> to tensor<8x32xf32>
-func @pad_tensor_of_cast(%t: tensor<8x?xf32>, %s: index) -> tensor<8x32xf32> {
- %c0 = arith.constant 0 : index
- %cst = arith.constant 0.000000e+00 : f32
- %0 = tensor.cast %t : tensor<8x?xf32> to tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %s] {
- ^bb0(%arg9: index, %arg10: index):
- linalg.yield %cst : f32
- } : tensor<?x?xf32> to tensor<8x32xf32>
- return %1 : tensor<8x32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @cast_of_pad_more_static
-func @cast_of_pad_more_static(%arg0: tensor<?x?xf32>, %padding: index) -> tensor<32x32xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- // CHECK: %[[PAD:.*]] = linalg.pad_tensor
- // CHECK: tensor<?x?xf32> to tensor<32x32xf32>
- %padded = linalg.pad_tensor %arg0 low[%padding, %padding] high[0, 0] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %cst : f32
- } : tensor<?x?xf32> to tensor<?x?xf32>
- // CHECK-NOT: tensor.cast
- %casted = tensor.cast %padded : tensor<?x?xf32> to tensor<32x32xf32>
- // CHECK: return %[[PAD]]
- return %casted : tensor<32x32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @cast_of_pad_less_static
-func @cast_of_pad_less_static(%arg0: tensor<32x?x?xf32>, %padding: index) -> tensor<?x32x32xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- // CHECK: linalg.pad_tensor
- %padded = linalg.pad_tensor %arg0 low[%padding, %padding, %padding] high[0, 0, 0] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index):
- linalg.yield %cst : f32
- } : tensor<32x?x?xf32> to tensor<32x?x?xf32>
- // CHECK: %[[CAST:.*]] = tensor.cast
- %casted = tensor.cast %padded : tensor<32x?x?xf32> to tensor<?x32x32xf32>
- // CHECK: return %[[CAST]]
- return %casted : tensor<?x32x32xf32>
-}
-
-// -----
-
func @propogate_casts(%arg0 : tensor<?x?xf32>, %arg1 : f32, %arg2 : index,
%arg3 : index) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
@@ -579,71 +448,6 @@ func @fold_tiled_loop_inputs(%A: memref<192xf32>, %A_tensor: tensor<192xf32>,
// -----
-func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
- %c0 = arith.constant 0 : index
- %cst = arith.constant 0.0 : f32
- %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %c0] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %cst : f32
- } : tensor<?x?xf32> to tensor<4x4xf32>
- return %1 : tensor<4x4xf32>
-}
-// CHECK-LABEL: @tensor_pad_cast
-// CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32>
-// CHECK: return %[[ARG0]]
-
-// -----
-
-// CHECK-LABEL: func @fold_pad_tensor_source_cast(
-// CHECK-SAME: %[[ARG0:.*]]: tensor<4x?xf32>
-// CHECK-NOT: tensor.cast
-// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]]
-func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> {
- %cst = arith.constant 0.0 : f32
- %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low[0, 0] high[0, 1] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %cst : f32
- } : tensor<?x?xf32> to tensor<4x4xf32>
- return %1 : tensor<4x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_static_zero_cast(
-// CHECK-SAME: %[[ARG0:.*]]: tensor<?x?x?xf32>
-// CHECK-NOT: linalg.pad_tensor
-// CHECK: %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-// CHECK: return %[[RESULT]]
-func @pad_static_zero_cast(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
- %c0 = arith.constant 0 : index
- %0 = linalg.pad_tensor %arg0 low[0, %c0, 0] high[0, 0, %c0] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index):
- linalg.yield %pad_value : f32
- } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-
- return %0 : tensor<2x3x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_nofold_static_zero(
-// CHECK-SAME: %[[ARG0:.*]]: tensor<?x?x?xf32>
-// CHECK: %[[PAD:.*]] = linalg.pad_tensor
-// CHECK: return %[[PAD]]
-func @pad_nofold_static_zero(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
- %c0 = arith.constant 0 : index
- %0 = linalg.pad_tensor %arg0 nofold low[0, %c0, 0] high[0, 0, %c0] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index):
- linalg.yield %pad_value : f32
- } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-
- return %0 : tensor<2x3x4xf32>
-}
-
-// -----
-
func private @some_use(%i : index, %j : index)
// CHECK-LABEL: func @init_canonicalize
diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir
index 7119db8f0ccd1..d698d63758e4c 100644
--- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir
+++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir
@@ -48,7 +48,7 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7
func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
// Check the padding of the input operands has been hoisted out of the tile loop nest.
- // CHECK-PAD-COUNT=2: linalg.pad_tensor %{{.*}} nofold
+ // CHECK-PAD-COUNT=2: tensor.pad %{{.*}} nofold
// CHECK-PAD: scf.for
// Check CSE eliminates the duplicate min operations introduced by tiling.
// CHECK-PAD: affine.min #[[MAP0]]
diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
index 9dd1c1e1ef967..8b5e2a313d5a5 100644
--- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
@@ -9,9 +9,9 @@
// CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32>
func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
%cst = arith.constant 0.000000e+00 : f32
- %0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] {
+ %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
return %0 : tensor<1x32x32x1xf32>
}
@@ -38,9 +38,9 @@ func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor
func @generalize_pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tensor<4x?x?x?xf32> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
- %out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] {
+ %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] {
^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32>
return %out : tensor<4x?x?x?xf32>
}
diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir
index 566abcfbc39ec..416dfe37e93d0 100644
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -18,7 +18,7 @@ func @static_size_divisible(%arg0: tensor<24x12xf32>,
// MATVEC: %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] =
// MATVEC: %[[PIDX0:.*]] = affine.apply #[[DIV4]](%[[PIV0]])
// MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [4]
- // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]]
+ // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]]
// MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]]
// MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] =
@@ -29,9 +29,9 @@ func @static_size_divisible(%arg0: tensor<24x12xf32>,
// MATVEC-DAG: %[[IDX0:.*]] = affine.apply #[[DIV4]](%[[IV0]])
// MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
%2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
- %3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] {
+ %3 = tensor.pad %2 nofold low[%c0] high[%c0] {
^bb0(%arg5: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<4xf32> to tensor<4xf32>
// Check matvec uses the packed input vector.
@@ -67,7 +67,7 @@ func @static_size_not_divisible(%arg0: tensor<24x12xf32>,
// MATVEC: %[[TS0:.*]] = affine.min #[[MAP0]](%[[PIV0]])
// MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [%[[TS0]]]
// MATVEC: %[[HPD0:.*]] = affine.apply #[[MAP1]](%[[TS0]])
- // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]]{{.*}}high[%[[HPD0]]
+ // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]]{{.*}}high[%[[HPD0]]
// MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]]
// MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] =
@@ -80,13 +80,13 @@ func @static_size_not_divisible(%arg0: tensor<24x12xf32>,
// MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
%3 = tensor.extract_slice %arg1[%arg3] [%1] [1] : tensor<12xf32> to tensor<?xf32>
%4 = affine.apply #map1(%1)
- %5 = linalg.pad_tensor %2 low[%c0, %c0] high[%c0, %4] {
+ %5 = tensor.pad %2 low[%c0, %c0] high[%c0, %4] {
^bb0(%arg5: index, %arg6: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<24x?xf32> to tensor<24x5xf32>
- %6 = linalg.pad_tensor %3 low[%c0] high[%4] {
+ %6 = tensor.pad %3 low[%c0] high[%4] {
^bb0(%arg5: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?xf32> to tensor<5xf32>
// Check matvec uses the packed input vector.
@@ -127,7 +127,7 @@ func @dynamic_size(%arg0: tensor<24x?xf32>,
// MATVEC: %[[TS0:.*]] = affine.min #[[MAP0]](%[[PIV0]])[%[[D0]]]
// MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [%[[TS0]]]
// MATVEC: %[[HPD0:.*]] = affine.apply #[[MAP1]](%[[TS0]])
- // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]]{{.*}}high[%[[HPD0]]
+ // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]]{{.*}}high[%[[HPD0]]
// MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]]
// MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] =
@@ -140,13 +140,13 @@ func @dynamic_size(%arg0: tensor<24x?xf32>,
// MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
%4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor<?xf32> to tensor<?xf32>
%5 = affine.apply #map1(%2)
- %6 = linalg.pad_tensor %3 low[%c0, %c0] high[%c0, %5] {
+ %6 = tensor.pad %3 low[%c0, %c0] high[%c0, %5] {
^bb0(%arg5: index, %arg6: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<24x?xf32> to tensor<24x4xf32>
- %7 = linalg.pad_tensor %4 nofold low[%c0] high[%5] {
+ %7 = tensor.pad %4 nofold low[%c0] high[%5] {
^bb0(%arg5: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?xf32> to tensor<4xf32>
// Check matvec uses the packed input vector.
@@ -174,13 +174,13 @@ func @non_constant_padding(%arg0: tensor<24x12xf32>,
// Check the non constant padding is not hoisted.
// MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
- // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]
+ // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]
%2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
- %3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] {
+ %3 = tensor.pad %2 nofold low[%c0] high[%c0] {
^bb0(%arg5: index):
%5 = arith.index_cast %arg3 : index to i32
%6 = arith.sitofp %5 : i32 to f32
- linalg.yield %6 : f32
+ tensor.yield %6 : f32
} : tensor<4xf32> to tensor<4xf32>
// Check matvec uses the padded input vector.
@@ -209,13 +209,13 @@ func @non_constant_op_padding(%arg0: tensor<24x12xf32>,
// Check the non constant op padding is not hoisted.
// MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
// MATVEC: %[[V0:.*]] = tensor.extract %[[ARG1]][%[[IV0]]
- // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]
- // MATVEC: linalg.yield %[[V0]]
+ // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]
+ // MATVEC: tensor.yield %[[V0]]
%2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = tensor.extract %arg1[%arg3] : tensor<12xf32>
- %4 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] {
+ %4 = tensor.pad %2 nofold low[%c0] high[%c0] {
^bb0(%arg5: index):
- linalg.yield %3 : f32
+ tensor.yield %3 : f32
} : tensor<4xf32> to tensor<4xf32>
// Check matvec uses the padded input vector.
@@ -247,12 +247,12 @@ func @non_index_operand(%arg0: tensor<24x12xf32>,
// Check the index_cast prevents hoisting due to its non index operand.
// MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
// MATVEC: %[[IDX0:.*]] = arith.index_cast %[[ARG3]]
- // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]]
+ // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]]
%2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = arith.index_cast %arg3 : i32 to index
- %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] {
+ %4 = tensor.pad %2 nofold low[%3] high[%3] {
^bb0(%arg6: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<4xf32> to tensor<4xf32>
// Check matvec uses the padded input vector.
@@ -284,12 +284,12 @@ func @memory_effect(%arg0: tensor<24x12xf32>,
// Check the load prevents hoisting due to its memory effect.
// MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
// MATVEC: %[[IDX0:.*]] = memref.load %[[ARG3]]
- // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]]
+ // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]]
%2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = memref.load %arg3[%c0] : memref<?xindex>
- %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] {
+ %4 = tensor.pad %2 nofold low[%3] high[%3] {
^bb0(%arg6: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<4xf32> to tensor<4xf32>
// Check matvec uses the padded input vector.
@@ -321,15 +321,15 @@ func @index_result_loop(%arg0: tensor<24x12xf32>,
// Check the unexpected operation with a region prevents hoisting.
// MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
// MATVEC: %[[IDX0:.*]] = scf.for {{.*}} step %[[ARG3]]
- // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]]
+ // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]]
%2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = scf.for %arg6 = %c0 to %c12 step %arg3 iter_args(%arg7 = %c0) -> (index) {
%6 = arith.addi %arg3, %arg7 : index
scf.yield %6 : index
}
- %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] {
+ %4 = tensor.pad %2 nofold low[%3] high[%3] {
^bb0(%arg6: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<4xf32> to tensor<4xf32>
// Check matvec uses the padded input vector.
@@ -361,7 +361,7 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
// Check the second input operand is hoisted by two loop nests.
// MATMUL: %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] =
// MATMUL: %[[T1:.*]] = tensor.extract_slice %[[ARG1]]
- // MATMUL: %[[T2:.*]] = linalg.pad_tensor %[[T1]]
+ // MATMUL: %[[T2:.*]] = tensor.pad %[[T1]]
// MATMUL: scf.for %[[IV0:[0-9a-zA-Z]*]] =
%0 = scf.for %arg3 = %c0 to %c12 step %c5 iter_args(%arg4 = %arg2) -> (tensor<12x24xf32>) {
@@ -372,9 +372,9 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
%3 = affine.apply #map1(%1)
// Check the fused and padded fill op does not prevent hoisting.
- %4 = linalg.pad_tensor %2 nofold low[%c0, %c0] high[%3, %c0] {
+ %4 = tensor.pad %2 nofold low[%c0, %c0] high[%3, %c0] {
^bb0(%arg5: index, %arg6: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?x24xf32> to tensor<5x24xf32>
%5 = linalg.fill(%cst, %4) : f32, tensor<5x24xf32> -> tensor<5x24xf32>
%6 = tensor.extract_slice %5[0, 0] [%1, 24] [1, 1] : tensor<5x24xf32> to tensor<?x24xf32>
@@ -382,7 +382,7 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
// Check the first input operand is hoisted by one loop nest.
// MATMUL: %[[T3:.*]] = scf.for %[[PIV1:[0-9a-z]+]] =
// MATMUL: %[[T4:.*]] = tensor.extract_slice %[[ARG0]]
- // MATMUL: %[[T5:.*]] = linalg.pad_tensor %[[T4]]
+ // MATMUL: %[[T5:.*]] = tensor.pad %[[T4]]
// MATMUL: scf.for %[[IV1:[0-9a-zA-Z]*]] =
%7 = scf.for %arg5 = %c0 to %c6 step %c3 iter_args(%arg6 = %6) -> (tensor<?x24xf32>) {
@@ -393,20 +393,20 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
%9 = tensor.extract_slice %arg0[%arg3, %arg5] [%1, 3] [1, 1] : tensor<12x6xf32> to tensor<?x3xf32>
%10 = tensor.extract_slice %arg1[%arg5, 0] [3, 24] [1, 1] : tensor<6x24xf32> to tensor<3x24xf32>
%11 = tensor.extract_slice %arg6[0, 0] [%1, 24] [1, 1] : tensor<?x24xf32> to tensor<?x24xf32>
- %12 = linalg.pad_tensor %9 nofold low[%c0, %c0] high[%3, %c0] {
+ %12 = tensor.pad %9 nofold low[%c0, %c0] high[%3, %c0] {
^bb0(%arg7: index, %arg8: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?x3xf32> to tensor<5x3xf32>
- %13 = linalg.pad_tensor %10 nofold low[%c0, %c0] high[%c0, %c0] {
+ %13 = tensor.pad %10 nofold low[%c0, %c0] high[%c0, %c0] {
^bb0(%arg7: index, %arg8: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<3x24xf32> to tensor<3x24xf32>
// Check the output padding is not hoisted.
- // MATMUL: %[[T8:.*]] = linalg.pad_tensor
- %14 = linalg.pad_tensor %11 nofold low[%c0, %c0] high[%3, %c0] {
+ // MATMUL: %[[T8:.*]] = tensor.pad
+ %14 = tensor.pad %11 nofold low[%c0, %c0] high[%3, %c0] {
^bb0(%arg7: index, %arg8: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?x24xf32> to tensor<5x24xf32>
// Check matmul uses the padded operands.
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index b205e30213498..40defe47a1cb6 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -353,71 +353,6 @@ func @init_tensor_err(%arg0 : index)
// -----
-
-func @pad_result_type(%arg0: tensor<?x2x3x4xi32>, %arg1: index, %arg2: i32) -> tensor<?x?x?x8xf32> {
- // expected-error @+1 {{specified type 'tensor<?x?x?x8xf32>' does not match the inferred type 'tensor<?x?x?x9xi32>}}
- %0 = linalg.pad_tensor %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] {
- ^bb0(%arg3: index, %arg4: index):
- linalg.yield %arg2 : i32
- } : tensor<?x2x3x4xi32> to tensor<?x?x?x8xf32>
- return %0 : tensor<?x?x?x8xf32>
-}
-
-// -----
-
-func @pad_number_of_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
- // expected-error @+1 {{expected the block to have 2 arguments}}
- %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg2: index, %arg3: index, %arg4: index):
- linalg.yield %arg1 : i32
- } : tensor<?x4xi32> to tensor<?x9xi32>
- return %0 : tensor<?x9xi32>
-}
-
-// -----
-
-func @pad_no_block(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
- // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}}
- %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- } : tensor<?x4xi32> to tensor<?x9xi32>
- return %0 : tensor<?x9xi32>
-}
-
-// -----
-
-func @pad_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
- // expected-error @+1 {{op expected block argument 1 to be an index}}
- %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg2: i32, %arg3: i32):
- linalg.yield %arg1 : i32
- } : tensor<?x4xi32> to tensor<?x9xi32>
- return %0 : tensor<?x9xi32>
-}
-
-// -----
-
-func @pad_num_yields(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
- // expected-error @+3 {{op expected single yield operand (got 2)}}
- %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg2: index, %arg3: index):
- linalg.yield %arg1, %arg1 : i32, i32
- } : tensor<?x4xi32> to tensor<?x9xi32>
- return %0 : tensor<?x9xi32>
-}
-
-// -----
-
-func @pad_yield_type(%arg0: tensor<?x4xi32>, %arg1: i8) -> tensor<?x9xi32> {
- // expected-error @+3 {{op expected yield type to match shape element type}}
- %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg2: index, %arg3: index):
- linalg.yield %arg1 : i8
- } : tensor<?x4xi32> to tensor<?x9xi32>
- return %0 : tensor<?x9xi32>
-}
-
-// -----
-
func @illegal_fill_tensor_no_return(%arg0 : index, %arg1 : index, %arg2 : f32)
{
%0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
index 0e4c62447e507..c6a3b1eed30f1 100644
--- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
@@ -6,9 +6,9 @@
func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = bufferization.to_tensor %arg0 : memref<1x28x28x1xf32>
- %1 = linalg.pad_tensor %0 low[1, 1, 1, 2] high[0, 2, 2, 0] {
+ %1 = tensor.pad %0 low[1, 1, 1, 2] high[0, 2, 2, 0] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<1x28x28x1xf32> to tensor<2x31x31x3xf32>
%2 = bufferization.to_memref %1 : memref<2x31x31x3xf32>
return %2 : memref<2x31x31x3xf32>
@@ -25,9 +25,9 @@ func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3x
// CHECK-LABEL: func @pad_tensor_no_memrefs
func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
- %0 = linalg.pad_tensor %arg0 low[1, 2, 2] high[0, 2, 2] {
+ %0 = tensor.pad %arg0 low[1, 2, 2] high[0, 2, 2] {
^bb0(%arg1: index, %arg2: index, %arg3: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<1x28x28xf32> to tensor<2x32x32xf32>
return %0 : tensor<2x32x32xf32>
}
@@ -43,9 +43,9 @@ func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> {
// CHECK-LABEL: func @pad_tensor_detailed
func @pad_tensor_detailed(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
%cst = arith.constant 0.000000e+00 : f32
- %0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] {
+ %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
return %0 : tensor<1x32x32x1xf32>
}
diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir
index 31163f5a6be8c..36879b7254a7e 100644
--- a/mlir/test/Dialect/Linalg/pad.mlir
+++ b/mlir/test/Dialect/Linalg/pad.mlir
@@ -31,10 +31,10 @@ func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
// Check statically sized matmul inputs with partially divisible sizes are padded.
// MATMUL: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS2]]]
- // MATMUL: %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold
+ // MATMUL: %[[T3:.*]] = tensor.pad %[[T0]] nofold
// MATMUL-SAME: [%[[C0]], %[[C0]]]
// MATMUL-SAME: [%[[C0]], %[[V0]]
- // MATMUL: %[[T4:.*]] = linalg.pad_tensor %[[T1]] nofold
+ // MATMUL: %[[T4:.*]] = tensor.pad %[[T1]] nofold
// Check the statically sized matmul output with fully divisible sizes is not padded.
// MATMUL: %[[T5:.*]] = linalg.matmul
@@ -74,7 +74,7 @@ func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>,
// Check the statically sized matmul output with partially divisible sizes is padded.
// MATMUL: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS1]]]
- // MATMUL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] low
+ // MATMUL: %[[T1:.*]] = tensor.pad %[[T0]] low
// MATMUL-SAME: [%[[C0]], %[[C0]]]
// MATMUL-SAME: [%[[C0]], %[[V0]]
@@ -137,11 +137,11 @@ func @dynamic_sizes(%arg0: tensor<?x?xf32>,
// Check all matmul operands are padded.
// MATMUL: %[[V0:.*]] = affine.apply #[[MAP3]]()[%[[TS0]]]
// MATMUL: %[[V1:.*]] = affine.apply #[[MAP4]]()[%[[TS2]]]
- // MATMUL: %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold
+ // MATMUL: %[[T3:.*]] = tensor.pad %{{.*}} nofold
// MATMUL-SAME: [%[[C0]], %[[C0]]]
// MATMUL-SAME: [%[[V0]], %[[V1]]
- // MATMUL: %[[T4:.*]] = linalg.pad_tensor %{{.*}} nofold
- // MATMUL: %[[T5:.*]] = linalg.pad_tensor %{{.*}} low
+ // MATMUL: %[[T4:.*]] = tensor.pad %{{.*}} nofold
+ // MATMUL: %[[T5:.*]] = tensor.pad %{{.*}} low
// Check the dynamic matmul has been erased.
// MATMUL-NOT: = linalg.matmul {{.*}} tensor<?x?xf32>
@@ -172,7 +172,7 @@ func @pad_multiple(%arg0: tensor<64x64xf32>,
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Check both fill operations are padded by the same pad tensor operation.
- // FILL: %[[T0:.*]] = linalg.pad_tensor
+ // FILL: %[[T0:.*]] = tensor.pad
// FILL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
// FILL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]])
// FILL: = tensor.extract_slice %[[T2]]
@@ -197,20 +197,20 @@ func @compose_padding(%arg0: tensor<64x64xf32>,
// MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
// MATMUL-SAME: [0, 0]
// MATMUL-SAME: [%[[SIZE]], %[[SIZE]]]
- // MATMUL: %[[T1:.*]] = linalg.pad_tensor %[[T0]]
+ // MATMUL: %[[T1:.*]] = tensor.pad %[[T0]]
// MATMUL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]]
// MATMUL: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
+ %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?x?xf32> to tensor<64x64xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%4 = tensor.extract_slice %3[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Check there are no additional pad tensor operations.
- // MATMUL-NOT: linalg.pad_tensor
+ // MATMUL-NOT: tensor.pad
// Check the matmul directly uses the result of the fill operation.
// MATMUL: %[[T4:.*]] = linalg.matmul ins(%[[T3]]
@@ -233,16 +233,16 @@ func @
diff erent_padding_values(%arg0: tensor<64x64xf32>,
%cst = arith.constant 42.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
+ %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?x?xf32> to tensor<64x64xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Different padding values prevent composing the paddings (42.0 vs. 0.0).
// MATMUL: = linalg.fill
- // MATMUL: = linalg.pad_tensor
+ // MATMUL: = tensor.pad
// MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
@@ -258,16 +258,16 @@ func @
diff erent_padding_dynamic_sizes(%arg0: tensor<64x64xf32>,
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
+ %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?x?xf32> to tensor<64x64xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Different dynamic sizes prevent composing the paddings (%iv0 vs %size).
// MATMUL: = linalg.fill
- // MATMUL: = linalg.pad_tensor
+ // MATMUL: = tensor.pad
// MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
@@ -283,16 +283,16 @@ func @
diff erent_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>,
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0, 0] [%size, %size, 1] [1, 1, 1] : tensor<64x64x1xf32> to tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
+ %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?x?xf32> to tensor<64x64xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%3 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Different dynamic ranks prevent composing the paddings ([%size, %size, 1] vs [%size, %size]).
// MATMUL: = linalg.fill
- // MATMUL: = linalg.pad_tensor
+ // MATMUL: = tensor.pad
// MATMUL: = linalg.matmul
%4 = linalg.matmul ins(%3, %3 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%3 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %4 : tensor<?x?xf32>
@@ -308,16 +308,16 @@ func @
diff erent_padding_static_sizes(%arg0: tensor<62x62xf32>,
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
+ %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?x?xf32> to tensor<62x62xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<62x62xf32> -> tensor<62x62xf32>
%4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor<?x?xf32>
// Different static sizes prevent composing the paddings (62 vs 64 derived from #map0).
// MATMUL: = linalg.fill
- // MATMUL: = linalg.pad_tensor
+ // MATMUL: = tensor.pad
// MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
@@ -336,7 +336,7 @@ func @scalar_operand(%arg0: f32,
%0 = affine.min #map0()[%iv0]
// FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]]
- // FILL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold
+ // FILL: %[[T1:.*]] = tensor.pad %[[T0]] nofold
%1 = tensor.extract_slice %arg1[0, 0] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
// Check only the fill output operand is padded.
@@ -361,8 +361,8 @@ func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
%2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
// Check the matmul inputs are padded despite the missing slice for the static output.
- // MATMUL: %[[T0:.*]] = linalg.pad_tensor
- // MATMUL: %[[T1:.*]] = linalg.pad_tensor
+ // MATMUL: %[[T0:.*]] = tensor.pad
+ // MATMUL: %[[T1:.*]] = tensor.pad
// MATMUL: = linalg.matmul ins(%[[T0]], %[[T1]]
// MATMUL-SAME: outs(%[[ARG2]]
%3 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%arg2 : tensor<4x5xf32>) -> tensor<4x5xf32>
@@ -414,8 +414,8 @@ func @static_input_padding_only(%arg0: tensor<24x12xf32>,
%3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
// Check the matmul inputs are padded despite the failure to compute a padding value for the static output.
- // INPUTS-ONLY: %[[T1:.*]] = linalg.pad_tensor
- // INPUTS-ONLY: %[[T2:.*]] = linalg.pad_tensor
+ // INPUTS-ONLY: %[[T1:.*]] = tensor.pad
+ // INPUTS-ONLY: %[[T2:.*]] = tensor.pad
// INPUTS-ONLY: = linalg.matmul ins(%[[T1]], %[[T2]]
// INPUTS-ONLY-SAME: outs(%[[T0]]
%4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
@@ -465,7 +465,7 @@ func @rank_reducing(%arg0: tensor<1x64x1x64xf32>,
%0 = tensor.extract_slice %arg0[0, 0, 0, 0] [1, %size, 1, %size] [1, 1, 1, 1] : tensor<1x64x1x64xf32> to tensor<1x?x?xf32>
// Check the fill is padded despite the rank-reducing slice operation.
- // FILL: %[[T0:.*]] = linalg.pad_tensor
+ // FILL: %[[T0:.*]] = tensor.pad
// FILL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
// FILL-SAME: tensor<1x64x64xf32>
// FILL: = tensor.extract_slice %[[T1]]
diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir
index 7f6bd150f3de9..90e6381f6f16a 100644
--- a/mlir/test/Dialect/Linalg/pad_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir
@@ -15,9 +15,9 @@ func @dynamic_pad_fusion(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index,
%1 = arith.mulf %arg6, %arg6 : f32
linalg.yield %1 : f32
} -> tensor<?x?xf32>
- %1 = linalg.pad_tensor %0 low [%arg1, %arg2] high [%arg3, %arg4] {
+ %1 = tensor.pad %0 low [%arg1, %arg2] high [%arg3, %arg4] {
^bb0(%arg6: index, %arg7 : index):
- linalg.yield %arg5 : f32
+ tensor.yield %arg5 : f32
} : tensor<?x?xf32> to tensor<?x?xf32>
return %1 : tensor<?x?xf32>
}
@@ -64,9 +64,9 @@ func @mixed_pad_fusion(%arg0 : tensor<?x42xf32>, %arg1 : index, %arg2 : index,
%1 = arith.mulf %arg4, %arg4 : f32
linalg.yield %1 : f32
} -> tensor<42x?xf32>
- %1 = linalg.pad_tensor %0 low [3, %arg1] high [4, %arg2] {
+ %1 = tensor.pad %0 low [3, %arg1] high [4, %arg2] {
^bb0(%arg4: index, %arg5 : index):
- linalg.yield %arg3 : f32
+ tensor.yield %arg3 : f32
} : tensor<42x?xf32> to tensor<49x?xf32>
return %1 : tensor<49x?xf32>
}
diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
index 9e0a672252104..27f014ed66147 100644
--- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
+++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
@@ -253,9 +253,9 @@ func @dim_of_pad_op(%arg0 : tensor<2x?x?xf32>, %arg1 : index, %arg2 : index,
%c3 = arith.constant 3 : index
%c4 = arith.constant 4 : index
%c5 = arith.constant 5 : index
- %0 = linalg.pad_tensor %arg0 low[%c3, %arg1, %c4] high[7, %c5, %arg2] {
+ %0 = tensor.pad %arg0 low[%c3, %arg1, %c4] high[7, %c5, %arg2] {
^bb0(%arg4: index, %arg5: index, %arg6: index):
- linalg.yield %arg3 : f32
+ tensor.yield %arg3 : f32
} : tensor<2x?x?xf32> to tensor<?x?x?xf32>
%1 = tensor.dim %0, %c0 : tensor<?x?x?xf32>
%2 = tensor.dim %0, %c1 : tensor<?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index 9e8dfb292032f..337b7c0ad2b7e 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -15,77 +15,6 @@
// CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>
// CHECK-DAG: #[[$strided3DT:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)>
-func @pad_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
- %pad_value: f32) -> tensor<6x?x?x?xf32> {
- %0 = linalg.pad_tensor %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
- linalg.yield %pad_value : f32
- } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
- return %0 : tensor<6x?x?x?xf32>
-}
-// CHECK-LABEL: func @pad_dynamic
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
-// CHECK-SAME: %[[LOW:[a-zA-Z0-9_]*]]
-// CHECK-SAME: %[[HIGH:[a-zA-Z0-9_]*]]
-// CHECK: linalg.pad_tensor %[[ARG0]]
-// CHECK-SAME: low[2, %[[LOW]], 3, 3]
-// CHECK-SAME: high[3, 3, %[[HIGH]], 2]
-// CHECK: : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
-
-// -----
-
-func @pad_static(%arg0: tensor<3x4xf32>, %pad_value: f32) -> tensor<6x9xf32> {
- %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg1 : index, %arg2 : index):
- linalg.yield %pad_value : f32
- } : tensor<3x4xf32> to tensor<6x9xf32>
- return %0 : tensor<6x9xf32>
-}
-// CHECK-LABEL: func @pad_static
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
-// CHECK: linalg.pad_tensor %[[ARG0]] low[1, 2] high[2, 3]
-// CHECK: : tensor<3x4xf32> to tensor<6x9xf32>
-
-// -----
-
-func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index,
- %pad_value: f32) -> tensor<?x?xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad_value : f32
- } : tensor<2x3xf32> to tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-// CHECK-LABEL: func @pad_asymmetrical
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
-// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]]
-// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]]
-// CHECK: linalg.pad_tensor %[[ARG0]]
-// CHECK-SAME: low[0, 0]
-// CHECK-SAME: high[%[[UB0]], %[[UB1]]]
-// CHECK: : tensor<2x3xf32> to tensor<?x?xf32>
-
-// -----
-
-func @pad_to_static_size(%arg0: tensor<?x?xf32>, %ub0: index, %ub1: index,
- %pad_value: f32) -> tensor<2x3xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] {
- ^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad_value : f32
- } : tensor<?x?xf32> to tensor<2x3xf32>
- return %0 : tensor<2x3xf32>
-}
-// CHECK-LABEL: func @pad_to_static_size
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
-// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]]
-// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]]
-// CHECK: linalg.pad_tensor %[[ARG0]]
-// CHECK-SAME: low[0, 0]
-// CHECK-SAME: high[%[[UB0]], %[[UB1]]]
-// CHECK: : tensor<?x?xf32> to tensor<2x3xf32>
-
-// -----
-
func @views(%arg0: index) {
%c0 = arith.constant 0 : index
%0 = arith.muli %arg0, %arg0 : index
diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
index a8e26baa2bded..64bb9d1ea9eff 100644
--- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
+++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
@@ -6,9 +6,9 @@
// CHECK: return %[[RESULT]]
func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32)
-> tensor<2x1xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+ %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<4x5xf32> to tensor<11x13xf32>
%1 = tensor.extract_slice %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32>
return %1 : tensor<2x1xf32>
@@ -18,16 +18,16 @@ func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32)
// CHECK-LABEL: @static_high_pad_only
// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-NOT: tensor.extract_slice
// CHECK: %[[RESULT:.*]] = tensor.generate
// CHECK: tensor.yield %[[PAD]]
// CHECK: return %[[RESULT]] : tensor<2x4xf32>
func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
-> tensor<2x4xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+ %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<4x5xf32> to tensor<11x13xf32>
%1 = tensor.extract_slice %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32>
return %1 : tensor<2x4xf32>
@@ -37,16 +37,16 @@ func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
// CHECK-LABEL: @static_low_pad_only
// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-NOT: tensor.extract_slice
// CHECK: %[[RESULT:.*]] = tensor.generate
// CHECK: tensor.yield %[[PAD]]
// CHECK: return %[[RESULT]] : tensor<2x3xf32>
func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
-> tensor<2x3xf32> {
- %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+ %0 = tensor.pad %arg0 low[3, 7] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<4x5xf32> to tensor<14x20xf32>
%1 = tensor.extract_slice %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32>
return %1 : tensor<2x3xf32>
@@ -56,16 +56,16 @@ func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
// CHECK-LABEL: @static_low_pad_only_2
// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-NOT: tensor.extract_slice
// CHECK: %[[RESULT:.*]] = tensor.generate
// CHECK: tensor.yield %[[PAD]]
// CHECK: return %[[RESULT]] : tensor<1x3xf32>
func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32)
-> tensor<1x3xf32> {
- %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+ %0 = tensor.pad %arg0 low[3, 7] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<4x5xf32> to tensor<14x20xf32>
%1 = tensor.extract_slice %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32>
return %1 : tensor<1x3xf32>
@@ -75,16 +75,16 @@ func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32)
// CHECK-LABEL: @static_mixed_data_high_pad
// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
-// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[1, 3]
-// CHECK: linalg.yield %[[PAD]]
+// CHECK: %[[RESULT:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[1, 3]
+// CHECK: tensor.yield %[[PAD]]
// CHECK: return %[[RESULT]] : tensor<3x4xf32>
func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
-> tensor<3x4xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+ %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<4x5xf32> to tensor<11x13xf32>
%1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32>
return %1 : tensor<3x4xf32>
@@ -94,16 +94,16 @@ func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
// CHECK-LABEL: @static_mixed_data_low_pad
// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
-// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0]
-// CHECK: linalg.yield %[[PAD]]
+// CHECK: %[[RESULT:.*]] = tensor.pad %[[SUBTENSOR]] low[1, 3] high[0, 0]
+// CHECK: tensor.yield %[[PAD]]
// CHECK: return %[[RESULT]] : tensor<3x4xf32>
func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
-> tensor<3x4xf32> {
- %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+ %0 = tensor.pad %arg0 low[3, 7] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<4x5xf32> to tensor<14x20xf32>
%1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32>
return %1 : tensor<3x4xf32>
@@ -113,15 +113,15 @@ func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
// CHECK-LABEL: @static_mixed_data_low_high_pad
// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-// CHECK-NOT: linalg.pad_tensor
-// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]] low[1, 1] high[2, 3]
-// CHECK: linalg.yield %[[PAD]]
+// CHECK-NOT: tensor.pad
+// CHECK: %[[RESULT:.*]] = tensor.pad %[[ARG0]] low[1, 1] high[2, 3]
+// CHECK: tensor.yield %[[PAD]]
// CHECK: return %[[RESULT]] : tensor<7x9xf32>
func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
-> tensor<7x9xf32> {
- %0 = linalg.pad_tensor %arg0 low[2, 3] high[7, 8] {
+ %0 = tensor.pad %arg0 low[2, 3] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<4x5xf32> to tensor<13x16xf32>
%1 = tensor.extract_slice %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32>
return %1 : tensor<7x9xf32>
@@ -131,7 +131,7 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
// CHECK-LABEL: @dynamic_high_pad
// CHECK-SAME: %[[ARG0:.*]]: tensor<?x5xf32>
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: tensor.dim %[[ARG0]], %[[C0]]
// CHECK: %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<3x4xf32>) {
@@ -139,14 +139,14 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
// CHECK: scf.yield %[[GEN]]
// CHECK: } else {
// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
-// CHECK: %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
+// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
// CHECK: scf.yield %[[PADTENSOR]]
// CHECK: }
// CHECK: return %[[RESULT]]
func @dynamic_high_pad(%arg0 : tensor<?x5xf32>, %h1: index, %pad : f32) -> tensor<3x4xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[%h1, 8] {
+ %0 = tensor.pad %arg0 low[0, 0] high[%h1, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<?x5xf32> to tensor<?x13xf32>
%1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<?x13xf32> to tensor<3x4xf32>
return %1 : tensor<3x4xf32>
@@ -156,7 +156,7 @@ func @dynamic_high_pad(%arg0 : tensor<?x5xf32>, %h1: index, %pad : f32) -> tenso
// CHECK-LABEL: @dynamic_extract_size
// CHECK-SAME: %[[ARG0:.*]]: tensor<?x5xf32>, %[[ARG1:.*]]: index
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: tensor.dim %[[ARG0]], %[[C0]]
// CHECK: %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<?x4xf32>) {
@@ -164,14 +164,14 @@ func @dynamic_high_pad(%arg0 : tensor<?x5xf32>, %h1: index, %pad : f32) -> tenso
// CHECK: scf.yield %[[GEN]]
// CHECK: } else {
// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
-// CHECK: %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
+// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
// CHECK: scf.yield %[[PADTENSOR]]
// CHECK: }
// CHECK: return %[[RESULT]]
func @dynamic_extract_size(%arg0 : tensor<?x5xf32>, %s1: index, %pad : f32) -> tensor<?x4xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+ %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<?x5xf32> to tensor<?x13xf32>
%1 = tensor.extract_slice %0[2, 4] [%s1, 4] [1, 1] : tensor<?x13xf32> to tensor<?x4xf32>
return %1 : tensor<?x4xf32>
@@ -184,14 +184,14 @@ func @dynamic_extract_size(%arg0 : tensor<?x5xf32>, %s1: index, %pad : f32) -> t
// CHECK: tensor.generate
// CHECK: else
// CHECK: %[[SLICE:.*]] = tensor.extract_slice
-// CHECK: linalg.pad_tensor %[[SLICE]] low[0, 0]
+// CHECK: tensor.pad %[[SLICE]] low[0, 0]
func @dynamic_zero_low_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
%o1 : index, %o2 : index,
%s1 : index, %s2 : index)
-> tensor<?x?xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+ %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<?x?xf32> to tensor<?x?xf32>
%1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
return %1 : tensor<?x?xf32>
@@ -204,14 +204,14 @@ func @dynamic_zero_low_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
// CHECK: tensor.generate
// CHECK: else
// CHECK: %[[SLICE:.*]] = tensor.extract_slice
-// CHECK: linalg.pad_tensor %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0]
+// CHECK: tensor.pad %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0]
func @dynamic_zero_high_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
%o1 : index, %o2 : index,
%s1 : index, %s2 : index)
-> tensor<?x?xf32> {
- %0 = linalg.pad_tensor %arg0 low[7, 8] high[0, 0] {
+ %0 = tensor.pad %arg0 low[7, 8] high[0, 0] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad : f32
+ tensor.yield %pad : f32
} : tensor<?x?xf32> to tensor<?x?xf32>
%1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
return %1 : tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
index b60c8c466f154..ac94261a153f0 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -288,7 +288,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
// CHECK: tensor.generate
// CHECK: else
// CHECK: tensor.extract_slice
-// CHECK: linalg.pad_tensor
+// CHECK: tensor.pad
// CHECK: tensor.extract_slice
// CHECK: tensor.extract_slice
// CHECK: linalg.generic
@@ -303,9 +303,9 @@ func @pad_generic_static(%small_input: tensor<58x1xf32>, %large_input: tensor<64
%d0 = tensor.dim %large_input, %c0 : tensor<64x128xf32>
%d1 = tensor.dim %large_input, %c1 : tensor<64x128xf32>
- %pad = linalg.pad_tensor %small_input low[4, 60] high[2, 67] {
+ %pad = tensor.pad %small_input low[4, 60] high[2, 67] {
^bb0(%arg0: index, %arg1: index):
- linalg.yield %zero : f32
+ tensor.yield %zero : f32
} : tensor<58x1xf32> to tensor<64x128xf32>
%fill = linalg.fill(%zero, %large_input) : f32, tensor<64x128xf32> -> tensor<64x128xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
index a83793544078c..a8dfdd940673a 100644
--- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
+++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
@@ -23,7 +23,7 @@
// TILE2: tensor.generate
// TILE2: else
// TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// TILE2: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]]
+// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]]
// TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
// TILE2: return %[[RESULT]]
@@ -43,15 +43,15 @@
// TILE1: tensor.generate
// TILE1: else
// TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
+// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
// TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
// TILE1: return %[[RESULT]]
func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
%pad_value: f32) -> tensor<?x?xf32> {
- %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] {
+ %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad_value : f32
+ tensor.yield %pad_value : f32
} : tensor<?x?xf32> to tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
@@ -71,7 +71,7 @@ func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
// TILE2: tensor.generate
// TILE2: else
// TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// TILE2: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]]
+// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]]
// TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
// TILE2: return %[[RESULT]]
@@ -86,15 +86,15 @@ func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
// TILE1: tensor.generate
// TILE1: else
// TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
-// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
// TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
// TILE1: return %[[RESULT]]
func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
%pad_value: f32) -> tensor<15x16xf32> {
- %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] {
+ %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad_value : f32
+ tensor.yield %pad_value : f32
} : tensor<7x9xf32> to tensor<15x16xf32>
return %0 : tensor<15x16xf32>
}
@@ -112,7 +112,7 @@ func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
// TILE1: scf.yield %[[GEN]] : tensor<14x3xf32>
// TILE1: else
// TILE1: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
-// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[0, 0] high[7, %{{.*}}]
+// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
// TILE1: scf.yield %[[PAD]] : tensor<14x3xf32>
// TILE1: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
// TILE1: scf.yield %[[R3]] : tensor<14x15xf32>
@@ -120,9 +120,9 @@ func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>,
%output_tensor: tensor<14x15xf32>,
%pad_value: f32) -> tensor<14x15xf32> {
- %0 = linalg.pad_tensor %input_tensor low[0, 0] high[7, 6] {
+ %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %pad_value : f32
+ tensor.yield %pad_value : f32
} : tensor<7x9xf32> to tensor<14x15xf32>
return %0 : tensor<14x15xf32>
}
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index ee3ec0019a840..c9f50af28ef27 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -537,7 +537,7 @@ func @matmul_tensors(
// CHECK-LABEL: func @pad_static(
// CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
// CHECK-DAG: %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
@@ -547,9 +547,9 @@ func @matmul_tensors(
// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32>
// CHECK: return %[[RESULT]]
func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {
+ %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] {
^bb0(%arg1: index, %arg2: index, %arg3: index):
- linalg.yield %pad_value : f32
+ tensor.yield %pad_value : f32
} : tensor<2x?x2xf32> to tensor<2x3x4xf32>
return %0 : tensor<2x3x4xf32>
}
@@ -558,7 +558,7 @@ func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32>
// CHECK-LABEL: func @pad_static_source(
// CHECK-SAME: %[[ARG0:.*]]: tensor<2x5x2xf32>, %[[PAD:.*]]: f32
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
// CHECK: %[[INIT:.*]] = linalg.init_tensor [2, 6, 4] : tensor<2x6x4xf32>
@@ -568,9 +568,9 @@ func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32>
// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x5x2xf32>, tensor<2x6x4xf32>
// CHECK: return %[[WRITE]]
func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6x4xf32> {
- %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {
+ %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] {
^bb0(%arg1: index, %arg2: index, %arg3: index):
- linalg.yield %pad_value : f32
+ tensor.yield %pad_value : f32
} : tensor<2x5x2xf32> to tensor<2x6x4xf32>
return %0 : tensor<2x6x4xf32>
}
@@ -579,7 +579,7 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6
// CHECK-LABEL: func @pad_static_dynamic(
// CHECK-SAME: %[[SRC:.*]]: tensor<1x2x2x?xf32>, %[[LOW:.*]]: index, %[[HIGH:.*]]: index
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index
@@ -596,9 +596,9 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6
// CHECK: return %[[RESULT]]
func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
%pad_value: f32) -> tensor<6x?x?x?xf32> {
- %0 = linalg.pad_tensor %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] {
+ %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
- linalg.yield %pad_value : f32
+ tensor.yield %pad_value : f32
} : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
return %0 : tensor<6x?x?x?xf32>
}
@@ -607,7 +607,7 @@ func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
// CHECK-LABEL: func @pad_and_transfer_read
// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0
// CHECK: %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
@@ -616,9 +616,9 @@ func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> {
%c0 = arith.constant 0 : index
%c5 = arith.constant 5.0 : f32
%c6 = arith.constant 6.0 : f32
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[5, 7] {
+ %0 = tensor.pad %arg0 low[0, 0] high[5, 7] {
^bb0(%arg1: index, %arg2: index):
- linalg.yield %c5 : f32
+ tensor.yield %c5 : f32
} : tensor<5x6xf32> to tensor<10x13xf32>
%1 = vector.transfer_read %0[%c0, %c0], %c6
: tensor<10x13xf32>, vector<7x9xf32>
@@ -631,7 +631,7 @@ func private @make_vector() -> vector<7x9xf32>
// CHECK-LABEL: func @pad_and_transfer_write_static
// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32>
@@ -640,9 +640,9 @@ func @pad_and_transfer_write_static(
%arg0: tensor<5x6xf32>) -> tensor<5x6xf32> {
%c0 = arith.constant 0 : index
%c5 = arith.constant 5.0 : f32
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[5, 7] {
+ %0 = tensor.pad %arg0 low[0, 0] high[5, 7] {
^bb0(%arg2: index, %arg3: index):
- linalg.yield %c5 : f32
+ tensor.yield %c5 : f32
} : tensor<5x6xf32> to tensor<10x13xf32>
%1 = call @make_vector() : () -> vector<7x9xf32>
%2 = vector.transfer_write %1, %0[%c0, %c0]
@@ -657,7 +657,7 @@ func private @make_vector() -> vector<7x9xf32>
// CHECK-LABEL: func @pad_and_transfer_write_dynamic_static
// CHECK-SAME: %[[ARG0:.*]]: tensor<?x?xf32>, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
@@ -669,9 +669,9 @@ func @pad_and_transfer_write_dynamic_static(
%c5 = arith.constant 5.0 : f32
%s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1]
: tensor<?x?xf32> to tensor<?x6xf32>
- %0 = linalg.pad_tensor %s low[0, 0] high[%padding, 7] {
+ %0 = tensor.pad %s low[0, 0] high[%padding, 7] {
^bb0(%arg2: index, %arg3: index):
- linalg.yield %c5 : f32
+ tensor.yield %c5 : f32
} : tensor<?x6xf32> to tensor<?x13xf32>
%1 = call @make_vector() : () -> vector<7x9xf32>
%2 = vector.transfer_write %1, %0[%c0, %c0]
@@ -686,7 +686,7 @@ func private @make_vector() -> tensor<12x13xf32>
// CHECK-LABEL: func @pad_and_insert_slice_source
// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0
// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32>
@@ -697,9 +697,9 @@ func @pad_and_insert_slice_source(
%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> {
%c0 = arith.constant 0 : index
%c5 = arith.constant 5.0 : f32
- %0 = linalg.pad_tensor %arg0 low[0, 0] high[2, 3] {
+ %0 = tensor.pad %arg0 low[0, 0] high[2, 3] {
^bb0(%arg2: index, %arg3: index):
- linalg.yield %c5 : f32
+ tensor.yield %c5 : f32
} : tensor<5x6xf32> to tensor<7x9xf32>
%1 = call @make_vector() : () -> tensor<12x13xf32>
%r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32>
@@ -717,9 +717,9 @@ func private @make_vector() -> tensor<12x13xf32>
func @pad_and_insert_slice_dest(
%arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> {
%c5 = arith.constant 5.0 : f32
- %0 = linalg.pad_tensor %arg0 low[0, 0, 0] high[0, 7, 7] {
+ %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] {
^bb0(%arg2: index, %arg3: index, %arg4: index):
- linalg.yield %c5 : f32
+ tensor.yield %c5 : f32
} : tensor<1x5x6xf32> to tensor<1x12x13xf32>
%1 = call @make_vector() : () -> tensor<12x13xf32>
%r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32>
@@ -730,7 +730,7 @@ func @pad_and_insert_slice_dest(
// CHECK-LABEL: func @pad_tensor_non_const_pad_value
// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
-// CHECK-NOT: linalg.pad_tensor
+// CHECK-NOT: tensor.pad
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
@@ -743,14 +743,14 @@ func @pad_and_insert_slice_dest(
func @pad_tensor_non_const_pad_value(%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> {
%c0 = arith.constant 0 : index
%c5 = arith.constant 5.0 : f32
- %0 = linalg.pad_tensor %arg0 low[3, 4] high[4, 3] {
+ %0 = tensor.pad %arg0 low[3, 4] high[4, 3] {
^bb0(%arg1: index, %arg2: index):
%i1 = arith.index_cast %arg1 : index to i32
%i2 = arith.index_cast %arg2 : index to i32
%f1 = arith.sitofp %i1 : i32 to f32
%f2 = arith.sitofp %i2 : i32 to f32
%m = arith.mulf %f1, %f2 : f32
- linalg.yield %m : f32
+ tensor.yield %m : f32
} : tensor<5x6xf32> to tensor<12x13xf32>
return %0 : tensor<12x13xf32>
}
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 82f880d098fdc..10d39132a1126 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -982,3 +982,199 @@ func @fold_rank() -> (index) {
// CHECK-NEXT: return [[C3]]
return %rank_0 : index
}
+
+// -----
+
+// CHECK-LABEL: func @pad_tensor_same_static_shape(
+// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
+// CHECK-NOT: tensor.pad
+// CHECK: return %[[ARG0]]
+func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
+ -> tensor<5x6xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.pad %arg0 low[%a, 0] high[0, %a] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %cst : f32
+ } : tensor<5x6xf32> to tensor<5x6xf32>
+ return %0 : tensor<5x6xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_tensor_nofold_same_static_shape(
+// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>
+// CHECK: %[[PAD:.*]] = tensor.pad
+// CHECK: return %[[PAD]]
+func @pad_tensor_nofold_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
+ -> tensor<5x6xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.pad %arg0 nofold low[%a, 0] high[0, %a] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %cst : f32
+ } : tensor<5x6xf32> to tensor<5x6xf32>
+ return %0 : tensor<5x6xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_tensor_after_cast_
diff erent_shape(
+// CHECK-SAME: %[[INPUT:.*]]: tensor<?x64x?x?xf32>) -> tensor<?x?x?x?xf32> {
+// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[PADDED:.*]] = tensor.pad %[[INPUT]]
+// CHECK-SAME: low[0, 0, 1, 1] high[0, 0, 1, 1] {
+// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
+// CHECK: tensor.yield %[[CST]] : f32
+// CHECK: } : tensor<?x64x?x?xf32> to tensor<?x64x?x?xf32>
+// CHECK: %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] :
+// CHECK-SAME: tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
+// CHECK: return %[[DYNAMIC]] : tensor<?x?x?x?xf32>
+// CHECK: }
+func @pad_tensor_after_cast_
diff erent_shape(%arg0: tensor<?x64x?x?xf32>)
+ -> tensor<?x?x?x?xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
+ %padded = tensor.pad %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1] {
+ ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+ tensor.yield %cst: f32
+ } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+ return %padded: tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_tensor_after_cast_same_shape(
+// CHECK-SAME: %[[INPUT:.*]]: tensor<?x64x?x?xf32>,
+// CHECK-SAME: %[[PADDING:.*]]: index) -> tensor<?x?x?x?xf32> {
+// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[PADDED:.*]] = tensor.pad %[[INPUT]]
+// CHECK-SAME: low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1] {
+// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
+// CHECK: tensor.yield %[[CST]] : f32
+// CHECK: } : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
+// CHECK: return %[[PADDED:.*]] : tensor<?x?x?x?xf32>
+// CHECK: }
+func @pad_tensor_after_cast_same_shape(%arg0: tensor<?x64x?x?xf32>, %padding : index)
+ -> tensor<?x?x?x?xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
+ %padded = tensor.pad %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1] {
+ ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+ tensor.yield %cst: f32
+ } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+ return %padded: tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_tensor_of_cast(
+// CHECK-NOT: tensor.cast
+// CHECK: tensor.pad
+// CHECK: tensor<8x?xf32> to tensor<8x32xf32>
+func @pad_tensor_of_cast(%t: tensor<8x?xf32>, %s: index) -> tensor<8x32xf32> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.cast %t : tensor<8x?xf32> to tensor<?x?xf32>
+ %1 = tensor.pad %0 low[%c0, %c0] high[%c0, %s] {
+ ^bb0(%arg9: index, %arg10: index):
+ tensor.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<8x32xf32>
+ return %1 : tensor<8x32xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @cast_of_pad_more_static
+func @cast_of_pad_more_static(%arg0: tensor<?x?xf32>, %padding: index) -> tensor<32x32xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ // CHECK: %[[PAD:.*]] = tensor.pad
+ // CHECK: tensor<?x?xf32> to tensor<32x32xf32>
+ %padded = tensor.pad %arg0 low[%padding, %padding] high[0, 0] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<?x?xf32>
+ // CHECK-NOT: tensor.cast
+ %casted = tensor.cast %padded : tensor<?x?xf32> to tensor<32x32xf32>
+ // CHECK: return %[[PAD]]
+ return %casted : tensor<32x32xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @cast_of_pad_less_static
+func @cast_of_pad_less_static(%arg0: tensor<32x?x?xf32>, %padding: index) -> tensor<?x32x32xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ // CHECK: tensor.pad
+ %padded = tensor.pad %arg0 low[%padding, %padding, %padding] high[0, 0, 0] {
+ ^bb0(%arg1: index, %arg2: index, %arg3: index):
+ tensor.yield %cst : f32
+ } : tensor<32x?x?xf32> to tensor<32x?x?xf32>
+ // CHECK: %[[CAST:.*]] = tensor.cast
+ %casted = tensor.cast %padded : tensor<32x?x?xf32> to tensor<?x32x32xf32>
+ // CHECK: return %[[CAST]]
+ return %casted : tensor<?x32x32xf32>
+}
+
+// -----
+
+func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor<?x?xf32>
+ %1 = tensor.pad %0 low[%c0, %c0] high[%c0, %c0] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<4x4xf32>
+ return %1 : tensor<4x4xf32>
+}
+// CHECK-LABEL: @tensor_pad_cast
+// CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32>
+// CHECK: return %[[ARG0]]
+
+// -----
+
+// CHECK-LABEL: func @fold_pad_tensor_source_cast(
+// CHECK-SAME: %[[ARG0:.*]]: tensor<4x?xf32>
+// CHECK-NOT: tensor.cast
+// CHECK: %[[RESULT:.*]] = tensor.pad %[[ARG0]]
+func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> {
+ %cst = arith.constant 0.0 : f32
+ %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
+ %1 = tensor.pad %0 low[0, 0] high[0, 1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<4x4xf32>
+ return %1 : tensor<4x4xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_static_zero_cast(
+// CHECK-SAME: %[[ARG0:.*]]: tensor<?x?x?xf32>
+// CHECK-NOT: tensor.pad
+// CHECK: %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
+// CHECK: return %[[RESULT]]
+func @pad_static_zero_cast(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
+ %c0 = arith.constant 0 : index
+ %0 = tensor.pad %arg0 low[0, %c0, 0] high[0, 0, %c0] {
+ ^bb0(%arg1: index, %arg2: index, %arg3: index):
+ tensor.yield %pad_value : f32
+ } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
+
+ return %0 : tensor<2x3x4xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_nofold_static_zero(
+// CHECK-SAME: %[[ARG0:.*]]: tensor<?x?x?xf32>
+// CHECK: %[[PAD:.*]] = tensor.pad
+// CHECK: return %[[PAD]]
+func @pad_nofold_static_zero(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
+ %c0 = arith.constant 0 : index
+ %0 = tensor.pad %arg0 nofold low[0, %c0, 0] high[0, 0, %c0] {
+ ^bb0(%arg1: index, %arg2: index, %arg3: index):
+ tensor.yield %pad_value : f32
+ } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
+
+ return %0 : tensor<2x3x4xf32>
+}
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
index 8cdab35fb5e20..cec4718595a45 100644
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -317,3 +317,58 @@ func @illegal_num_offsets(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?x?xf32>,
%0 = tensor.insert_slice %arg0 into %arg1[0, 0] [%arg2, %arg3] [1, 1] : tensor<?x?xf32> into tensor<?x?x?xf32>
return
}
+
+// -----
+
+
+func @pad_result_type(%arg0: tensor<?x2x3x4xi32>, %arg1: index, %arg2: i32) -> tensor<?x?x?x8xf32> {
+ // expected-error @+1 {{specified type 'tensor<?x?x?x8xf32>' does not match the inferred type 'tensor<?x?x?x9xi32>}}
+ %0 = tensor.pad %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] {
+ ^bb0(%arg3: index, %arg4: index):
+ tensor.yield %arg2 : i32
+ } : tensor<?x2x3x4xi32> to tensor<?x?x?x8xf32>
+ return %0 : tensor<?x?x?x8xf32>
+}
+
+// -----
+
+func @pad_number_of_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
+ // expected-error @+1 {{expected the block to have 2 arguments}}
+ %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+ ^bb0(%arg2: index, %arg3: index, %arg4: index):
+ tensor.yield %arg1 : i32
+ } : tensor<?x4xi32> to tensor<?x9xi32>
+ return %0 : tensor<?x9xi32>
+}
+
+// -----
+
+func @pad_no_block(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
+ // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}}
+ %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+ } : tensor<?x4xi32> to tensor<?x9xi32>
+ return %0 : tensor<?x9xi32>
+}
+
+// -----
+
+func @pad_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
+ // expected-error @+1 {{op expected block argument 1 to be an index}}
+ %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+ ^bb0(%arg2: i32, %arg3: i32):
+ tensor.yield %arg1 : i32
+ } : tensor<?x4xi32> to tensor<?x9xi32>
+ return %0 : tensor<?x9xi32>
+}
+
+// -----
+
+func @pad_yield_type(%arg0: tensor<?x4xi32>, %arg1: i8) -> tensor<?x9xi32> {
+ // expected-error @+1 {{op expected yield type to match shape element type}}
+ %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+ ^bb0(%arg2: index, %arg3: index):
+ tensor.yield %arg1 : i8
+ } : tensor<?x4xi32> to tensor<?x9xi32>
+ return %0 : tensor<?x9xi32>
+}
+
diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir
index d461dffeb6d5b..a76a18d190b57 100644
--- a/mlir/test/Dialect/Tensor/ops.mlir
+++ b/mlir/test/Dialect/Tensor/ops.mlir
@@ -176,3 +176,77 @@ func @rank(%t : tensor<4x4x?xf32>) {
%1 = tensor.rank %t : tensor<4x4x?xf32>
return
}
+
+// -----
+
+func @pad_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
+ %pad_value: f32) -> tensor<6x?x?x?xf32> {
+ %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] {
+ ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+ tensor.yield %pad_value : f32
+ } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
+ return %0 : tensor<6x?x?x?xf32>
+}
+// CHECK-LABEL: func @pad_dynamic
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
+// CHECK-SAME: %[[LOW:[a-zA-Z0-9_]*]]
+// CHECK-SAME: %[[HIGH:[a-zA-Z0-9_]*]]
+// CHECK: tensor.pad %[[ARG0]]
+// CHECK-SAME: low[2, %[[LOW]], 3, 3]
+// CHECK-SAME: high[3, 3, %[[HIGH]], 2]
+// CHECK: : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
+
+// -----
+
+func @pad_static(%arg0: tensor<3x4xf32>, %pad_value: f32) -> tensor<6x9xf32> {
+ %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+ ^bb0(%arg1 : index, %arg2 : index):
+ tensor.yield %pad_value : f32
+ } : tensor<3x4xf32> to tensor<6x9xf32>
+ return %0 : tensor<6x9xf32>
+}
+// CHECK-LABEL: func @pad_static
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
+// CHECK: tensor.pad %[[ARG0]] low[1, 2] high[2, 3]
+// CHECK: : tensor<3x4xf32> to tensor<6x9xf32>
+
+// -----
+
+func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index,
+ %pad_value: f32) -> tensor<?x?xf32> {
+ %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : f32
+ } : tensor<2x3xf32> to tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+// CHECK-LABEL: func @pad_asymmetrical
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
+// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]]
+// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]]
+// CHECK: tensor.pad %[[ARG0]]
+// CHECK-SAME: low[0, 0]
+// CHECK-SAME: high[%[[UB0]], %[[UB1]]]
+// CHECK: : tensor<2x3xf32> to tensor<?x?xf32>
+
+// -----
+
+func @pad_to_static_size(%arg0: tensor<?x?xf32>, %ub0: index, %ub1: index,
+ %pad_value: f32) -> tensor<2x3xf32> {
+ %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] {
+ ^bb0(%arg1: index, %arg2: index):
+ tensor.yield %pad_value : f32
+ } : tensor<?x?xf32> to tensor<2x3xf32>
+ return %0 : tensor<2x3xf32>
+}
+// CHECK-LABEL: func @pad_to_static_size
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
+// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]]
+// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]]
+// CHECK: tensor.pad %[[ARG0]]
+// CHECK-SAME: low[0, 0]
+// CHECK-SAME: high[%[[UB0]], %[[UB1]]]
+// CHECK: : tensor<?x?xf32> to tensor<2x3xf32>
+
+// -----
+
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
index 58a4b7630dcc1..d840491b89984 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
@@ -21,9 +21,9 @@ func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: tensor<f
%8 = affine.apply #map1(%arg3, %c0)[%c2]
%9 = tensor.extract_slice %arg1[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32>
%10 = tensor.cast %9 : tensor<2xf32> to tensor<?xf32>
- %11 = linalg.pad_tensor %10 low[%c0] high[%c0] {
+ %11 = tensor.pad %10 low[%c0] high[%c0] {
^bb0(%arg5: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?xf32> to tensor<2xf32>
%12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor<?x2xf32>
scf.yield %12 : tensor<?x2xf32>
@@ -38,9 +38,9 @@ func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: tensor<f
%8 = affine.apply #map1(%arg3, %c0)[%c2]
%9 = tensor.extract_slice %arg0[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32>
%10 = tensor.cast %9 : tensor<2xf32> to tensor<?xf32>
- %11 = linalg.pad_tensor %10 low[%c0] high[%c0] {
+ %11 = tensor.pad %10 low[%c0] high[%c0] {
^bb0(%arg5: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<?xf32> to tensor<2xf32>
%12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor<?x2xf32>
scf.yield %12 : tensor<?x2xf32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
index 05b7e1a7d2cac..ced7a49073b37 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
@@ -13,9 +13,9 @@ func @main() {
%offset = arith.constant 2 : index
%cst = arith.constant 2.3 : f32
%c0 = arith.constant 0 : index
- %out = linalg.pad_tensor %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset] {
+ %out = tensor.pad %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset] {
^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index):
- linalg.yield %cst : f32
+ tensor.yield %cst : f32
} : tensor<1x?x3xf32> to tensor<1x?x?xf32>
%unranked = tensor.cast %out: tensor<1x?x?xf32> to tensor<*xf32>
call @print_memref_f32(%unranked) : (tensor<*xf32>) -> ()
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
index 76d983d3892e4..eef99f82fcb3f 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -42,6 +42,7 @@ struct TestLinalgTransforms
memref::MemRefDialect,
scf::SCFDialect,
StandardOpsDialect,
+ linalg::LinalgDialect,
vector::VectorDialect,
gpu::GPUDialect>();
// clang-format on
@@ -549,20 +550,20 @@ static void applyLinalgToVectorPatterns(FuncOp funcOp) {
funcOp.getContext(),
LinalgTransformationFilter()
.addOpFilter<ContractionOpInterface, FillOp, CopyOp, GenericOp>());
- populatePadTensorOpVectorizationPatterns(patterns);
+ populatePadOpVectorizationPatterns(patterns);
populateConvolutionVectorizationPatterns(patterns);
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
}
static void applyPadTensorToGenericPatterns(FuncOp funcOp) {
RewritePatternSet patterns(funcOp.getContext());
- patterns.add<PadTensorOpTransformationPattern>(funcOp.getContext());
+ patterns.add<PadOpTransformationPattern>(funcOp.getContext());
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
}
static void applyGeneralizePadTensorPatterns(FuncOp funcOp) {
RewritePatternSet patterns(funcOp.getContext());
- patterns.add<GeneralizePadTensorOpPattern>(funcOp.getContext());
+ patterns.add<GeneralizePadOpPattern>(funcOp.getContext());
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
}
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index cd4d2e195d03a..e6b4654097d2c 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -4280,6 +4280,7 @@ td_library(
":InferTypeOpInterfaceTdFiles",
":OpBaseTdFiles",
":SideEffectInterfacesTdFiles",
+ ":TilingInterfaceTdFiles",
":ViewLikeInterfaceTdFiles",
],
)
@@ -4336,6 +4337,7 @@ cc_library(
":StandardOps",
":Support",
":TensorOpsIncGen",
+ ":TilingInterface",
":ViewLikeInterface",
"//llvm:Support",
],
@@ -4356,6 +4358,38 @@ cc_library(
],
)
+cc_library(
+ name = "TensorTilingInterfaceImpl",
+ srcs = ["lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp"],
+ hdrs = ["include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"],
+ includes = ["include"],
+ deps = [
+ ":Affine",
+ ":IR",
+ ":LinalgOps",
+ ":SCFDialect",
+ ":StandardOps",
+ ":TensorDialect",
+ ":TilingInterface",
+ "//llvm:Support",
+ ],
+)
+
+cc_library(
+ name = "TensorUtils",
+ srcs = ["lib/Dialect/Tensor/Utils/Utils.cpp"],
+ hdrs = ["include/mlir/Dialect/Tensor/Utils/Utils.h"],
+ includes = ["include"],
+ deps = [
+ ":Affine",
+ ":ArithmeticDialect",
+ ":IR",
+ ":Support",
+ ":TensorDialect",
+ "//llvm:Support",
+ ],
+)
+
gentbl_cc_library(
name = "TensorPassIncGen",
strip_include_prefix = "include",
@@ -5634,6 +5668,7 @@ cc_library(
":StandardToSPIRV",
":TensorDialect",
":TensorInferTypeOpInterfaceImpl",
+ ":TensorTilingInterfaceImpl",
":TensorTransforms",
":TosaDialect",
":TosaToLinalg",
@@ -6913,6 +6948,7 @@ cc_library(
":Support",
":TensorBufferizableOpInterfaceImpl",
":TensorDialect",
+ ":TensorUtils",
":TransformUtils",
":VectorBufferizableOpInterfaceImpl",
":VectorOps",
@@ -6952,7 +6988,6 @@ cc_library(
deps = [
":IR",
":Support",
- ":TensorDialect",
":TilingInterfaceIncGen",
":ViewLikeInterface",
"//llvm:Support",
@@ -7255,6 +7290,7 @@ cc_library(
":SCFDialect",
":StandardOps",
":TensorDialect",
+ ":TensorUtils",
":TosaDialect",
":Transforms",
],
More information about the Mlir-commits
mailing list