[Mlir-commits] [mlir] [mlir][tensor][linalg] Move Pack/Unpack Ops to Linalg (PR #123902)
Andrzej WarzyĆski
llvmlistbot at llvm.org
Wed Jan 22 02:21:31 PST 2025
https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/123902
>From e9ed0a121ffa930d48a0a45ade10e19f1e197868 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 16 Jan 2025 12:20:43 +0000
Subject: [PATCH 1/4] [mlir][tensor][linalg] Move Pack/Unpack Ops to Linalg
(1/4)
This is merely moving code around, no new functionality is added.
PATCH 1: Copies `tensor.pack` and `tensor.unpack` as `linalg.pack` and
`linalg.unpack`, respectively. New Ops are defined in
LinalgRelayoutOps.td.
Note, `tensor.pack` and `tensor.unpack` are still present at this point.
CONTEXT:
This change was discussed in the following RFC:
* https://discourse.llvm.org/t/rfc-move-tensor-pack-and-tensor-unpack-into-linalg
---
.../mlir/Dialect/Linalg/IR/CMakeLists.txt | 7 +
mlir/include/mlir/Dialect/Linalg/IR/Linalg.h | 3 +
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 331 +++++++
.../mlir/Dialect/Utils/ReshapeOpsUtils.h | 7 +
mlir/lib/Dialect/Linalg/IR/CMakeLists.txt | 1 +
mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp | 15 +-
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 913 ++++++++++++++++++
mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 14 -
mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp | 10 +
9 files changed, 1286 insertions(+), 15 deletions(-)
create mode 100644 mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt
index 71214b4404c550..efd708c5e5a113 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt
@@ -65,6 +65,13 @@ add_public_tablegen_target(MLIRLinalgStructuredOpsIncGen)
add_dependencies(MLIRLinalgStructuredOpsIncGen LinalgOdsGen)
add_dependencies(mlir-headers MLIRLinalgStructuredOpsIncGen)
+set(LLVM_TARGET_DEFINITIONS LinalgRelayoutOps.td)
+mlir_tablegen(LinalgRelayoutOps.h.inc -gen-op-decls)
+mlir_tablegen(LinalgRelayoutOps.cpp.inc -gen-op-defs)
+add_public_tablegen_target(MLIRLinalgRelayoutOpsIncGen)
+add_dependencies(MLIRLinalgRelayoutOpsIncGen LinalgOdsGen)
+add_dependencies(mlir-headers MLIRLinalgRelayoutOpsIncGen)
+
set(LLVM_TARGET_DEFINITIONS LinalgInterfaces.td)
mlir_tablegen(LinalgInterfaces.h.inc -gen-op-interface-decls)
mlir_tablegen(LinalgInterfaces.cpp.inc -gen-op-interface-defs)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h b/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h
index 85f5ebeb8081ee..57bf6305a469d0 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h
+++ b/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h
@@ -123,4 +123,7 @@ OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val,
#define GET_OP_CLASSES
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.h.inc"
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.h.inc"
+
#endif // MLIR_DIALECT_LINALG_IR_LINALG_H
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
new file mode 100644
index 00000000000000..845a34e90bc097
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -0,0 +1,331 @@
+//===- LinalgReleayoutOps.td - Linalg dialect library ops -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the operation definition file for structured operations on buffers
+// that correspond to underlying library calls (e.g. BLAS).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LINALG_RELEAYOUT_OPS
+#define LINALG_RELEAYOUT_OPS
+
+include "mlir/Dialect/Linalg/IR/LinalgBase.td"
+include "mlir/Interfaces/DestinationStyleOpInterface.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/IR/OpAsmInterface.td"
+
+//===----------------------------------------------------------------------===//
+// RelayoutOp
+//===----------------------------------------------------------------------===//
+
+class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
+ Op<Linalg_Dialect, mnemonic, !listconcat(traits, [
+ DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
+ DestinationStyleOpInterface,
+ ConditionallySpeculatable, NoMemoryEffect,
+ DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
+ TypesMatchWith<"result type matches type of dest",
+ "dest", "result",
+ "$_self">])> {
+
+ code commonExtraClassDeclaration = [{
+ size_t getSourceRank() { return getSourceType().getRank(); };
+ size_t getDestRank() { return getDestType().getRank(); };
+ RankedTensorType getSourceType() {
+ return ::llvm::cast<RankedTensorType>(getSource().getType()); };
+ RankedTensorType getDestType() {
+ return ::llvm::cast<RankedTensorType>(getDest().getType()); };
+
+ MutableOperandRange getDpsInitsMutable() { return getDestMutable(); }
+
+ /// Interface method for ConditionallySpeculatable.
+ Speculation::Speculatability getSpeculatability();
+
+ /// Return a mapping from positions `inner_dims_pos` to their
+ /// tile factors.
+ DenseMap<int64_t, OpFoldResult> getDimAndTileMapping();
+
+ /// Return the tile sizes as OpFoldResult.
+ SmallVector<OpFoldResult> getMixedTiles();
+
+ /// Return the tile sizes as `int64_t`. If a tile size is dynamic
+ /// a sentinel `kDynamic` is introduced at that position in
+ /// the returned vector.
+ SmallVector<int64_t> getStaticTiles();
+
+ /// Retrieve all outer dims for this Pack/UnPack Op, i.e. all the leading
+ /// dims excluding the trailing dims corresponding to `innerTiles`. Note
+ /// that this will include both tiled and non-tiled dimensions. The order
+ /// of the output dimensions is consistent with the shape of the packed
+ /// tensor.
+ ArrayRef<int64_t> getAllOuterDims();
+
+ /// Similar to `getAllOuterDims`, but only retrieve the outer dims that
+ /// have been tiled. Also, the order of the output dimensions is consistent
+ /// with `inner_dims_pos` rather than the packed tensor.
+ SmallVector<int64_t> getTiledOuterDims();
+ }];
+
+ let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// PackOp
+//===----------------------------------------------------------------------===//
+
+def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
+ AttrSizedOperandSegments]> {
+ let summary = "linalg.pack operation";
+ let description = [{
+ The "pack" operation converts a source tensor of rank `n` into a result
+ tensor of rank `n + k` with a tiled and packed layout (maybe with padding)
+ and optionally transposes the tiled source tensor dimensions.
+
+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
+ being tiled, where `0 < k <= n`. The order of the dimensions matters:
+ - The tiled dimensions (of size `inner_tiles`) are added to the end of the result
+ tensor in the order in which they appear in `inner_dims_pos`.
+ - `inner_dims_pos[i]` specifies the source tensor dimension tiled by
+ `inner_tiles[i]`.
+
+ `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
+ correspond to the least significant ("inner") result tensor dimension sizes,
+ in the same order. Tile sizes can be static or dynamic.
+
+ Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of
+ `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled
+ by 16 and the 1st source dimension is tiled by 32. Other source dimensions
+ (if any) are not tiled. If `inner_dims_pos = [1, 0]`, the 1st dimension is
+ tiled by 16 and the 0th dimension is tiled by 32.
+
+ Example:
+ ```mlir
+ // NC to NCnc
+ %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32>
+ // \ / \ /
+ // outer dims inner dims
+ ```
+
+ `outer_dims_perm` (optional) specifies a permutation for the outer
+ dimensions. If specified, it must have `n` elements.
+
+ Example:
+ ```mlir
+ // CK to KCck
+ %0 = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
+ inner_tiles = [8, 32] into %dest
+ : tensor<128x256xf32> -> tensor<8x16 x 8x32 xf32>
+ // \ /
+ // compare with "NC to NCnc": outer dims are transposed
+ ```
+
+ `padding_value` specifies a padding value at the boundary on non-perfectly
+ divisible dimensions. Padding is optional:
+ - If absent, it is UB if the tile does not perfectly divide the dimension.
+ - If present, it will pad along high dimensions (high-padding) to make the
+ tile complete.
+
+ Example:
+ ```mlir
+ %0 = linalg.pack %arg0 padding_value(%pad : f32) outer_dims_perm = [2, 1, 0]
+ inner_dims_pos = [1] inner_tiles = [2] into %arg1
+ : tensor<200x127x256xf32> -> tensor<256x64x200x2xf32>
+ // \
+ // padded and tiled dim
+ //
+ // Source dimension 1 is tiled. 64 does not divide 127 evenly, so 1 padded
+ // element is added at the end.
+ //
+ // Note: Only tiled dimensions can be padded.
+ ```
+ }];
+ let arguments = (ins AnyRankedTensor:$source,
+ AnyRankedTensor:$dest,
+ Optional<AnyType>:$padding_value,
+ DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
+ DenseI64ArrayAttr:$inner_dims_pos,
+ Variadic<Index>:$inner_tiles,
+ DenseI64ArrayAttr:$static_inner_tiles);
+ let results = (outs AnyRankedTensor:$result);
+ let assemblyFormat = [{
+ $source
+ (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?
+ (`outer_dims_perm` `=` $outer_dims_perm^)?
+ `inner_dims_pos` `=` $inner_dims_pos
+ `inner_tiles` `=`
+ custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
+ `into` $dest attr-dict `:` type($source) `->` type($dest)
+ }];
+
+ let builders = [
+ OpBuilder<(ins "Value":$source, "Value":$dest,
+ "ArrayRef<int64_t>":$innerDimsPos,
+ "ArrayRef<OpFoldResult>":$innerTiles,
+ CArg<"std::optional<Value>", "std::nullopt">:$paddingValue,
+ CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)>
+ ];
+
+ let extraClassDeclaration = commonExtraClassDeclaration # [{
+ // Method to get the shape of the result as `SmallVector<OpFoldResult>`.
+ // This is a static method to allow getting the shape of the destination
+ // expected while creating a `pack` op.
+ static SmallVector<OpFoldResult> getResultShape(OpBuilder &builder,
+ Location loc, ArrayRef<OpFoldResult> sourceDims,
+ ArrayRef<OpFoldResult> innerTileDims, ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm = {});
+
+ // Method to get the `RankedTensorType` of the result based on the inner
+ // tiles, position of the inner tiles (innerDimsPos) and interchange vector
+ // of outer loops (outerDimsPerm).
+ static RankedTensorType inferPackedType(RankedTensorType sourceType,
+ ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm = {});
+
+ // Returns true if we have enough static information to catch undefined
+ // behavior when the tile size does not divide perfectly the dimension of
+ // the input tensor. Detecting UB requires that the input size and either
+ // corresponding tile or output size are static.
+ static bool requirePaddingValue(ArrayRef<int64_t> inputShape,
+ ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outputShape,
+ ArrayRef<int64_t> outerDimsPerm,
+ ArrayRef<OpFoldResult> innerTiles);
+
+ static Value createDestinationTensor(OpBuilder &b, Location loc,
+ Value source, ArrayRef<OpFoldResult> innerTileSizes,
+ ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
+
+ /// Build and return a new PackOp that is a clone of the current PackOp with
+ /// (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by
+ /// innerPermutation (resp. outerPermutation).
+ /// A new `tensor.empty` of the proper shape is built in the process.
+ /// Asserts that:
+ /// - At least one of innerPermutation or outerPermutation is non-empty.
+ /// - If not empty, innerPermutation is a valid permutation of size
+ /// matching innerDimPos.
+ /// - If not empty, outerPermutation is a valid permutation of size
+ /// matching outerDimsPerm.
+ PackOp createTransposedClone(OpBuilder &b,
+ Location loc,
+ ArrayRef<int64_t> innerPermutation,
+ ArrayRef<int64_t> outerPermutation);
+
+ /// Check if this PackOp is like a simple pad operation.
+ /// In other words, this operation:
+ /// 1. adds useless dimensions (dimension of size 1),
+ /// 2. pads the other ones, and
+ /// 3. doesn't shuffle the dimensions
+ bool isLikePad();
+ }];
+
+ let hasCanonicalizeMethod = 1;
+
+ let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// UnPackOp
+//===----------------------------------------------------------------------===//
+
+def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
+ let summary = "linalg.unpack operation";
+ let description = [{
+ The "unpack" operation converts a source tensor of rank `n` with a tiled and
+ packed layout to a result tensor of rank `n - k`.
+
+ `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with
+ which the last `k` source tensor dimensions are combined, where
+ `0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`.
+ The order of the dimensions in `inner_dims_pos` matters: dimension
+ `inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that
+ `outer_dims_perm` is not specified).
+
+ `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
+ correspond to the least significant ("inner") source tensor dimension sizes.
+ The behavior of this op is undefined if:
+ - `inner_tiles` do not exactly match with the corresponding source tensor
+ dimension sizes.
+ - Or, `inner_tiles[i]` does not divide the size of dimension
+ `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified)
+ evenly.
+
+ `outer_dims_perm` (optional) specifies a permutation for the outer
+ dimensions. If specified, it must have `n - k` elements. If specified, this
+ permutation is applied before combining any dimensions.
+
+ Example:
+
+ ```mlir
+ // NCnc to NC:
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
+
+ // CK to KCck:
+ %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
+ inner_tiles = [8, 32] into %dest
+ : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
+ ```
+ }];
+ let arguments = (ins AnyRankedTensor:$source,
+ AnyRankedTensor:$dest,
+ DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
+ DenseI64ArrayAttr:$inner_dims_pos,
+ Variadic<Index>:$inner_tiles,
+ DenseI64ArrayAttr:$static_inner_tiles);
+ let results = (outs AnyRankedTensor:$result);
+ let assemblyFormat = [{
+ $source
+ (`outer_dims_perm` `=` $outer_dims_perm^)?
+ `inner_dims_pos` `=` $inner_dims_pos
+ `inner_tiles` `=`
+ custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
+ `into` $dest attr-dict `:` type($source) `->` type($dest)
+ }];
+
+ let builders = [
+ OpBuilder<(ins "Value":$source, "Value":$dest,
+ "ArrayRef<int64_t>":$innerDimsPos,
+ "ArrayRef<OpFoldResult>":$innerTiles,
+ CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)>
+ ];
+
+ let extraClassDeclaration = commonExtraClassDeclaration # [{
+ static Value createDestinationTensor(OpBuilder &b, Location loc,
+ Value source, ArrayRef<OpFoldResult> innerTileSizes,
+ ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
+
+ /// Build and return a new UnPackOp that is a clone of the current UnPackOp
+ /// with (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by
+ /// innerPermutation (resp. outerPermutation).
+ /// Asserts that:
+ /// - At least one of innerPermutation or outerPermutation is non-empty.
+ /// - If not empty, innerPermutation is a valid permutation of size
+ /// matching innerDimPos.
+ /// - If not empty, outerPermutation is a valid permutation of size
+ /// matching outerDimsPerm.
+ UnPackOp createTransposedClone(OpBuilder &b,
+ Location loc,
+ Value transposedSource,
+ ArrayRef<int64_t> innerPermutation,
+ ArrayRef<int64_t> outerPermutation);
+
+ /// Check if this UnPackOp is like a simple unpad operation.
+ /// In other words, this operation:
+ /// 1. drops useless dimensions (dimension of size 1), and
+ /// 2. reduces dimensions in place (i.e., no transpose.)
+ bool isLikeUnPad();
+ }];
+
+ let hasCanonicalizeMethod = 1;
+
+ let hasFolder = 1;
+}
+
+#endif // LINALG_RELEAYOUT_OPS
diff --git a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
index 3fa35bf1851a9c..3af89a6ab37991 100644
--- a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
@@ -568,6 +568,13 @@ struct PackingMetadata {
// repeated N^2 counts).
PackingMetadata computePackingMetadata(int64_t packedRank,
ArrayRef<int64_t> innerDimPos);
+
+/// Try to remove a tensor operation if it would only reshape a constant.
+/// Removes the op and replaces the constant with a new constant of the result
+/// shape. When an optional cst attribute is passed, it is reshaped only if the
+/// splat value matches the value in the attribute.
+OpFoldResult reshapeConstantSource(DenseElementsAttr source, TensorType result,
+ std::optional<Attribute> cst = std::nullopt);
} // namespace mlir
#endif // MLIR_DIALECT_UTILS_RESHAPEOPSUTILS_H
diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
index ce8dc6ccb0fa33..b4aeb44ac8fafd 100644
--- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
@@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRLinalgDialect
MLIRLinalgOpsEnumsIncGen
MLIRLinalgOpsIncGen
MLIRLinalgStructuredOpsIncGen
+ MLIRLinalgRelayoutOpsIncGen
MLIRShardingInterfaceIncGen
LINK_LIBS PUBLIC
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
index 9e50c355c50417..c256b18dd2b172 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
@@ -114,6 +114,10 @@ void mlir::linalg::LinalgDialect::initialize() {
#define GET_OP_LIST
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
>();
+ addOperations<
+#define GET_OP_LIST
+#include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.cpp.inc"
+ >();
// Fill the Linalg-specific OpName to RegionBuilder map.
addNamedOpBuilders<
@@ -130,13 +134,22 @@ void mlir::linalg::LinalgDialect::initialize() {
>();
declarePromisedInterface<SubsetOpInterface, CopyOp>();
declarePromisedInterface<SubsetInsertionOpInterface, CopyOp>();
+
+ // ValueBoundsOpInterface
declarePromisedInterface<ValueBoundsOpInterface, IndexOp>();
- declarePromisedInterface<TilingInterface, linalg::GenericOp>();
+
declarePromisedInterface<PartialReductionOpInterface, linalg::GenericOp>();
+
+ // Tiling Interface
+ declarePromisedInterface<TilingInterface, linalg::GenericOp>();
declarePromisedInterfaces<TilingInterface,
#define GET_OP_LIST
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
>();
+ declarePromisedInterfaces<TilingInterface,
+#define GET_OP_LIST
+#include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.cpp.inc"
+ >();
declarePromisedInterfaces<PartialReductionOpInterface,
#define GET_OP_LIST
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index c13b663dbf05b1..7e627c7da77e1b 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -2275,6 +2275,8 @@ LogicalResult IndexOp::verify() {
#define GET_OP_CLASSES
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.cpp.inc"
AffineMap mlir::linalg::extractOrIdentityMap(std::optional<AffineMap> maybeMap,
unsigned rank,
@@ -3611,5 +3613,916 @@ Speculation::Speculatability MatmulOp::getSpeculatability() {
return getGenericSpeculatabilityImpl(cast<LinalgOp>(getOperation()));
}
+//===----------------------------------------------------------------------===//
+// PackOp/UnPackOp Common
+//===----------------------------------------------------------------------===//
+
+template <typename OpTy>
+static LogicalResult
+reifyResultShapesImpl(OpTy op, OpBuilder &builder,
+ ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+ static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+ "applies to only pack or unpack operations");
+ int64_t destRank = op.getDestRank();
+ reifiedReturnShapes.resize(1, SmallVector<OpFoldResult>(destRank));
+ reifiedReturnShapes[0] =
+ tensor::getMixedSizes(builder, op.getLoc(), op.getDest());
+ return success();
+}
+
+template <typename OpTy>
+static DenseMap<int64_t, OpFoldResult> getDimAndTileMappingImpl(OpTy op) {
+ static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+ "applies to only pack or unpack operations");
+ DenseMap<int64_t, OpFoldResult> dimAndTileMapping;
+ ArrayRef<int64_t> dimsToTile = op.getInnerDimsPos();
+ SmallVector<OpFoldResult> tiles = op.getMixedTiles();
+ assert(tiles.size() == dimsToTile.size() &&
+ "tiles must match indices of dimension to block");
+ // bind the dimension `i` with the tile factor.
+ for (auto i : llvm::seq<int64_t>(0, dimsToTile.size()))
+ dimAndTileMapping[dimsToTile[i]] = tiles[i];
+ return dimAndTileMapping;
+}
+
+template <typename OpTy>
+static SmallVector<OpFoldResult> getMixedTilesImpl(OpTy op) {
+ static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+ "applies to only pack or unpack operations");
+ Builder builder(op);
+ SmallVector<OpFoldResult> mixedInnerTiles;
+ unsigned dynamicValIndex = 0;
+ for (int64_t staticTile : op.getStaticInnerTiles()) {
+ if (!ShapedType::isDynamic(staticTile))
+ mixedInnerTiles.push_back(builder.getI64IntegerAttr(staticTile));
+ else
+ mixedInnerTiles.push_back(op.getInnerTiles()[dynamicValIndex++]);
+ }
+ return mixedInnerTiles;
+}
+
+template <typename OpTy>
+static SmallVector<int64_t> getStaticTilesImpl(OpTy op) {
+ static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+ "applies to only pack or unpack operations");
+ SmallVector<Value> dynamicTiles;
+ SmallVector<int64_t> staticTiles;
+ dispatchIndexOpFoldResults(op.getMixedTiles(), dynamicTiles, staticTiles);
+ return staticTiles;
+}
+
+/// Returns true if `dimsPos` is invalid. It is invalid when:
+/// a) It contains duplicate.
+/// b) At least one dimension is out of bound (`dimPos` is >= 0 and < rank).
+/// c) The number of elements in `dimsPos` is > than `rank`.
+static bool isInvalidPackingPosSpecification(ArrayRef<int64_t> dimsPos,
+ size_t rank) {
+ size_t dimsPosSize = dimsPos.size();
+ if (dimsPosSize > rank)
+ return true;
+ DenseSet<int64_t> uniqued;
+ for (int64_t dim : dimsPos)
+ uniqued.insert(dim);
+ if (dimsPosSize != uniqued.size())
+ return true;
+ return llvm::any_of(dimsPos, [rank](int64_t dimPos) {
+ return dimPos < 0 || dimPos >= static_cast<int64_t>(rank);
+ });
+}
+
+/// Returns true if the dimension of `sourceShape` is smaller than the dimension
+/// of the `limitShape`.
+static bool areAllInBound(ArrayRef<int64_t> sourceShape,
+ ArrayRef<int64_t> limitShape) {
+ assert(
+ sourceShape.size() == limitShape.size() &&
+ "expected source shape rank, and limit of the shape to have same rank");
+ return llvm::all_of(
+ llvm::zip(sourceShape, limitShape), [](std::tuple<int64_t, int64_t> it) {
+ int64_t sourceExtent = std::get<0>(it);
+ int64_t limit = std::get<1>(it);
+ return ShapedType::isDynamic(sourceExtent) ||
+ ShapedType::isDynamic(limit) || sourceExtent <= limit;
+ });
+}
+
+template <typename OpTy>
+static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
+ static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+ "applies to only pack or unpack operations");
+ Operation *op = packOrUnPack.getOperation();
+
+ // Return true if we have a zero-value tile.
+ auto hasZeros = [&](ArrayRef<OpFoldResult> tiles) {
+ return llvm::any_of(
+ tiles, [](OpFoldResult tile) { return isConstantIntValue(tile, 0); });
+ };
+
+ // Verify tiles. Do not allow zero tiles.
+ SmallVector<OpFoldResult> mixedTiles = packOrUnPack.getMixedTiles();
+ if (hasZeros(mixedTiles))
+ return op->emitError("invalid zero tile factor");
+
+ // Verify inner_dims_pos and outer_dims_perm.
+ RankedTensorType unpackedType = (std::is_same<OpTy, PackOp>::value)
+ ? packOrUnPack.getSourceType()
+ : packOrUnPack.getDestType();
+ size_t unpackedRank = unpackedType.getRank();
+ ArrayRef<int64_t> innerDimsPos = packOrUnPack.getInnerDimsPos();
+ ArrayRef<int64_t> outerDimPerm = packOrUnPack.getOuterDimsPerm();
+ if (isInvalidPackingPosSpecification(innerDimsPos, unpackedRank))
+ return op->emitError("invalid inner_dims_pos vector");
+ if (isInvalidPackingPosSpecification(outerDimPerm, unpackedRank))
+ return op->emitError("invalid outer_dims_perm vector");
+ if (!outerDimPerm.empty() && outerDimPerm.size() != unpackedRank)
+ return op->emitError("outer_dims_perm must be a permutation or empty");
+
+ // Tiling factors must be less than or equal to the input rank for pack (or
+ // output rank for unpack), and must match the number of `inner_dims_pos`.
+ if (mixedTiles.size() > unpackedRank) {
+ return op->emitError("tiling factors must be less than or equal to the "
+ "input rank for pack or output rank for unpack");
+ }
+ if (mixedTiles.size() != innerDimsPos.size()) {
+ return op->emitError(
+ "tiling factors must equal the number of dimensions to tile");
+ }
+
+ ShapedType packedType = (std::is_same<OpTy, PackOp>::value)
+ ? packOrUnPack.getDestType()
+ : packOrUnPack.getSourceType();
+ size_t packedRank = packedType.getRank();
+ // Require output rank to match input rank + number of blocking factors.
+ size_t expectedPackedRank = unpackedRank + mixedTiles.size();
+ if (expectedPackedRank != packedRank) {
+ return op->emitError(
+ "packed rank != (unpacked rank + num tiling factors), got ")
+ << packedRank << " != " << expectedPackedRank;
+ }
+
+ // Verify result shape is greater than the minimum expected
+ // by the pack operation, and that the output shape
+ // represents full tiles.
+ RankedTensorType expectedPackedType = PackOp::inferPackedType(
+ unpackedType, packOrUnPack.getStaticTiles(), innerDimsPos, outerDimPerm);
+ if (!areAllInBound(expectedPackedType.getShape(), packedType.getShape())) {
+ return op->emitError("the shape of output is not large enough to hold the "
+ "packed data. Expected at least ")
+ << expectedPackedType << ", got " << packedType;
+ }
+ if (!llvm::all_of(
+ llvm::zip(packedType.getShape().take_back(mixedTiles.size()),
+ mixedTiles),
+ [](std::tuple<int64_t, OpFoldResult> it) {
+ int64_t shape = std::get<0>(it);
+ if (Attribute attr =
+ llvm::dyn_cast_if_present<Attribute>(std::get<1>(it))) {
+ IntegerAttr intAttr = dyn_cast_or_null<IntegerAttr>(attr);
+ int64_t staticTileSize = intAttr.getValue().getSExtValue();
+ return shape == staticTileSize;
+ }
+ return ShapedType::isDynamic(shape);
+ })) {
+ return op->emitError("mismatch in inner tile sizes specified and shaped of "
+ "tiled dimension in the packed type");
+ }
+ return success();
+}
+
+namespace {
+/// Subset of PackOp/UnPackOp fields used to compute the result of applying
+/// various permutations to the op.
+// TODO: Add linalg.transpose + pack/unpack folding patterns that just reuse
+// these. These may or may not become true foldings / canonicalizations
+// depending on how aggressive we want to be in automatically folding
+// transposes.
+struct PackOrUnPackTransposeResult {
+ SmallVector<int64_t> innerDimsPos;
+ SmallVector<OpFoldResult> innerTiles;
+ SmallVector<int64_t> outerDimsPerm;
+};
+} // namespace
+
+template <typename OpTy>
+static PackOrUnPackTransposeResult
+commonPermutationOfPackAndUnPackOp(OpTy packOrUnPackOp,
+ ArrayRef<int64_t> innerPermutation,
+ ArrayRef<int64_t> outerPermutation) {
+ static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+ "applies to only pack or unpack operations");
+ assert((!innerPermutation.empty() || !outerPermutation.empty()) &&
+ "some permutation must be non-empty");
+ PackOrUnPackTransposeResult metadata;
+ metadata.innerDimsPos =
+ SmallVector<int64_t>(packOrUnPackOp.getInnerDimsPos());
+ metadata.innerTiles =
+ SmallVector<OpFoldResult>(packOrUnPackOp.getMixedTiles());
+ int64_t numOuterDims = std::is_same<OpTy, PackOp>::value
+ ? packOrUnPackOp.getSourceRank()
+ : packOrUnPackOp.getDestRank();
+ metadata.outerDimsPerm =
+ packOrUnPackOp.getOuterDimsPerm().empty()
+ ? llvm::to_vector(llvm::seq<int64_t>(0, numOuterDims))
+ : SmallVector<int64_t>(packOrUnPackOp.getOuterDimsPerm());
+ if (!innerPermutation.empty()) {
+ assert(innerPermutation.size() == metadata.innerDimsPos.size() &&
+ isPermutationVector(innerPermutation) &&
+ "invalid inner permutation");
+ applyPermutationToVector(metadata.innerDimsPos, innerPermutation);
+ applyPermutationToVector(metadata.innerTiles, innerPermutation);
+ }
+ if (!outerPermutation.empty()) {
+ assert(outerPermutation.size() == metadata.outerDimsPerm.size() &&
+ isPermutationVector(outerPermutation) &&
+ "invalid outer permutation");
+ applyPermutationToVector(metadata.outerDimsPerm, outerPermutation);
+ }
+ return metadata;
+}
+
+//===----------------------------------------------------------------------===//
+// PackOp
+//===----------------------------------------------------------------------===//
+
+void PackOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
+ setNameFn(getResult(), "pack");
+}
+
+void PackOp::build(OpBuilder &builder, OperationState &state, Value source,
+ Value dest, ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<OpFoldResult> innerTiles,
+ std::optional<Value> paddingValue,
+ ArrayRef<int64_t> outerDimsPerm) {
+ assert(innerDimsPos.size() == innerTiles.size() &&
+ "number of tile sizes specified must match the specified number of "
+ "original dimensions to be tiled");
+ SmallVector<int64_t> staticTileSizes;
+ SmallVector<Value> dynamicTileSizes;
+ dispatchIndexOpFoldResults(innerTiles, dynamicTileSizes, staticTileSizes);
+ build(builder, state, dest.getType(), source, dest,
+ paddingValue ? *paddingValue : nullptr,
+ outerDimsPerm.empty() ? nullptr
+ : builder.getDenseI64ArrayAttr(outerDimsPerm),
+ builder.getDenseI64ArrayAttr(innerDimsPos), dynamicTileSizes,
+ builder.getDenseI64ArrayAttr(staticTileSizes));
+}
+
+LogicalResult
+PackOp::reifyResultShapes(OpBuilder &builder,
+ ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+ return reifyResultShapesImpl(*this, builder, reifiedReturnShapes);
+}
+
+DenseMap<int64_t, OpFoldResult> PackOp::getDimAndTileMapping() {
+ return getDimAndTileMappingImpl(*this);
+}
+
+SmallVector<OpFoldResult> PackOp::getMixedTiles() {
+ return getMixedTilesImpl(*this);
+}
+
+SmallVector<int64_t> PackOp::getStaticTiles() {
+ return getStaticTilesImpl(*this);
+}
+
+ArrayRef<int64_t> PackOp::getAllOuterDims() {
+ ShapedType inputType = getSourceType();
+ int64_t inputRank = inputType.getRank();
+ return getDestType().getShape().take_front(inputRank);
+}
+
+SmallVector<int64_t> PackOp::getTiledOuterDims() {
+ auto innerDimsPos = getInnerDimsPos();
+ auto packedShape = getDestType().getShape();
+ SmallVector<int64_t> res;
+
+ for (auto index : innerDimsPos)
+ res.push_back(packedShape[index]);
+
+ return res;
+}
+
+bool PackOp::requirePaddingValue(ArrayRef<int64_t> inputShape,
+ ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outputShape,
+ ArrayRef<int64_t> outerDimsPerm,
+ ArrayRef<OpFoldResult> innerTiles) {
+ SmallVector<int64_t> outputTileSizes(
+ outputShape.take_front(inputShape.size()));
+ if (!outerDimsPerm.empty()) {
+ assert(outerDimsPerm.size() == outputTileSizes.size() &&
+ "expected output and outer_dims_perm to have same size");
+ applyPermutationToVector(outputTileSizes,
+ invertPermutationVector(outerDimsPerm));
+ }
+ for (auto [pos, tileSize] : llvm::zip_equal(innerDimsPos, innerTiles)) {
+ if (ShapedType::isDynamic(inputShape[pos]))
+ continue;
+ std::optional<int64_t> constantTile = getConstantIntValue(tileSize);
+
+ if (!constantTile) {
+ if (!ShapedType::isDynamic(outputTileSizes[pos]) &&
+ (inputShape[pos] % outputTileSizes[pos] != 0))
+ return true;
+ } else if (inputShape[pos] % (*constantTile) != 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+LogicalResult PackOp::verify() {
+ if (failed(commonVerifierPackAndUnPackOp(*this)))
+ return failure();
+
+ // Verify padding value, and bail out if the tile does not divide the
+ // dimension fully. In the case of dynamic tile factors or dimensions, having
+ // a partial tile is undefined behavior.
+ auto paddingValue = getPaddingValue();
+ if (paddingValue &&
+ paddingValue.getType() != getSourceType().getElementType()) {
+ return emitOpError("expected padding_value has ")
+ << getSourceType().getElementType()
+ << " but got: " << paddingValue.getType();
+ }
+
+ if (!paddingValue &&
+ requirePaddingValue(getSourceType().getShape(), getInnerDimsPos(),
+ getDestType().getShape(), getOuterDimsPerm(),
+ getMixedTiles())) {
+ return emitOpError(
+ "invalid tile factor or output size provided. Only full tiles are "
+ "supported when padding_value is not set");
+ }
+ return success();
+}
+
+/// Converts OpFoldResults to int64_t shape entries, unconditionally mapping all
+/// Value's to kDynamic, even if they are arith.constant values.
+static SmallVector<int64_t>
+asShapeWithAnyValueAsDynamic(ArrayRef<OpFoldResult> ofrs) {
+ SmallVector<int64_t> result;
+ for (auto o : ofrs) {
+ // Have to do this first, as getConstantIntValue special-cases constants.
+ if (llvm::dyn_cast_if_present<Value>(o))
+ result.push_back(ShapedType::kDynamic);
+ else
+ result.push_back(getConstantIntValue(o).value_or(ShapedType::kDynamic));
+ }
+ return result;
+}
+
+/// Helper for PackOp::{getResultShape,inferPackedType}. Returns the shape of
+/// the packed type. Having a shared helper helps implement these two methods in
+/// a way that ensures that they agree on which dimensions are dynamic.
+static SmallVector<int64_t> getPackOpResultTypeShape(
+ ArrayRef<int64_t> sourceShape, ArrayRef<int64_t> innerTileSizes,
+ ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm) {
+ SmallVector<int64_t> resultShape = llvm::to_vector(sourceShape);
+ for (auto tiledDim : llvm::enumerate(llvm::to_vector(innerDimsPos))) {
+ if (ShapedType::isDynamic(resultShape[tiledDim.value()]))
+ continue;
+ if (ShapedType::isDynamic(innerTileSizes[tiledDim.index()])) {
+ resultShape[tiledDim.value()] = ShapedType::kDynamic;
+ continue;
+ }
+ resultShape[tiledDim.value()] = llvm::divideCeilSigned(
+ resultShape[tiledDim.value()], innerTileSizes[tiledDim.index()]);
+ }
+
+ // Swap tile loops if outer_dims_perm is available.
+ if (!outerDimsPerm.empty())
+ applyPermutationToVector(resultShape, outerDimsPerm);
+
+ // Append the inner tile dimensions.
+ resultShape.append(innerTileSizes.begin(), innerTileSizes.end());
+ return resultShape;
+}
+
+SmallVector<OpFoldResult> PackOp::getResultShape(
+ OpBuilder &builder, Location loc, ArrayRef<OpFoldResult> sourceDims,
+ ArrayRef<OpFoldResult> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm) {
+ SmallVector<OpFoldResult> resultDims = llvm::to_vector(sourceDims);
+
+ AffineExpr s0, s1;
+ bindSymbols(builder.getContext(), s0, s1);
+ AffineExpr ceilDivExpr = s0.ceilDiv(s1);
+ for (auto tiledDim : llvm::enumerate(llvm::to_vector(innerDimsPos))) {
+ resultDims[tiledDim.value()] = affine::makeComposedFoldedAffineApply(
+ builder, loc, ceilDivExpr,
+ {resultDims[tiledDim.value()], innerTileSizes[tiledDim.index()]});
+ }
+ if (!outerDimsPerm.empty())
+ applyPermutationToVector(resultDims, outerDimsPerm);
+ resultDims.append(innerTileSizes.begin(), innerTileSizes.end());
+
+ SmallVector<int64_t> resultTypeShape =
+ getPackOpResultTypeShape(asShapeWithAnyValueAsDynamic(sourceDims),
+ asShapeWithAnyValueAsDynamic(innerTileSizes),
+ innerDimsPos, outerDimsPerm);
+
+ // Fix-up `resultDims` to ensure that they are Value's if and only if the
+ // result type shape says it's a dynamic dim. This is needed as callers may
+ // use dispatchIndexOpFoldResults on the result, and rely on exact number of
+ // dynamic dims returned by that.
+ for (unsigned i = 0; i < resultDims.size(); ++i) {
+ if (!ShapedType::isDynamic(resultTypeShape[i]))
+ continue;
+ resultDims[i] =
+ getValueOrCreateConstantIndexOp(builder, loc, resultDims[i]);
+ }
+
+ return resultDims;
+}
+
+/// Get the expected packed type based on source type, tile factors, position of
+/// the inner tiles and permutation of the outer tiled loop.
+RankedTensorType PackOp::inferPackedType(RankedTensorType sourceType,
+ ArrayRef<int64_t> innerTileSizes,
+ ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm) {
+ SmallVector<int64_t> resultShape = getPackOpResultTypeShape(
+ sourceType.getShape(), innerTileSizes, innerDimsPos, outerDimsPerm);
+ return RankedTensorType::get(resultShape, sourceType.getElementType());
+}
+
+Value PackOp::createDestinationTensor(OpBuilder &b, Location loc, Value source,
+ ArrayRef<OpFoldResult> innerTileSizes,
+ ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm) {
+ AffineExpr dim0, dim1;
+ bindDims(b.getContext(), dim0, dim1);
+ auto ceilDiv = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult {
+ return affine::makeComposedFoldedAffineApply(b, loc, dim0.ceilDiv(dim1),
+ {v1, v2});
+ };
+
+ SmallVector<OpFoldResult> mixedSizes;
+ for (auto [index, value] : llvm::enumerate(
+ llvm::cast<RankedTensorType>(source.getType()).getShape())) {
+ if (ShapedType::isDynamic(value))
+ mixedSizes.push_back(
+ b.create<tensor::DimOp>(loc, source, index).getResult());
+ else
+ mixedSizes.push_back(b.getIndexAttr(value));
+ }
+ for (auto it : llvm::zip(innerDimsPos, innerTileSizes)) {
+ int64_t dimPos = std::get<0>(it);
+ OpFoldResult tileSize = std::get<1>(it);
+ mixedSizes[dimPos] = ceilDiv(mixedSizes[dimPos], tileSize);
+ }
+ if (!outerDimsPerm.empty())
+ applyPermutationToVector<OpFoldResult>(mixedSizes, outerDimsPerm);
+
+ mixedSizes.append(innerTileSizes.begin(), innerTileSizes.end());
+ auto elemType = llvm::cast<ShapedType>(source.getType()).getElementType();
+ return b.create<tensor::EmptyOp>(loc, mixedSizes, elemType);
+}
+
+PackOp PackOp::createTransposedClone(OpBuilder &b, Location loc,
+ ArrayRef<int64_t> innerPermutation,
+ ArrayRef<int64_t> outerPermutation) {
+ PackOrUnPackTransposeResult metadata = commonPermutationOfPackAndUnPackOp(
+ *this, innerPermutation, outerPermutation);
+ Value transposedDest =
+ createDestinationTensor(b, loc, getSource(), metadata.innerTiles,
+ metadata.innerDimsPos, metadata.outerDimsPerm);
+ return b.create<PackOp>(loc, getSource(), transposedDest,
+ metadata.innerDimsPos, metadata.innerTiles,
+ getPaddingValue(), metadata.outerDimsPerm);
+}
+
+/// Returns true if the tiles and the tiled dims are constant.
+template <typename OpTy>
+bool areTilesAndTiledDimsAllConstant(OpTy op) {
+ static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+ "applies to only pack or unpack operations");
+ ShapedType packedType = (std::is_same<OpTy, PackOp>::value)
+ ? op.getDestType()
+ : op.getSourceType();
+ SmallVector<OpFoldResult> mixedTiles = op.getMixedTiles();
+ for (auto [dimDest, tile] : llvm::zip(
+ packedType.getShape().take_back(mixedTiles.size()), mixedTiles)) {
+ std::optional<int64_t> constTileSize = getConstantIntValue(tile);
+ if (!constTileSize || ShapedType::isDynamic(dimDest))
+ return false;
+ }
+ return true;
+}
+
+Speculation::Speculatability PackOp::getSpeculatability() {
+ if (getPaddingValue())
+ return Speculation::Speculatable;
+
+ // The verifier rejects already operations if we can statically prove that the
+ // sizes of the tiles do not divide perfectly the dimension; thus, check only
+ // to have constant tiles and tiled inner dimensions.
+ if (!areTilesAndTiledDimsAllConstant(*this))
+ return Speculation::NotSpeculatable;
+
+ return Speculation::Speculatable;
+}
+
+// Return true if `inner_dims_pos` and `outer_dims_perm` target the same
+// dimensions for pack and unpack.
+static bool hasSameInnerOuterAttribute(PackOp packOp, UnPackOp unPackOp) {
+ if (packOp.getInnerDimsPos() != unPackOp.getInnerDimsPos())
+ return false;
+ if (packOp.getOuterDimsPerm() == unPackOp.getOuterDimsPerm())
+ return true;
+ // Outer dims permutation is optional.
+ // To compare unbalanced pack-unpack pair, treat no permutation as equal to
+ // identity permutation.
+ return isIdentityPermutation(packOp.getOuterDimsPerm()) &&
+ isIdentityPermutation(unPackOp.getOuterDimsPerm());
+}
+
+// Return true if pack and unpack have the same tiles.
+// Same SSA values or same integer constants.
+static bool haveSameTiles(PackOp packOp, UnPackOp unPackOp) {
+ auto packTiles = packOp.getMixedTiles();
+ auto unPackTiles = unPackOp.getMixedTiles();
+ if (packTiles.size() != unPackTiles.size())
+ return false;
+ for (size_t i = 0, e = packTiles.size(); i < e; i++) {
+ if (!isEqualConstantIntOrValue(packTiles[i], unPackTiles[i]))
+ return false;
+ }
+ return true;
+}
+
+/// Returns true if the pack op does not need a padding value.
+static bool paddingIsNotNeeded(PackOp op) {
+ auto srcType = op.getSourceType();
+ if (llvm::any_of(op.getInnerDimsPos(),
+ [&](int64_t pos) { return srcType.isDynamicDim(pos); }))
+ return false;
+ if (ShapedType::isDynamicShape(op.getStaticInnerTiles()))
+ return false;
+ return !PackOp::requirePaddingValue(
+ srcType.getShape(), op.getInnerDimsPos(), op.getDestType().getShape(),
+ op.getOuterDimsPerm(), op.getMixedTiles());
+}
+
+/// Returns true if the `srcShape` or `destShape` is different from the one in
+/// `packOp` and populates each with the inferred static shape.
+static bool inferStaticShape(PackOp packOp, SmallVectorImpl<int64_t> &srcShape,
+ SmallVectorImpl<int64_t> &destShape) {
+ bool changeNeeded = false;
+ srcShape.assign(packOp.getSourceType().getShape().begin(),
+ packOp.getSourceType().getShape().end());
+ destShape.assign(packOp.getDestType().getShape().begin(),
+ packOp.getDestType().getShape().end());
+ llvm::SmallSetVector<int64_t, 4> innerDims;
+ innerDims.insert(packOp.getInnerDimsPos().begin(),
+ packOp.getInnerDimsPos().end());
+ SmallVector<int64_t> inverseOuterDimsPerm;
+ if (!packOp.getOuterDimsPerm().empty())
+ inverseOuterDimsPerm = invertPermutationVector(packOp.getOuterDimsPerm());
+ int srcRank = packOp.getSourceRank();
+ for (auto i : llvm::seq<int64_t>(0, srcRank)) {
+ if (innerDims.contains(i))
+ continue;
+ int64_t srcPos = i;
+ int64_t destPos = i;
+ if (!inverseOuterDimsPerm.empty())
+ destPos = inverseOuterDimsPerm[srcPos];
+ if (ShapedType::isDynamic(srcShape[srcPos]) ==
+ ShapedType::isDynamic(destShape[destPos])) {
+ continue;
+ }
+ int64_t size = srcShape[srcPos];
+ if (ShapedType::isDynamic(size))
+ size = destShape[destPos];
+ srcShape[srcPos] = size;
+ destShape[destPos] = size;
+ changeNeeded = true;
+ }
+ return changeNeeded;
+}
+
+LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
+ // Fold an pack(unpack(x)) to x.
+ if (auto unPackOp = packOp.getSource().getDefiningOp<UnPackOp>()) {
+ if (unPackOp.getSourceType() != packOp.getDestType())
+ return failure();
+ if (packOp.getPaddingValue() ||
+ !hasSameInnerOuterAttribute(packOp, unPackOp) ||
+ !haveSameTiles(packOp, unPackOp))
+ return failure();
+ rewriter.replaceOp(packOp, unPackOp.getSource());
+ return success();
+ }
+
+ // Fold optional PaddingValue operand away if padding is not needed.
+ if (packOp.getPaddingValue() && paddingIsNotNeeded(packOp)) {
+ rewriter.startOpModification(packOp);
+ packOp.getPaddingValueMutable().clear();
+ rewriter.finalizeOpModification(packOp);
+ return success();
+ }
+
+ // Insert tensor.cast ops if static shape inference is available..
+ SmallVector<int64_t> srcShape, destShape;
+ if (inferStaticShape(packOp, srcShape, destShape)) {
+ Location loc = packOp.getLoc();
+ Value source = packOp.getSource();
+ if (srcShape != packOp.getSourceType().getShape()) {
+ auto newSrcType = packOp.getSourceType().clone(srcShape);
+ source =
+ rewriter.create<tensor::CastOp>(loc, newSrcType, packOp.getSource());
+ }
+ Value dest = packOp.getDest();
+ RankedTensorType originalResultType = packOp.getDestType();
+ bool needUpdateDestType = (destShape != originalResultType.getShape());
+ if (needUpdateDestType) {
+ auto newDestType = packOp.getDestType().clone(destShape);
+ dest =
+ rewriter.create<tensor::CastOp>(loc, newDestType, packOp.getDest());
+ }
+ rewriter.modifyOpInPlace(packOp, [&] {
+ packOp.getSourceMutable().assign(source);
+ packOp.getDestMutable().assign(dest);
+ packOp.getResult().setType(cast<RankedTensorType>(dest.getType()));
+ });
+ // Insert a cast if needed
+ if (needUpdateDestType) {
+ rewriter.setInsertionPointAfter(packOp);
+ auto castOp =
+ rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
+ rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
+ }
+ return success();
+ }
+
+ return failure();
+}
+
+template <typename PackOrUnpackOp>
+static bool isLikePadUnPad(PackOrUnpackOp packOp,
+ RankedTensorType packedTensorType) {
+ static_assert(std::is_same<PackOrUnpackOp, PackOp>::value ||
+ std::is_same<PackOrUnpackOp, UnPackOp>::value,
+ "Function meant for pack/unpack");
+ // This is a pad if packing only adds ones and we don't transpose dimensions.
+
+ // Check that we are not transposing any dimensions.
+ ArrayRef<int64_t> innerDimsPos = packOp.getInnerDimsPos();
+ int64_t numPackedDims = innerDimsPos.size();
+ auto orderedDims = llvm::to_vector<4>(llvm::seq<int64_t>(0, numPackedDims));
+ if (orderedDims != innerDimsPos) {
+ // Dimensions don't happen in order.
+ return false;
+ }
+
+ ArrayRef<int64_t> packedShape = packedTensorType.getShape();
+ int64_t packedRank = packedTensorType.getRank();
+ // At this point we know that we are taking numPackedDims outer
+ // dimensions and pushing them all the way as the inner most dimensions.
+ // What's left on the outer most dimensions is, in this order:
+ // - the factor of the packed dimensions, then
+ // - the untouched dimensions
+ // This shifting inward of dimensions is a no-op (as opposed to a transpose)
+ // if all the dimensions that bubble outerward are ones.
+ // Therefore check that all the dimensions but the numPackedDims inner most
+ // ones are ones.
+ return llvm::all_of(
+ llvm::seq<int64_t>(0, packedRank - numPackedDims),
+ [&packedShape](int64_t i) { return packedShape[i] == 1; });
+}
+
+bool PackOp::isLikePad() {
+ auto packedTensorType =
+ llvm::cast<RankedTensorType>((*this)->getResultTypes().front());
+ return isLikePadUnPad(*this, packedTensorType);
+}
+
+OpFoldResult PackOp::fold(FoldAdaptor adaptor) {
+ std::optional<Attribute> paddingValue;
+ if (auto pad = adaptor.getPaddingValue())
+ paddingValue = pad;
+ if (OpFoldResult reshapedSource = reshapeConstantSource(
+ llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
+ getDestType(), paddingValue))
+ return reshapedSource;
+ return {};
+}
+
+//===----------------------------------------------------------------------===//
+// UnPackOp
+//===----------------------------------------------------------------------===//
+
+void UnPackOp::getAsmResultNames(
+ function_ref<void(Value, StringRef)> setNameFn) {
+ setNameFn(getResult(), "unpack");
+}
+
+LogicalResult
+UnPackOp::reifyResultShapes(OpBuilder &builder,
+ ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+ return reifyResultShapesImpl(*this, builder, reifiedReturnShapes);
+}
+
+DenseMap<int64_t, OpFoldResult> UnPackOp::getDimAndTileMapping() {
+ return getDimAndTileMappingImpl(*this);
+}
+
+SmallVector<OpFoldResult> UnPackOp::getMixedTiles() {
+ return getMixedTilesImpl(*this);
+}
+
+SmallVector<int64_t> UnPackOp::getStaticTiles() {
+ return getStaticTilesImpl(*this);
+}
+
+ArrayRef<int64_t> UnPackOp::getAllOuterDims() {
+ ShapedType destType = getDestType();
+ int64_t destRank = destType.getRank();
+ return getSourceType().getShape().take_front(destRank);
+}
+
+SmallVector<int64_t> UnPackOp::getTiledOuterDims() {
+ auto innerDimsPos = getInnerDimsPos();
+ auto packedShape = getSourceType().getShape();
+ SmallVector<int64_t> res;
+
+ for (auto index : innerDimsPos)
+ res.push_back(packedShape[index]);
+
+ return res;
+}
+
+LogicalResult UnPackOp::verify() {
+ return commonVerifierPackAndUnPackOp(*this);
+}
+
+Speculation::Speculatability UnPackOp::getSpeculatability() {
+ // See PackOp::getSpeculatability.
+ if (!areTilesAndTiledDimsAllConstant(*this))
+ return Speculation::NotSpeculatable;
+
+ return Speculation::Speculatable;
+}
+
+void UnPackOp::build(OpBuilder &builder, OperationState &state, Value source,
+ Value dest, ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<OpFoldResult> innerTiles,
+ ArrayRef<int64_t> outerDimsPerm) {
+ assert(innerDimsPos.size() == innerTiles.size() &&
+ "number of tile sizes specified must match the specified number of "
+ "original dimensions to be tiled");
+ SmallVector<int64_t> staticTileSizes;
+ SmallVector<Value> dynamicTileSizes;
+ dispatchIndexOpFoldResults(innerTiles, dynamicTileSizes, staticTileSizes);
+ build(builder, state, dest.getType(), source, dest,
+ outerDimsPerm.empty() ? nullptr
+ : builder.getDenseI64ArrayAttr(outerDimsPerm),
+ builder.getDenseI64ArrayAttr(innerDimsPos), dynamicTileSizes,
+ builder.getDenseI64ArrayAttr(staticTileSizes));
+}
+
+Value UnPackOp::createDestinationTensor(OpBuilder &b, Location loc,
+ Value source,
+ ArrayRef<OpFoldResult> innerTileSizes,
+ ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm) {
+ AffineExpr sym0, sym1;
+ bindSymbols(b.getContext(), sym0, sym1);
+ auto dimMul = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult {
+ return affine::makeComposedFoldedAffineApply(b, loc, sym0 * sym1, {v1, v2});
+ };
+
+ SmallVector<OpFoldResult> mixedSizes;
+ auto srcType = llvm::cast<RankedTensorType>(source.getType());
+ for (auto i :
+ llvm::seq<unsigned>(0, srcType.getRank() - innerTileSizes.size())) {
+ if (srcType.isDynamicDim(i))
+ mixedSizes.push_back(b.create<tensor::DimOp>(loc, source, i).getResult());
+ else
+ mixedSizes.push_back(b.getIndexAttr(srcType.getDimSize(i)));
+ }
+ if (!outerDimsPerm.empty()) {
+ applyPermutationToVector<OpFoldResult>(
+ mixedSizes, invertPermutationVector(outerDimsPerm));
+ }
+
+ for (auto [dimPos, tileSize] : llvm::zip_equal(innerDimsPos, innerTileSizes))
+ mixedSizes[dimPos] = dimMul(mixedSizes[dimPos], tileSize);
+
+ auto elemType = srcType.getElementType();
+ return b.create<tensor::EmptyOp>(loc, mixedSizes, elemType);
+}
+
+UnPackOp UnPackOp::createTransposedClone(OpBuilder &b, Location loc,
+ Value transposedSource,
+ ArrayRef<int64_t> innerPermutation,
+ ArrayRef<int64_t> outerPermutation) {
+ PackOrUnPackTransposeResult metadata = commonPermutationOfPackAndUnPackOp(
+ *this, innerPermutation, outerPermutation);
+ return b.create<UnPackOp>(loc, transposedSource, getDest(),
+ metadata.innerDimsPos, metadata.innerTiles,
+ metadata.outerDimsPerm);
+}
+
+/// Returns true if the `srcShape` or `destShape` is different from the one in
+/// `op` and populates each with the inferred static shape.
+static bool inferStaticShape(UnPackOp op, SmallVectorImpl<int64_t> &srcShape,
+ SmallVectorImpl<int64_t> &destShape) {
+ bool changeNeeded = false;
+ srcShape.assign(op.getSourceType().getShape().begin(),
+ op.getSourceType().getShape().end());
+ destShape.assign(op.getDestType().getShape().begin(),
+ op.getDestType().getShape().end());
+ llvm::SmallSetVector<int64_t, 4> innerDims;
+ innerDims.insert(op.getInnerDimsPos().begin(), op.getInnerDimsPos().end());
+ SmallVector<int64_t> inverseOuterDimsPerm;
+ if (!op.getOuterDimsPerm().empty())
+ inverseOuterDimsPerm = invertPermutationVector(op.getOuterDimsPerm());
+ int destRank = op.getDestRank();
+ for (auto i : llvm::seq<int64_t>(0, destRank)) {
+ if (innerDims.contains(i))
+ continue;
+ int64_t srcPos = i;
+ int64_t destPos = i;
+ if (!inverseOuterDimsPerm.empty())
+ srcPos = inverseOuterDimsPerm[destPos];
+ if (ShapedType::isDynamic(srcShape[srcPos]) ==
+ ShapedType::isDynamic(destShape[destPos])) {
+ continue;
+ }
+ int64_t size = srcShape[srcPos];
+ if (ShapedType::isDynamic(size))
+ size = destShape[destPos];
+ srcShape[srcPos] = size;
+ destShape[destPos] = size;
+ changeNeeded = true;
+ }
+ return changeNeeded;
+}
+
+LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
+ PatternRewriter &rewriter) {
+ /// unpack(pack(x)) -> x
+ if (PackOp packOp = unPackOp.getSource().getDefiningOp<PackOp>()) {
+ if (packOp.getSourceType() != unPackOp.getDestType())
+ return failure();
+ if (packOp.getPaddingValue() ||
+ !hasSameInnerOuterAttribute(packOp, unPackOp) ||
+ !haveSameTiles(packOp, unPackOp))
+ return failure();
+ rewriter.replaceOp(unPackOp, packOp.getSource());
+ return success();
+ }
+ /// unpack(destinationStyleOp(x)) -> unpack(x)
+ if (auto dstStyleOp =
+ unPackOp.getDest().getDefiningOp<DestinationStyleOpInterface>()) {
+ auto destValue = cast<OpResult>(unPackOp.getDest());
+ Value newDest = dstStyleOp.getDpsInits()[destValue.getResultNumber()];
+ rewriter.modifyOpInPlace(unPackOp,
+ [&]() { unPackOp.setDpsInitOperand(0, newDest); });
+ return success();
+ }
+
+ // Insert tensor.cast ops if static shape inference is available..
+ SmallVector<int64_t> srcShape, destShape;
+ if (inferStaticShape(unPackOp, srcShape, destShape)) {
+ Location loc = unPackOp.getLoc();
+ Value source = unPackOp.getSource();
+ if (srcShape != unPackOp.getSourceType().getShape()) {
+ auto newSrcType = unPackOp.getSourceType().clone(srcShape);
+ source = rewriter.create<tensor::CastOp>(loc, newSrcType,
+ unPackOp.getSource());
+ }
+ Value dest = unPackOp.getDest();
+ if (destShape != unPackOp.getDestType().getShape()) {
+ auto newDestType = unPackOp.getDestType().clone(destShape);
+ dest =
+ rewriter.create<tensor::CastOp>(loc, newDestType, unPackOp.getDest());
+ }
+ Value newOp = rewriter.create<UnPackOp>(
+ loc, source, dest, unPackOp.getInnerDimsPos(), unPackOp.getMixedTiles(),
+ unPackOp.getOuterDimsPerm());
+ rewriter.replaceOpWithNewOp<tensor::CastOp>(
+ unPackOp, unPackOp.getResult().getType(), newOp);
+ return success();
+ }
+
+ return failure();
+}
+
+bool UnPackOp::isLikeUnPad() {
+ RankedTensorType packedTensorType = getSourceType();
+ return isLikePadUnPad(*this, packedTensorType);
+}
+
+OpFoldResult UnPackOp::fold(FoldAdaptor adaptor) {
+ if (OpFoldResult reshapedSource = reshapeConstantSource(
+ llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
+ getResult().getType()))
+ return reshapedSource;
+ return {};
+}
+
} // namespace linalg
} // namespace mlir
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 24a1d553153198..8a1392baf29983 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -1119,20 +1119,6 @@ void EmptyOp::getCanonicalizationPatterns(RewritePatternSet &results,
ReplaceEmptyTensorStaticShapeDims>(context);
}
-/// Try to remove a tensor operation if it would only reshape a constant.
-/// Removes the op and replaces the constant with a new constant of the result
-/// shape. When an optional cst attribute is passed, it is reshaped only if the
-/// splat value matches the value in the attribute.
-static OpFoldResult
-reshapeConstantSource(DenseElementsAttr source, TensorType result,
- std::optional<Attribute> cst = std::nullopt) {
- if (source && source.isSplat() && result.hasStaticShape() &&
- (!cst.has_value() || source.getSplatValue<Attribute>() == cst.value()))
- return source.resizeSplat(result);
-
- return {};
-}
-
//===----------------------------------------------------------------------===//
// ExtractOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
index 70b2aaf9a17e0b..0336423c57b1dc 100644
--- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
+++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
@@ -483,3 +483,13 @@ PackingMetadata mlir::computePackingMetadata(int64_t packedRank,
}
return res;
}
+
+OpFoldResult mlir::reshapeConstantSource(DenseElementsAttr source,
+ TensorType result,
+ std::optional<Attribute> cst) {
+ if (source && source.isSplat() && result.hasStaticShape() &&
+ (!cst.has_value() || source.getSplatValue<Attribute>() == cst.value()))
+ return source.resizeSplat(result);
+
+ return {};
+}
>From ee159c2f725345cffb8bc2ea08f9d040ff0fe4a8 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Tue, 21 Jan 2025 11:26:55 +0000
Subject: [PATCH 2/4] [mlir][tensor][linalg] Move Pack/Unpack Ops to Linalg
(2/4)
This is merely moving code around, no new functionality is added.
PATCH 2: To verify the newly added Ops (and to make the subsequent
change smaller), this PR:
1. Moves tests from:
* "mlir/test/Dialect/Tensor/ops.mlir"
to:
* "mlir/test/Dialect/Linalg/named-ops.mlir"
2. Moves tests from:
* "mlir/test/Dialect/Tensor/invalid.mlir"
to:
* "mlir/test/Dialect/Linalg/invalid.mlir:
In addition, I grouped "invalid" tests for `linalg.pack` and
`linalg.unpack` into two seperate sets (as opposed to mixing them
together).
CONTEXT:
This change was discussed in the following RFC:
* https://discourse.llvm.org/t/rfc-move-tensor-pack-and-tensor-unpack-into-linalg
---
mlir/test/Dialect/Linalg/invalid.mlir | 183 ++++++++++++++++++++++++
mlir/test/Dialect/Linalg/named-ops.mlir | 105 ++++++++++++++
mlir/test/Dialect/Tensor/invalid.mlir | 175 ----------------------
mlir/test/Dialect/Tensor/ops.mlir | 103 -------------
4 files changed, 288 insertions(+), 278 deletions(-)
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index a59472377a732c..8b94ae5fa10a57 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -1142,3 +1142,186 @@ func.func @winograd_output_transform_output_width(%arg0: tensor<6x6x3x3x2x2xf32>
%0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x6x3x3x2x2xf32>) outs(%arg1 : tensor<2x12x11x2xf32>) -> tensor<2x12x11x2xf32>
return %0 : tensor<2x12x11x2xf32>
}
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// linalg.pack
+//===----------------------------------------------------------------------===//
+
+func.func @pack_invalid_no_padding_no_full_tiles(%input: tensor<256x128xf32>, %output: tensor<8x8x16x33xf32>) -> tensor<8x8x16x33xf32> {
+ // expected-error at +1 {{invalid tile factor or output size provided. Only full tiles are supported when padding_value is not set}}
+ %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [16, 33] into %output : tensor<256x128xf32> -> tensor<8x8x16x33xf32>
+ return %0 : tensor<8x8x16x33xf32>
+}
+
+// -----
+
+func.func @pack_invalid_no_padding_no_full_tiles_dyn_tiles(%input: tensor<256x128xf32>, %output: tensor<10x8x?x?xf32>, %tile_size_0: index, %tile_size_1: index) -> tensor<10x8x?x?xf32> {
+ // expected-error at +1 {{invalid tile factor or output size provided. Only full tiles are supported when padding_value is not set}}
+ %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [%tile_size_0, %tile_size_1] into %output : tensor<256x128xf32> -> tensor<10x8x?x?xf32>
+ return %0 : tensor<10x8x?x?xf32>
+}
+
+// -----
+
+func.func @pack_invalid_no_padding_no_full_tiles_dyn_tiles_outperm(%input: tensor<256x128xf32>, %output: tensor<8x10x?x?xf32>, %tile_size_0: index, %tile_size_1: index) -> tensor<8x10x?x?xf32> {
+ // expected-error at +1 {{invalid tile factor or output size provided. Only full tiles are supported when padding_value is not set}}
+ %0 = linalg.pack %input outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [%tile_size_0, %tile_size_1] into %output : tensor<256x128xf32> -> tensor<8x10x?x?xf32>
+ return %0 : tensor<8x10x?x?xf32>
+}
+
+// -----
+
+func.func @pad_and_pack_invalid_type(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: i32) -> tensor<2x8x8x2xf32> {
+ // expected-error at +1 {{expected padding_value has 'f32' but got: 'i32'}}
+ %0 = linalg.pack %input padding_value(%pad: i32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
+ return %0 : tensor<2x8x8x2xf32>
+}
+
+// -----
+
+func.func @pack_invalid_inner_dims_pos_vector(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+ // expected-error at +1 {{invalid inner_dims_pos vector}}
+ %0 = linalg.pack %input inner_dims_pos = [2, 0] inner_tiles = [2, 2] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
+ return %0 : tensor<8x8x32x16xf32>
+}
+
+// -----
+
+func.func @pack_invalid_duplicate_element_in_inner_dims(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+ // expected-error at +1 {{invalid inner_dims_pos vector}}
+ %0 = linalg.pack %input inner_dims_pos = [1, 1] inner_tiles = [2, 2] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
+ return %0 : tensor<8x8x32x16xf32>
+}
+
+// -----
+
+func.func @pack_invalid_duplicate_element_in_outer_perm(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+ // expected-error at +1 {{invalid outer_dims_perm vector}}
+ %0 = linalg.pack %input outer_dims_perm = [1, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
+ return %0 : tensor<8x8x32x16xf32>
+}
+
+// -----
+
+func.func @pack_invalid_output_rank(%input: tensor<256x128xf32>, %output: tensor<64x32x16xf32>) -> tensor<64x32x16xf32> {
+ // expected-error at +1 {{packed rank != (unpacked rank + num tiling factors), got 3 != 4}}
+ %0 = linalg.pack %input inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %output : tensor<256x128xf32> -> tensor<64x32x16xf32>
+ return %0 : tensor<64x32x16xf32>
+}
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// linalg.unpack
+//===----------------------------------------------------------------------===//
+
+func.func @unpack_invalid_output_rank(%input: tensor<256x128xf32>, %output: tensor<64x32x16xf32>) -> tensor<256x128xf32> {
+ // expected-error at +1 {{packed rank != (unpacked rank + num tiling factors), got 3 != 4}}
+ %0 = linalg.unpack %output inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %input : tensor<64x32x16xf32> -> tensor<256x128xf32>
+ return %0 : tensor<256x128xf32>
+}
+
+// -----
+
+func.func @unpack_invalid_out_of_bound_outer_perm(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+ // expected-error at +1 {{invalid outer_dims_perm vector}}
+ %0 = linalg.unpack %output outer_dims_perm = [2, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %input : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+ return %0 : tensor<256x128xf32>
+}
+
+// -----
+
+func.func @pack_invalid_outer_dims_perm(%source: tensor<128x256xf32>, %dest: tensor<16x4x32x16xf32>) -> tensor<16x4x32x16xf32> {
+ // expected-error at +1 {{outer_dims_perm must be a permutation or empty}}
+ %0 = linalg.pack %source outer_dims_perm = [0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<16x4x32x16xf32>
+ return %0 : tensor<16x4x32x16xf32>
+}
+
+// -----
+
+func.func @unpack_invalid_outer_dims_perm(%source: tensor<128x256xf32>, %dest: tensor<16x4x32x16xf32>) -> tensor<128x256xf32> {
+ // expected-error at +1 {{outer_dims_perm must be a permutation or empty}}
+ %0 = linalg.unpack %dest outer_dims_perm = [1] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<16x4x32x16xf32> -> tensor<128x256xf32>
+ return %0 : tensor<128x256xf32>
+}
+
+// -----
+
+func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+ // expected-error at +1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<8x8x16x32xf32>', got 'tensor<8x8x32x16xf32>'}}
+ %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
+ return %0 : tensor<8x8x32x16xf32>
+}
+
+// -----
+
+func.func @unpack_invalid(%output: tensor<256x128xf32>, %input: tensor<8x8x32x16xf32>) -> tensor<256x128xf32> {
+ // expected-error at +1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<8x32x4x32xf32>', got 'tensor<8x8x32x16xf32>'}}
+ %0 = linalg.unpack %input inner_dims_pos = [1, 0] inner_tiles = [4, 32] into %output : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+ return %0 : tensor<256x128xf32>
+}
+
+// -----
+
+func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
+ // expected-error at +1 {{invalid zero tile factor}}
+ %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [0, 2] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
+ return %0 : tensor<8x8x32x16xf32>
+}
+
+// -----
+func.func @pack_mismatch_inner_tile_size_and_output_shape(
+ %input : tensor<?x?xf32>, %output : tensor<?x?x8x8xf32>) -> tensor<?x?x8x8xf32> {
+ // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
+ %0 = linalg.pack %input inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %output : tensor<?x?xf32> -> tensor<?x?x8x8xf32>
+ return %0 : tensor<?x?x8x8xf32>
+}
+
+// -----
+
+func.func @pack_dynamic_inner_tile_size_and_static_output_shape(
+ %input : tensor<?x?xf32>, %output : tensor<?x?x8x8xf32>) -> tensor<?x?x8x8xf32> {
+ %c8 = arith.constant 8 : index
+ // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
+ %0 = linalg.pack %input inner_dims_pos = [0, 1] inner_tiles = [8, %c8] into %output : tensor<?x?xf32> -> tensor<?x?x8x8xf32>
+ return %0 : tensor<?x?x8x8xf32>
+}
+
+// -----
+
+func.func @pack_static_inner_tile_size_and_dynamic_output_shape(
+ %input : tensor<?x?xf32>, %output : tensor<?x?x8x?xf32>) -> tensor<?x?x8x?xf32> {
+ // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
+ %0 = linalg.pack %input inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %output : tensor<?x?xf32> -> tensor<?x?x8x?xf32>
+ return %0 : tensor<?x?x8x?xf32>
+}
+
+// -----
+
+func.func @unpack_mismatch_inner_tile_size_and_output_shape(
+ %input : tensor<?x?x8x8xf32>, %output : tensor<?x?xf32>) -> tensor<?x?xf32> {
+ // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
+ %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %output : tensor<?x?x8x8xf32> -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
+// -----
+
+func.func @unpack_dynamic_inner_tile_size_and_static_output_shape(
+ %input : tensor<?x?x8x4xf32>, %output : tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %c8 = arith.constant 8 : index
+ // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
+ %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8, 4] into %output : tensor<?x?x8x4xf32> -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
+// -----
+
+func.func @unpack_static_inner_tile_size_and_dynamic_output_shape(
+ %input : tensor<?x?x?x4xf32>, %output : tensor<?x?xf32>) -> tensor<?x?xf32> {
+ // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
+ %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %output : tensor<?x?x?x4xf32> -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir
index 68aa5a85b5e0e6..f2b6549db3073a 100644
--- a/mlir/test/Dialect/Linalg/named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/named-ops.mlir
@@ -2248,3 +2248,108 @@ func.func @select_tensor(%arg0: tensor<4x8x16xi1>, %arg1: tensor<4x8x16xf32>, %a
%1 = linalg.select ins(%arg0, %arg1, %arg2 : tensor<4x8x16xi1>, tensor<4x8x16xf32>, tensor<4x8x16xf32>) outs(%0: tensor<4x8x16xf32>) -> tensor<4x8x16xf32>
return %1 : tensor<4x8x16xf32>
}
+
+//===----------------------------------------------------------------------===//
+// linalg.pack + linalg.unpack
+//===----------------------------------------------------------------------===//
+
+func.func @pack_nc_to_ncnc(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) -> tensor<128x256xf32> {
+ %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+ %1 = tensor.empty() : tensor<128x256xf32>
+ %2 = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %1 : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
+ return %2 : tensor<128x256xf32>
+}
+
+// CHECK-LABEL: func.func @pack_nc_to_ncnc(
+// CHECK-SAME: %[[SOURCE:.*]]: tensor<128x256xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<4x16x32x16xf32>)
+// CHECK: %[[PACKED:.*]] = linalg.pack %[[SOURCE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %[[DEST]] : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+// CHECK: %[[BUFF:.*]] = tensor.empty() : tensor<128x256xf32>
+// CHECK: %{{.*}} = linalg.unpack %[[PACKED]] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %[[BUFF]] : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
+
+// -----
+
+func.func @pack_nc_to_ncnc_with_padding(%source: tensor<13x15xf32>, %dest: tensor<2x8x8x2xf32>, %padding: f32) -> tensor<13x15xf32> {
+ %0 = linalg.pack %source padding_value(%padding : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %dest : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
+ %1 = tensor.empty() : tensor<13x15xf32>
+ %2 = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
+ return %2 : tensor<13x15xf32>
+}
+
+// CHECK-LABEL: func.func @pack_nc_to_ncnc_with_padding(
+// CHECK-SAME: %[[SOURCE:.*]]: tensor<13x15xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<2x8x8x2xf32>,
+// CHECK-SAME: %[[PADDING:.*]]: f32)
+// CHECK: %[[PACKED:.*]] = linalg.pack %[[SOURCE]] padding_value(%[[PADDING]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %[[DEST]] : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
+// CHECK: %[[BUFF:.*]] = tensor.empty() : tensor<13x15xf32>
+// CHECK: %{{.*}} = linalg.unpack %[[PACKED]] inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %[[BUFF]] : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
+
+// -----
+
+func.func @pack_ck_to_kcck(%source: tensor<128x256xf32>, %dest: tensor<16x4x32x16xf32>) -> tensor<128x256xf32> {
+ %0 = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<16x4x32x16xf32>
+ %1 = tensor.empty() : tensor<128x256xf32>
+ %2 = linalg.unpack %0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %1 : tensor<16x4x32x16xf32> -> tensor<128x256xf32>
+ return %2 : tensor<128x256xf32>
+}
+
+// CHECK-LABEL: func.func @pack_ck_to_kcck(
+// CHECK-SAME: %[[SOURCE:.*]]: tensor<128x256xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<16x4x32x16xf32>)
+// CHECK: %[[PACKED:.*]] = linalg.pack %[[SOURCE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %[[DEST]] : tensor<128x256xf32> -> tensor<16x4x32x16xf32>
+// CHECK: %[[BUFF:.*]] = tensor.empty() : tensor<128x256xf32>
+// CHECK: %{{.*}} = linalg.unpack %[[PACKED]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %[[BUFF]] : tensor<16x4x32x16xf32> -> tensor<128x256xf32>
+
+// -----
+
+func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x?x?xf32>, %pad: f32, %tile_n : index, %tile_m : index) -> tensor<?x?x?x?xf32> {
+ %0 = linalg.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+ return %0 : tensor<?x?x?x?xf32>
+}
+
+// CHECK-LABEL: func.func @pad_and_pack_fully_dynamic(
+// CHECK-SAME: %[[SOURCE:.*]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<?x?x?x?xf32>,
+// CHECK-SAME: %[[PAD:.*]]: f32,
+// CHECK-SAME: %[[TILE_N:.*]]: index,
+// CHECK-SAME: %[[TILE_M:.*]]: index)
+// CHECK: %{{.*}} = linalg.pack %[[SOURCE]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_N]], %[[TILE_M]]] into %[[DEST]] : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+
+// -----
+
+func.func @pad_and_pack_partially_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x8x2xf32>, %pad: f32) -> tensor<?x?x8x2xf32> {
+ %0 = linalg.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
+ return %0 : tensor<?x?x8x2xf32>
+}
+
+// CHECK-LABEL: func.func @pad_and_pack_partially_dynamic(
+// CHECK-SAME: %[[SOURCE:.*]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<?x?x8x2xf32>,
+// CHECK-SAME: %[[PAD:.*]]: f32)
+// CHECK: %{{.*}} = linalg.pack %[[SOURCE]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %[[DEST]] : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
+
+// -----
+
+func.func @unpack_fully_dynamic(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>, %tile_n : index, %tile_m : index) -> tensor<?x?xf32> {
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: func.func @unpack_fully_dynamic(
+// CHECK-SAME: %[[SOURCE:.*]]: tensor<?x?x?x?xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[TILE_N:.*]]: index,
+// CHECK-SAME: %[[TILE_M:.*]]: index)
+// CHECK: %{{.*}} = linalg.unpack %[[SOURCE]] inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_N]], %[[TILE_M]]] into %[[DEST]] : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+
+// -----
+
+func.func @unpack_partially_dynamic(%source: tensor<?x?x8x2xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %dest : tensor<?x?x8x2xf32> -> tensor<?x?xf32>
+ return %0: tensor<?x?xf32>
+}
+
+// CHECK-LABEL: func.func @unpack_partially_dynamic(
+// CHECK-SAME: %[[SOURCE:.*]]: tensor<?x?x8x2xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<?x?xf32>)
+// CHECK: %{{.*}} = linalg.unpack %[[SOURCE]] inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %[[DEST]] : tensor<?x?x8x2xf32> -> tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
index 1de3e281bc462b..d9c0c9904402be 100644
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -632,178 +632,3 @@ func.func @empty_wrong_number_of_operands(%sz : index) {
%out = tensor.empty(%sz) : tensor<2x?x?x5xf32>
return
}
-
-// -----
-
-func.func @pack_invalid_no_padding_no_full_tiles(%input: tensor<256x128xf32>, %output: tensor<8x8x16x33xf32>) -> tensor<8x8x16x33xf32> {
- // expected-error at +1 {{invalid tile factor or output size provided. Only full tiles are supported when padding_value is not set}}
- %0 = tensor.pack %input inner_dims_pos = [1, 0] inner_tiles = [16, 33] into %output : tensor<256x128xf32> -> tensor<8x8x16x33xf32>
- return %0 : tensor<8x8x16x33xf32>
-}
-
-// -----
-
-func.func @pack_invalid_no_padding_no_full_tiles_dyn_tiles(%input: tensor<256x128xf32>, %output: tensor<10x8x?x?xf32>, %tile_size_0: index, %tile_size_1: index) -> tensor<10x8x?x?xf32> {
- // expected-error at +1 {{invalid tile factor or output size provided. Only full tiles are supported when padding_value is not set}}
- %0 = tensor.pack %input inner_dims_pos = [1, 0] inner_tiles = [%tile_size_0, %tile_size_1] into %output : tensor<256x128xf32> -> tensor<10x8x?x?xf32>
- return %0 : tensor<10x8x?x?xf32>
-}
-
-// -----
-
-func.func @pack_invalid_no_padding_no_full_tiles_dyn_tiles_outperm(%input: tensor<256x128xf32>, %output: tensor<8x10x?x?xf32>, %tile_size_0: index, %tile_size_1: index) -> tensor<8x10x?x?xf32> {
- // expected-error at +1 {{invalid tile factor or output size provided. Only full tiles are supported when padding_value is not set}}
- %0 = tensor.pack %input outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [%tile_size_0, %tile_size_1] into %output : tensor<256x128xf32> -> tensor<8x10x?x?xf32>
- return %0 : tensor<8x10x?x?xf32>
-}
-
-// -----
-
-func.func @pad_and_pack_invalid_type(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: i32) -> tensor<2x8x8x2xf32> {
- // expected-error at +1 {{expected padding_value has 'f32' but got: 'i32'}}
- %0 = tensor.pack %input padding_value(%pad: i32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
- return %0 : tensor<2x8x8x2xf32>
-}
-
-// -----
-
-func.func @pack_invalid_inner_dims_pos_vector(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
- // expected-error at +1 {{invalid inner_dims_pos vector}}
- %0 = tensor.pack %input inner_dims_pos = [2, 0] inner_tiles = [2, 2] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
- return %0 : tensor<8x8x32x16xf32>
-}
-
-// -----
-
-func.func @pack_invalid_duplicate_element_in_inner_dims(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
- // expected-error at +1 {{invalid inner_dims_pos vector}}
- %0 = tensor.pack %input inner_dims_pos = [1, 1] inner_tiles = [2, 2] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
- return %0 : tensor<8x8x32x16xf32>
-}
-
-// -----
-
-func.func @pack_invalid_duplicate_element_in_outer_perm(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
- // expected-error at +1 {{invalid outer_dims_perm vector}}
- %0 = tensor.pack %input outer_dims_perm = [1, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
- return %0 : tensor<8x8x32x16xf32>
-}
-
-// -----
-
-func.func @pack_invalid_output_rank(%input: tensor<256x128xf32>, %output: tensor<64x32x16xf32>) -> tensor<64x32x16xf32> {
- // expected-error at +1 {{packed rank != (unpacked rank + num tiling factors), got 3 != 4}}
- %0 = tensor.pack %input inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %output : tensor<256x128xf32> -> tensor<64x32x16xf32>
- return %0 : tensor<64x32x16xf32>
-}
-
-// -----
-
-func.func @unpack_invalid_output_rank(%input: tensor<256x128xf32>, %output: tensor<64x32x16xf32>) -> tensor<256x128xf32> {
- // expected-error at +1 {{packed rank != (unpacked rank + num tiling factors), got 3 != 4}}
- %0 = tensor.unpack %output inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %input : tensor<64x32x16xf32> -> tensor<256x128xf32>
- return %0 : tensor<256x128xf32>
-}
-
-// -----
-
-func.func @unpack_invalid_out_of_bound_outer_perm(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
- // expected-error at +1 {{invalid outer_dims_perm vector}}
- %0 = tensor.unpack %output outer_dims_perm = [2, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %input : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
- return %0 : tensor<256x128xf32>
-}
-
-// -----
-
-func.func @pack_invalid_outer_dims_perm(%source: tensor<128x256xf32>, %dest: tensor<16x4x32x16xf32>) -> tensor<16x4x32x16xf32> {
- // expected-error at +1 {{outer_dims_perm must be a permutation or empty}}
- %0 = tensor.pack %source outer_dims_perm = [0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<16x4x32x16xf32>
- return %0 : tensor<16x4x32x16xf32>
-}
-
-// -----
-
-func.func @unpack_invalid_outer_dims_perm(%source: tensor<128x256xf32>, %dest: tensor<16x4x32x16xf32>) -> tensor<128x256xf32> {
- // expected-error at +1 {{outer_dims_perm must be a permutation or empty}}
- %0 = tensor.unpack %dest outer_dims_perm = [1] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<16x4x32x16xf32> -> tensor<128x256xf32>
- return %0 : tensor<128x256xf32>
-}
-
-// -----
-
-func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
- // expected-error at +1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<8x8x16x32xf32>', got 'tensor<8x8x32x16xf32>'}}
- %0 = tensor.pack %input inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
- return %0 : tensor<8x8x32x16xf32>
-}
-
-// -----
-
-func.func @unpack_invalid(%output: tensor<256x128xf32>, %input: tensor<8x8x32x16xf32>) -> tensor<256x128xf32> {
- // expected-error at +1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<8x32x4x32xf32>', got 'tensor<8x8x32x16xf32>'}}
- %0 = tensor.unpack %input inner_dims_pos = [1, 0] inner_tiles = [4, 32] into %output : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
- return %0 : tensor<256x128xf32>
-}
-
-// -----
-
-func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> {
- // expected-error at +1 {{invalid zero tile factor}}
- %0 = tensor.pack %input inner_dims_pos = [1, 0] inner_tiles = [0, 2] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32>
- return %0 : tensor<8x8x32x16xf32>
-}
-
-// -----
-func.func @pack_mismatch_inner_tile_size_and_output_shape(
- %input : tensor<?x?xf32>, %output : tensor<?x?x8x8xf32>) -> tensor<?x?x8x8xf32> {
- // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
- %0 = tensor.pack %input inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %output : tensor<?x?xf32> -> tensor<?x?x8x8xf32>
- return %0 : tensor<?x?x8x8xf32>
-}
-
-// -----
-
-func.func @pack_dynamic_inner_tile_size_and_static_output_shape(
- %input : tensor<?x?xf32>, %output : tensor<?x?x8x8xf32>) -> tensor<?x?x8x8xf32> {
- %c8 = arith.constant 8 : index
- // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
- %0 = tensor.pack %input inner_dims_pos = [0, 1] inner_tiles = [8, %c8] into %output : tensor<?x?xf32> -> tensor<?x?x8x8xf32>
- return %0 : tensor<?x?x8x8xf32>
-}
-
-// -----
-
-func.func @pack_static_inner_tile_size_and_dynamic_output_shape(
- %input : tensor<?x?xf32>, %output : tensor<?x?x8x?xf32>) -> tensor<?x?x8x?xf32> {
- // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
- %0 = tensor.pack %input inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %output : tensor<?x?xf32> -> tensor<?x?x8x?xf32>
- return %0 : tensor<?x?x8x?xf32>
-}
-
-// -----
-
-func.func @unpack_mismatch_inner_tile_size_and_output_shape(
- %input : tensor<?x?x8x8xf32>, %output : tensor<?x?xf32>) -> tensor<?x?xf32> {
- // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
- %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %output : tensor<?x?x8x8xf32> -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-
-// -----
-
-func.func @unpack_dynamic_inner_tile_size_and_static_output_shape(
- %input : tensor<?x?x8x4xf32>, %output : tensor<?x?xf32>) -> tensor<?x?xf32> {
- %c8 = arith.constant 8 : index
- // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
- %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8, 4] into %output : tensor<?x?x8x4xf32> -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-
-// -----
-
-func.func @unpack_static_inner_tile_size_and_dynamic_output_shape(
- %input : tensor<?x?x?x4xf32>, %output : tensor<?x?xf32>) -> tensor<?x?xf32> {
- // expected-error at +1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}}
- %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %output : tensor<?x?x?x4xf32> -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir
index 378137a14b59ff..930986211cb6d2 100644
--- a/mlir/test/Dialect/Tensor/ops.mlir
+++ b/mlir/test/Dialect/Tensor/ops.mlir
@@ -358,106 +358,3 @@ func.func @gather_scatter(
(tensor<1x3x4xf32>, tensor<4x5x6xf32>, tensor<1x3x2xi32>) -> tensor<4x5x6xf32>
return
}
-
-// -----
-
-func.func @pack_nc_to_ncnc(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) -> tensor<128x256xf32> {
- %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
- %1 = tensor.empty() : tensor<128x256xf32>
- %2 = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %1 : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
- return %2 : tensor<128x256xf32>
-}
-
-// CHECK-LABEL: func.func @pack_nc_to_ncnc(
-// CHECK-SAME: %[[SOURCE:.*]]: tensor<128x256xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<4x16x32x16xf32>)
-// CHECK: %[[PACKED:.*]] = tensor.pack %[[SOURCE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %[[DEST]] : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
-// CHECK: %[[BUFF:.*]] = tensor.empty() : tensor<128x256xf32>
-// CHECK: %{{.*}} = tensor.unpack %[[PACKED]] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %[[BUFF]] : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
-
-// -----
-
-func.func @pack_nc_to_ncnc_with_padding(%source: tensor<13x15xf32>, %dest: tensor<2x8x8x2xf32>, %padding: f32) -> tensor<13x15xf32> {
- %0 = tensor.pack %source padding_value(%padding : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %dest : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
- %1 = tensor.empty() : tensor<13x15xf32>
- %2 = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
- return %2 : tensor<13x15xf32>
-}
-
-// CHECK-LABEL: func.func @pack_nc_to_ncnc_with_padding(
-// CHECK-SAME: %[[SOURCE:.*]]: tensor<13x15xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<2x8x8x2xf32>,
-// CHECK-SAME: %[[PADDING:.*]]: f32)
-// CHECK: %[[PACKED:.*]] = tensor.pack %[[SOURCE]] padding_value(%[[PADDING]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %[[DEST]] : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
-// CHECK: %[[BUFF:.*]] = tensor.empty() : tensor<13x15xf32>
-// CHECK: %{{.*}} = tensor.unpack %[[PACKED]] inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %[[BUFF]] : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
-
-// -----
-
-func.func @pack_ck_to_kcck(%source: tensor<128x256xf32>, %dest: tensor<16x4x32x16xf32>) -> tensor<128x256xf32> {
- %0 = tensor.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<16x4x32x16xf32>
- %1 = tensor.empty() : tensor<128x256xf32>
- %2 = tensor.unpack %0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %1 : tensor<16x4x32x16xf32> -> tensor<128x256xf32>
- return %2 : tensor<128x256xf32>
-}
-
-// CHECK-LABEL: func.func @pack_ck_to_kcck(
-// CHECK-SAME: %[[SOURCE:.*]]: tensor<128x256xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<16x4x32x16xf32>)
-// CHECK: %[[PACKED:.*]] = tensor.pack %[[SOURCE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %[[DEST]] : tensor<128x256xf32> -> tensor<16x4x32x16xf32>
-// CHECK: %[[BUFF:.*]] = tensor.empty() : tensor<128x256xf32>
-// CHECK: %{{.*}} = tensor.unpack %[[PACKED]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %[[BUFF]] : tensor<16x4x32x16xf32> -> tensor<128x256xf32>
-
-// -----
-
-func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x?x?xf32>, %pad: f32, %tile_n : index, %tile_m : index) -> tensor<?x?x?x?xf32> {
- %0 = tensor.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
- return %0 : tensor<?x?x?x?xf32>
-}
-
-// CHECK-LABEL: func.func @pad_and_pack_fully_dynamic(
-// CHECK-SAME: %[[SOURCE:.*]]: tensor<?x?xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<?x?x?x?xf32>,
-// CHECK-SAME: %[[PAD:.*]]: f32,
-// CHECK-SAME: %[[TILE_N:.*]]: index,
-// CHECK-SAME: %[[TILE_M:.*]]: index)
-// CHECK: %{{.*}} = tensor.pack %[[SOURCE]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_N]], %[[TILE_M]]] into %[[DEST]] : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
-
-// -----
-
-func.func @pad_and_pack_partially_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x8x2xf32>, %pad: f32) -> tensor<?x?x8x2xf32> {
- %0 = tensor.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
- return %0 : tensor<?x?x8x2xf32>
-}
-
-// CHECK-LABEL: func.func @pad_and_pack_partially_dynamic(
-// CHECK-SAME: %[[SOURCE:.*]]: tensor<?x?xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<?x?x8x2xf32>,
-// CHECK-SAME: %[[PAD:.*]]: f32)
-// CHECK: %{{.*}} = tensor.pack %[[SOURCE]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %[[DEST]] : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
-
-// -----
-
-func.func @unpack_fully_dynamic(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>, %tile_n : index, %tile_m : index) -> tensor<?x?xf32> {
- %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: func.func @unpack_fully_dynamic(
-// CHECK-SAME: %[[SOURCE:.*]]: tensor<?x?x?x?xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<?x?xf32>,
-// CHECK-SAME: %[[TILE_N:.*]]: index,
-// CHECK-SAME: %[[TILE_M:.*]]: index)
-// CHECK: %{{.*}} = tensor.unpack %[[SOURCE]] inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_N]], %[[TILE_M]]] into %[[DEST]] : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
-
-// -----
-
-func.func @unpack_partially_dynamic(%source: tensor<?x?x8x2xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
- %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %dest : tensor<?x?x8x2xf32> -> tensor<?x?xf32>
- return %0: tensor<?x?xf32>
-}
-
-// CHECK-LABEL: func.func @unpack_partially_dynamic(
-// CHECK-SAME: %[[SOURCE:.*]]: tensor<?x?x8x2xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<?x?xf32>)
-// CHECK: %{{.*}} = tensor.unpack %[[SOURCE]] inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %[[DEST]] : tensor<?x?x8x2xf32> -> tensor<?x?xf32>
>From 372b709e8dd66fbe215a15b7c995e37f4505f432 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 16 Jan 2025 12:20:43 +0000
Subject: [PATCH 3/4] [mlir][tensor][linalg] Move Pack/Unpack Ops to Linalg
(3/4)
This is merely moving code around, no new functionality is added.
PATCH 3: Update/move/replace all tests for `tensor.{pack|unpack}` with
identical tests for `linalg.{pack|unpack}`. Updates the testing
infrastructure accordingly and copy all the required transformations.
To help reviewing, below is an overview of non-obvious code moves:
1. Tests from:
* "mlir/test/Dialect/Tensor/tiling.mlir"
are moved to to:
* "mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir"
2. Tests from:
* "mlir/test/Dialect/Tensor/fold-empty-op.mlir"
are moved to:
* "mlir/test/Dialect/Linalg/fold-empty-op.mlir"
CONTEXT:
This change was discussed in the following RFC:
* https://discourse.llvm.org/t/rfc-move-tensor-pack-and-tensor-unpack-into-linalg
---
.../Linalg/TransformOps/LinalgTransformOps.td | 86 ++-
.../Linalg/Transforms/TilingInterfaceImpl.h | 5 +
.../Dialect/Linalg/Transforms/Transforms.h | 59 +-
.../include/mlir/Dialect/Linalg/Utils/Utils.h | 18 +
.../Tensor/TransformOps/TensorTransformOps.td | 10 -
.../Dialect/Tensor/Transforms/Transforms.h | 9 -
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 219 +++++-
.../TransformOps/LinalgTransformOps.cpp | 26 +-
.../Linalg/Transforms/BlockPackMatmul.cpp | 2 +-
.../Dialect/Linalg/Transforms/CMakeLists.txt | 1 +
.../Transforms/DataLayoutPropagation.cpp | 120 ++--
.../Transforms/PackAndUnpackPatterns.cpp | 65 +-
.../Linalg/Transforms/TilingInterfaceImpl.cpp | 655 ++++++++++++++++++
.../Dialect/Linalg/Transforms/Transforms.cpp | 40 +-
.../Linalg/Transforms/Vectorization.cpp | 32 +-
mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 54 ++
mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 20 +-
.../Tensor/IR/TensorTilingInterfaceImpl.cpp | 4 +-
.../TransformOps/TensorTransformOps.cpp | 5 -
.../Dialect/Tensor/Transforms/CMakeLists.txt | 1 -
.../Tensor/Transforms/EmptyOpPatterns.cpp | 48 +-
.../Linalg/block-pack-matmul-layout.mlir | 36 +-
.../Linalg/block-pack-matmul-padding.mlir | 20 +-
.../Dialect/Linalg/block-pack-matmul.mlir | 90 +--
mlir/test/Dialect/Linalg/canonicalize.mlir | 502 +++++++++++++-
.../Linalg/data-layout-propagation.mlir | 254 +++----
.../Linalg/decompose-tensor-pack-tile.mlir | 12 +-
.../Dialect/Linalg/decompose-tensor-pack.mlir | 22 +-
.../Linalg/decompose-tensor-unpack-tile.mlir | 12 +-
.../Linalg/decompose-tensor-unpack.mlir | 18 +-
mlir/test/Dialect/Linalg/fold-empty-op.mlir | 82 +++
.../simplify-pack-unpack.mlir | 92 +--
.../Dialect/Linalg/td/decompose-pack.mlir | 2 +-
.../Dialect/Linalg/td/decompose-unpack.mlir | 2 +-
.../Dialect/Linalg/transform-lower-pack.mlir | 172 ++---
.../Dialect/Linalg/transform-op-fuse.mlir | 12 +-
.../Dialect/Linalg/transform-op-pack.mlir | 124 ++--
.../Linalg/transform-op-tile-pack-unpack.mlir | 491 +++++++++++++
.../Linalg/transform-pack-greedily.mlir | 12 +-
.../transform-tile-and-fuse-pack-unpack.mlir | 32 +-
.../Linalg/vectorization-unsupported.mlir | 4 +-
.../Linalg/vectorization-with-patterns.mlir | 8 +-
mlir/test/Dialect/Linalg/vectorization.mlir | 48 +-
mlir/test/Dialect/Tensor/canonicalize.mlir | 474 -------------
mlir/test/Dialect/Tensor/fold-empty-op.mlir | 71 --
.../Tensor/fold-into-pack-and-unpack.mlir | 198 +++---
mlir/test/Dialect/Tensor/tiling.mlir | 492 -------------
.../CPU/ArmSVE/pack-scalable-inner-tile.mlir | 8 +-
.../Linalg/CPU/pack-dynamic-inner-tile.mlir | 8 +-
.../Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir | 30 +-
.../Linalg/CPU/unpack-dynamic-inner-tile.mlir | 8 +-
.../tile-and-fuse-consumer.mlir | 16 +-
.../tile-and-fuse-using-interface.mlir | 4 +-
.../loop-invariant-code-motion.mlir | 20 +-
.../Dialect/Linalg/TestLinalgTransforms.cpp | 28 +-
.../Dialect/Tensor/TestTensorTransforms.cpp | 26 -
56 files changed, 2941 insertions(+), 1968 deletions(-)
rename mlir/lib/Dialect/{Tensor => Linalg}/Transforms/PackAndUnpackPatterns.cpp (90%)
create mode 100644 mlir/test/Dialect/Linalg/fold-empty-op.mlir
rename mlir/test/Dialect/{Tensor => Linalg}/simplify-pack-unpack.mlir (86%)
create mode 100644 mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 081bf9b6d3b239..deee9a84aa6ae9 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -45,7 +45,7 @@ def ApplyDecomposeTensorPackUnpackPatternsOp
: Op<Transform_Dialect, "apply_patterns.linalg.decompose_pack_unpack",
[DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
let description = [{
- Collect patterns to decompose tensor.pack and tensor.unpack into e.g.
+ Collect patterns to decompose linalg.pack and linalg.unpack into e.g.
tensor::PadOp, linalg::transposeOp Ops. Requires all outer dims to be unit.
}];
@@ -126,6 +126,28 @@ def ApplyPadVectorizationPatternsOp : Op<Transform_Dialect,
let assemblyFormat = "attr-dict";
}
+def ApplyFoldIntoPackAndUnpackPatternsOp : Op<Transform_Dialect,
+ "apply_patterns.tensor.fold_into_pack_and_unpack",
+ [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+ let description = [{
+ Indicates that operations like tensor.pad and tensor.extract_slice should
+ be folded into tensor.pack and tensor.unpack operations, respectively.
+ }];
+
+ let assemblyFormat = "attr-dict";
+}
+
+def ApplyFoldPackUnpackIntoEmptyPatternsOp : Op<Transform_Dialect,
+ "apply_patterns.linalg.fold_pack_unpack_into_empty",
+ [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+ let description = [{
+ // TODO:
+ }];
+
+ let arguments = (ins DefaultValuedAttr<BoolAttr, "false">:$fold_single_use_only);
+ let assemblyFormat = "attr-dict";
+}
+
//===----------------------------------------------------------------------===//
// BufferizeToAllocationOp
//===----------------------------------------------------------------------===//
@@ -547,19 +569,18 @@ def LowerPackOp : Op<Transform_Dialect, "structured.lower_pack", [
TransformOpInterface,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
- Rewrite a tensor.pack into tensor.pad + tensor.expand_shape + linalg.transpose.
+ Rewrite a linalg.pack into tensor.pad + tensor.expand_shape + linalg.transpose.
#### Return modes
- This operation ignores non-pack ops and drops them in the return.
- This operation produces a silenceable failure if the rewrite fails for any
- reason.
- If all the operations referred to by the `target` are rewritten, the
- transform succeeds.
- Return handles to the newly produced pad, expand_shape and transpose ops.
+ This operation ignores non-pack ops and drops them in the return. This
+ operation produces a silenceable failure if the rewrite fails for any
+ reason. If all the operations referred to by the `target` are rewritten,
+ the transform succeeds. Return handles to the newly produced pad,
+ expand_shape and transpose ops.
}];
- let arguments = (ins Transform_ConcreteOpType<"tensor.pack">:$target,
+ let arguments = (ins Transform_ConcreteOpType<"linalg.pack">:$target,
DefaultValuedAttr<BoolAttr, "true">:$lowerPadLikeWithInsertSlice);
let results = (outs Transform_ConcreteOpType<"tensor.pad">:$pad_op,
Transform_ConcreteOpType<"tensor.expand_shape">:$expand_shape_op,
@@ -571,7 +592,7 @@ def LowerPackOp : Op<Transform_Dialect, "structured.lower_pack", [
let extraClassDeclaration = [{
::mlir::DiagnosedSilenceableFailure applyToOne(
::mlir::transform::TransformRewriter &rewriter,
- ::mlir::tensor::PackOp target,
+ ::mlir::linalg::PackOp target,
::mlir::transform::ApplyToEachResultList &transformResults,
::mlir::transform::TransformState &state);
}];
@@ -587,20 +608,19 @@ def LowerUnPackOp : Op<Transform_Dialect, "structured.lower_unpack", [
TransformOpInterface,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
- Lower a tensor.unpack into empty + linalg.transpose + tensor.collapse_shape +
+ Lower a linalg.unpack into empty + linalg.transpose + tensor.collapse_shape +
tensor.extract_slice.
#### Return modes
- This operation ignores non-unpack ops and drops them in the return.
- This operation produces a silenceable failure if the rewrite fails for any
- reason.
- If all the operations referred to by the `target` are rewritten, the
- transform succeeds.
- Return handles to the newly produced empty, transpose, collapse_shape and extract_slice ops.
+ This operation ignores non-unpack ops and drops them in the return. This
+ operation produces a silenceable failure if the rewrite fails for any
+ reason. If all the operations referred to by the `target` are rewritten,
+ the transform succeeds. Return handles to the newly produced empty,
+ transpose, collapse_shape and extract_slice ops.
}];
- let arguments = (ins Transform_ConcreteOpType<"tensor.unpack">:$target,
+ let arguments = (ins Transform_ConcreteOpType<"linalg.unpack">:$target,
DefaultValuedAttr<BoolAttr, "true">:$lowerUnpadLikeWithExtractSlice);
let results = (outs Transform_ConcreteOpType<"tensor.empty">:$empty_op,
Transform_ConcreteOpType<"linalg.transpose">:$transpose_op,
@@ -613,7 +633,7 @@ def LowerUnPackOp : Op<Transform_Dialect, "structured.lower_unpack", [
let extraClassDeclaration = [{
::mlir::DiagnosedSilenceableFailure applyToOne(
::mlir::transform::TransformRewriter &rewriter,
- ::mlir::tensor::UnPackOp target,
+ ::mlir::linalg::UnPackOp target,
::mlir::transform::ApplyToEachResultList &transformResults,
::mlir::transform::TransformState &state);
}];
@@ -791,7 +811,7 @@ def PackOp : Op<Transform_Dialect, "structured.pack", [
Specifying a packed size of 0 for an iterator removes it from consideration
for packing.
- `tensor.pack` (resp. `tensor.unpack`) operations are inserted for the operands
+ `linalg.pack` (resp. `linalg.unpack`) operations are inserted for the operands
(resp. results) that need to be packed (resp. unpacked) according to the
`packed_sizes` specification.
@@ -980,7 +1000,7 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
DeclareOpInterfaceMethods<TransformOpInterface>,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
- Apply a transposition to a single `tensor.pack` (resp. `tensor.unpack`) and
+ Apply a transposition to a single `linalg.pack` (resp. `linalg.unpack`) and
update the `linalg.generic` op that consumes (resp. produces) the operation.
This transform allows composing a simple `structured.pack` with additional
@@ -989,19 +1009,19 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
The transpose spec must specify at least one of `outer_perm` or `inner_perm`
attributes, which will act upon the `outer_dims_perm` or `inner_dims_pos` of
- the specified `tensor.pack` or `tensor.unpack` op.
+ the specified `linalg.pack` or `linalg.unpack` op.
- If the `target` of this op is a `tensor.pack` then a new `tensor.empty` will
- be created along with transposed versions of the `tensor.pack` and the
+ If the `target` of this op is a `linalg.pack` then a new `tensor.empty` will
+ be created along with transposed versions of the `linalg.pack` and the
consuming `linalg.generic`, which is expected to be the sole consumer.
- If the `target` of this op is a `tensor.unpack` then the whole pack / compute
- / unpack chain will be transposed and transposed clones of `tensor.pack`,
- the consuming `linalg.generic` and the tail `tensor.pack` will be created.
+ If the `target` of this op is a `linalg.unpack` then the whole pack / compute
+ / unpack chain will be transposed and transposed clones of `linalg.pack`,
+ the consuming `linalg.generic` and the tail `linalg.pack` will be created.
#### Return modes
- This operation targets a single `tensor.pack` / `tensor.unpack` op and a
+ This operation targets a single `linalg.pack` / `linalg.unpack` op and a
single matching `linalg.generic` that consumes / produces the op. Otherwise,
it produces a silenceableFailure.
@@ -1011,9 +1031,9 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
reason.
This operation returns 3 handles, one to the transformed LinalgOp, one to
- the transformed `tensor.pack` and one to the transformed `tensor.unpack`.
- The last handle for `tensor.unpack` is empty if `target_pack_or_unpack_op`
- was not itself a `tensor.unpack`.
+ the transformed `linalg.pack` and one to the transformed `linalg.unpack`.
+ The last handle for `linalg.unpack` is empty if `target_pack_or_unpack_op`
+ was not itself a `linalg.unpack`.
}];
let arguments = (ins TransformHandleTypeInterface:$target_pack_or_un_pack_op,
@@ -1143,7 +1163,7 @@ def HoistPadBuildPackingLoopNestOp :
creates the packing loop nest required by the hoist_pad operation and makes
that functionality available independently.
- TODO: In the future, we should consider rewriting as a tensor.pack after
+ TODO: In the future, we should consider rewriting as a linalg.pack after
hoisting since this abstraction is now available.
#### Return modes
@@ -1182,7 +1202,7 @@ def HoistPadOp : Op<Transform_Dialect, "structured.hoist_pad",
Hoist the tensor.pad target operation by at most the given number of loops.
Optionally apply the transpose attribute to the inner dimensions.
- TODO: In the future, we should consider rewriting as a tensor.pack after
+ TODO: In the future, we should consider rewriting as a linalg.pack after
hoisting since this abstraction is now available.
TODO: Maybe also return the linalg.generic transpose created at some point.
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h
index 5b88f1d05ce84d..3cc30123afb25f 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h
@@ -14,6 +14,11 @@ class DialectRegistry;
namespace linalg {
void registerTilingInterfaceExternalModels(DialectRegistry ®istry);
+
+/// Similar to the above registeration, but it is only for `tensor.pack` and
+/// `tensor.unpack` ops.
+void registerTilingInterfaceExternalModelsForPackUnPackOps(
+ DialectRegistry ®istry);
} // namespace linalg
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index eed279b6be34ac..3dd18b44a4c85c 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -572,7 +572,7 @@ buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist,
/// packed tensor. A `transposeVector` can change the storage order of the
/// padded tensor but does not change the order of the pack or compute loops.
///
-/// TODO: In the future, we should consider rewriting as a tensor.pack after
+/// TODO: In the future, we should consider rewriting as a linalg.pack after
/// hoisting since this abstraction is now available.
///
/// Example in pseudo-mlir:
@@ -1121,7 +1121,7 @@ struct LowerPackResult {
/// Rewrite pack as pad + reshape + transpose.
FailureOr<LowerPackResult> lowerPack(RewriterBase &rewriter,
- tensor::PackOp packOp,
+ linalg::PackOp packOp,
bool lowerPadLikeWithInsertSlice = true);
struct LowerUnPackOpResult {
@@ -1133,14 +1133,14 @@ struct LowerUnPackOpResult {
/// Rewrite pack as empty + transpose + reshape + extract_slice.
FailureOr<LowerUnPackOpResult>
-lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp,
+lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
bool lowerUnpadLikeWithExtractSlice = true);
/// Struct to hold the result of a `pack` call.
struct PackResult {
- SmallVector<tensor::PackOp> packOps;
+ SmallVector<linalg::PackOp> packOps;
linalg::LinalgOp packedLinalgOp;
- SmallVector<tensor::UnPackOp> unPackOps;
+ SmallVector<linalg::UnPackOp> unPackOps;
};
/// Implement packing of a single LinalgOp by `packedSizes`.
/// There must be one packedSizes entry per `linalgOp` iterator.
@@ -1150,9 +1150,9 @@ FailureOr<PackResult> pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
/// Struct to hold the result of a `packTranspose` call.
struct PackTransposeResult {
- tensor::PackOp transposedPackOp;
+ linalg::PackOp transposedPackOp;
linalg::LinalgOp transposedLinalgOp;
- tensor::UnPackOp transposedUnPackOp;
+ linalg::UnPackOp transposedUnPackOp;
};
/// Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the
/// transposed PackOp -> LinalgOp -> UnPackOp chain after replacements.
@@ -1163,8 +1163,8 @@ struct PackTransposeResult {
/// 3. `outerPerm` (resp. `innerPerm`) must be valid permutations of
/// `packOp.getOuterDimsPerm` (resp. `packOp.getInnerDimsPerm`) or empty.
FailureOr<PackTransposeResult>
-packTranspose(RewriterBase &rewriter, tensor::PackOp packOp,
- linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp,
+packTranspose(RewriterBase &rewriter, linalg::PackOp packOp,
+ linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp,
ArrayRef<int64_t> outerPerm, ArrayRef<int64_t> innerPerm);
/// Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m
@@ -1517,15 +1517,15 @@ struct DecomposePadOpPattern : public OpRewritePattern<tensor::PadOp> {
const SmallVector<Value> &dynSizes) const;
};
-/// Rewrites a tensor::PackOp into a sequence of:
+/// Rewrites a linalg::PackOp into a sequence of:
/// * tensor::PadOp + linalg::TransposeOp + tensor::EmptyOp +
/// tensor::InsertSliceOp ops.
///
-/// Requires that all the outer dims of the input tensor::PackOp are 1.
+/// Requires that all the outer dims of the input linalg::PackOp are 1.
///
/// Before:
/// ```
-/// %packed = tensor.pack %input
+/// %packed = linalg.pack %input
/// padding_value(%pad : f32)
/// inner_dims_pos = [1, 0]
/// inner_tiles = [2, %high]
@@ -1551,20 +1551,20 @@ struct DecomposePadOpPattern : public OpRewritePattern<tensor::PadOp> {
/// : tensor<2x?xf32> into tensor<1x1x2x?xf32>
/// ```
struct DecomposeOuterUnitDimsPackOpPattern
- : public OpRewritePattern<tensor::PackOp> {
- using OpRewritePattern<tensor::PackOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(tensor::PackOp packOp,
+ : public OpRewritePattern<linalg::PackOp> {
+ using OpRewritePattern<linalg::PackOp>::OpRewritePattern;
+ LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override;
};
-/// Rewrites a tensor::UnPackOp into a sequence of rank-reduced
+/// Rewrites a linalg::UnPackOp into a sequence of rank-reduced
/// * tensor::ExtractSliceOp + linalg::TransposeOp + tensor::InsertSliceOp
///
-/// Requires that all the outer dims of the input tensor::PackOp are 1.
+/// Requires that all the outer dims of the input linalg::PackOp are 1.
///
/// Before:
/// ```
-/// %packed = tensor.unpack %input
+/// %packed = linalg.unpack %input
/// inner_dims_pos = [1, 0]
/// inner_tiles = [2, 8]
/// into %output : tensor<1x1x2x8xf32> -> tensor<5x1xf32>
@@ -1585,9 +1585,9 @@ struct DecomposeOuterUnitDimsPackOpPattern
/// : tensor<8x2xf32> to tensor<5x1xf32>
/// ```
struct DecomposeOuterUnitDimsUnPackOpPattern
- : public OpRewritePattern<tensor::UnPackOp> {
- using OpRewritePattern<tensor::UnPackOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp,
+ : public OpRewritePattern<linalg::UnPackOp> {
+ using OpRewritePattern<linalg::UnPackOp>::OpRewritePattern;
+ LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp,
PatternRewriter &rewriter) const override;
};
@@ -1709,7 +1709,7 @@ void populateLinalgGenericOpsSpecializationPatterns(
void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns,
PatternBenefit benefit = 1);
-/// Populates patterns to decompose tensor.pack and tensor.unpack Ops into e.g.
+/// Populates patterns to decompose linalg.pack and linalg.unpack Ops into e.g.
/// tensor.pad, linalg.transpose, tensor.{insert|extract}_slice. Require all
/// outer dims to be unit.
void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns);
@@ -1776,7 +1776,7 @@ void populateElementwiseOpsFusionPatterns(
RewritePatternSet &patterns,
const ControlFusionFn &controlElementwiseOpFusion);
-/// Function type which is used to control propagation of tensor.pack/unpack
+/// Function type which is used to control propagation of linalg.pack/unpack
/// ops.
using ControlPropagationFn = std::function<bool(OpOperand *opOperand)>;
@@ -1885,6 +1885,19 @@ void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns);
/// convert to a `linalg.dot`.
void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns);
+/// Populates `patterns` with patterns that fold operations like `tensor.pad`
+/// and `tensor.extract_slice` into `tensor.pack` and `tensor.unpack` operations
+/// respectively.
+void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
+
+/// Populates `patterns` with patterns that fold operations like `linalg.pack`
+/// and `linalg.unpack` into `tensor.empty`.
+void populateFoldPackUnpackIntoTensorEmptyPatterns(RewritePatternSet &patterns);
+
+/// Populates `patterns` with patterns that simplify `tensor.pack` and
+/// `tensor.unpack` operations.
+void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns);
+
} // namespace linalg
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 1e4f3004dec7e7..80aa034d2199dc 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -33,6 +33,24 @@ namespace linalg {
//===----------------------------------------------------------------------===//
// Utilities for inferring various semantics properties of Linalg ops.
//===----------------------------------------------------------------------===//
+/// Shell function to compute the Destination Permutation of PackOp
+/// This function uses the helper function `computePackUnPackPerm` to get
+/// the permutation vector. Only major difference between UnPack and Pack is
+/// that packOp uses destination rank whereas unpack Uses source rank.
+SmallVector<int64_t> getPackInverseDestPerm(linalg::PackOp packOp);
+
+/// Shell function to compute the Source Permutation of unPackOp.
+/// This function, like the getPackInverseDestPerm uses the helper function
+/// computePackUnPackPerm` to get the permutation vector.
+/// Only major difference between UnPack and Pack is that packOp uses
+/// destination rank whereas unpack Uses source rank.
+SmallVector<int64_t> getUnPackInverseSrcPerm(linalg::UnPackOp unpackOp);
+
+/// Shell function to compute the Source rank permutation for unpackOp
+/// Unpack requires some packing metadata data information, so created
+/// another function where this value is passed by reference.
+SmallVector<int64_t> getUnPackInverseSrcPerm(linalg::UnPackOp,
+ PackingMetadata &metadata);
//===----------------------------------------------------------------------===//
// General utilities
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
index 81bab1b0c82f7a..fcb10f55d556d0 100644
--- a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
@@ -53,16 +53,6 @@ def ApplyFoldTensorEmptyPatternsOp : Op<Transform_Dialect,
let arguments = (ins DefaultValuedAttr<BoolAttr, "false">:$fold_single_use_only);
let assemblyFormat = "attr-dict";
}
-def ApplyFoldIntoPackAndUnpackPatternsOp : Op<Transform_Dialect,
- "apply_patterns.tensor.fold_into_pack_and_unpack",
- [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
- let description = [{
- Indicates that operations like tensor.pad and tensor.extract_slice should
- be folded into tensor.pack and tensor.unpack operations, respectively.
- }];
-
- let assemblyFormat = "attr-dict";
-}
def ApplyFoldTensorSubsetOpsPatternsOp : Op<Transform_Dialect,
"apply_patterns.tensor.fold_tensor_subset_ops",
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
index ae695e0326ca1a..905ab0577ccc13 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
@@ -86,15 +86,6 @@ void populateFoldTensorEmptyPatterns(RewritePatternSet &patterns,
/// that it can be bufferized into a sequence of copies.
void populateDecomposeTensorConcatPatterns(RewritePatternSet &patterns);
-/// Populates `patterns` with patterns that simplify `tensor.pack` and
-/// `tensor.unpack` operations.
-void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns);
-
-/// Populates `patterns` with patterns that fold operations like `tensor.pad`
-/// and `tensor.extract_slice` into `tensor.pack` and `tensor.unpack` operations
-/// respectively.
-void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
-
using ControlFoldFn = std::function<bool(OpOperand *)>;
/// Populates `patterns` with patterns that replace tensor ops (such as
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 7e627c7da77e1b..045b844de37811 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -22,6 +22,7 @@
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
@@ -833,7 +834,7 @@ struct FoldFillWithTensorExtract : public OpRewritePattern<tensor::ExtractOp> {
/// 1. The pack op does not have padding value, or
/// 2. The filled value and padding value are the same.
static FailureOr<FillOp> foldFillPackIntoFillOp(RewriterBase &rewriter,
- tensor::PackOp packOp) {
+ linalg::PackOp packOp) {
auto fillOp = packOp.getSource().getDefiningOp<FillOp>();
if (!fillOp)
return failure();
@@ -851,12 +852,12 @@ static FailureOr<FillOp> foldFillPackIntoFillOp(RewriterBase &rewriter,
}
/// Wrapper pattern that applies foldFillPackIntoFillOp method.
-struct FoldFillWithPack : public OpRewritePattern<tensor::PackOp> {
+struct FoldFillWithPack : public OpRewritePattern<linalg::PackOp> {
public:
FoldFillWithPack(MLIRContext *context)
- : OpRewritePattern<tensor::PackOp>(context) {}
+ : OpRewritePattern<linalg::PackOp>(context) {}
- LogicalResult matchAndRewrite(tensor::PackOp packOp,
+ LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
auto fillOp = foldFillPackIntoFillOp(rewriter, packOp);
if (failed(fillOp))
@@ -3395,19 +3396,6 @@ FailureOr<TilingResult> WinogradOutputTransformOp::getTiledImplementation(
//===----------------------------------------------------------------------===//
// LinalgDialect
//===----------------------------------------------------------------------===//
-
-void LinalgDialect::getCanonicalizationPatterns(
- RewritePatternSet &results) const {
- results.add<EraseDeadLinalgOp, FoldTensorCastConsumerOp,
- InferStaticShapeOfOperands>(getContext());
-}
-
-Operation *LinalgDialect::materializeConstant(OpBuilder &builder,
- Attribute value, Type type,
- Location loc) {
- return arith::ConstantOp::materialize(builder, value, type, loc);
-}
-
/// Returns true if the result AffineExpr of the \p explicitMap is same as \p
/// defaultMap.
static bool isValidResultDimExprs(AffineMap explictMap, AffineMap defaultMap) {
@@ -3616,6 +3604,78 @@ Speculation::Speculatability MatmulOp::getSpeculatability() {
//===----------------------------------------------------------------------===//
// PackOp/UnPackOp Common
//===----------------------------------------------------------------------===//
+// FIXME: Duplicates similar hook from TensorOps.cpp!
+bool foldTensorCastPrecondition(DestinationStyleOpInterface op) {
+ // If no operand comes from a tensor::CastOp and can be folded then fail.
+ bool hasTensorCastOperand =
+ llvm::any_of(op->getOpOperands(), [&](OpOperand &opOperand) {
+ if (llvm::isa<BlockArgument>(opOperand.get()))
+ return false;
+ auto castOp = opOperand.get().getDefiningOp<tensor::CastOp>();
+ return castOp && canFoldIntoConsumerOp(castOp);
+ });
+
+ return hasTensorCastOperand;
+}
+
+// FIXME: Duplicates similar hook from TensorOps.cpp!
+static SmallVector<Value> getNewOperands(DestinationStyleOpInterface op,
+ SmallVector<Type> &newResTy) {
+ SmallVector<Value> newOperands;
+ newOperands.reserve(op->getNumOperands());
+
+ // Assumes that the result has dpsInits followed by nonDpsInits.
+ int64_t dpsInitIdx = 0;
+ for (OpOperand &opOperand : op->getOpOperands()) {
+ auto tensorCastOp = opOperand.get().getDefiningOp<tensor::CastOp>();
+ bool fold = canFoldIntoConsumerOp(tensorCastOp);
+ newOperands.push_back(fold ? tensorCastOp.getOperand() : opOperand.get());
+ if (op.isDpsInit(&opOperand) &&
+ !llvm::isa<MemRefType>(newOperands.back().getType()))
+ newResTy[dpsInitIdx++] = newOperands.back().getType();
+ }
+ return newOperands;
+}
+
+// Given the (potentially) updated packed type, `newPackedTy`, generates an
+// updated mixed-tile-sizes attribute. A tile size is updated only
+// when:
+// * a dim from newPackedTy is static, and
+// * the corresponding size from mixedTiles is still dynamic.
+// Otherwise, the original tile size is preserved.
+// Note - packed-type-dim and mixed-tile-size should always match!
+//
+// FIXME: Duplicates similar hook from TensorOps.cpp!
+static SmallVector<OpFoldResult>
+getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy,
+ SmallVector<OpFoldResult> mixedTiles) {
+ SmallVector<OpFoldResult> newMixedTileSizes;
+ for (auto it : llvm::zip(cast<ShapedType>(newPackedTy)
+ .getShape()
+ .take_back(mixedTiles.size()),
+ mixedTiles)) {
+ int64_t shape = std::get<0>(it);
+ if (shape == ShapedType::kDynamic) {
+ newMixedTileSizes.push_back(std::get<1>(it));
+ continue;
+ }
+
+ // If the current result dim is static, update the dynamic mixed-size
+ // (provided the original value is dynamic).
+ OpFoldResult tile = std::get<1>(it);
+ if (Attribute attr = llvm::dyn_cast_if_present<Attribute>(tile)) {
+ // Already a constant
+ newMixedTileSizes.push_back(tile);
+ } else {
+ assert(getConstantIntValue(tile).value() == shape &&
+ "tile size and dim size don't match!");
+ newMixedTileSizes.push_back(
+ (rewriter.getIntegerAttr(rewriter.getIndexType(), shape)));
+ }
+ }
+
+ return newMixedTileSizes;
+}
template <typename OpTy>
static LogicalResult
@@ -4302,13 +4362,65 @@ OpFoldResult PackOp::fold(FoldAdaptor adaptor) {
std::optional<Attribute> paddingValue;
if (auto pad = adaptor.getPaddingValue())
paddingValue = pad;
- if (OpFoldResult reshapedSource = reshapeConstantSource(
+ if (OpFoldResult reshapedSource = tensor::reshapeConstantSource(
llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
getDestType(), paddingValue))
return reshapedSource;
return {};
}
+/// Folds a tensor.cast op into a consuming PackOp op if the
+/// `tensor.cast` has source that is more static than the consuming op.
+///
+/// Example:
+/// ```mlir
+/// %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
+/// %2 = tensor.pack %1 ... : tensor<?x?xf32> ...
+/// ```
+///
+/// folds into:
+///
+/// ```mlir
+/// %2 = tensor.pack %0 ... : tensor<8x16xf32> ...
+/// ```
+struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
+ using OpRewritePattern<PackOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(PackOp op,
+ PatternRewriter &rewriter) const override {
+ if (!foldTensorCastPrecondition(op))
+ return failure();
+
+ SmallVector<Type> newResultTypes(op->getResultTypes());
+ SmallVector<Value> newOperands = getNewOperands(op, newResultTypes);
+
+ // Get the updated mixed-tile-sizes attribute.
+ SmallVector<OpFoldResult> newMixedTileSizes =
+ getNewMixedTileSizes(rewriter, newResultTypes[0], op.getMixedTiles());
+
+ // Clone op.
+ // TODO: Strictly speaking, discardable attributes should be _discarded_ at
+ // this point. However, in practice, we use them for things that we'd like
+ // to preserve. Implement a better abstraction.
+ PackOp newOp = rewriter.create<PackOp>(
+ op.getLoc(), newOperands[0], newOperands[1], op.getInnerDimsPos(),
+ newMixedTileSizes, op.getPaddingValue(), op.getOuterDimsPerm());
+ newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary());
+
+ // Replace op.
+ Value oldResult = op.getResult();
+ Value newResult = newOp.getResult();
+ Value replacement = (newResult.getType() != oldResult.getType())
+ ? rewriter.create<tensor::CastOp>(
+ op->getLoc(), oldResult.getType(), newResult)
+ : newResult;
+
+ rewriter.replaceOp(op, {replacement});
+
+ return success();
+ }
+};
+
//===----------------------------------------------------------------------===//
// UnPackOp
//===----------------------------------------------------------------------===//
@@ -4517,12 +4629,81 @@ bool UnPackOp::isLikeUnPad() {
}
OpFoldResult UnPackOp::fold(FoldAdaptor adaptor) {
- if (OpFoldResult reshapedSource = reshapeConstantSource(
+ if (OpFoldResult reshapedSource = tensor::reshapeConstantSource(
llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
getResult().getType()))
return reshapedSource;
return {};
}
+/// Folds a tensor.cast op into a consuming UnPackOp op if the
+/// `tensor.cast` has source that is more static than the consuming op.
+///
+/// Example:
+/// ```mlir
+/// %1 = tensor.cast %0 : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
+/// %2 = tensor.unpack %1 ... : tensor<1x1x?x1xi32> -> tensor<7x?xi32>
+/// ```
+///
+/// folds into:
+///
+/// ```mlir
+/// %2 = tensor.unpack %0 ... tensor<1x1x8x1xi32> -> tensor<7x?xi32>
+/// ```
+struct FoldTensorCastUnPackOp : public OpRewritePattern<UnPackOp> {
+ using OpRewritePattern<UnPackOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(UnPackOp op,
+ PatternRewriter &rewriter) const override {
+ if (!foldTensorCastPrecondition(op))
+ return failure();
+
+ SmallVector<Type> newResultTypes(op->getResultTypes());
+ SmallVector<Value> newOperands = getNewOperands(op, newResultTypes);
+ Value sourceTensor = newOperands[0];
+
+ // Get the updated mixed-tile-sizes attribute.
+ SmallVector<OpFoldResult> newMixedTileSizes = getNewMixedTileSizes(
+ rewriter, sourceTensor.getType(), op.getMixedTiles());
+
+ // Clone op.
+ // TODO: Strictly speaking, discardable attributes should be _discarded_ at
+ // this point. However, in practice, we use them for things that we'd like
+ // to preserve. Implement a better abstraction.
+ UnPackOp newOp = rewriter.create<UnPackOp>(
+ op.getLoc(), sourceTensor, newOperands[1], op.getInnerDimsPos(),
+ newMixedTileSizes, op.getOuterDimsPerm());
+ newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary());
+
+ // Replace op.
+ Value oldResult = op.getResult();
+ Value newResult = newOp.getResult();
+ Value replacement = (newResult.getType() != oldResult.getType())
+ ? rewriter.create<tensor::CastOp>(
+ op->getLoc(), oldResult.getType(), newResult)
+ : newResult;
+
+ rewriter.replaceOp(op, {replacement});
+
+ return success();
+ }
+};
+
} // namespace linalg
} // namespace mlir
+
+//===----------------------------------------------------------------------===//
+// LinalgDialect
+//===----------------------------------------------------------------------===//
+
+void LinalgDialect::getCanonicalizationPatterns(
+ RewritePatternSet &results) const {
+ results.add<EraseDeadLinalgOp, FoldTensorCastConsumerOp, FoldTensorCastPackOp,
+ FoldTensorCastUnPackOp, InferStaticShapeOfOperands>(getContext());
+}
+
+Operation *LinalgDialect::materializeConstant(OpBuilder &builder,
+ Attribute value, Type type,
+ Location loc) {
+ return arith::ConstantOp::materialize(builder, value, type, loc);
+}
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 8f5b49e0c21306..ad6c1d00d0b8ff 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -268,6 +268,16 @@ void transform::ApplyPadVectorizationPatternsOp::populatePatterns(
linalg::populateInsertSliceVectorizationPatterns(patterns);
}
+void transform::ApplyFoldIntoPackAndUnpackPatternsOp::populatePatterns(
+ RewritePatternSet &patterns) {
+ linalg::populateFoldIntoPackAndUnpackPatterns(patterns);
+}
+
+void transform::ApplyFoldPackUnpackIntoEmptyPatternsOp::populatePatterns(
+ RewritePatternSet &patterns) {
+ linalg::populateFoldPackUnpackIntoTensorEmptyPatterns(patterns);
+}
+
//===----------------------------------------------------------------------===//
// BufferizeToAllocationOp
//===----------------------------------------------------------------------===//
@@ -1171,7 +1181,7 @@ LogicalResult transform::InterchangeOp::verify() {
//===----------------------------------------------------------------------===//
DiagnosedSilenceableFailure transform::LowerPackOp::applyToOne(
- transform::TransformRewriter &rewriter, tensor::PackOp target,
+ transform::TransformRewriter &rewriter, linalg::PackOp target,
transform::ApplyToEachResultList &transformResults,
transform::TransformState &state) {
rewriter.setInsertionPoint(target);
@@ -1193,7 +1203,7 @@ DiagnosedSilenceableFailure transform::LowerPackOp::applyToOne(
//===----------------------------------------------------------------------===//
DiagnosedSilenceableFailure transform::LowerUnPackOp::applyToOne(
- transform::TransformRewriter &rewriter, tensor::UnPackOp target,
+ transform::TransformRewriter &rewriter, linalg::UnPackOp target,
transform::ApplyToEachResultList &transformResults,
transform::TransformState &state) {
rewriter.setInsertionPoint(target);
@@ -1623,7 +1633,7 @@ bool isValidPackingPermutation(
RelayoutOpTy op, ArrayRef<int64_t> permutation,
OuterOrInnerPerm outerOrInnerPerm = OuterOrInnerPerm::Outer) {
static_assert(
- llvm::is_one_of<RelayoutOpTy, tensor::PackOp, tensor::UnPackOp>::value,
+ llvm::is_one_of<RelayoutOpTy, linalg::PackOp, linalg::UnPackOp>::value,
"applies to only pack or unpack operations");
if (!op || permutation.empty())
return true;
@@ -1632,7 +1642,7 @@ bool isValidPackingPermutation(
return permutation.size() == innerRank && isPermutationVector(permutation);
// op.getOuterDimsPerm() may be empty, in which case it is identity.
// Don't rely on it.
- if (std::is_same<RelayoutOpTy, tensor::PackOp>::value) {
+ if (std::is_same<RelayoutOpTy, linalg::PackOp>::value) {
return permutation.size() == op.getSourceRank() &&
isPermutationVector(permutation);
}
@@ -1666,11 +1676,11 @@ transform::PackTransposeOp::apply(transform::TransformRewriter &rewriter,
}
// Step 2.2. Fail on wrong type.
- auto packOp = dyn_cast<tensor::PackOp>(*packOrUnpackOps.begin());
- auto unPackOp = dyn_cast<tensor::UnPackOp>(*packOrUnpackOps.begin());
+ auto packOp = dyn_cast<linalg::PackOp>(*packOrUnpackOps.begin());
+ auto unPackOp = dyn_cast<linalg::UnPackOp>(*packOrUnpackOps.begin());
if ((!packOp && !unPackOp)) {
return emitSilenceableError() << "requires target to map to a "
- "tensor.pack or tensor.unpack";
+ "linalg.pack or linalg.unpack";
}
LinalgOp linalgOpTarget = dyn_cast<LinalgOp>(*linalgOps.begin());
if (!linalgOpTarget)
@@ -1695,7 +1705,7 @@ transform::PackTransposeOp::apply(transform::TransformRewriter &rewriter,
assert(!packOp && "packOp must be null on entry when unPackOp is not null");
OpOperand *packUse = linalgOp.getDpsInitOperand(
cast<OpResult>(unPackOp.getSource()).getResultNumber());
- packOp = dyn_cast_or_null<tensor::PackOp>(packUse->get().getDefiningOp());
+ packOp = dyn_cast_or_null<linalg::PackOp>(packUse->get().getDefiningOp());
if (!packOp || !packOp.getResult().hasOneUse())
return emitSilenceableError() << "could not find matching pack op";
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
index ed1685a9cb9e69..12c1b2389c2ff1 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
@@ -88,7 +88,7 @@ static bool validateFullTilesOnDims(linalg::LinalgOp linalgOp,
/// Return failure or packed matmul with one of its operands transposed.
static FailureOr<PackTransposeResult>
transposePackedMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
- tensor::PackOp packOp, AffineMap operandMap,
+ linalg::PackOp packOp, AffineMap operandMap,
ArrayRef<unsigned> blocksStartDimPos,
bool transposeOuterBlocks, bool transposeInnerBlocks) {
assert(operandMap.getNumDims() >= 4 &&
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index 3594b084138124..d18b6f8afc43b7 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -26,6 +26,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
MeshShardingInterfaceImpl.cpp
NamedOpConversions.cpp
BlockPackMatmul.cpp
+ PackAndUnpackPatterns.cpp
Padding.cpp
Promotion.cpp
RuntimeOpVerification.cpp
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index d79399b6588be3..d826f72afa1c10 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -61,7 +61,7 @@ template <typename OpTy>
static FailureOr<PackInfo>
getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp,
OpTy packOrUnPackOp) {
- static_assert(llvm::is_one_of<OpTy, tensor::PackOp, tensor::UnPackOp>::value,
+ static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value,
"applies to only pack or unpack operations");
LLVM_DEBUG(
{ llvm::dbgs() << "--- Construct PackInfo From an operand ---\n"; });
@@ -210,7 +210,7 @@ static SmallVector<int64_t> computeOuterDims(ArrayRef<int64_t> perm,
/// %4 = arith.addf %arg3, %arg4 : f32
/// linalg.yield %4 : f32
/// } -> tensor<?x?xf32>
-/// %1 = tensor.pack %0
+/// %1 = linalg.pack %0
/// inner_dims_pos = [0, 1]
/// inner_tiles = [8, 2]
/// into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -219,7 +219,7 @@ static SmallVector<int64_t> computeOuterDims(ArrayRef<int64_t> perm,
/// 8. Thus, the below operation and `affine_map<(d0, d1, d2, d3)> ->
/// affine_map<(d1, d3)>` will be returned.
///
-/// %pack = tensor.pack %arg0
+/// %pack = linalg.pack %arg0
/// inner_dims_pos = [0]
/// inner_tiles = [8]
/// into %init : tensor<?xf32> -> tensor<?x8xf32>
@@ -290,9 +290,9 @@ getOrCreatePackedViewOfOperand(OpBuilder &b, Location loc, PackInfo packInfo,
if (innerDimsPos.empty() && outerDimsPerm.empty())
return std::make_tuple(opOperand->get(), indexingMap);
- auto empty = tensor::PackOp::createDestinationTensor(
+ auto empty = linalg::PackOp::createDestinationTensor(
b, loc, opOperand->get(), innerTileSizes, innerDimsPos, outerDimsPerm);
- auto packedOperand = b.create<tensor::PackOp>(
+ auto packedOperand = b.create<linalg::PackOp>(
loc, opOperand->get(), empty, innerDimsPos, innerTileSizes,
/*padding=*/std::nullopt, outerDimsPerm);
return std::make_tuple(packedOperand, indexingMap);
@@ -327,7 +327,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
return newGenericOp;
}
-/// Bubbles up tensor.pack op through a producer generic op. This
+/// Bubbles up linalg.pack op through a producer generic op. This
/// swap pack(generic) to generic(pack). The new generic op works on packed
/// domain; pack ops are created for input and output operands. E.g.,
///
@@ -343,7 +343,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
/// %4 = arith.addf %arg3, %arg3 : f32
/// linalg.yield %4 : f32
/// } -> tensor<?x?xf32>
-/// %4 = tensor.pack %3
+/// %4 = linalg.pack %3
/// inner_dims_pos = [0, 1]
/// inner_tiles = [8, 2]
/// into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -358,7 +358,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
/// %0 = affine.apply #map()[%dim]
/// %1 = affine.apply #map1()[%dim_0]
/// %2 = tensor.empty(%0, %1) : tensor<?x?x8x2xf32>
-/// %pack = tensor.pack %arg0
+/// %pack = linalg.pack %arg0
/// inner_dims_pos = [0, 1]
/// inner_tiles = [8, 2]
/// into %2 : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -371,7 +371,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
/// linalg.yield %4 : f32
/// } -> tensor<?x?x8x2xf32>
static FailureOr<GenericOp>
-bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp,
+bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, linalg::PackOp packOp,
const ControlPropagationFn &controlFn) {
auto genericOp = packOp.getSource().getDefiningOp<GenericOp>();
if (!genericOp)
@@ -404,11 +404,11 @@ bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp,
rewriter.setInsertionPoint(genericOp);
// We need to handle two cases:
- // 1) The tensor.pack destination is a tensor.empty. If this is the case, we
+ // 1) The linalg.pack destination is a tensor.empty. If this is the case, we
// create a new tensor.empty to avoid breaking dominance, as we are moving the
- // tensor.pack above the linalg.generic.
+ // linalg.pack above the linalg.generic.
// 2) The destination is not a tensor.empty. In this case we can replace only
- // if the destination of the tensor.pack dominates the linalg.generic.
+ // if the destination of the linalg.pack dominates the linalg.generic.
Value packOpDest = packOp.getDest();
if (!packOpDest.hasOneUse())
return failure();
@@ -453,13 +453,13 @@ bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp,
/// Wrapper pattern that applies bubbleUpPackOpThroughGenericOp method.
struct BubbleUpPackOpThroughGenericOpPattern
- : public OpRewritePattern<tensor::PackOp> {
+ : public OpRewritePattern<linalg::PackOp> {
public:
BubbleUpPackOpThroughGenericOpPattern(MLIRContext *context,
ControlPropagationFn fun)
- : OpRewritePattern<tensor::PackOp>(context), controlFn(std::move(fun)) {}
+ : OpRewritePattern<linalg::PackOp>(context), controlFn(std::move(fun)) {}
- LogicalResult matchAndRewrite(tensor::PackOp packOp,
+ LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
auto genericOp =
bubbleUpPackOpThroughGenericOp(rewriter, packOp, controlFn);
@@ -473,15 +473,15 @@ struct BubbleUpPackOpThroughGenericOpPattern
ControlPropagationFn controlFn;
};
-/// Propagate a tensor.pack operation up through a tensor.pad. The idea is to
+/// Propagate a linalg.pack operation up through a tensor.pad. The idea is to
/// add as many zero padding dimensions in `high` and `low` based on the number
/// of point loops.
-class BubbleUpPackThroughPadOp final : public OpRewritePattern<tensor::PackOp> {
+class BubbleUpPackThroughPadOp final : public OpRewritePattern<linalg::PackOp> {
public:
BubbleUpPackThroughPadOp(MLIRContext *context, ControlPropagationFn fun)
- : OpRewritePattern<tensor::PackOp>(context), controlFn(std::move(fun)) {}
+ : OpRewritePattern<linalg::PackOp>(context), controlFn(std::move(fun)) {}
- LogicalResult matchAndRewrite(tensor::PackOp packOp,
+ LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
auto padOp = packOp.getSource().getDefiningOp<tensor::PadOp>();
if (!padOp)
@@ -522,10 +522,10 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern<tensor::PackOp> {
ArrayRef<int64_t> outerDimsPerm = packOp.getOuterDimsPerm();
SmallVector<OpFoldResult> mixedTiles = packOp.getMixedTiles();
- auto empty = tensor::PackOp::createDestinationTensor(
+ auto empty = linalg::PackOp::createDestinationTensor(
rewriter, loc, padOp.getSource(), mixedTiles, innerDimsPos,
outerDimsPerm);
- auto sourcePack = rewriter.create<tensor::PackOp>(
+ auto sourcePack = rewriter.create<linalg::PackOp>(
loc, padOp.getSource(), empty, innerDimsPos, mixedTiles,
/*padding=*/std::nullopt, outerDimsPerm);
@@ -549,9 +549,9 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern<tensor::PackOp> {
// If the pad has more than one user, create an unpack on the new pad to
// replace the other uses.
if (!padOp->hasOneUse()) {
- auto unpackEmpty = tensor::UnPackOp::createDestinationTensor(
+ auto unpackEmpty = linalg::UnPackOp::createDestinationTensor(
rewriter, loc, newPadOp, mixedTiles, innerDimsPos, outerDimsPerm);
- Value unpackedPad = rewriter.create<tensor::UnPackOp>(
+ Value unpackedPad = rewriter.create<linalg::UnPackOp>(
loc, newPadOp, unpackEmpty, innerDimsPos, mixedTiles, outerDimsPerm);
rewriter.replaceAllUsesExcept(padOp, unpackedPad, sourcePack);
}
@@ -636,20 +636,20 @@ static int64_t applyPermutationAndReindexReassoc(
///
/// %collapsed = tensor.collapse_shape %in [[0, 1], 2]
/// : tensor<?x16x4xf32> into tensor<?x4xf32>
-/// %pack = tensor.pack %collapsed outer_dims_perm = [0, 1]
+/// %pack = linalg.pack %collapsed outer_dims_perm = [0, 1]
/// inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %empty
/// : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
///
/// can be transformed into:
///
-/// %pack = tensor.pack %in outer_dims_perm = [1, 2]
+/// %pack = linalg.pack %in outer_dims_perm = [1, 2]
/// inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %empty
/// : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
/// %collapsed = tensor.collapse_shape %pack [[0, 1], 2, 3, 4]
/// : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1>
static LogicalResult
bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
- tensor::PackOp packOp,
+ linalg::PackOp packOp,
PatternRewriter &rewriter) {
SmallVector<int64_t> innerTileSizes = packOp.getStaticTiles();
ArrayRef<int64_t> innerDimsPos = packOp.getInnerDimsPos();
@@ -682,10 +682,10 @@ bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
reassocIndices[outerPos].end());
}
- auto emptyOp = tensor::PackOp::createDestinationTensor(
+ auto emptyOp = linalg::PackOp::createDestinationTensor(
rewriter, packOp.getLoc(), collapseOp.getSrc(), packOp.getMixedTiles(),
projectedInnerDimsPos, newOuterDimsPerm);
- auto newPackOp = rewriter.create<tensor::PackOp>(
+ auto newPackOp = rewriter.create<linalg::PackOp>(
packOp.getLoc(), collapseOp.getSrc(), emptyOp, projectedInnerDimsPos,
packOp.getMixedTiles(), packOp.getPaddingValue(), newOuterDimsPerm);
@@ -742,20 +742,20 @@ projectDimsPosIntoReassocPos(ArrayRef<int64_t> dimsPos,
///
/// %expand = tensor.expand_shape %in [[0], [1, 2]]
/// : tensor<?x64xf32> into tensor<?x4x16xf32>
-/// %pack = tensor.pack %expand outer_dims_perm = [0, 1]
+/// %pack = linalg.pack %expand outer_dims_perm = [0, 1]
/// inner_dims_pos = [2] inner_tiles = [8] into %empty
/// : tensor<?x4x16xf32> -> tensor<?x4x2x8xf32>
///
/// can be transformed into:
///
-/// %pack = tensor.pack %in outer_dims_perm = [1, 2]
+/// %pack = linalg.pack %in outer_dims_perm = [1, 2]
/// inner_dims_pos = [1] inner_tiles = [8] into %empty
/// : tensor<?x64xf32> -> tensor<?x8x8xf32>
/// %expand = tensor.expand_shape %pack [[0], [1, 2], [3]]
/// : tensor<?x8x8xf32> into tensor<?x4x2x8xf32>
static LogicalResult
bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
- tensor::PackOp packOp,
+ linalg::PackOp packOp,
PatternRewriter &rewriter) {
// Outer dimensions permutation is not supported currently.
// TODO: Handle outer_dims_perm variants.
@@ -808,7 +808,7 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
// If reassociation is not possible, then reordering cannot happen.
// This can be caused by pack padding affecting previously expanded
// dimensions or packing extending dimensions.
- RankedTensorType newPackType = tensor::PackOp::inferPackedType(
+ RankedTensorType newPackType = linalg::PackOp::inferPackedType(
expandOp.getSrcType(), packOp.getStaticInnerTiles(),
projectedInnerDimsPos, /*outerDimsPerm=*/SmallVector<int64_t>{});
auto reassocExpand =
@@ -817,10 +817,10 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
return rewriter.notifyMatchFailure(
packOp, "could not reassociate dims after bubbling up");
- Value destTensor = tensor::PackOp::createDestinationTensor(
+ Value destTensor = linalg::PackOp::createDestinationTensor(
rewriter, packOp.getLoc(), expandOp.getSrc(), packOp.getMixedTiles(),
projectedInnerDimsPos, /*outerDimsPerm=*/SmallVector<int64_t>{});
- Value packedVal = rewriter.create<tensor::PackOp>(
+ Value packedVal = rewriter.create<linalg::PackOp>(
packOp.getLoc(), expandOp.getSrc(), destTensor, projectedInnerDimsPos,
packOp.getMixedTiles(), packOp.getPaddingValue(),
/*outerDimsPerm=*/SmallVector<int64_t>{});
@@ -833,12 +833,12 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
}
class BubbleUpPackOpThroughReshapeOp final
- : public OpRewritePattern<tensor::PackOp> {
+ : public OpRewritePattern<linalg::PackOp> {
public:
BubbleUpPackOpThroughReshapeOp(MLIRContext *context, ControlPropagationFn fun)
- : OpRewritePattern<tensor::PackOp>(context), controlFn(std::move(fun)) {}
+ : OpRewritePattern<linalg::PackOp>(context), controlFn(std::move(fun)) {}
- LogicalResult matchAndRewrite(tensor::PackOp packOp,
+ LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
Operation *srcOp = packOp.getSource().getDefiningOp();
// Currently only support when the pack op is the only user.
@@ -877,7 +877,7 @@ class BubbleUpPackOpThroughReshapeOp final
///
/// For example:
///
-/// %unpack = tensor.unpack %in outer_dims_perm = [0, 1]
+/// %unpack = linalg.unpack %in outer_dims_perm = [0, 1]
/// inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %empty
/// : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
/// %expanded = tensor.expand_shape %unpack [[0, 1], [2]]
@@ -887,11 +887,11 @@ class BubbleUpPackOpThroughReshapeOp final
///
/// %expanded = tensor.expand_shape %ain [[0, 1], [2], [3], [4]]
/// : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
-/// %unpack = tensor.unpack %expanded outer_dims_perm = [0, 1, 2]
+/// %unpack = linalg.unpack %expanded outer_dims_perm = [0, 1, 2]
/// inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %empty
/// : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
static LogicalResult pushDownUnPackOpThroughExpandShape(
- tensor::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp,
+ linalg::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp,
PatternRewriter &rewriter, ControlPropagationFn controlFn) {
// User controlled propagation function.
if (!controlFn(&expandOp.getSrcMutable()))
@@ -943,16 +943,16 @@ static LogicalResult pushDownUnPackOpThroughExpandShape(
nextPos += 1;
}
- RankedTensorType newExpandType = tensor::PackOp::inferPackedType(
+ RankedTensorType newExpandType = linalg::PackOp::inferPackedType(
expandTy, innerTileSizes, projectedInnerDimsPos, newOuterDimsPerm);
auto newExpandOp = rewriter.create<tensor::ExpandShapeOp>(
expandOp.getLoc(), newExpandType, unPackOp.getSource(),
newReassocIndices);
- auto emptyOp = tensor::UnPackOp::createDestinationTensor(
+ auto emptyOp = linalg::UnPackOp::createDestinationTensor(
rewriter, unPackOp.getLoc(), newExpandOp, unPackOp.getMixedTiles(),
projectedInnerDimsPos, newOuterDimsPerm);
- auto newUnPackOp = rewriter.create<tensor::UnPackOp>(
+ auto newUnPackOp = rewriter.create<linalg::UnPackOp>(
unPackOp.getLoc(), newExpandOp.getResult(), emptyOp,
projectedInnerDimsPos, unPackOp.getMixedTiles(), newOuterDimsPerm);
rewriter.replaceOp(expandOp, newUnPackOp);
@@ -961,14 +961,14 @@ static LogicalResult pushDownUnPackOpThroughExpandShape(
}
class PushDownUnPackOpThroughReshapeOp final
- : public OpRewritePattern<tensor::UnPackOp> {
+ : public OpRewritePattern<linalg::UnPackOp> {
public:
PushDownUnPackOpThroughReshapeOp(MLIRContext *context,
ControlPropagationFn fun)
- : OpRewritePattern<tensor::UnPackOp>(context), controlFn(std::move(fun)) {
+ : OpRewritePattern<linalg::UnPackOp>(context), controlFn(std::move(fun)) {
}
- LogicalResult matchAndRewrite(tensor::UnPackOp unPackOp,
+ LogicalResult matchAndRewrite(linalg::UnPackOp unPackOp,
PatternRewriter &rewriter) const override {
Value result = unPackOp.getResult();
// Currently only support unpack op with the single user.
@@ -1001,7 +1001,7 @@ class PushDownUnPackOpThroughReshapeOp final
static FailureOr<OpOperand *> getUnPackedOperand(GenericOp genericOp) {
OpOperand *unPackedOperand = nullptr;
for (OpOperand &operand : genericOp->getOpOperands()) {
- auto unPackOp = operand.get().getDefiningOp<tensor::UnPackOp>();
+ auto unPackOp = operand.get().getDefiningOp<linalg::UnPackOp>();
if (!unPackOp)
continue;
if (unPackedOperand)
@@ -1013,9 +1013,9 @@ static FailureOr<OpOperand *> getUnPackedOperand(GenericOp genericOp) {
return unPackedOperand;
}
-/// Push down a tensor.unpack op through a generic op.
+/// Push down a linalg.unpack op through a generic op.
/// The new generic op works on packed domain; pack ops are created for input
-/// and output operands. A tensor.unpack op is inserted right after the packed
+/// and output operands. A linalg.unpack op is inserted right after the packed
/// generic. E.g.
///
/// #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
@@ -1023,7 +1023,7 @@ static FailureOr<OpOperand *> getUnPackedOperand(GenericOp genericOp) {
/// %arg0 = tensor<12x2x56x56x32xf32> // packed arg.
///
/// %0 = tensor.empty() : tensor<12x56x56x64xf32>
-/// %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2]
+/// %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2]
/// inner_dims_pos = [3] inner_tiles = [32] into %0
/// %2 = linalg.generic {indexing_maps = [#map],
/// iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
@@ -1044,7 +1044,7 @@ static FailureOr<OpOperand *> getUnPackedOperand(GenericOp genericOp) {
/// ^bb0(%out : f32):
/// linalg.yield %out : f32
/// } -> tensor<12x2x56x56x32xf32>
-/// %2 = tensor.unpack %1 outer_dims_perm = [0, 3, 1, 2]
+/// %2 = linalg.unpack %1 outer_dims_perm = [0, 3, 1, 2]
/// inner_dims_pos = [3] inner_tiles = [32] into %0
///
static FailureOr<std::tuple<GenericOp, Value>>
@@ -1063,8 +1063,8 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp,
OpOperand *unPackedOperand = *(maybeUnPackedOperand);
// Extract packing information.
- tensor::UnPackOp producerUnPackOp =
- unPackedOperand->get().getDefiningOp<tensor::UnPackOp>();
+ linalg::UnPackOp producerUnPackOp =
+ unPackedOperand->get().getDefiningOp<linalg::UnPackOp>();
assert(producerUnPackOp && "expect a valid UnPackOp");
if (!controlFn(unPackedOperand))
@@ -1079,7 +1079,7 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp,
auto [packedOutOperand, packedOutIndexingMap] =
getOrCreatePackedViewOfOperand(rewriter, genericOp.getLoc(), *packInfo,
genericOp, genericOp.getDpsInitOperand(0));
- auto destPack = packedOutOperand.getDefiningOp<tensor::PackOp>();
+ auto destPack = packedOutOperand.getDefiningOp<linalg::PackOp>();
// If the dps init operand of the generic is a tensor.empty, do not pack it
// and forward the new tensor.empty as a destination.
@@ -1108,7 +1108,7 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp,
// Insert an unPackOp right after the packed generic.
Value unPackOpRes =
rewriter
- .create<tensor::UnPackOp>(genericOp.getLoc(), newResult,
+ .create<linalg::UnPackOp>(genericOp.getLoc(), newResult,
destPack.getSource(), innerDimsPos,
mixedTiles, outerDimsPerm)
.getResult();
@@ -1137,7 +1137,7 @@ struct PushDownUnPackOpThroughGenericOp : public OpRewritePattern<GenericOp> {
ControlPropagationFn controlFn;
};
-/// Propagate a tensor.unpack operation through a tensor.pad. The idea is to
+/// Propagate a linalg.unpack operation through a tensor.pad. The idea is to
/// add as many zero padding dimensions in `high` and `low` based on the number
/// of point loops.
struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
@@ -1146,8 +1146,8 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const override {
- tensor::UnPackOp unpackOp =
- padOp.getSource().getDefiningOp<tensor::UnPackOp>();
+ linalg::UnPackOp unpackOp =
+ padOp.getSource().getDefiningOp<linalg::UnPackOp>();
if (!unpackOp)
return failure();
@@ -1185,12 +1185,12 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
loc, /*result=*/Type(), unpackOp.getSource(), lowPad, highPad,
paddingVal, padOp.getNofold());
- // Inject the tensor.unpack right after the packed padOp.
+ // Inject the linalg.unpack right after the packed padOp.
Value outputUnPack = rewriter.create<tensor::EmptyOp>(
loc, padOp.getResultType().getShape(),
padOp.getResultType().getElementType());
- Value replacement = rewriter.create<tensor::UnPackOp>(
+ Value replacement = rewriter.create<linalg::UnPackOp>(
loc, newPadOp.getResult(), outputUnPack, innerDimsPos,
unpackOp.getMixedTiles(), outerDimsPerm);
rewriter.replaceOp(padOp, replacement);
diff --git a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
similarity index 90%
rename from mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
rename to mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
index 3566714c6529e3..0984b6988b93b6 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
@@ -13,7 +13,7 @@
#include "mlir/IR/PatternMatch.h"
namespace mlir {
-namespace tensor {
+namespace linalg {
namespace {
/// Returns the number of shape sizes that is either dynamic or greater than 1.
@@ -201,7 +201,7 @@ struct FoldPadWithPackOp : public OpRewritePattern<PackOp> {
LogicalResult matchAndRewrite(PackOp packOp,
PatternRewriter &rewriter) const override {
- auto padOp = packOp.getSource().getDefiningOp<PadOp>();
+ auto padOp = packOp.getSource().getDefiningOp<tensor::PadOp>();
if (!padOp || padOp.getNofold() || !padOp.hasZeroLowPad())
return failure();
@@ -224,10 +224,11 @@ struct FoldPadWithPackOp : public OpRewritePattern<PackOp> {
/// Fold a `unpack` -> `extract_slice` into the `unpack` since it already
/// has extract_slice semantics.
-struct FoldUnpackWithExtractSliceOp : public OpRewritePattern<ExtractSliceOp> {
- using OpRewritePattern<ExtractSliceOp>::OpRewritePattern;
+struct FoldUnpackWithExtractSliceOp
+ : public OpRewritePattern<tensor::ExtractSliceOp> {
+ using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(ExtractSliceOp sliceOp,
+ LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
PatternRewriter &rewriter) const override {
auto unpackOp = sliceOp.getSource().getDefiningOp<UnPackOp>();
if (!unpackOp)
@@ -247,7 +248,7 @@ struct FoldUnpackWithExtractSliceOp : public OpRewritePattern<ExtractSliceOp> {
// Create a new empty output tensor.
Type elementType = unpackOp.getDestType().getElementType();
- Value output = rewriter.create<EmptyOp>(
+ Value output = rewriter.create<tensor::EmptyOp>(
sliceOp.getLoc(), sliceOp.getMixedSizes(), elementType);
rewriter.replaceOpWithNewOp<UnPackOp>(
sliceOp, unpackOp.getSource(), output, unpackOp.getInnerDimsPos(),
@@ -474,6 +475,50 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
return success();
}
};
+
+/// tensor.empty does not define any tensor contents, so an unpadded pack
+/// can be folded away.
+struct FoldEmptyTensorWithPackOp : public OpRewritePattern<PackOp> {
+ using OpRewritePattern<PackOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(PackOp packOp,
+ PatternRewriter &rewriter) const override {
+ // Check for tensor.empty source.
+ auto emptyOp = packOp.getSource().getDefiningOp<tensor::EmptyOp>();
+ if (!emptyOp)
+ return failure();
+
+ // Check for padding.
+ // Packing with padding cannot be simply removed.
+ if (packOp.getPaddingValue())
+ return rewriter.notifyMatchFailure(packOp, "expects no padding value");
+
+ // Replace the pack directly with its destination.
+ rewriter.replaceOp(packOp, packOp.getDest());
+
+ return success();
+ }
+};
+
+/// tensor.empty does not define any tensor contents, so an unpack
+/// can be folded away.
+struct FoldEmptyTensorWithUnPackOp : public OpRewritePattern<UnPackOp> {
+ using OpRewritePattern<UnPackOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(UnPackOp unPackOp,
+ PatternRewriter &rewriter) const override {
+ // Check for tensor.empty source.
+ auto emptyOp = unPackOp.getSource().getDefiningOp<tensor::EmptyOp>();
+ if (!emptyOp)
+ return failure();
+
+ // Replace the unpack directly with its destination.
+ rewriter.replaceOp(unPackOp, unPackOp.getDest());
+
+ return success();
+ }
+};
+
} // namespace
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns) {
@@ -490,5 +535,11 @@ void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns) {
patterns.getContext());
}
-} // namespace tensor
+void populateFoldPackUnpackIntoTensorEmptyPatterns(
+ RewritePatternSet &patterns) {
+ patterns.add<FoldEmptyTensorWithPackOp, FoldEmptyTensorWithUnPackOp>(
+ patterns.getContext());
+}
+
+} // namespace linalg
} // namespace mlir
diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
index b7764da26a7f47..faa7bbf9d168a1 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
@@ -10,14 +10,17 @@
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Interfaces/TilingInterface.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
#include <optional>
using namespace mlir;
@@ -563,6 +566,648 @@ struct LinalgOpPartialReductionInterface
}
};
+template <typename OpTy>
+static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
+ OpBuilder &builder) {
+ static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+ "applies to only pack or unpack operations");
+ OpBuilder::InsertionGuard g(builder);
+ int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
+ : op.getDestRank();
+ OpFoldResult zero = builder.getIndexAttr(0);
+ OpFoldResult one = builder.getIndexAttr(1);
+ ReifiedRankedShapedTypeDims resultShape;
+ (void)reifyResultShapes(builder, op, resultShape);
+ SmallVector<Range> loopBounds(rank);
+ for (auto dim : llvm::seq<int64_t>(0, rank)) {
+ loopBounds[dim].offset = zero;
+ loopBounds[dim].stride = one;
+ loopBounds[dim].size = resultShape[0][dim];
+ }
+ return loopBounds;
+}
+
+static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
+ SmallVector<OpFoldResult> &sizes,
+ ArrayRef<int64_t> permutation) {
+ if (permutation.empty())
+ return;
+ applyPermutationToVector<OpFoldResult>(offsets, permutation);
+ applyPermutationToVector<OpFoldResult>(sizes, permutation);
+}
+
+struct PackOpTiling
+ : public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
+
+ SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
+ // Note that here we only consider untiled dimensions and outer tiled data
+ // dimensions, the inner tiled data dimensions are materialized when
+ // building the body of the operation.
+ auto packOp = cast<PackOp>(op);
+ SmallVector<utils::IteratorType> iteratorTypes(
+ packOp.getSourceRank(), utils::IteratorType::parallel);
+ return iteratorTypes;
+ }
+
+ SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
+ return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
+ }
+
+ FailureOr<TilingResult>
+ getTiledImplementation(Operation *op, OpBuilder &b,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes) const {
+ auto packOp = cast<PackOp>(op);
+ Location loc = packOp.getLoc();
+
+ // The tiling is applied on interchanged dimensions. We have to undo the
+ // interchange to map sizes and offsets to the original input.
+ int64_t inputRank = packOp.getSourceRank();
+ SmallVector<OpFoldResult> origOffsets(offsets);
+ SmallVector<OpFoldResult> origSizes(sizes);
+ applyPermToRange(origOffsets, origSizes,
+ invertPermutationVector(packOp.getOuterDimsPerm()));
+
+ DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
+ packOp.getDimAndTileMapping();
+ SmallVector<OpFoldResult> srcDimValues =
+ tensor::getMixedSizes(b, loc, packOp.getSource());
+ SmallVector<OpFoldResult> inputIndices, inputSizes;
+ for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
+ using AV = affine::AffineValueExpr;
+ affine::AffineBuilder ab(b, loc);
+ AffineExpr dim0, dim1, sym;
+ bindDims(b.getContext(), dim0, dim1);
+ bindSymbols(b.getContext(), sym);
+ if (dimAndTileMapping.count(dim)) {
+ // If the data dimension is tiled, the i-th index is the product of
+ // offset_i and tile_i, and the i-th size is the product of sizes_i and
+ // tile_i.
+ auto avOffset = AV(dim0).bind(origOffsets[dim]);
+ auto avSize = AV(dim0).bind(origSizes[dim]);
+ auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
+ inputIndices.push_back(ab.mul(avOffset, avTileSize));
+ inputSizes.push_back(ab.mul(avSize, avTileSize));
+ } else {
+ inputIndices.push_back(origOffsets[dim]);
+ inputSizes.push_back(origSizes[dim]);
+ }
+
+ // Limit the size of the input operand for incomplete tiles.
+ if (packOp.getPaddingValue()) {
+ OpFoldResult dimSize = srcDimValues[dim];
+ auto avDimSize = AV(dim0).bind(dimSize);
+ auto avInputIdx = AV(dim1).bind(inputIndices.back());
+ inputSizes.back() =
+ ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
+ }
+ }
+
+ auto oneAttr = b.getI64IntegerAttr(1);
+ SmallVector<OpFoldResult> strides(inputRank, oneAttr);
+
+ SmallVector<Value> tiledOperands;
+ auto sourceSlice = b.create<tensor::ExtractSliceOp>(
+ loc, packOp.getSource(), inputIndices, inputSizes, strides);
+ tiledOperands.push_back(sourceSlice);
+
+ SmallVector<OpFoldResult> outputOffsets, outputSizes;
+ if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
+ outputSizes)))
+ return {};
+
+ strides.append(packOp.getDestRank() - inputRank, oneAttr);
+ auto outSlice = b.create<tensor::ExtractSliceOp>(
+ loc, packOp.getDest(), outputOffsets, outputSizes, strides);
+ tiledOperands.push_back(outSlice);
+
+ if (auto val = packOp.getPaddingValue())
+ tiledOperands.push_back(val);
+ for (auto tile : packOp.getInnerTiles())
+ tiledOperands.push_back(tile);
+
+ Operation *tiledPackOp = b.create<PackOp>(
+ loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
+
+ return TilingResult{
+ {tiledPackOp},
+ SmallVector<Value>(tiledPackOp->getResults()),
+ llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
+ }
+
+ LogicalResult
+ getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes,
+ SmallVector<OpFoldResult> &resultOffsets,
+ SmallVector<OpFoldResult> &resultSizes) const {
+ // The iteration domain is over outer dimensions of packed layout. In this
+ // context, the outer dimensions of `resultOffsets` are `offsets`. The
+ // inner dimensions of `resultOffsets` are zeros because tiling is not
+ // applied to them.
+ auto packOp = cast<PackOp>(op);
+ int64_t inputRank = packOp.getSourceRank();
+ int64_t outputRank = packOp.getDestRank();
+ auto zeroAttr = b.getI64IntegerAttr(0);
+ resultOffsets.assign(offsets.begin(), offsets.end());
+ resultOffsets.append(outputRank - inputRank, zeroAttr);
+
+ ReifiedRankedShapedTypeDims outputShape;
+ (void)reifyResultShapes(b, packOp, outputShape);
+ resultSizes.assign(sizes.begin(), sizes.end());
+ for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
+ resultSizes.push_back(outputShape[0][dataTileDim]);
+
+ return success();
+ }
+
+ FailureOr<TilingResult>
+ generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes) const {
+ auto packOp = cast<PackOp>(op);
+ int64_t numTiles = packOp.getInnerDimsPos().size();
+
+ // tensor.pack op is fusible (as a producer) only if full inner tiles are
+ // iterated or inner dims are not tiled. Otherwise, it will generate a
+ // sequence of non-trivial ops (for partial tiles).
+ for (auto offset : offsets.take_back(numTiles))
+ if (!isConstantIntValue(offset, 0))
+ return failure();
+
+ for (auto iter :
+ llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
+ if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
+ return failure();
+
+ FailureOr<TilingResult> tilingResult = getTiledImplementation(
+ op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
+ if (failed(tilingResult))
+ return failure();
+ return tilingResult.value();
+ }
+
+ /// Method to return the position of iteration domain tile computed by the
+ /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and
+ /// `resultSizes` only cover outer dimensions.
+ LogicalResult getIterationDomainTileFromOperandTile(
+ Operation *op, OpBuilder &b, unsigned operandNumber,
+ ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
+ SmallVectorImpl<OpFoldResult> &resultOffsets,
+ SmallVectorImpl<OpFoldResult> &resultSizes) const {
+ if (operandNumber != 0)
+ return failure();
+
+ auto packOp = cast<PackOp>(op);
+ // It is not trivial to infer dest tile from source tile if `packOp` has
+ // padding semantic.
+ if (packOp.getPaddingValue())
+ return failure();
+
+ Location loc = packOp.getLoc();
+
+ SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
+ DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
+ packOp.getDimAndTileMapping();
+ for (auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
+ if (dimAndTileMapping.count(dim)) {
+ FailureOr<int64_t> cstSize =
+ ValueBoundsConstraintSet::computeConstantBound(
+ presburger::BoundType::UB, sizes[dim],
+ /*stopCondition=*/nullptr, /*closedUB=*/true);
+ std::optional<int64_t> cstInnerSize =
+ getConstantIntValue(dimAndTileMapping[dim]);
+ // Currently fusing `packOp` as consumer only expects perfect tiling
+ // scenario because even if without padding semantic, the `packOp` may
+ // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
+ // where the `tileSize` from operand of `packOp` is 5, which is not
+ // exactly divided by `innerTile`(=6) of `packOp`. As the result:
+ // 1. the first slice is extracted from (0) to (4) and inserted into
+ // (0,0)~(0,4) at first row.
+ // 2. the second slice is extracted from (5) to (9) and SHOULD BE
+ // respectively inserted into two rows with different length, including
+ // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
+ // them, thus adding below constraint to bypass them temporarily. In
+ // another word, we can only support tiling with consumer if the tile
+ // size for the producer is a multiple of the inner tile size for the
+ // packed dimensions at this moment.
+ if (failed(cstSize) || !cstInnerSize || *cstSize % *cstInnerSize != 0) {
+ return failure();
+ }
+
+ using AV = affine::AffineValueExpr;
+ affine::AffineBuilder ab(b, loc);
+ AffineExpr dim0, sym;
+ bindDims(b.getContext(), dim0);
+ bindSymbols(b.getContext(), sym);
+ auto avOffset = AV(dim0).bind(offsets[dim]);
+ auto avSize = AV(dim0).bind(sizes[dim]);
+ auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
+ outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
+ outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
+ } else {
+ outerDimOffsets.push_back(offsets[dim]);
+ outerDimSizes.push_back(sizes[dim]);
+ }
+ }
+ applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
+ resultOffsets = outerDimOffsets;
+ resultSizes = outerDimSizes;
+ return success();
+ }
+
+ /// Method to return the tiled implementation of tensor.pack as a consumer.
+ FailureOr<TilingResult> getTiledImplementationFromOperandTile(
+ Operation *op, OpBuilder &b, unsigned operandNumber,
+ ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes) const {
+ if (operandNumber != 0)
+ return failure();
+
+ auto packOp = cast<PackOp>(op);
+ Location loc = packOp.getLoc();
+
+ int64_t inputRank = packOp.getSourceRank();
+ auto oneAttr = b.getI64IntegerAttr(1);
+ SmallVector<OpFoldResult> strides(inputRank, oneAttr);
+
+ SmallVector<Value> tiledOperands;
+ auto sourceSlice = b.create<tensor::ExtractSliceOp>(
+ loc, packOp.getSource(), offsets, sizes, strides);
+ tiledOperands.push_back(sourceSlice);
+
+ SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
+ if (failed(getIterationDomainTileFromOperandTile(
+ op, b, /*operandNumber=*/0, offsets, sizes, outerDimOffsets,
+ outerDimSizes)))
+ return failure();
+
+ SmallVector<OpFoldResult> outputOffsets, outputSizes;
+ if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes,
+ outputOffsets, outputSizes)))
+ return failure();
+
+ strides.append(packOp.getDestRank() - inputRank, oneAttr);
+ auto outSlice = b.create<tensor::ExtractSliceOp>(
+ loc, packOp.getDest(), outputOffsets, outputSizes, strides);
+ tiledOperands.push_back(outSlice);
+
+ assert(!packOp.getPaddingValue() && "Expect no padding semantic");
+ for (auto tile : packOp.getInnerTiles())
+ tiledOperands.push_back(tile);
+
+ Operation *tiledPackOp = b.create<PackOp>(
+ loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
+
+ return TilingResult{
+ {tiledPackOp},
+ SmallVector<Value>(tiledPackOp->getResults()),
+ llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
+ }
+};
+
+struct UnpackTileDimInfo {
+ bool isAlignedToInnerTileSize;
+ OpFoldResult sourceOffset;
+ OpFoldResult sourceSize;
+ OpFoldResult resultOffset;
+ OpFoldResult destExpandedSize;
+};
+
+/// Returns the needed information for tiling unpack op on `tileDim` with given
+/// `tileOffset` and `tileSize`. For more details, see the comment of the
+/// `getTiledImplementation`.
+static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
+ int64_t tileDim,
+ OpFoldResult tileOffset,
+ OpFoldResult tileSize) {
+ UnpackTileDimInfo info;
+ Attribute zeroAttr = b.getIndexAttr(0);
+ Attribute oneAttr = b.getIndexAttr(1);
+ DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
+ unpackOp.getDimAndTileMapping();
+ // The dimension is not one of packed data dimension.
+ if (!dimAndTileMapping.count(tileDim)) {
+ info.isAlignedToInnerTileSize = true;
+ info.sourceOffset = tileOffset;
+ info.sourceSize = tileSize;
+ info.resultOffset = zeroAttr;
+ info.destExpandedSize = tileSize;
+ return info;
+ }
+
+ Location loc = unpackOp.getLoc();
+ using AV = affine::AffineValueExpr;
+ affine::AffineBuilder ab(b, loc);
+ AffineExpr dim0, dim1, sym0;
+ bindDims(b.getContext(), dim0, dim1);
+ bindSymbols(b.getContext(), sym0);
+
+ OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
+
+ info.isAlignedToInnerTileSize = false;
+ FailureOr<int64_t> cstSize = ValueBoundsConstraintSet::computeConstantBound(
+ presburger::BoundType::UB, tileSize,
+ /*stopCondition=*/nullptr, /*closedUB=*/true);
+ std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
+ if (!failed(cstSize) && cstInnerSize) {
+ if (*cstSize % *cstInnerSize == 0)
+ info.isAlignedToInnerTileSize = true;
+
+ // If the tiling size equals to the inner tiling size, the outer dims are
+ // always 1.
+ if (*cstInnerSize == *cstSize) {
+ auto lhs = AV(dim0).bind(tileOffset);
+ auto rhs = AV(dim1).bind(innerTileSize);
+ info.sourceOffset = ab.floor(lhs, rhs);
+ info.sourceSize = oneAttr;
+ info.resultOffset = zeroAttr;
+ info.destExpandedSize = tileSize;
+ return info;
+ }
+ }
+
+ if (info.isAlignedToInnerTileSize) {
+ info.sourceOffset =
+ ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
+ info.resultOffset = zeroAttr;
+ info.destExpandedSize = tileSize;
+
+ // The ceilDiv is needed here because there could be incomplete tile even
+ // it is perfect tiling cases. E.g.,
+ // %0 = unpack tensor<33x2xf32> into tensor<64xf32>
+ // If the tiling size is 32, there will be 3 tiles. Two of them have
+ // size=32; one of them have size=2. The size is represented using
+ // affine_min op; we need ceilDiv.
+ info.sourceSize =
+ ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
+ return info;
+ }
+
+ affine::DivModValue firstCoord = affine::getDivMod(
+ b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
+ getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
+ OpFoldResult tileExclusiveBound =
+ ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
+ affine::DivModValue lastCoord = affine::getDivMod(
+ b, loc,
+ getValueOrCreateConstantIndexOp(
+ b, loc,
+ ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
+ getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
+
+ OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
+ AV(dim1).bind(firstCoord.quotient));
+ info.sourceSize =
+ ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
+ info.sourceOffset = firstCoord.quotient;
+ info.resultOffset = firstCoord.remainder;
+ // Do not create an Affine ops for expanded size because the affine op is too
+ // complicated which would trigger an issue in affine ops simplification.
+ info.destExpandedSize = b.createOrFold<arith::MulIOp>(
+ loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize),
+ getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
+ return info;
+}
+
+struct UnPackOpTiling
+ : public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
+
+ SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
+ auto unpackOp = cast<UnPackOp>(op);
+ SmallVector<utils::IteratorType> iteratorTypes(
+ unpackOp.getDestRank(), utils::IteratorType::parallel);
+ return iteratorTypes;
+ }
+
+ SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
+ return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
+ }
+
+ /// There are two cases in tiling unpack ops. If the tiling size is aligned to
+ /// the inner tile size, the corresponding tiles of source are all complete.
+ /// Otherwise, there are in-complete tiles. We will need to expand the slice
+ /// of source for getting complete tiles. The tiled unpack op unpacks more
+ /// data from source, so We'll need an extract_slice op to shift and truncate
+ /// the output.
+ /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
+ /// coordinates of second tile (i.e., result[15..31]) are
+ /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
+ /// row are incomplete tiles. To represent the unpack op, we have to complete
+ /// the rows. I.e., the input coordinates would start with (1, 0); end with
+ /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
+ /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
+ /// can get the actual result.
+ FailureOr<TilingResult>
+ getTiledImplementation(Operation *op, OpBuilder &b,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes) const {
+ auto unpackOp = cast<UnPackOp>(op);
+ int64_t srcRank = unpackOp.getSourceRank();
+ int64_t destRank = unpackOp.getDestRank();
+ int64_t numInnerTiles = srcRank - destRank;
+ Location loc = unpackOp.getLoc();
+
+ // The perfect tiling case indicates that the tiling sizes are multiple of
+ // inner_tile_size. In this context, no extra data is needed when
+ // representing the tiled unpack op.
+ bool isPerfectTilingCase = true;
+ Attribute oneAttr = b.getIndexAttr(1);
+ SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
+ SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
+ SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
+ for (auto dim : llvm::seq<int64_t>(0, destRank)) {
+ UnpackTileDimInfo info =
+ getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
+ if (!info.isAlignedToInnerTileSize)
+ isPerfectTilingCase = false;
+ sliceSrcIndices.push_back(info.sourceOffset);
+ sliceSrcSizes.push_back(info.sourceSize);
+ destExpandedSizes.push_back(info.destExpandedSize);
+ resultOffsetsFromDest.push_back(info.resultOffset);
+ }
+
+ // The tiling is applied on destination dimensions. We have to apply the
+ // interchange on source dimensions if outer_dims_perm is set.
+ applyPermToRange(sliceSrcIndices, sliceSrcSizes,
+ unpackOp.getOuterDimsPerm());
+ Attribute zeroAttr = b.getIndexAttr(0);
+ sliceSrcIndices.append(numInnerTiles, zeroAttr);
+ sliceSrcSizes.append(unpackOp.getMixedTiles());
+ sliceSrcStrides.append(numInnerTiles, oneAttr);
+ SmallVector<Operation *> generatedSlices;
+ tensor::ExtractSliceOp sliceSource = b.create<tensor::ExtractSliceOp>(
+ loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
+ sliceSrcStrides);
+ generatedSlices.push_back(sliceSource);
+
+ SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
+ Value sliceDest;
+ if (isPerfectTilingCase) {
+ auto destSliceOp = b.create<tensor::ExtractSliceOp>(
+ loc, unpackOp.getDest(), offsets, sizes, destStrides);
+ sliceDest = destSliceOp;
+ generatedSlices.push_back(destSliceOp);
+ } else {
+ sliceDest = b.create<tensor::EmptyOp>(
+ loc, destExpandedSizes, unpackOp.getDestType().getElementType());
+ }
+
+ SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
+ for (auto tile : unpackOp.getInnerTiles())
+ tiledOperands.push_back(tile);
+
+ Operation *tiledUnpackOp = b.create<UnPackOp>(
+ loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs());
+
+ if (isPerfectTilingCase)
+ return TilingResult{{tiledUnpackOp},
+ SmallVector<Value>(tiledUnpackOp->getResults()),
+ generatedSlices};
+
+ auto extractSlice = b.create<tensor::ExtractSliceOp>(
+ loc, tiledUnpackOp->getResult(0), resultOffsetsFromDest, sizes,
+ destStrides);
+ return TilingResult{
+ {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
+ }
+
+ LogicalResult
+ getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes,
+ SmallVector<OpFoldResult> &resultOffsets,
+ SmallVector<OpFoldResult> &resultSizes) const {
+ resultOffsets = llvm::to_vector(offsets);
+ resultSizes = llvm::to_vector(sizes);
+ return success();
+ }
+
+ FailureOr<TilingResult>
+ generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<OpFoldResult> sizes) const {
+ FailureOr<TilingResult> tilingResult =
+ getTiledImplementation(op, b, offsets, sizes);
+ if (failed(tilingResult))
+ return failure();
+ return tilingResult.value();
+ }
+
+ /// Method to return the position of iteration domain tile computed by the
+ /// tiled operation.
+ LogicalResult getIterationDomainTileFromOperandTile(
+ Operation *op, OpBuilder &b, unsigned operandNumber,
+ ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
+ SmallVectorImpl<OpFoldResult> &resultOffsets,
+ SmallVectorImpl<OpFoldResult> &resultSizes) const {
+ auto unPackOp = cast<UnPackOp>(op);
+ // If the operand tile is the dest, then no adjustment is needed.
+ if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
+ resultOffsets = llvm::to_vector(offsets);
+ resultSizes = llvm::to_vector(sizes);
+ return success();
+ }
+ Location loc = unPackOp.getLoc();
+
+ int64_t numTiles = unPackOp.getInnerDimsPos().size();
+ auto destOffsets = offsets.drop_back(numTiles);
+ auto destSizes = sizes.drop_back(numTiles);
+ // The tiling is applied on interchanged dimensions. We have to undo the
+ // interchange to map sizes and offsets to the original input.
+ int64_t outputRank = unPackOp.getDestRank();
+ ReifiedRankedShapedTypeDims reifiedReturnShapes;
+ if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes)))
+ return failure();
+ SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
+ SmallVector<OpFoldResult> origOffsets(destOffsets);
+ SmallVector<OpFoldResult> origSizes(destSizes);
+ applyPermToRange(origOffsets, origSizes,
+ invertPermutationVector(unPackOp.getOuterDimsPerm()));
+
+ DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
+ unPackOp.getDimAndTileMapping();
+
+ for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
+ using AV = affine::AffineValueExpr;
+ affine::AffineBuilder ab(b, loc);
+ AffineExpr dim0, dim1, sym0;
+ bindDims(b.getContext(), dim0, dim1);
+ bindSymbols(b.getContext(), sym0);
+ if (dimAndTileMapping.count(dim)) {
+ // If the data dimension is tiled, the i-th index is the product of
+ // offset_i and tile_i, and the i-th size is the product of sizes_i and
+ // tile_i. The sizes must be clamped to the sizes of the unpack result.
+ auto avOffset = AV(dim0).bind(origOffsets[dim]);
+ auto avSize = AV(dim0).bind(origSizes[dim]);
+ auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
+ auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
+ resultOffsets.push_back(ab.mul(avOffset, avTileSize));
+ auto avResultOffset = AV(dim1).bind(resultOffsets.back());
+ resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
+ ab.sub(avResultSize, avResultOffset)}));
+ } else {
+ resultOffsets.push_back(origOffsets[dim]);
+ resultSizes.push_back(origSizes[dim]);
+ }
+ }
+ return success();
+ }
+
+ /// Method to return the tiled implementation of tensor.unpack as a consumer.
+ FailureOr<TilingResult> getTiledImplementationFromOperandTile(
+ Operation *op, OpBuilder &b, unsigned operandNumber,
+ ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes) const {
+ auto unPackOp = cast<UnPackOp>(op);
+ // tensor.unpack op is fusible (as a consumer) only if inner dims are not
+ // tiled.
+ int64_t numTiles = unPackOp.getInnerDimsPos().size();
+ for (auto iter :
+ llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
+ if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
+ return failure();
+ }
+
+ Location loc = unPackOp.getLoc();
+
+ // Fetch offset/size for creating the slice of the dest operand of
+ // unpack op.
+ SmallVector<OpFoldResult> outputOffsets, outputSizes;
+ if (failed(getIterationDomainTileFromOperandTile(
+ op, b, /*operandNumber=*/0, offsets, sizes, outputOffsets,
+ outputSizes)))
+ return failure();
+
+ auto oneAttr = b.getI64IntegerAttr(1);
+ int64_t outputRank = unPackOp.getDestRank();
+ SmallVector<OpFoldResult> strides(outputRank, oneAttr);
+
+ SmallVector<Value> tiledOperands;
+ // Create slice of the dest operand.
+ auto extractDestSlice = b.create<tensor::ExtractSliceOp>(
+ loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
+ tiledOperands.push_back(extractDestSlice);
+
+ SmallVector<OpFoldResult> inputOffsets, inputSizes;
+ strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
+ // Create slice of the source operand.
+ auto extractSourceSlice = b.create<tensor::ExtractSliceOp>(
+ loc, unPackOp.getSource(), offsets, sizes, strides);
+ tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
+ for (auto tile : unPackOp.getInnerTiles())
+ tiledOperands.push_back(tile);
+
+ // Create tiled unpack op.
+ Operation *tiledUnPackOp =
+ b.create<UnPackOp>(loc, TypeRange{extractDestSlice.getType()},
+ tiledOperands, op->getAttrs());
+
+ return TilingResult{{tiledUnPackOp},
+ SmallVector<Value>(tiledUnPackOp->getResults()),
+ llvm::to_vector(ArrayRef<Operation *>{
+ extractSourceSlice, extractDestSlice})};
+ }
+};
+
} // namespace
template <typename OpType>
@@ -584,8 +1229,18 @@ void mlir::linalg::registerTilingInterfaceExternalModels(
DialectRegistry ®istry) {
registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
registerOne<linalg::GenericOp>(ctx);
+ linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
+ linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
registerAll<
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
>(ctx);
});
}
+
+void mlir::linalg::registerTilingInterfaceExternalModelsForPackUnPackOps(
+ DialectRegistry ®istry) {
+ registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) {
+ linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
+ linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
+ });
+}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 50593b08ad74b5..dcd50cc44f81bc 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -217,7 +217,7 @@ struct PackedOperandsDimList {
} // namespace
FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
- tensor::PackOp packOp,
+ linalg::PackOp packOp,
bool lowerPadLikeWithInsertSlice) {
// 1. Filter out NYI cases.
auto packedTensorType =
@@ -238,7 +238,7 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
PackingMetadata packingMetadata = computePackingMetadata(
packedTensorType.getRank(), packOp.getInnerDimsPos());
SmallVector<int64_t> packedToStripMinedShapePerm =
- tensor::getPackInverseDestPerm(packOp);
+ getPackInverseDestPerm(packOp);
// 3. Compute the stripMinedShape: this is the packed shape before any outer
// or inner permutations have been applied.
@@ -353,7 +353,7 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
}
FailureOr<LowerUnPackOpResult>
-linalg::lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp,
+linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
bool lowerUnpadLikeWithExtractSlice) {
Location loc = unPackOp->getLoc();
OpBuilder::InsertionGuard g(rewriter);
@@ -388,7 +388,7 @@ linalg::lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp,
// before any outer or inner permutations have been applied.
PackingMetadata packingMetadata;
SmallVector<int64_t> packedToStripMinedShapePerm =
- tensor::getUnPackInverseSrcPerm(unPackOp, packingMetadata);
+ getUnPackInverseSrcPerm(unPackOp, packingMetadata);
// 2. Compute the stripMinedShape: this is the packed shape without outer and
// inner permutations.
@@ -493,8 +493,8 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
llvm::interleaveComma(iteratorTypes, DBGS() << "iterators: ");
DBGSNL(););
- SmallVector<tensor::PackOp> packOps;
- SmallVector<tensor::UnPackOp> unPackOps;
+ SmallVector<linalg::PackOp> packOps;
+ SmallVector<linalg::UnPackOp> unPackOps;
// Step 1. Pack each dim of the LinalgOp metadata by packedSizes[i].
PackedOperandsDimList listOfPackedOperandsDim;
for (int64_t i = 0, e = packedSizes.size(); i < e; ++i) {
@@ -545,7 +545,7 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
inputsAndInits.push_back(operand);
continue;
}
- Value dest = tensor::PackOp::createDestinationTensor(
+ Value dest = linalg::PackOp::createDestinationTensor(
rewriter, loc, operand, innerPackSizes, innerPos,
/*outerDimsPerm=*/{});
ShapedType operandType = cast<ShapedType>(operand.getType());
@@ -554,11 +554,11 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
return getConstantIntValue(tile).has_value();
});
if (areConstantTiles && operandType.hasStaticShape() &&
- !tensor::PackOp::requirePaddingValue(
+ !linalg::PackOp::requirePaddingValue(
operandType.getShape(), innerPos,
cast<ShapedType>(dest.getType()).getShape(), {},
innerPackSizes)) {
- packOps.push_back(rewriter.create<tensor::PackOp>(
+ packOps.push_back(rewriter.create<linalg::PackOp>(
loc, operand, dest, innerPos, innerPackSizes));
} else {
// TODO: value of the padding attribute should be determined by
@@ -566,7 +566,7 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
auto zeroAttr =
rewriter.getZeroAttr(getElementTypeOrSelf(dest.getType()));
Value zero = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
- packOps.push_back(rewriter.create<tensor::PackOp>(
+ packOps.push_back(rewriter.create<linalg::PackOp>(
loc, operand, dest, innerPos, innerPackSizes, zero));
}
inputsAndInits.push_back(packOps.back());
@@ -586,14 +586,14 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
// Step 4. Propagate packing to all the op results.
for (OpResult result : packedLinalgOp->getResults()) {
int64_t resultNum = result.getResultNumber();
- tensor::PackOp maybePackedInit =
- inits[resultNum].getDefiningOp<tensor::PackOp>();
+ linalg::PackOp maybePackedInit =
+ inits[resultNum].getDefiningOp<linalg::PackOp>();
if (!maybePackedInit) {
results.push_back(result);
continue;
}
// Build the symmetrical UnPackOp to the existing PackOp.
- unPackOps.push_back(rewriter.create<tensor::UnPackOp>(
+ unPackOps.push_back(rewriter.create<linalg::UnPackOp>(
packedLinalgOp->getLoc(), result, maybePackedInit.getSource(),
maybePackedInit.getInnerDimsPos(), maybePackedInit.getMixedTiles()));
results.push_back(unPackOps.back());
@@ -674,15 +674,15 @@ static LinalgOp transposeOneLinalgOperandAndReplace(
}
FailureOr<PackTransposeResult>
-linalg::packTranspose(RewriterBase &rewriter, tensor::PackOp packOp,
- linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp,
+linalg::packTranspose(RewriterBase &rewriter, linalg::PackOp packOp,
+ linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp,
ArrayRef<int64_t> outerPerm,
ArrayRef<int64_t> innerPerm) {
Location loc = linalgOp.getLoc();
// Step 1. Transpose packOp.
rewriter.setInsertionPoint(packOp);
- tensor::PackOp transposedPackOp =
+ linalg::PackOp transposedPackOp =
packOp.createTransposedClone(rewriter, loc, innerPerm, outerPerm);
if (!packOp.getResult().hasOneUse())
@@ -733,7 +733,7 @@ linalg::packTranspose(RewriterBase &rewriter, tensor::PackOp packOp,
rewriter, linalgOp, packUse, permutation, transposedPackOp.getResult());
// Step 3. Maybe transpose unPackOp.
- tensor::UnPackOp transposedUnPackOp;
+ linalg::UnPackOp transposedUnPackOp;
if (maybeUnPackOp) {
OpOperand &opOperand =
transposedLinalgOp->getOpOperand(packUseOperandNumber);
@@ -1024,7 +1024,7 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
///
/// This method assumes that all outer dims for this pack Op are 1.
static Value getPackOpSourceOrPaddedSource(OpBuilder &builder,
- tensor::PackOp packOp) {
+ linalg::PackOp packOp) {
Value input = packOp.getSource();
if (!packOp.getPaddingValue()) {
return input;
@@ -1141,7 +1141,7 @@ getPackUnpackRankReducedPerm(ArrayRef<int64_t> shape,
}
LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
- tensor::PackOp packOp, PatternRewriter &rewriter) const {
+ linalg::PackOp packOp, PatternRewriter &rewriter) const {
// TODO: support the case that outer dimensions are not all 1s. A
// tensor.expand_shape will be generated in this case.
if (llvm::any_of(packOp.getAllOuterDims(),
@@ -1242,7 +1242,7 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
}
LogicalResult DecomposeOuterUnitDimsUnPackOpPattern::matchAndRewrite(
- tensor::UnPackOp unpackOp, PatternRewriter &rewriter) const {
+ linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const {
int64_t srcRank = unpackOp.getSourceRank();
int64_t destRank = unpackOp.getDestRank();
ArrayRef<int64_t> srcShape = unpackOp.getSourceType().getShape();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 299bbc226dec8b..f39934ae05eb40 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1468,11 +1468,11 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state,
return success();
}
-/// Given a tensor::PackOp, return the `dest` shape before any packing
+/// Given a linalg::PackOp, return the `dest` shape before any packing
/// permutations.
-static SmallVector<int64_t> getTiledPackShape(tensor::PackOp packOp,
+static SmallVector<int64_t> getTiledPackShape(linalg::PackOp packOp,
ArrayRef<int64_t> destShape) {
- return applyPermutation(destShape, tensor::getPackInverseDestPerm(packOp));
+ return applyPermutation(destShape, linalg::getPackInverseDestPerm(packOp));
}
/// Given an input, the mixed destSizes, and the vector sizes for vectorization,
@@ -1527,7 +1527,7 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc,
return write;
}
-/// Vectorize tensor::PackOp with (1) static innerTiles (2) constant
+/// Vectorize linalg::PackOp with (1) static innerTiles (2) constant
/// padding value and (3) input vector sizes into:
/// masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds
/// As in the following example:
@@ -1554,7 +1554,7 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc,
/// determined by the result tensor shape. Also, we update the inBounds
/// attribute instead of masking.
static LogicalResult
-vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
+vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
OpBuilder::InsertionGuard g(rewriter);
@@ -1607,7 +1607,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
// Create TransposeOp.
auto destPermutation =
- invertPermutationVector(tensor::getPackInverseDestPerm(packOp));
+ invertPermutationVector(getPackInverseDestPerm(packOp));
auto transposeOp = rewriter.create<vector::TransposeOp>(
loc, shapeCastOp.getResult(), destPermutation);
@@ -1619,7 +1619,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
return success();
}
-/// Vectorize a `tensor::UnPackOp` to these 4 Ops:
+/// Vectorize a `linalg::UnPackOp` to these 4 Ops:
/// Vector::TransferReadOp - Reads a vector from the source tensor
/// vector::TransposeOp - Transpose the Source tensor
/// ShapeCastOp - Reshape the data based on the target.
@@ -1629,7 +1629,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
/// * the vector sizes are determined by the input operand and attributes,
/// * update the inBounds attribute instead of masking.
static LogicalResult
-vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
+vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
@@ -1721,7 +1721,7 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
PackingMetadata packMetadata;
SmallVector<int64_t> lastDimToInsertPosPerm =
- tensor::getUnPackInverseSrcPerm(unpackOp, packMetadata);
+ getUnPackInverseSrcPerm(unpackOp, packMetadata);
ShapedType maskedOpShapedType = cast<ShapedType>(readResult.getType());
SmallVector<int64_t> stripMineShape(maskedOpShapedType.getShape());
mlir::Type stripMineElemType = maskedOpShapedType.getElementType();
@@ -1854,7 +1854,7 @@ vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op,
/// Need to check if the inner-tiles are static/constant.
static LogicalResult
-vectorizeUnPackOpPrecondition(tensor::UnPackOp unpackOp,
+vectorizeUnPackOpPrecondition(linalg::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes) {
if (llvm::any_of(unpackOp.getInnerTiles(), [](OpFoldResult res) {
@@ -1942,7 +1942,7 @@ static LogicalResult vectorizeLinalgOpPrecondition(
}
static LogicalResult
-vectorizePackOpPrecondition(tensor::PackOp packOp,
+vectorizePackOpPrecondition(linalg::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes) {
auto padValue = packOp.getPaddingValue();
Attribute cstAttr;
@@ -2138,10 +2138,10 @@ LogicalResult mlir::linalg::vectorizeOpPrecondition(
.Case<tensor::PadOp>([&](auto padOp) {
return vectorizePadOpPrecondition(padOp, inputVectorSizes);
})
- .Case<tensor::PackOp>([&](auto packOp) {
+ .Case<linalg::PackOp>([&](auto packOp) {
return vectorizePackOpPrecondition(packOp, inputVectorSizes);
})
- .Case<tensor::UnPackOp>([&](auto unpackOp) {
+ .Case<linalg::UnPackOp>([&](auto unpackOp) {
return vectorizeUnPackOpPrecondition(unpackOp, inputVectorSizes);
})
.Default([](auto) { return failure(); });
@@ -2163,7 +2163,7 @@ static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) {
}
bool mlir::linalg::hasVectorizationImpl(Operation *op) {
- return isa<linalg::LinalgOp, tensor::PadOp, tensor::PackOp, tensor::UnPackOp>(
+ return isa<linalg::LinalgOp, tensor::PadOp, linalg::PackOp, linalg::UnPackOp>(
op);
}
@@ -2240,11 +2240,11 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
return vectorizeAsTensorPadOp(rewriter, padOp, inputVectorSizes,
results);
})
- .Case<tensor::PackOp>([&](auto packOp) {
+ .Case<linalg::PackOp>([&](auto packOp) {
return vectorizeAsTensorPackOp(rewriter, packOp, inputVectorSizes,
results);
})
- .Case<tensor::UnPackOp>([&](auto unpackOp) {
+ .Case<linalg::UnPackOp>([&](auto unpackOp) {
return vectorizeAsTensorUnpackOp(rewriter, unpackOp,
inputVectorSizes, results);
})
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index d148067fe63433..d3d301ca093b16 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -142,10 +142,64 @@ static void unpackRanges(OpBuilder &builder, Location loc,
//===----------------------------------------------------------------------===//
// General utilities
//===----------------------------------------------------------------------===//
+//
+/// The permutation can be obtained from two permutations:
+/// a) Compute the permutation vector to move the last `numPackedDims` into
+/// the `innerPosDims` of a shape of rank `rank`.
+/// b) Compute the permutation vector to move outer dims if the
+/// `outerPerm` parameter is not empty.
+/// Apply (b) permutation on (a) permutation to get the final permutation.
+static SmallVector<int64_t>
+computePackUnPackPerm(int64_t rank, ArrayRef<int64_t> &innerDimsPos,
+ ArrayRef<int64_t> &outerPerm,
+ PackingMetadata &packingMetadata) {
+ int64_t numPackedDims = innerDimsPos.size();
+ auto lastDims =
+ llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
+ packingMetadata = computePackingMetadata(rank, innerDimsPos);
+ SmallVector<int64_t> innerPositionsPerm =
+ computePermutationVector(rank, lastDims, packingMetadata.insertPositions);
+
+ SmallVector<int64_t> outerPos = packingMetadata.outerPositions;
+ if (!outerPerm.empty())
+ applyPermutationToVector(outerPos, outerPerm);
+ SmallVector<int64_t> outerPositionPerm =
+ computePermutationVector(rank, packingMetadata.outerPositions, outerPos);
+
+ SmallVector<int64_t> packInverseDestPermutation = innerPositionsPerm;
+ applyPermutationToVector(packInverseDestPermutation, outerPositionPerm);
+ return packInverseDestPermutation;
+}
namespace mlir {
namespace linalg {
+SmallVector<int64_t> getPackInverseDestPerm(PackOp packOp) {
+
+ PackingMetadata pMetadata;
+ int64_t packedRank = packOp.getDestType().getRank();
+ ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
+ ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
+ SmallVector<int64_t> packInvDestPerm =
+ computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata);
+ return packInvDestPerm;
+}
+
+SmallVector<int64_t> getUnPackInverseSrcPerm(UnPackOp unpackOp) {
+ PackingMetadata metadata;
+ return getUnPackInverseSrcPerm(unpackOp, metadata);
+}
+
+SmallVector<int64_t> getUnPackInverseSrcPerm(UnPackOp unpackOp,
+ PackingMetadata &metadata) {
+ int64_t unpackRank = unpackOp.getSourceType().getRank();
+ ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
+ ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
+ SmallVector<int64_t> unpackInvSrcPerm =
+ computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata);
+ return unpackInvSrcPerm;
+}
+
bool allIndexingsAreProjectedPermutation(LinalgOp op) {
return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
return m.isProjectedPermutation(/*allowZeroInResults=*/true);
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 8a1392baf29983..dfe342b3e743bb 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -10,7 +10,9 @@
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
@@ -4478,8 +4480,8 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
template <typename PackOrUnpackOp>
static bool isLikePadUnPad(PackOrUnpackOp packOp,
RankedTensorType packedTensorType) {
- static_assert(std::is_same<PackOrUnpackOp, tensor::PackOp>::value ||
- std::is_same<PackOrUnpackOp, tensor::UnPackOp>::value,
+ static_assert(std::is_same<PackOrUnpackOp, PackOp>::value ||
+ std::is_same<PackOrUnpackOp, UnPackOp>::value,
"Function meant for pack/unpack");
// This is a pad if packing only adds ones and we don't transpose dimensions.
@@ -4680,7 +4682,7 @@ static bool inferStaticShape(UnPackOp op, SmallVectorImpl<int64_t> &srcShape,
LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
PatternRewriter &rewriter) {
/// unpack(pack(x)) -> x
- if (PackOp packOp = unPackOp.getSource().getDefiningOp<tensor::PackOp>()) {
+ if (PackOp packOp = unPackOp.getSource().getDefiningOp<PackOp>()) {
if (packOp.getSourceType() != unPackOp.getDestType())
return failure();
if (packOp.getPaddingValue() ||
@@ -4716,7 +4718,7 @@ LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
dest =
rewriter.create<tensor::CastOp>(loc, newDestType, unPackOp.getDest());
}
- Value newOp = rewriter.create<tensor::UnPackOp>(
+ Value newOp = rewriter.create<UnPackOp>(
loc, source, dest, unPackOp.getInnerDimsPos(), unPackOp.getMixedTiles(),
unPackOp.getOuterDimsPerm());
rewriter.replaceOpWithNewOp<tensor::CastOp>(
@@ -4819,7 +4821,7 @@ getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy,
return newMixedTileSizes;
}
-/// Folds a tensor.cast op into a consuming tensor::PackOp op if the
+/// Folds a tensor.cast op into a consuming PackOp op if the
/// `tensor.cast` has source that is more static than the consuming op.
///
/// Example:
@@ -4871,7 +4873,7 @@ struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
}
};
-/// Folds a tensor.cast op into a consuming tensor::UnPackOp op if the
+/// Folds a tensor.cast op into a consuming UnPackOp op if the
/// `tensor.cast` has source that is more static than the consuming op.
///
/// Example:
@@ -4948,9 +4950,9 @@ struct FoldTensorCastProducerOp
LogicalResult matchAndRewrite(DestinationStyleOpInterface op,
PatternRewriter &rewriter) const override {
- // Reject tensor::PackOp - there's dedicated pattern for that instead.
- if (!foldTensorCastPrecondition(op) ||
- isa<tensor::PackOp, tensor::UnPackOp>(*op))
+ // Reject PackOp/UnpackOp - there are dedicated patterns for that instead.
+ if (!foldTensorCastPrecondition(op) || isa<PackOp, UnPackOp>(*op) ||
+ isa<linalg::PackOp, linalg::UnPackOp>(*op))
return failure();
SmallVector<Type> newResultTypes(op->getResultTypes());
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
index 052dee402b79ed..bd1a09be6b9bca 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -118,7 +118,7 @@ static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
}
struct PackOpTiling
- : public TilingInterface::ExternalModel<PackOpTiling, PackOp> {
+ : public TilingInterface::ExternalModel<PackOpTiling, tensor::PackOp> {
SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
// Note that here we only consider untiled dimensions and outer tiled data
@@ -491,7 +491,7 @@ static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
}
struct UnPackOpTiling
- : public TilingInterface::ExternalModel<UnPackOpTiling, UnPackOp> {
+ : public TilingInterface::ExternalModel<UnPackOpTiling, tensor::UnPackOp> {
SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
auto unpackOp = cast<UnPackOp>(op);
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
index 99199252710f99..f3560d08ff7693 100644
--- a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
+++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
@@ -100,11 +100,6 @@ void transform::ApplyFoldTensorEmptyPatternsOp::populatePatterns(
tensor::populateFoldTensorEmptyPatterns(patterns, getFoldSingleUseOnly());
}
-void transform::ApplyFoldIntoPackAndUnpackPatternsOp::populatePatterns(
- RewritePatternSet &patterns) {
- tensor::populateFoldIntoPackAndUnpackPatterns(patterns);
-}
-
void transform::ApplyFoldTensorSubsetOpsPatternsOp::populatePatterns(
RewritePatternSet &patterns) {
tensor::populateFoldTensorSubsetOpPatterns(patterns);
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
index cc6275fee671aa..7880d1c5a0c5d7 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -6,7 +6,6 @@ add_mlir_dialect_library(MLIRTensorTransforms
FoldTensorSubsetOps.cpp
IndependenceTransforms.cpp
MergeConsecutiveInsertExtractSlicePatterns.cpp
- PackAndUnpackPatterns.cpp
ReshapePatterns.cpp
RewriteAsConstant.cpp
SwapExtractSliceWithProducerPatterns.cpp
diff --git a/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp
index 60b0c3e759b6c7..fa748cf01977fa 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp
@@ -93,49 +93,6 @@ struct FoldEmptyTensorWithExtractSliceOp
bool foldSingleUseOnly = false;
};
-/// tensor.empty does not define any tensor contents, so an unpadded pack
-/// can be folded away.
-struct FoldEmptyTensorWithPackOp : public OpRewritePattern<PackOp> {
- using OpRewritePattern<PackOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(PackOp packOp,
- PatternRewriter &rewriter) const override {
- // Check for tensor.empty source.
- auto emptyOp = packOp.getSource().getDefiningOp<EmptyOp>();
- if (!emptyOp)
- return failure();
-
- // Check for padding.
- // Packing with padding cannot be simply removed.
- if (packOp.getPaddingValue())
- return rewriter.notifyMatchFailure(packOp, "expects no padding value");
-
- // Replace the pack directly with its destination.
- rewriter.replaceOp(packOp, packOp.getDest());
-
- return success();
- }
-};
-
-/// tensor.empty does not define any tensor contents, so an unpack
-/// can be folded away.
-struct FoldEmptyTensorWithUnPackOp : public OpRewritePattern<UnPackOp> {
- using OpRewritePattern<UnPackOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(UnPackOp unPackOp,
- PatternRewriter &rewriter) const override {
- // Check for tensor.empty source.
- auto emptyOp = unPackOp.getSource().getDefiningOp<EmptyOp>();
- if (!emptyOp)
- return failure();
-
- // Replace the unpack directly with its destination.
- rewriter.replaceOp(unPackOp, unPackOp.getDest());
-
- return success();
- }
-};
-
// Fold concat operation where all the operands are empty.
struct FoldConcatsOfEmpty : public OpRewritePattern<ConcatOp> {
using OpRewritePattern<ConcatOp>::OpRewritePattern;
@@ -176,7 +133,6 @@ void mlir::tensor::populateFoldTensorEmptyPatterns(RewritePatternSet &patterns,
FoldEmptyTensorWithReshapeOp<tensor::ExpandShapeOp>,
FoldEmptyTensorWithReshapeOp<tensor::CollapseShapeOp>>(
patterns.getContext(), /*benefit=*/1, foldSingleUseOnly);
- patterns.add<FoldConcatsOfEmpty, FoldEmptyTensorWithPackOp,
- FoldEmptyTensorWithUnPackOp>(patterns.getContext(),
- /*benefit=*/1);
+ patterns.add<FoldConcatsOfEmpty>(patterns.getContext(),
+ /*benefit=*/1);
}
diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir
index 01ca4374da046f..4ba4b09f52163b 100644
--- a/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir
+++ b/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir
@@ -38,64 +38,64 @@ func.func @block_matmul_transpose_b(
// MMT4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
// MMT4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
// MMT4D-LABEL: func @block_matmul
-// MMT4D-COUNT-3: tensor.pack
+// MMT4D-COUNT-3: linalg.pack
// MMT4D: linalg.generic
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MMT4D-COUNT-1: tensor.unpack
+// MMT4D-COUNT-1: linalg.unpack
// MMT4D-LABEL: func @block_matmul_transpose_a
-// MMT4D-COUNT-3: tensor.pack
+// MMT4D-COUNT-3: linalg.pack
// MMT4D: linalg.generic
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MMT4D-COUNT-1: tensor.unpack
+// MMT4D-COUNT-1: linalg.unpack
// MMT4D-LABEL: func @block_matmul_transpose_b
-// MMT4D-COUNT-3: tensor.pack
+// MMT4D-COUNT-3: linalg.pack
// MMT4D: linalg.generic
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MMT4D-COUNT-1: tensor.unpack
+// MMT4D-COUNT-1: linalg.unpack
// MM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
// MM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)>
// MM4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
// MM4D-LABEL: func @block_matmul
-// MM4D-COUNT-3: tensor.pack
+// MM4D-COUNT-3: linalg.pack
// MM4D: linalg.generic
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MM4D-COUNT-1: tensor.unpack
+// MM4D-COUNT-1: linalg.unpack
// MM4D-LABEL: func @block_matmul_transpose_a
-// MM4D-COUNT-3: tensor.pack
+// MM4D-COUNT-3: linalg.pack
// MM4D: linalg.generic
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MM4D-COUNT-1: tensor.unpack
+// MM4D-COUNT-1: linalg.unpack
// MM4D-LABEL: func @block_matmul_transpose_b
-// MM4D-COUNT-3: tensor.pack
+// MM4D-COUNT-3: linalg.pack
// MM4D: linalg.generic
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MM4D-COUNT-1: tensor.unpack
+// MM4D-COUNT-1: linalg.unpack
// MTM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d0, d5, d3)>
// MTM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)>
// MTM4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
// MTM4D-LABEL: func @block_matmul
-// MTM4D-COUNT-3: tensor.pack
+// MTM4D-COUNT-3: linalg.pack
// MTM4D: linalg.generic
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MTM4D-COUNT-1: tensor.unpack
+// MTM4D-COUNT-1: linalg.unpack
// MTM4D-LABEL: func @block_matmul_transpose_a
-// MTM4D-COUNT-3: tensor.pack
+// MTM4D-COUNT-3: linalg.pack
// MTM4D: linalg.generic
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MTM4D-COUNT-1: tensor.unpack
+// MTM4D-COUNT-1: linalg.unpack
// MTM4D-LABEL: func @block_matmul_transpose_b
-// MTM4D-COUNT-3: tensor.pack
+// MTM4D-COUNT-3: linalg.pack
// MTM4D: linalg.generic
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MTM4D-COUNT-1: tensor.unpack
+// MTM4D-COUNT-1: linalg.unpack
diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir
index 9e396ba08d2460..e667879ceea0e9 100644
--- a/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir
+++ b/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir
@@ -21,17 +21,17 @@ func.func @block_matmul_padding(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32>
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: padding_value(%[[ZERO]] : f32)
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<123x125xf32> -> tensor<4x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: padding_value(%[[ZERO]] : f32)
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<125x124xf32> -> tensor<8x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: padding_value(%[[ZERO]] : f32)
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<123x124xf32> -> tensor<4x8x32x16xf32>
@@ -39,17 +39,17 @@ func.func @block_matmul_padding(
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<123x124xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<123x124xf32>
// NOPAD-LABEL: func @block_matmul_padding(
// NOPAD-SAME: %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32>
-// NOPAD-NOT: tensor.pack
+// NOPAD-NOT: linalg.pack
// NOPAD: linalg.matmul ins(%[[A]], %[[B]] : tensor<123x125xf32>, tensor<125x124xf32>)
// NOPAD-SAME: outs(%[[C]] : tensor<123x124xf32>) -> tensor<123x124xf32>
-// NOPAD-NOT: tensor.unpack
+// NOPAD-NOT: linalg.unpack
// PAD-MULT-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
// PAD-MULT-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
@@ -58,17 +58,17 @@ func.func @block_matmul_padding(
// PAD-MULT-SAME: %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32>
// PAD-MULT-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32
// PAD-MULT: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<1x1x256x384xf32>
-// PAD-MULT: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// PAD-MULT: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// PAD-MULT-SAME: padding_value(%[[ZERO]] : f32)
// PAD-MULT-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [256, 384]
// PAD-MULT-SAME: into %[[PACK_DST_0]] : tensor<123x125xf32> -> tensor<1x1x256x384xf32>
// PAD-MULT: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<1x1x512x384xf32>
-// PAD-MULT: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// PAD-MULT: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// PAD-MULT-SAME: padding_value(%[[ZERO]] : f32)
// PAD-MULT-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [512, 384]
// PAD-MULT-SAME: into %[[PACK_DST_1]] : tensor<125x124xf32> -> tensor<1x1x512x384xf32>
// PAD-MULT: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<1x1x256x512xf32>
-// PAD-MULT: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// PAD-MULT: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// PAD-MULT-SAME: padding_value(%[[ZERO]] : f32)
// PAD-MULT-SAME: inner_dims_pos = [0, 1] inner_tiles = [256, 512]
// PAD-MULT-SAME: into %[[PACK_DST_2]] : tensor<123x124xf32> -> tensor<1x1x256x512xf32>
@@ -76,7 +76,7 @@ func.func @block_matmul_padding(
// PAD-MULT-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// PAD-MULT-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// PAD-MULT-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<1x1x256x384xf32>, tensor<1x1x512x384xf32>) outs(%[[C_PACKED]] : tensor<1x1x256x512xf32>)
-// PAD-MULT: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// PAD-MULT: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// PAD-MULT-SAME: inner_dims_pos = [0, 1] inner_tiles = [256, 512]
// PAD-MULT-SAME: into %[[C]] : tensor<1x1x256x512xf32> -> tensor<123x124xf32>
// PAD-MULT: return %[[RES_UNPACKED]] : tensor<123x124xf32>
diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul.mlir
index 8a82608177692b..aa860dbd581a9e 100644
--- a/mlir/test/Dialect/Linalg/block-pack-matmul.mlir
+++ b/mlir/test/Dialect/Linalg/block-pack-matmul.mlir
@@ -14,22 +14,22 @@ func.func @block_matmul(
// CHECK-LABEL: func @block_matmul(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x128xf32>, %[[B:[0-9a-z]+]]: tensor<128x128xf32>, %[[C:[0-9a-z]+]]: tensor<128x128xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x128xf32> -> tensor<4x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x128xf32> -> tensor<8x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<128x128xf32> -> tensor<4x8x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32>
@@ -60,7 +60,7 @@ func.func @block_matmul_dynamic(
// CHECK-DAG: %[[A_OUTER_TILE_M:.+]] = affine.apply #[[$MAP_M]]()[%[[A_M]]]
// CHECK-DAG: %[[A_OUTER_TILE_K:.+]] = affine.apply #[[$MAP_K]]()[%[[A_K]]]
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty(%[[A_OUTER_TILE_M]], %[[A_OUTER_TILE_K]]) : tensor<?x?x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: padding_value(%[[ZERO]] : f32)
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<?x?xf32> -> tensor<?x?x32x64xf32>
@@ -69,7 +69,7 @@ func.func @block_matmul_dynamic(
// CHECK-DAG: %[[B_OUTER_TILE_K:.+]] = affine.apply #[[$MAP_K]]()[%[[B_K]]]
// CHECK-DAG: %[[B_OUTER_TILE_N:.+]] = affine.apply #[[$MAP_N]]()[%[[B_N]]]
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty(%[[B_OUTER_TILE_N]], %[[B_OUTER_TILE_K]]) : tensor<?x?x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: padding_value(%[[ZERO]] : f32)
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<?x?xf32> -> tensor<?x?x16x64xf32>
@@ -78,7 +78,7 @@ func.func @block_matmul_dynamic(
// CHECK-DAG: %[[C_OUTER_TILE_M:.+]] = affine.apply #[[$MAP_M]]()[%[[C_M]]]
// CHECK-DAG: %[[C_OUTER_TILE_N:.+]] = affine.apply #[[$MAP_N]]()[%[[C_N]]]
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty(%[[C_OUTER_TILE_M]], %[[C_OUTER_TILE_N]]) : tensor<?x?x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: padding_value(%[[ZERO]] : f32)
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<?x?xf32> -> tensor<?x?x32x16xf32>
@@ -86,7 +86,7 @@ func.func @block_matmul_dynamic(
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<?x?x32x64xf32>, tensor<?x?x16x64xf32>) outs(%[[C_PACKED]] : tensor<?x?x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<?x?x32x16xf32> -> tensor<?x?xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<?x?xf32>
@@ -107,7 +107,7 @@ func.func @block_matmul_with_constant(
// CHECK-DAG: %[[RES_DST:.+]] = arith.constant dense<0.000000e+00> : tensor<128x128xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: ins({{.*}} : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[CST_ACC_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[RES_DST]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32>
@@ -130,7 +130,7 @@ func.func @block_matmul_with_producer(
// CHECK: %[[ACC_PACKED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[FILL_DST_PACKED]] : tensor<4x8x32x16xf32>) -> tensor<4x8x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: ins({{.*}} : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[ACC_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32>
@@ -152,7 +152,7 @@ func.func @block_matmul_with_consumer(
// CHECK-DAG: %[[RES_DST:.+]] = tensor.empty() : tensor<128x128xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: outs({{.*}} : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
// CHECK: %[[ADD_RES:.+]] = linalg.add
@@ -175,22 +175,22 @@ func.func @block_batch_matmul(
// CHECK-LABEL: func @block_batch_matmul(
// CHECK-SAME: %[[A:.+]]: tensor<512x64x128xf32>, %[[B:.+]]: tensor<512x128x64xf32>, %[[C:.+]]: tensor<512x64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x64x128xf32> -> tensor<512x2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x128x64xf32> -> tensor<512x4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
@@ -211,22 +211,22 @@ func.func @block_matmul_transpose_a(
// CHECK-LABEL: func @block_matmul_transpose_a(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x64xf32>, %[[B:[0-9a-z]+]]: tensor<128x64xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x64xf32> -> tensor<2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x64xf32> -> tensor<4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
@@ -247,22 +247,22 @@ func.func @block_batch_matmul_transpose_a(
// CHECK-LABEL: func @block_batch_matmul_transpose_a(
// CHECK-SAME: %[[A:.+]]: tensor<512x128x64xf32>, %[[B:.+]]: tensor<512x128x64xf32>, %[[C:.+]]: tensor<512x64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x128x64xf32> -> tensor<512x2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x128x64xf32> -> tensor<512x4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
@@ -283,22 +283,22 @@ func.func @block_matmul_transpose_b(
// CHECK-LABEL: func @block_matmul_transpose_b(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<64x128xf32>, %[[B:[0-9a-z]+]]: tensor<64x128xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<64x128xf32> -> tensor<2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<64x128xf32> -> tensor<4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
@@ -319,22 +319,22 @@ func.func @block_batch_matmul_transpose_b(
// CHECK-LABEL: func @block_batch_matmul_transpose_b(
// CHECK-SAME: %[[A:.+]]: tensor<512x64x128xf32>, %[[B:.+]]: tensor<512x64x128xf32>, %[[C:.+]]: tensor<512x64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x64x128xf32> -> tensor<512x2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x64x128xf32> -> tensor<512x4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
@@ -365,22 +365,22 @@ func.func @block_generic_matmul(
// CHECK-LABEL: func @block_generic_matmul(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x128xf32>, %[[B:[0-9a-z]+]]: tensor<128x128xf32>, %[[C:[0-9a-z]+]]: tensor<128x128xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x128xf32> -> tensor<4x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x128xf32> -> tensor<8x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<128x128xf32> -> tensor<4x8x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32>
@@ -411,22 +411,22 @@ func.func @block_generic_matmul_transpose_a(
// CHECK-LABEL: func @block_generic_matmul_transpose_a(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x64xf32>, %[[B:[0-9a-z]+]]: tensor<128x64xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x64xf32> -> tensor<2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x64xf32> -> tensor<4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
@@ -457,22 +457,22 @@ func.func @block_generic_matmul_transpose_b(
// CHECK-LABEL: func @block_generic_matmul_transpose_b(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<64x128xf32>, %[[B:[0-9a-z]+]]: tensor<64x128xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<64x128xf32> -> tensor<2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<64x128xf32> -> tensor<4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
@@ -498,10 +498,10 @@ func.func @non_contraction_generic(
// CHECK-LABEL: func @non_contraction_generic(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<64x128xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK-NOT: tensor.pack
+// CHECK-NOT: linalg.pack
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel"]
// CHECK-SAME: outs(%[[A]] : tensor<64x128xf32>)
-// CHECK-NOT: tensor.unpack
+// CHECK-NOT: linalg.unpack
// CHECK: return %[[GENERIC]] : tensor<64x128xf32>
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index cd439cd23ecd0c..db4f6181f517c5 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -357,7 +357,7 @@ func.func @fill_pack() -> tensor<24x32x16x16xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<24x32x16x16xf32>
%1 = linalg.fill ins(%cst : f32) outs(%dest : tensor<384x512xf32>) -> tensor<384x512xf32>
- %pack = tensor.pack %1 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<384x512xf32> -> tensor<24x32x16x16xf32>
+ %pack = linalg.pack %1 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<384x512xf32> -> tensor<24x32x16x16xf32>
return %pack : tensor<24x32x16x16xf32>
}
// CHECK-LABEL: func.func @fill_pack
@@ -374,7 +374,7 @@ func.func @fill_pack_general() -> tensor<1x1x8x4x4x8xi32>{
%extracted_slice_15 = tensor.extract_slice %9[0, 0, 0, 0] [1, 1, 16, 64] [1, 1, 1, 1] : tensor<1x1x16x64xi32> to tensor<1x1x16x64xi32>
%16 = linalg.fill ins(%c0_i32 : i32) outs(%extracted_slice_15 : tensor<1x1x16x64xi32>) -> tensor<1x1x16x64xi32>
%0 = bufferization.to_tensor %alloc restrict writable : memref<1x1x8x4x4x8xi32> to tensor<1x1x8x4x4x8xi32>
- %pack_18 = tensor.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32>
+ %pack_18 = linalg.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32>
return %pack_18 : tensor<1x1x8x4x4x8xi32>
}
@@ -397,7 +397,7 @@ func.func @dynamic_fill_pack(%arg0: tensor<?x?xf32>) -> tensor<?x?x16x16xf32> {
%1 = affine.apply #map()[%dim]
%2 = affine.apply #map()[%dim_0]
%3 = tensor.empty(%1, %2) : tensor<?x?x16x16xf32>
- %pack = tensor.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<?x?xf32> -> tensor<?x?x16x16xf32>
+ %pack = linalg.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<?x?xf32> -> tensor<?x?x16x16xf32>
return %pack : tensor<?x?x16x16xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)>
@@ -1249,3 +1249,499 @@ func.func @recursive_effect(%arg : tensor<1xf32>) {
// CHECK-LABEL: @recursive_effect
// CHECK: linalg.map
+
+//===----------------------------------------------------------------------===//
+// linalg.pack
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: func @fold_pack_constant_splat
+// CHECK-NOT: linalg.pack
+// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32>
+func.func @fold_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
+ %cst = arith.constant dense<1.000000e-01> : tensor<64x128xf32>
+ %0 = linalg.pack %cst outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
+ inner_tiles = [8, 32] into %dest : tensor<64x128xf32> -> tensor<8x16x8x32xf32>
+ return %0 : tensor<8x16x8x32xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @fold_padding_value_pack_constant_splat
+// CHECK-NOT: linalg.pack
+// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32>
+func.func @fold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
+ %pad = arith.constant 1.000000e-01 : f32
+ %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32>
+ %0 = linalg.pack %cst
+ padding_value(%pad : f32)
+ outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
+ inner_tiles = [8, 32] into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32>
+ return %0 : tensor<8x16x8x32xf32>
+}
+
+
+// -----
+
+// CHECK-LABEL: func @nofold_padding_value_pack_constant_splat
+// CHECK: arith.constant dense<1.000000e-01> : tensor<63x127xf32>
+// CHECK: linalg.pack
+func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
+ %pad = arith.constant 0.0 : f32
+ %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32>
+ %0 = linalg.pack %cst
+ padding_value(%pad : f32)
+ outer_dims_perm = [1, 0]
+ inner_dims_pos = [0, 1]
+ inner_tiles = [8, 32]
+ into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32>
+ return %0 : tensor<8x16x8x32xf32>
+}
+
+// -----
+
+func.func @fold_padding_value_pack(%arg0: tensor<1200x500000xf32>) -> tensor<31250x1200x16x1xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.empty() : tensor<31250x1200x16x1xf32>
+ %pack = linalg.pack %arg0
+ padding_value(%cst : f32)
+ outer_dims_perm = [1, 0]
+ inner_dims_pos = [1, 0]
+ inner_tiles = [16, 1]
+ into %0 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32>
+ return %pack : tensor<31250x1200x16x1xf32>
+}
+// CHECK-LABEL: func @fold_padding_value_pack
+// CHECK-NOT: padding_value
+
+// -----
+
+func.func @infer_src_shape_pack(%src: tensor<?x?x?x?xf32>, %dest: tensor<10x20x30x40x16xf32>) -> tensor<10x20x30x40x16xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %pack = linalg.pack %src
+ padding_value(%cst : f32)
+ outer_dims_perm = [2, 1, 3, 0]
+ inner_dims_pos = [2]
+ inner_tiles = [16]
+ into %dest : tensor<?x?x?x?xf32> -> tensor<10x20x30x40x16xf32>
+ return %pack : tensor<10x20x30x40x16xf32>
+}
+// CHECK-LABEL: func.func @infer_src_shape_pack
+// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]]
+// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor<?x?x?x?xf32> to tensor<40x20x?x30xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[CAST_SRC]] {{.+}} into %[[DEST]]
+// CHECK: return %[[PACK]]
+
+// -----
+
+func.func @infer_dest_shape_pack(%src: tensor<30x20x?x10xf32>, %dest: tensor<?x?x?x?x16xf32>) -> tensor<?x?x?x?x16xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %pack = linalg.pack %src
+ padding_value(%cst : f32)
+ outer_dims_perm = [2, 1, 3, 0]
+ inner_dims_pos = [2]
+ inner_tiles = [16]
+ into %dest : tensor<30x20x?x10xf32> -> tensor<?x?x?x?x16xf32>
+ return %pack : tensor<?x?x?x?x16xf32>
+}
+// CHECK-LABEL: func.func @infer_dest_shape_pack
+// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]]
+// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor<?x?x?x?x16xf32> to tensor<?x20x10x30x16xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC]] {{.+}} into %[[CAST_DEST]]
+// CHECK: %[[CAST_PACK:.+]] = tensor.cast %[[PACK]] : tensor<?x20x10x30x16xf32> to tensor<?x?x?x?x16xf32>
+// CHECK: return %[[CAST_PACK]]
+
+// -----
+
+func.func @no_infer_pack_shape(%arg0: tensor<?x32x100xf32>, %arg1: index) -> tensor<32x7x?x16x1xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.empty(%arg1) : tensor<32x7x?x16x1xf32>
+ %pack = linalg.pack %arg0 padding_value(%cst : f32) outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<?x32x100xf32> -> tensor<32x7x?x16x1xf32>
+ return %pack : tensor<32x7x?x16x1xf32>
+}
+// CHECK-LABEL: func.func @no_infer_pack_shape
+// CHECK-NOT: tensor.cast
+
+// -----
+
+func.func @fold_padding_value_pack_negative1(%arg0: tensor<1200x499999xf32>) -> tensor<31250x1200x16x1xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.empty() : tensor<31250x1200x16x1xf32>
+ %pack = linalg.pack %arg0
+ padding_value(%cst : f32)
+ outer_dims_perm = [1, 0]
+ inner_dims_pos = [1, 0]
+ inner_tiles = [16, 1]
+ into %0 : tensor<1200x499999xf32> -> tensor<31250x1200x16x1xf32>
+ return %pack : tensor<31250x1200x16x1xf32>
+}
+// CHECK-LABEL: func @fold_padding_value_pack_negative1
+// CHECK: linalg.pack
+// CHECK-SAME: padding_value
+
+// -----
+
+func.func @fold_padding_value_pack_negative2(%arg0: tensor<1200x?xf32>, %arg1: tensor<?x1200x16x1xf32>) -> tensor<?x1200x16x1xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %pack = linalg.pack %arg0
+ padding_value(%cst : f32)
+ outer_dims_perm = [1, 0]
+ inner_dims_pos = [1, 0]
+ inner_tiles = [16, 1]
+ into %arg1 : tensor<1200x?xf32> -> tensor<?x1200x16x1xf32>
+ return %pack : tensor<?x1200x16x1xf32>
+}
+// CHECK-LABEL: func @fold_padding_value_pack_negative2
+// CHECK: linalg.pack
+// CHECK-SAME: padding_value
+
+// -----
+
+func.func @fold_padding_value_pack_negative3(%arg0: tensor<1200x500000xf32>, %arg1: tensor<?x1200x?x1xf32>, %tile : index) -> tensor<?x1200x?x1xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %pack = linalg.pack %arg0
+ padding_value(%cst : f32)
+ outer_dims_perm = [1, 0]
+ inner_dims_pos = [1, 0]
+ inner_tiles = [%tile, 1]
+ into %arg1 : tensor<1200x500000xf32> -> tensor<?x1200x?x1xf32>
+ return %pack : tensor<?x1200x?x1xf32>
+}
+// CHECK-LABEL: func @fold_padding_value_pack_negative3
+// CHECK: linalg.pack
+// CHECK-SAME: padding_value
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// linalg.unpack
+//===----------------------------------------------------------------------===//
+
+
+// CHECK-LABEL: func @fold_unpack_constant_splat
+// CHECK-NOT: linalg.unpack
+// CHECK: arith.constant dense<1.000000e-01> : tensor<128x256xf32>
+func.func @fold_unpack_constant_splat(%dest : tensor<128x256xf32>) -> tensor<128x256xf32> {
+ %cst = arith.constant dense<1.000000e-01> : tensor<16x8x8x32xf32>
+ %0 = linalg.unpack %cst inner_dims_pos = [0, 1]
+ inner_tiles = [8, 32] into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
+ return %0 : tensor<128x256xf32>
+}
+
+// -----
+
+func.func @infer_dest_shape_unpack(%src: tensor<10x20x30x40x16xf32>, %dest: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
+ %unpack = linalg.unpack %src
+ outer_dims_perm = [2, 1, 3, 0]
+ inner_dims_pos = [2]
+ inner_tiles = [16]
+ into %dest : tensor<10x20x30x40x16xf32> -> tensor<?x?x?x?xf32>
+ return %unpack : tensor<?x?x?x?xf32>
+}
+// CHECK-LABEL: func.func @infer_dest_shape_unpack
+// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]]
+// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor<?x?x?x?xf32> to tensor<40x20x?x30xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SRC]] {{.+}} into %[[CAST_DEST]]
+// CHECK: %[[CAST_UNPACK:.+]] = tensor.cast %[[UNPACK]] : tensor<40x20x?x30xf32> to tensor<?x?x?x?xf32>
+// CHECK: return %[[CAST_UNPACK]]
+
+// -----
+
+func.func @infer_src_shape_unpack(%src: tensor<?x?x?x?x16xf32>, %dest: tensor<30x20x?x10xf32>) -> tensor<30x20x?x10xf32> {
+ %unpack = linalg.unpack %src
+ outer_dims_perm = [2, 1, 3, 0]
+ inner_dims_pos = [2]
+ inner_tiles = [16]
+ into %dest : tensor<?x?x?x?x16xf32> -> tensor<30x20x?x10xf32>
+ return %unpack : tensor<30x20x?x10xf32>
+}
+// CHECK-LABEL: func.func @infer_src_shape_unpack
+// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]]
+// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor<?x?x?x?x16xf32> to tensor<?x20x10x30x16xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[CAST_SRC]]
+// CHECK: return %[[UNPACK]]
+
+// -----
+
+func.func @no_infer_unpack_shape(%arg1: tensor<32x7x?x16x1xf32>, %arg2: index) -> tensor<?x32x100xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.empty(%arg2) : tensor<?x32x100xf32>
+ %unpack = linalg.unpack %arg1 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<32x7x?x16x1xf32> -> tensor<?x32x100xf32>
+ return %unpack : tensor<?x32x100xf32>
+}
+// CHECK-LABEL: func.func @no_infer_unpack_shape
+// CHECK-NOT: tensor.cast
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// linalg.pack + linalg.unpack
+//===----------------------------------------------------------------------===//
+
+// Chain: NC -> NCnc -> NCnc -> NC
+// CHECK: func.func @unpack_pack(
+// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
+// CHECK: return %[[T]] : tensor<128x128xf32>
+func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
+ %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
+ %packed = linalg.pack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
+ %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
+ %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor<128x128xf32>
+ return %unpacked : tensor<128x128xf32>
+}
+
+// -----
+
+// Chain: NC -> NCcn -> NCnc -> NC
+// CHECK: func.func @unpack_pack(
+// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
+// CHECK-NOT: return %[[T]] : tensor<128x128xf32>
+func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
+ %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
+ %packed = linalg.pack %t inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
+ %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
+ %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor
+<128x128xf32>
+ return %unpacked : tensor<128x128xf32>
+}
+
+// -----
+
+// Chain: NC -> CNcn -> NCnc -> NC
+// CHECK: func.func @unpack_pack(
+// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
+// CHECK-NOT: return %[[T]] : tensor<128x128xf32>
+func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
+ %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
+ %packed = linalg.pack %t outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
+ %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
+ %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor
+<128x128xf32>
+ return %unpacked : tensor<128x128xf32>
+}
+
+// -----
+
+// Chain: NC -> NCnc -> NCnc -> NC
+// CHECK: func.func @unpack_pack(
+// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>,
+// CHECK: return %[[T]] : tensor<128x128xf32>
+func.func @unpack_pack(%t: tensor<128x128xf32>, %tile1: index, %tile2: index) -> tensor<128x128xf32> {
+ %tensor_empty = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
+ %packed = linalg.pack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
+ %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
+ %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<16x16x?x?xf32> -> tensor
+<128x128xf32>
+ return %unpacked : tensor<128x128xf32>
+}
+
+// -----
+
+// CHECK: func.func @unpack_pack_with_padding_no_canonicalization(
+// CHECK: linalg.pack
+// CHECK: linalg.unpack
+func.func @unpack_pack_with_padding_no_canonicalization(%t: tensor<256x512xbf16>) -> tensor<224x512xbf16> {
+ %tensor_empty = tensor.empty() : tensor<4x16x64x32xbf16>
+ %tensor_empty1 = tensor.empty() : tensor<224x512xbf16>
+ %packed = linalg.pack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty : tensor<256x512xbf16> -> tensor<4x16x64x32xbf16>
+ %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty1 : tensor<4x16x64x32xbf16> -> tensor<224x512xbf16>
+ return %unpacked : tensor<224x512xbf16>
+}
+
+// -----
+
+// Chain NCnc -> NC -> NC -> NCnc
+// CHECK: func.func @pack_unpack(
+// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
+// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
+func.func @pack_unpack(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
+ %tensor_empty = tensor.empty() : tensor<128x128xf32>
+ %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
+ %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
+ %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
+ return %packed : tensor<16x16x?x?xf32>
+}
+
+// -----
+
+// Chain NCnc -> NC -> NC -> NCnc
+// CHECK: func.func @pack_unpack(
+// CHECK-SAME: %[[T:.+]]: tensor<16x16x8x8xf32>
+// CHECK: return %[[T]] : tensor<16x16x8x8xf32>
+func.func @pack_unpack(%t: tensor<16x16x8x8xf32>) -> tensor<16x16x8x8xf32> {
+ %tensor_empty = tensor.empty() : tensor<128x128xf32>
+ %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<16x16x8x8xf32> -> tensor<128x128xf32>
+ %tensor_empty1 = tensor.empty() : tensor<16x16x8x8xf32>
+ %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
+ return %packed : tensor<16x16x8x8xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_unpack_same_tiles(
+// CHECK-SAME: %[[T:.+]]: tensor<?x?x?x?xf32>,
+// CHECK: return %[[T]] : tensor<?x?x?x?xf32>
+func.func @pack_unpack_same_tiles(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
+ %tile1: index, %tile2: index) -> tensor<?x?x?x?xf32> {
+ %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
+ %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+ %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
+ %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+ return %packed : tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_unpack_different_tiles(
+// CHECK-SAME: %[[T:.+]]: tensor<?x?x?x?xf32>,
+// CHECK-NOT: return %[[T]] : tensor<?x?x?x?xf32>
+func.func @pack_unpack_different_tiles(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
+ %tile1: index, %tile2: index) -> tensor<?x?x?x?xf32> {
+ %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
+ %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+ %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
+ %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile2, %tile1] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+ return %packed : tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_unpack_dynamic_with_padding(
+// CHECK-SAME: %[[T:.+]]: tensor<?x?x?x?xf32>,
+// CHECK-NOT: return %[[T]] : tensor<?x?x?x?xf32>
+func.func @pack_unpack_dynamic_with_padding(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
+ %tile1: index, %tile2: index, %pad: f32) -> tensor<?x?x?x?xf32> {
+ %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
+ %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+ %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
+ %packed = linalg.pack %unpacked padding_value(%pad: f32) inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+ return %packed : tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_outer_dims_unpack_no_outer_dims(
+// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
+// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
+func.func @pack_outer_dims_unpack_no_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
+ %tensor_empty = tensor.empty() : tensor<128x128xf32>
+ %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
+ %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
+ %packed = linalg.pack %unpacked outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
+ return %packed : tensor<16x16x?x?xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_no_outer_dims_unpack_outer_dims(
+// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
+// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
+func.func @pack_no_outer_dims_unpack_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
+ %tensor_empty = tensor.empty() : tensor<128x128xf32>
+ %unpacked = linalg.unpack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
+ %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
+ %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
+ return %packed : tensor<16x16x?x?xf32>
+}
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// tensor.cast + linalg.pack
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: func.func @fold_cast_pack_dynamic_tile_size
+// CHECK-SAME: %[[DEST:.*]]: tensor<1x1x8x1xi32>,
+// CHECK-SAME: %[[SRC:.*]]: tensor<7x?xi32>,
+// CHECK-SAME: %[[PAD:.*]]: i32) -> tensor<1x1x8x1xi32> {
+// CHECK: %[[PACK:.*]] = linalg.pack %[[SRC]] padding_value(%[[PAD]] : i32)
+// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]]
+// CHECK-SAME: test_attr
+// CHECK-SAME: : tensor<7x?xi32> -> tensor<1x1x8x1xi32>
+// CHECK: return %[[PACK]] : tensor<1x1x8x1xi32>
+func.func @fold_cast_pack_dynamic_tile_size(
+ %dest: tensor<1x1x8x1xi32>,
+ %src: tensor<7x?xi32>,
+ %pad: i32) -> tensor<1x1x8x1xi32> {
+
+ %cast = tensor.cast %dest : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
+ %c8 = arith.constant 8 : index
+ %pack = linalg.pack %src padding_value(%pad : i32)
+ inner_dims_pos = [0, 1]
+ inner_tiles = [%c8, 1]
+ into %cast {test_attr} : tensor<7x?xi32> -> tensor<1x1x?x1xi32>
+ %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32>
+ return %res : tensor<1x1x8x1xi32>
+}
+
+// -----
+
+func.func @infer_and_fold_pack_unpack_same_tiles(%t: tensor<10x20x4x4xf32>) -> tensor<10x20x4x4xf32> {
+ %dim1 = arith.constant 40 : index
+ %dim2 = arith.constant 80 : index
+ %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
+ %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty : tensor<10x20x4x4xf32> -> tensor<?x?xf32>
+ %cast = tensor.cast %unpacked : tensor<?x?xf32> to tensor<40x80xf32>
+ %tensor_empty1 = tensor.empty() : tensor<10x20x4x4xf32>
+ %packed = linalg.pack %cast inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty1 : tensor<40x80xf32> -> tensor<10x20x4x4xf32>
+ return %packed : tensor<10x20x4x4xf32>
+}
+// CHECK-LABEL: func.func @infer_and_fold_pack_unpack_same_tiles
+// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
+// CHECK: return %[[SRC]]
+
+// -----
+
+// CHECK-LABEL: func.func @pack_dont_drop_attributes(
+// CHECK: linalg.pack {{.*}} {test_attr}
+func.func @pack_dont_drop_attributes(%arg0: tensor<?x?x?xf16>, %arg1: tensor<128x?x100x16x1xf16>) -> tensor<128x?x100x16x1xf16> {
+ %c32_i64 = arith.constant 32 : i64
+ %cst = arith.constant 0.000000e+00 : f16
+ %pack = linalg.pack %arg0 padding_value(%cst : f16) outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %arg1 {test_attr} : tensor<?x?x?xf16> -> tensor<128x?x100x16x1xf16>
+ return %pack : tensor<128x?x100x16x1xf16>
+}
+// -----
+
+//===----------------------------------------------------------------------===//
+// linalg.fill + linalg.unpack
+//===----------------------------------------------------------------------===//
+// Fold DstStyleOp -> tensor.unpack operations.
+func.func @fold_dst_style_ops_into_unpack(%arg0 : tensor<?x?x16x64xf32>, %init : tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %cst = arith.constant 0.0 : f32
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
+ %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 64] into %fill : tensor<?x?x16x64xf32> -> tensor<?x?xf32>
+ return %unpack : tensor<?x?xf32>
+}
+// CHECK-LABEL: func @fold_dst_style_ops_into_unpack
+// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x16x64xf32>
+// CHECK-SAME: %[[INIT:.+]]: tensor<?x?xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
+// CHECK-SAME: into %[[INIT]]
+// CHECK: return %[[UNPACK]]
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// tensor.cast + linalg.unpack
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: func.func @fold_cast_unpack_dynamic_tile_size(
+// CHECK-SAME: %[[SRC:.*]]: tensor<1x1x8x1xi32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<7x?xi32>) -> tensor<7x?xi32> {
+// CHECK: %[[RES:.*]] = linalg.unpack %[[SRC]] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] {test_attr} : tensor<1x1x8x1xi32> -> tensor<7x?xi32>
+// CHECK: return %[[RES]] : tensor<7x?xi32>
+func.func @fold_cast_unpack_dynamic_tile_size(
+ %src: tensor<1x1x8x1xi32>,
+ %res: tensor<7x?xi32>) -> tensor<7x?xi32> {
+
+ %cast = tensor.cast %src : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
+ %c8 = arith.constant 8 : index
+ %unpack = linalg.unpack %cast
+ inner_dims_pos = [0, 1]
+ inner_tiles = [%c8, 1]
+ into %res {test_attr} : tensor<1x1x?x1xi32> -> tensor<7x?xi32>
+ return %unpack : tensor<7x?xi32>
+}
diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
index 07708231a6e2f6..9bbe70daad3f1a 100644
--- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
+++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
@@ -15,7 +15,7 @@ func.func @dynamic_elem_pack(%arg0: tensor<?x?xf32>, %dest: tensor<?x?x8x2xf32>)
%4 = arith.addf %arg3, %arg3 : f32
linalg.yield %4 : f32
} -> tensor<?x?xf32>
- %4 = tensor.pack %3
+ %4 = linalg.pack %3
inner_dims_pos = [0, 1]
inner_tiles = [8, 2]
into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -34,7 +34,7 @@ func.func @dynamic_elem_pack(%arg0: tensor<?x?xf32>, %dest: tensor<?x?x8x2xf32>)
// CHECK-DAG: %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]]
// CHECK-DAG: %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]], %[[OUTER_D1]]) : tensor<?x?x8x2xf32>
-// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 2]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[ELEM:.+]] = linalg.generic
@@ -56,7 +56,7 @@ func.func @elem_pack_transpose_inner_dims(%arg0: tensor<128x256xi32>, %dest: ten
%4 = arith.addi %arg3, %arg3 : i32
linalg.yield %4 : i32
} -> tensor<128x256xi32>
- %pack = tensor.pack %elem
+ %pack = linalg.pack %elem
inner_dims_pos = [1, 0]
inner_tiles = [16, 32]
into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
@@ -67,7 +67,7 @@ func.func @elem_pack_transpose_inner_dims(%arg0: tensor<128x256xi32>, %dest: ten
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32>
-// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[ELEM:.+]] = linalg.generic
@@ -89,7 +89,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %dest: ten
%4 = arith.addi %arg3, %arg3 : i32
linalg.yield %4 : i32
} -> tensor<128x256xi32>
- %pack = tensor.pack %elem
+ %pack = linalg.pack %elem
outer_dims_perm = [1, 0]
inner_dims_pos = [0, 1]
inner_tiles = [32, 16]
@@ -101,7 +101,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %dest: ten
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[ARG0_EMPTY]] : tensor<128x256xi32> -> tensor<16x4x32x16xi32>
// CHECK: %[[ELEM:.+]] = linalg.generic
@@ -123,7 +123,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims(%arg0: tensor<128x256xi32>,
%4 = arith.addi %arg3, %arg3 : i32
linalg.yield %4 : i32
} -> tensor<128x256xi32>
- %pack = tensor.pack %elem
+ %pack = linalg.pack %elem
outer_dims_perm = [1, 0]
inner_dims_pos = [1, 0]
inner_tiles = [16, 32]
@@ -135,7 +135,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims(%arg0: tensor<128x256xi32>,
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x16x32xi32>
-// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[ELEM:.+]] = linalg.generic
@@ -163,7 +163,7 @@ func.func @dynamic_broadcast_pack(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %d
%4 = arith.addf %arg3, %arg4 : f32
linalg.yield %4 : f32
} -> tensor<?x?xf32>
- %4 = tensor.pack %3
+ %4 = linalg.pack %3
inner_dims_pos = [0, 1]
inner_tiles = [8, 2]
into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -182,13 +182,13 @@ func.func @dynamic_broadcast_pack(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %d
// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
// CHECK-DAG: %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]]) : tensor<?x8xf32>
-// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [8]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C0]]
// CHECK-DAG: %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]]
// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty(%[[OUTER_D1]]) : tensor<?x2xf32>
-// CHECK: %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK: %[[PACK_ARG1:.+]] = linalg.pack %[[ARG1]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [2]
// CHECK-SAME: into %[[ARG1_EMPTY]]
// CHECK: %[[ELEM:.+]] = linalg.generic
@@ -212,7 +212,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims2(%arg0: tensor<64xf32>, %des
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<1x56x57x64xf32>
- %2 = tensor.pack %1 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %dest : tensor<1x56x57x64xf32> -> tensor<1x2x56x57x32xf32>
+ %2 = linalg.pack %1 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %dest : tensor<1x56x57x64xf32> -> tensor<1x2x56x57x32xf32>
return %2 : tensor<1x2x56x57x32xf32>
}
// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d4)>
@@ -221,7 +221,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims2(%arg0: tensor<64xf32>, %des
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<2x32xf32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
@@ -247,7 +247,7 @@ func.func @transpose_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: tensor<100x
%1 = arith.addi %0, %b2 : i32
linalg.yield %1 : i32
} -> tensor<100x200x128x256xi32>
- %4 = tensor.pack %transpose
+ %4 = linalg.pack %transpose
inner_dims_pos = [3, 2]
inner_tiles = [16, 32]
into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32>
@@ -263,11 +263,11 @@ func.func @transpose_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: tensor<100x
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [3, 1] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK: %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32]
// CHECK-SAME: into %[[ARG2_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
@@ -293,7 +293,7 @@ func.func @affine_constant_expr_pack(%arg0: tensor<100x128x200x256xi32>, %arg1:
%1 = arith.addi %0, %b2 : i32
linalg.yield %1 : i32
} -> tensor<100x200x128x256xi32>
- %4 = tensor.pack %transpose
+ %4 = linalg.pack %transpose
inner_dims_pos = [3, 2]
inner_tiles = [16, 32]
into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32>
@@ -309,11 +309,11 @@ func.func @affine_constant_expr_pack(%arg0: tensor<100x128x200x256xi32>, %arg1:
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [3, 1] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<1x4x1x1x32xi32>
-// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK: %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]]
// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [32]
// CHECK-SAME: into %[[ARG2_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
@@ -339,7 +339,7 @@ func.func @transpose_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
%1 = arith.addi %0, %b2 : i32
linalg.yield %1 : i32
} -> tensor<100x200x128x256xi32>
- %4 = tensor.pack %transpose
+ %4 = linalg.pack %transpose
outer_dims_perm = [1, 2, 3, 0]
inner_dims_pos = [3, 2]
inner_tiles = [16, 32]
@@ -356,11 +356,11 @@ func.func @transpose_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<200x4x16x100x16x32xi32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK: %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32]
// CHECK-SAME: into %[[ARG2_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
@@ -380,7 +380,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
linalg.yield %4 : i32
} -> tensor<128x256xi32>
%empty = tensor.empty() : tensor<16x4x32x16xi32>
- %pack = tensor.pack %elem
+ %pack = linalg.pack %elem
outer_dims_perm = [1, 0]
inner_dims_pos = [0, 1]
inner_tiles = [32, 16]
@@ -393,11 +393,11 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK: %[[PACKED_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK: %[[PACKED_ARG1:.+]] = linalg.pack %[[ARG1]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[ARG1_EMPTY]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
@@ -411,7 +411,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56x64xf32> {
%0 = tensor.empty() : tensor<12x56x56x64xf32>
- %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
+ %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
%2 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%1 : tensor<12x56x56x64xf32>) {
^bb0(%out: f32):
%3 = arith.addf %out, %out : f32
@@ -424,17 +424,17 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56
// CHECK-LABEL: func.func @unpack_on_output
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_EMPTY_UNPACK]]
// CHECK: %[[ARG0_EMPTY_PACK:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_EMPTY_PACK]]
// CHECK: %[[RES:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]]]
// CHECK-SAME: outs(%[[PACKED_ARG0]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[UNPACKED_ARG0]]
@@ -444,7 +444,7 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56
func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf32>) -> tensor<12x56x56x64xf32> {
%0 = tensor.empty() : tensor<12x56x56x64xf32>
- %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
+ %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
%2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) {
^bb0(%in: f32, %out: f32):
%3 = arith.addf %in, %out : f32
@@ -458,22 +458,22 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]]
// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]]
+// CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG1_PACK_EMPTY]]
// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK: %[[ARG0_PACK:.+]] = linalg.pack %[[UNPACKED_ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_PACK_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]]
// CHECK-SAME: ins(%[[ARG0_PACK]]
// CHECK-SAME: outs(%[[ARG1_PACK]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG1]]
@@ -483,7 +483,7 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56
func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf16>) -> tensor<12x56x56x64xf16> {
%0 = tensor.empty() : tensor<12x56x56x64xf32>
- %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
+ %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
%2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf16>) {
^bb0(%in: f32, %out: f16):
%3 = arith.truncf %in : f32 to f16
@@ -497,22 +497,22 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]]
// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16>
-// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]]
+// CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG1_PACK_EMPTY]]
// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK: %[[ARG0_PACK:.+]] = linalg.pack %[[UNPACKED_ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_PACK_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]]
// CHECK-SAME: ins(%[[ARG0_PACK]]
// CHECK-SAME: outs(%[[ARG1_PACK]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG1]]
@@ -523,7 +523,7 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t
func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56x64xf32> {
%init = tensor.empty() : tensor<12x56x56x64xf32>
%0 = tensor.empty() : tensor<12x56x56x64xf32>
- %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
+ %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
%2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) {
^bb0(%in: f32, %out: f32):
%3 = arith.addf %in, %in : f32
@@ -537,19 +537,19 @@ func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x5
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[FINAL_RES:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]]
// CHECK: %[[DEST:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[ARG0_PACK_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]]
// CHECK-SAME: ins(%[[PACKED_ARG0]]
// CHECK-SAME: outs(%[[DEST]]
-// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
+// CHECK: %[[UNPACKED:.+]] = linalg.unpack %[[RES]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[FINAL_RES]]
@@ -558,7 +558,7 @@ func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x5
func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x64xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x56x56x64xf32>
- %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
+ %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
%padded = tensor.pad %1 low[0, 1, 1, 0] high[0, 1, 1, 0] {
^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
tensor.yield %cst : f32
@@ -571,7 +571,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x58x58x64xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[PADDED]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[EMPTY]] : tensor<1x2x58x58x32xf32> -> tensor<1x58x58x64xf32>
@@ -580,7 +580,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens
func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58x58x64xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x56x56x64xf32>
- %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
+ %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
%padded = tensor.pad %1 low[1, 1, 1, 0] high[0, 1, 1, 0] {
^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
tensor.yield %cst : f32
@@ -593,7 +593,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[1, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x58x58x64xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[PADDED]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[EMPTY]] : tensor<2x2x58x58x32xf32> -> tensor<2x58x58x64xf32>
@@ -602,7 +602,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens
func.func @pad_along_unpacked_dim(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x66xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x56x56x64xf32>
- %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
+ %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
%padded = tensor.pad %1 low[0, 1, 1, 1] high[0, 1, 1, 1] {
^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
tensor.yield %cst : f32
@@ -614,7 +614,7 @@ func.func @pad_along_unpacked_dim(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x5
// CHECK: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>)
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x64xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[EMPTY]] : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
// CHECK: %[[PADDED:.+]] = tensor.pad %[[UNPACK]] low[0, 1, 1, 1] high[0, 1, 1, 1]
@@ -628,7 +628,7 @@ func.func @pad_valid_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1
tensor.yield %cst : f32
} : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
%0 = tensor.empty() : tensor<1x2x58x58x32xf32>
- %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
+ %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
return %1 : tensor<1x2x58x58x32xf32>
}
@@ -636,7 +636,7 @@ func.func @pad_valid_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x64x56x56xf32>)
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x56x56x32xf32>
-// CHECK: %[[PACKED:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK: %[[PACKED:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
// CHECK-SAME: into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x2x56x56x32xf32>
// CHECK: %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
// CHECK: return %[[PADDED]]
@@ -650,7 +650,7 @@ func.func @pad_valid_outer_dims_pack_propagation(%arg0: tensor<1x64x56x56xf32>)
tensor.yield %cst : f32
} : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
%0 = tensor.empty() : tensor<1x58x58x2x32xf32>
- %1 = tensor.pack %padded outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x58x58x2x32xf32>
+ %1 = linalg.pack %padded outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x58x58x2x32xf32>
return %1 : tensor<1x58x58x2x32xf32>
}
@@ -658,7 +658,7 @@ func.func @pad_valid_outer_dims_pack_propagation(%arg0: tensor<1x64x56x56xf32>)
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x64x56x56xf32>)
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x2x32xf32>
-// CHECK: %[[PACKED:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACKED:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32]
// CHECK-SAME: into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x56x56x2x32xf32>
// CHECK: %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 1, 1, 0, 0] high[0, 1, 1, 0, 0]
@@ -673,7 +673,7 @@ func.func @pad_along_packed_dim(%arg0: tensor<1x60x56x56xf32>) -> tensor<1x2x58x
tensor.yield %cst : f32
} : tensor<1x60x56x56xf32> to tensor<1x64x58x58xf32>
%0 = tensor.empty() : tensor<1x2x58x58x32xf32>
- %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
+ %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
return %1 : tensor<1x2x58x58x32xf32>
}
@@ -682,7 +682,7 @@ func.func @pad_along_packed_dim(%arg0: tensor<1x60x56x56xf32>) -> tensor<1x2x58x
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 2, 1, 1] high[0, 2, 1, 1]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x58x58x32xf32>
-// CHECK: tensor.pack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK: linalg.pack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
// CHECK-SAME: into %[[EMPTY]] : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
// -----
@@ -694,7 +694,7 @@ func.func @multi_use_pad_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> (ten
tensor.yield %cst : f32
} : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
%0 = tensor.empty() : tensor<1x2x58x58x32xf32>
- %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
+ %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
return %padded, %1 : tensor<1x64x58x58xf32>, tensor<1x2x58x58x32xf32>
}
@@ -702,10 +702,10 @@ func.func @multi_use_pad_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> (ten
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x64x56x56xf32>)
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x56x56x32xf32>
-// CHECK: %[[PACKED:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK: %[[PACKED:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
// CHECK-SAME: into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x2x56x56x32xf32>
// CHECK: %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
-// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK: %[[UNPACKED:.+]] = linalg.unpack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
// CHECK: return %[[UNPACKED]], %[[PADDED]]
// -----
@@ -721,7 +721,7 @@ func.func @would_break_dominance(%arg0: tensor<128x256xi32>) -> tensor<4x16x16x3
linalg.yield %4 : i32
} -> tensor<128x256xi32>
%dest = bufferization.alloc_tensor() : tensor<4x16x16x32xi32>
- %pack = tensor.pack %elem
+ %pack = linalg.pack %elem
inner_dims_pos = [1, 0]
inner_tiles = [16, 32]
into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
@@ -735,7 +735,7 @@ func.func @would_break_dominance(%arg0: tensor<128x256xi32>) -> tensor<4x16x16x3
// CHECK-SAME: ins(%[[ARG0]]
// CHECK-SAME: outs(%[[EMPTY]]
// CHECK: %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32>
-// CHECK-NEXT: %{{.+}} = tensor.pack %[[GEN]]
+// CHECK-NEXT: %{{.+}} = linalg.pack %[[GEN]]
// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ALLOC]]
@@ -751,7 +751,7 @@ func.func @scalar_tensor(%arg0 : tensor<f32>) -> tensor<1x32x7x7x32xf32> {
linalg.yield %in : f32
} -> tensor<1x7x7x1024xf32>
%empty_pack = tensor.empty() : tensor<1x32x7x7x32xf32>
- %pack = tensor.pack %gen outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %empty_pack : tensor<1x7x7x1024xf32> -> tensor<1x32x7x7x32xf32>
+ %pack = linalg.pack %gen outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %empty_pack : tensor<1x7x7x1024xf32> -> tensor<1x32x7x7x32xf32>
return %pack : tensor<1x32x7x7x32xf32>
}
@@ -772,7 +772,7 @@ func.func @scalar_tensor(%arg0 : tensor<f32>) -> tensor<1x32x7x7x32xf32> {
func.func @unpack_empty_inner_dims(%arg0: tensor<12x64x56x56xf32>) -> tensor<12x56x56x64xf32> {
%init = tensor.empty() : tensor<12x56x56x64xf32>
%0 = tensor.empty() : tensor<12x56x56x64xf32>
- %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] into %0 : tensor<12x64x56x56xf32> -> tensor<12x56x56x64xf32>
+ %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] into %0 : tensor<12x64x56x56xf32> -> tensor<12x56x56x64xf32>
%2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) {
^bb0(%in: f32, %out: f32):
%3 = arith.addf %in, %in : f32
@@ -782,13 +782,13 @@ func.func @unpack_empty_inner_dims(%arg0: tensor<12x64x56x56xf32>) -> tensor<12x
}
// CHECK-LABEL: func.func @unpack_empty_inner_dims
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack
+// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = []
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = []
// CHECK: %[[RES:.+]] = linalg.generic
// CHECK-SAME: ins(%[[PACKED_ARG0]]
-// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
+// CHECK: %[[UNPACKED:.+]] = linalg.unpack %[[RES]]
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = []
// -----
@@ -805,7 +805,7 @@ func.func @reduction_pack_transpose_inner_dims(%arg0: tensor<128x256x32xi32>,
linalg.yield %4 : i32
} -> tensor<128x256xi32>
%dest = tensor.empty() : tensor<4x16x16x32xi32>
- %pack = tensor.pack %elem
+ %pack = linalg.pack %elem
inner_dims_pos = [1, 0]
inner_tiles = [16, 32]
into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
@@ -817,11 +817,11 @@ func.func @reduction_pack_transpose_inner_dims(%arg0: tensor<128x256x32xi32>,
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32>
-// CHECK: %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK: %[[PACK_ARG1:.+]] = linalg.pack %[[ARG1]]
// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG1_EMPTY]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x32x16x32xi32>
-// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[RED:.+]] = linalg.generic
@@ -851,7 +851,7 @@ func.func @reduction_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
linalg.yield %2 : i32
} -> tensor<100x128x256xi32>
%init_pack = tensor.empty() : tensor<4x16x100x16x32xi32>
- %4 = tensor.pack %reduction
+ %4 = linalg.pack %reduction
outer_dims_perm = [1, 2, 0]
inner_dims_pos = [2, 1]
inner_tiles = [16, 32]
@@ -869,15 +869,15 @@ func.func @reduction_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]
// CHECK: %[[ARG3_EMPTY:.+]] = tensor.empty() : tensor<4x16x100x16x32xi32>
-// CHECK: %[[PACKED_ARG3:.+]] = tensor.pack %[[ARG3]]
+// CHECK: %[[PACKED_ARG3:.+]] = linalg.pack %[[ARG3]]
// CHECK-SAME: outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG3_EMPTY]]
// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x200x100x16x32xi32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 3, 2, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32]
// CHECK-SAME: into %[[ARG0_EMPTY]]
// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK: %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32]
// CHECK-SAME: into %[[ARG2_EMPTY]]
// CHECK: %[[RES:.+]] = linalg.generic
@@ -894,7 +894,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
%filter: tensor<2x2xi32>) -> tensor<16x540x960xi32>{
%init = tensor.empty() : tensor<16x540x960xi32>
%empty = tensor.empty() : tensor<1x16x1080x1920xi32>
- %unpack = tensor.unpack %arg0
+ %unpack = linalg.unpack %arg0
inner_dims_pos = [1]
inner_tiles = [16]
into %empty : tensor<1x1x1080x1920x16xi32> -> tensor<1x16x1080x1920xi32>
@@ -916,7 +916,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
// CHECK: %[[FINAL_RES:.+]] = tensor.empty() : tensor<16x540x960xi32>
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x540x960x16xi32>
// CHECK: %[[PACK_EMPTY:.+]] = tensor.empty() : tensor<1x1x1080x1920x16xi32>
-// CHECK: %[[PACK_ARG0:.+]] = tensor.pack
+// CHECK: %[[PACK_ARG0:.+]] = linalg.pack
// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [16]
// CHECK-SAME: into %[[PACK_EMPTY]]
// CHECK: %[[POOL:.+]] = linalg.generic
@@ -924,7 +924,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]
// CHECK-SAME: ins(%[[PACK_ARG0]], %[[ARG1]]
// CHECK-SAME: outs(%[[INIT]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[POOL]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[POOL]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [16]
// CHECK-SAME: into %[[FINAL_RES]]
// CHECK: return %[[UNPACK]] : tensor<16x540x960xi32>
@@ -934,7 +934,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index) -> tensor<?x4x8x1xf32> {
%collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor<?x16x4xf32> into tensor<?x4xf32>
%2 = tensor.empty(%dim) : tensor<?x4x8x1xf32>
- %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
+ %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
func.return %pack : tensor<?x4x8x1xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_through_collapse
@@ -943,7 +943,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index)
// CHECK: %[[C0:.+]] = arith.constant 0 : index
// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x16x4xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x2x4x8x1xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1xf32>
// CHECK: return %[[COLLAPSED]] : tensor<?x4x8x1xf32>
@@ -952,7 +952,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index)
func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor<?x16x4xf32>, %dim : index) -> tensor<?x4x8x1xf32> {
%collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor<?x16x4xf32> into tensor<?x4xf32>
%2 = tensor.empty(%dim) : tensor<?x4x8x1xf32>
- %pack = tensor.pack %collapsed inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
+ %pack = linalg.pack %collapsed inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
func.return %pack : tensor<?x4x8x1xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm
@@ -961,7 +961,7 @@ func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor<?x16
// CHECK: %[[C0:.+]] = arith.constant 0 : index
// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x16x4xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x2x4x8x1xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1xf32>
// CHECK: return %[[COLLAPSED]] : tensor<?x4x8x1xf32>
@@ -970,13 +970,13 @@ func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor<?x16
func.func @bubble_up_permuted_pack_through_collapse(%1: tensor<4x192x16x256xf32>) -> tensor<4x32x3072x8x1xf32> {
%collapsed = tensor.collapse_shape %1 [[0], [1, 2], [3]] : tensor<4x192x16x256xf32> into tensor<4x3072x256xf32>
%2 = tensor.empty() : tensor<4x32x3072x8x1xf32>
- %pack = tensor.pack %collapsed outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %2 : tensor<4x3072x256xf32> -> tensor<4x32x3072x8x1xf32>
+ %pack = linalg.pack %collapsed outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %2 : tensor<4x3072x256xf32> -> tensor<4x32x3072x8x1xf32>
func.return %pack : tensor<4x32x3072x8x1xf32>
}
// CHECK-LABEL: func.func @bubble_up_permuted_pack_through_collapse
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x32x192x16x8x1xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<4x192x16x256xf32> -> tensor<4x32x192x16x8x1xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<4x192x16x256xf32> -> tensor<4x32x192x16x8x1xf32>
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %pack {{\[}}[0], [1], [2, 3], [4], [5]] : tensor<4x32x192x16x8x1xf32> into tensor<4x32x3072x8x1xf32>
// CHECK: return %[[COLLAPSED]] : tensor<4x32x3072x8x1xf32>
@@ -985,13 +985,13 @@ func.func @bubble_up_permuted_pack_through_collapse(%1: tensor<4x192x16x256xf32>
func.func @bubble_up_pack_through_unit_collapse(%1: tensor<1x64x1x4xf32>) -> tensor<8x4x8x1xf32> {
%collapsed = tensor.collapse_shape %1 [[0, 1, 2], [3]] : tensor<1x64x1x4xf32> into tensor<64x4xf32>
%2 = tensor.empty() : tensor<8x4x8x1xf32>
- %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<64x4xf32> -> tensor<8x4x8x1xf32>
+ %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<64x4xf32> -> tensor<8x4x8x1xf32>
func.return %pack : tensor<8x4x8x1xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_through_unit_collapse
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x8x1x4x8x1xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<1x64x1x4xf32> -> tensor<1x8x1x4x8x1xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<1x64x1x4xf32> -> tensor<1x8x1x4x8x1xf32>
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1, 2], [3], [4], [5]] : tensor<1x8x1x4x8x1xf32> into tensor<8x4x8x1xf32>
// CHECK: return %[[COLLAPSED]] : tensor<8x4x8x1xf32>
@@ -1000,7 +1000,7 @@ func.func @bubble_up_pack_through_unit_collapse(%1: tensor<1x64x1x4xf32>) -> ten
func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor<?x16x4xf32>, %dim : index) -> tensor<?x1x4xf32> {
%collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor<?x16x4xf32> into tensor<?x4xf32>
%2 = tensor.empty(%dim) : tensor<?x1x4xf32>
- %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [4] into %2 : tensor<?x4xf32> -> tensor<?x1x4xf32>
+ %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [4] into %2 : tensor<?x4xf32> -> tensor<?x1x4xf32>
func.return %pack : tensor<?x1x4xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_through_collapse_on_outer_dims
@@ -1009,7 +1009,7 @@ func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor<?x16x4xf32>,
// CHECK: %[[C0:.+]] = arith.constant 0 : index
// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x16x4xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x16x1x4xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [4] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x16x1x4xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [4] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x16x1x4xf32>
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3]] : tensor<?x16x1x4xf32> into tensor<?x1x4xf32>
// CHECK: return %[[COLLAPSED]] : tensor<?x1x4xf32>
@@ -1018,13 +1018,13 @@ func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor<?x16x4xf32>,
func.func @no_bubble_up_pack_through_non_divisible_collapse(%1: tensor<3072x64x4xf32>) -> tensor<384x32x8x8xf32> {
%collapsed = tensor.collapse_shape %1 [[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32>
%2 = tensor.empty() : tensor<384x32x8x8xf32>
- %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %2 : tensor<3072x256xf32> -> tensor<384x32x8x8xf32>
+ %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %2 : tensor<3072x256xf32> -> tensor<384x32x8x8xf32>
func.return %pack : tensor<384x32x8x8xf32>
}
// CHECK-LABEL: func.func @no_bubble_up_pack_through_non_divisible_collapse
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[COLLAPSED]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[COLLAPSED]]
// CHECK: return %[[PACK]] : tensor<384x32x8x8xf32>
// -----
@@ -1032,13 +1032,13 @@ func.func @no_bubble_up_pack_through_non_divisible_collapse(%1: tensor<3072x64x4
func.func @bubble_up_pack_outer_expanded_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x64x4xf32> {
%empty = tensor.empty() : tensor<4x2x64x4xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [1] inner_tiles = [4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x64x4xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [1] inner_tiles = [4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x64x4xf32>
return %pack : tensor<4x2x64x4xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_outer_expanded_through_expand(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x64x4xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<8x64x4xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3]]
// CHECK-SAME: output_shape [4, 2, 64, 4] : tensor<8x64x4xf32> into tensor<4x2x64x4xf32>
@@ -1049,13 +1049,13 @@ func.func @bubble_up_pack_outer_expanded_through_expand(%arg0: tensor<32x64xf32>
func.func @bubble_up_pack_inner_expanded_through_expand(%arg0: tensor<32x64xf32>) -> tensor<32x4x4x4xf32> {
%empty = tensor.empty() : tensor<32x4x4x4xf32>
%expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [2] inner_tiles = [4] into %empty : tensor<32x4x16xf32> -> tensor<32x4x4x4xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [2] inner_tiles = [4] into %empty : tensor<32x4x16xf32> -> tensor<32x4x4x4xf32>
return %pack : tensor<32x4x4x4xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_inner_expanded_through_expand(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x16x4xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]]
// CHECK-SAME: : tensor<32x64xf32> -> tensor<32x16x4xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3]]
@@ -1067,13 +1067,13 @@ func.func @bubble_up_pack_inner_expanded_through_expand(%arg0: tensor<32x64xf32>
func.func @bubble_up_pack_non_expanded_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<8x2x32x16x4xf32> {
%empty = tensor.empty() : tensor<8x2x32x16x4xf32>
%expanded = tensor.expand_shape %arg0 [[0], [1, 2], [3]] output_shape [32, 2, 32, 16] : tensor<32x64x16xf32> into tensor<32x2x32x16xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [4] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x32x16x4xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [4] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x32x16x4xf32>
return %pack : tensor<8x2x32x16x4xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_non_expanded_dims_through_expand(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x64x16x4xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack
+// CHECK: %[[PACK:.+]] = linalg.pack
// CHECK-SAME: %[[ARG0]] inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]]
// CHECK-SAME: : tensor<32x64x16xf32> -> tensor<8x64x16x4xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3], [4]]
@@ -1087,7 +1087,7 @@ func.func @bubble_up_pack_through_expand_dynamic(%arg0: tensor<?x64xf32>) -> ten
%dim = tensor.dim %arg0, %c0 : tensor<?x64xf32>
%empty = tensor.empty(%dim) : tensor<?x4x2x8xf32>
%expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [%dim, 4, 16] : tensor<?x64xf32> into tensor<?x4x16xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [2] inner_tiles = [8] into %empty : tensor<?x4x16xf32> -> tensor<?x4x2x8xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [2] inner_tiles = [8] into %empty : tensor<?x4x16xf32> -> tensor<?x4x2x8xf32>
return %pack : tensor<?x4x2x8xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_through_expand_dynamic(
@@ -1095,7 +1095,7 @@ func.func @bubble_up_pack_through_expand_dynamic(%arg0: tensor<?x64xf32>) -> ten
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK: %[[DIM_INPUT:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x64xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM_INPUT]]) : tensor<?x8x8xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [8] into %[[EMPTY]]
// CHECK-SAME: : tensor<?x64xf32> -> tensor<?x8x8xf32>
// CHECK: %[[DIM_PACK:.+]] = tensor.dim %[[PACK]], %[[C0]] : tensor<?x8x8xf32>
@@ -1109,14 +1109,14 @@ func.func @bubble_up_pack_non_expanded_padding_through_expand(%arg0: tensor<32x6
%cst = arith.constant 3.000000e+00 : f32
%empty = tensor.empty() : tensor<4x2x8x4x8xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x60xf32> into tensor<4x8x60xf32>
- %pack = tensor.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1, 2] inner_tiles = [4, 8] into %empty : tensor<4x8x60xf32> -> tensor<4x2x8x4x8xf32>
+ %pack = linalg.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1, 2] inner_tiles = [4, 8] into %empty : tensor<4x8x60xf32> -> tensor<4x2x8x4x8xf32>
return %pack : tensor<4x2x8x4x8xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_non_expanded_padding_through_expand(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-DAG: %[[CST:.+]] = arith.constant 3.000000e+00 : f32
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x8x4x8xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[CST]] : f32)
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[CST]] : f32)
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 8] into %[[EMPTY]]
// CHECK-SAME: : tensor<32x60xf32> -> tensor<8x8x4x8xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]]
@@ -1128,13 +1128,13 @@ func.func @bubble_up_pack_non_expanded_padding_through_expand(%arg0: tensor<32x6
func.func @bubble_up_pack_outer_dims_perm_identity_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x32x4x2xf32> {
%empty = tensor.empty() : tensor<4x2x32x4x2xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
- %pack = tensor.pack %expanded outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<4x2x32x4x2xf32>
+ %pack = linalg.pack %expanded outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<4x2x32x4x2xf32>
return %pack : tensor<4x2x32x4x2xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_outer_dims_perm_identity_through_expand(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x32x4x2xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 2] into %[[EMPTY]]
// CHECK-SAME: : tensor<32x64xf32> -> tensor<8x32x4x2xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]]
@@ -1146,13 +1146,13 @@ func.func @bubble_up_pack_outer_dims_perm_identity_through_expand(%arg0: tensor<
func.func @bubble_up_pack_multiple_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<8x2x4x8x4x8x2xf32> {
%empty = tensor.empty() : tensor<8x2x4x8x4x8x2xf32>
%expanded = tensor.expand_shape %arg0 [[0], [1, 2], [3]] output_shape [32, 2, 32, 16] : tensor<32x64x16xf32> into tensor<32x2x32x16xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [0, 2, 3] inner_tiles = [4, 8, 2] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x4x8x4x8x2xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [0, 2, 3] inner_tiles = [4, 8, 2] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x4x8x4x8x2xf32>
return %pack : tensor<8x2x4x8x4x8x2xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_multiple_dims_through_expand(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x8x8x4x8x2xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [0, 1, 2] inner_tiles = [4, 8, 2] into %[[EMPTY]]
// CHECK-SAME: : tensor<32x64x16xf32> -> tensor<8x8x8x4x8x2xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3], [4], [5], [6]]
@@ -1164,13 +1164,13 @@ func.func @bubble_up_pack_multiple_dims_through_expand(%arg0: tensor<32x64x16xf3
func.func @bubble_up_pack_inner_dims_reorder_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x4x16x4xf32> {
%empty = tensor.empty() : tensor<4x2x4x16x4xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [2, 1] inner_tiles = [16, 4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x4x16x4xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [2, 1] inner_tiles = [16, 4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x4x16x4xf32>
return %pack : tensor<4x2x4x16x4xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_inner_dims_reorder_through_expand(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x4x16x4xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 4] into %[[EMPTY]]
// CHECK-SAME: : tensor<32x64xf32> -> tensor<8x4x16x4xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]]
@@ -1182,13 +1182,13 @@ func.func @bubble_up_pack_inner_dims_reorder_through_expand(%arg0: tensor<32x64x
func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<4x2x2x8x16x4x4xf32> {
%empty = tensor.empty() : tensor<4x2x2x8x16x4x4xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2, 3], [4]] output_shape [4, 8, 2, 32, 16] : tensor<32x64x16xf32> into tensor<4x8x2x32x16xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [1, 3] inner_tiles = [4, 4] into %empty : tensor<4x8x2x32x16xf32> -> tensor<4x2x2x8x16x4x4xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [1, 3] inner_tiles = [4, 4] into %empty : tensor<4x8x2x32x16xf32> -> tensor<4x2x2x8x16x4x4xf32>
return %pack : tensor<4x2x2x8x16x4x4xf32>
}
// CHECK-LABEL: func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x16x16x4x4xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %[[EMPTY]]
// CHECK-SAME: : tensor<32x64x16xf32> -> tensor<8x16x16x4x4xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2, 3], [4], [5], [6]]
@@ -1200,7 +1200,7 @@ func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand(%arg0:
func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor<32x64xf32>) -> tensor<32x4x2x4x2xf32> {
%empty = tensor.empty() : tensor<32x4x2x4x2xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
- %pack = tensor.pack %expanded outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32>
+ %pack = linalg.pack %expanded outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32>
return %pack : tensor<32x4x2x4x2xf32>
}
// CHECK-LABEL: func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(
@@ -1208,7 +1208,7 @@ func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x4x2x4x2xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]]
// CHECK-SAME: output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]]
// CHECK-SAME: outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %[[EMPTY]]
// CHECK-SAME: : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32>
// CHECK: return %[[PACK]] : tensor<32x4x2x4x2xf32>
@@ -1218,7 +1218,7 @@ func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor
func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: tensor<32x64xf32>) -> tensor<2x2x64x2x4xf32> {
%empty = tensor.empty() : tensor<2x2x64x2x4xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %empty : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %empty : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32>
return %pack : tensor<2x2x64x2x4xf32>
}
// CHECK-LABEL: func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(
@@ -1226,7 +1226,7 @@ func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: te
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x2x64x2x4xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]]
// CHECK-SAME: output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %[[EMPTY]]
// CHECK-SAME: : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32>
// CHECK: return %[[PACK]] : tensor<2x2x64x2x4xf32>
@@ -1236,7 +1236,7 @@ func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: te
func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand(%arg0: tensor<32x64xf32>) -> tensor<2x8x64x2xf32> {
%empty = tensor.empty() : tensor<2x8x64x2xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [2] into %empty : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [2] into %empty : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32>
return %pack : tensor<2x8x64x2xf32>
}
// CHECK-LABEL: func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand(
@@ -1244,7 +1244,7 @@ func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand(%arg0: te
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x8x64x2xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]]
// CHECK-SAME: output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [2] into %[[EMPTY]]
// CHECK-SAME: : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32>
// CHECK: return %[[PACK]] : tensor<2x8x64x2xf32>
@@ -1255,7 +1255,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate(
%cst = arith.constant 3.000000e+00 : f32
%empty = tensor.empty() : tensor<3x2x60x8xf32>
%expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [3, 10, 60] : tensor<30x60xf32> into tensor<3x10x60xf32>
- %pack = tensor.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1] inner_tiles = [8] into %empty : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32>
+ %pack = linalg.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1] inner_tiles = [8] into %empty : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32>
return %pack : tensor<3x2x60x8xf32>
}
// CHECK-LABEL: func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate(
@@ -1264,7 +1264,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate(
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x2x60x8xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]]
// CHECK-SAME: output_shape [3, 10, 60] : tensor<30x60xf32> into tensor<3x10x60xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]] padding_value(%[[CST]] : f32)
+// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]] padding_value(%[[CST]] : f32)
// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [8] into %[[EMPTY]]
// CHECK-SAME: : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32>
// CHECK: return %[[PACK]] : tensor<3x2x60x8xf32>
@@ -1274,7 +1274,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate(
func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassociate(%arg0: tensor<32x64xf32>) -> tensor<8x4x16x8xf32> {
%empty = tensor.empty() : tensor<8x4x16x8xf32>
%expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32>
- %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [8] into %empty : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32>
+ %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [8] into %empty : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32>
return %pack : tensor<8x4x16x8xf32>
}
// CHECK-LABEL: func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassociate(
@@ -1282,7 +1282,7 @@ func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassocia
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x4x16x8xf32>
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1, 2]]
// CHECK-SAME: output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [8] into %[[EMPTY]]
// CHECK-SAME: : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32>
// CHECK: return %[[PACK]] : tensor<8x4x16x8xf32>
@@ -1291,7 +1291,7 @@ func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassocia
func.func @push_down_unpack_through_expand(%5: tensor<?x32x8x8xf32>, %dim: index, %sz0: index) -> tensor<?x256x256xf32> {
%6 = tensor.empty(%dim) : tensor<?x256xf32>
- %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
+ %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
%expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor<?x256xf32> into tensor<?x256x256xf32>
func.return %expanded : tensor<?x256x256xf32>
}
@@ -1305,14 +1305,14 @@ func.func @push_down_unpack_through_expand(%5: tensor<?x32x8x8xf32>, %dim: index
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] output_shape [%[[SZ0]], 32, 32, 8, 8] : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
// CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor<?x32x32x8x8xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x256x256xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
// CHECK: return %[[UNPACK]] : tensor<?x256x256xf32>
// -----
func.func @push_down_unpack_through_expand_empty_outer_dims_perm(%5: tensor<?x32x8x8xf32>, %dim: index, %sz0: index) -> tensor<?x256x256xf32> {
%6 = tensor.empty(%dim) : tensor<?x256xf32>
- %unpack = tensor.unpack %5 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
+ %unpack = linalg.unpack %5 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
%expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor<?x256xf32> into tensor<?x256x256xf32>
func.return %expanded : tensor<?x256x256xf32>
}
@@ -1326,14 +1326,14 @@ func.func @push_down_unpack_through_expand_empty_outer_dims_perm(%5: tensor<?x32
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] output_shape [%[[SZ0]], 32, 32, 8, 8] : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
// CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor<?x32x32x8x8xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x256x256xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
// CHECK: return %[[UNPACK]] : tensor<?x256x256xf32>
// -----
func.func @push_down_permuted_unpack_through_expand(%5: tensor<4x32x384x8x8xf32>) -> tensor<4x12x256x256xf32> {
%6 = tensor.empty() : tensor<4x3072x256xf32>
- %unpack = tensor.unpack %5 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 8] into %6 : tensor<4x32x384x8x8xf32> -> tensor<4x3072x256xf32>
+ %unpack = linalg.unpack %5 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 8] into %6 : tensor<4x32x384x8x8xf32> -> tensor<4x3072x256xf32>
%expanded = tensor.expand_shape %unpack [[0], [1, 2], [3]] output_shape [4, 12, 256, 256] : tensor<4x3072x256xf32> into tensor<4x12x256x256xf32>
func.return %expanded : tensor<4x12x256x256xf32>
}
@@ -1341,14 +1341,14 @@ func.func @push_down_permuted_unpack_through_expand(%5: tensor<4x32x384x8x8xf32>
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3], [4], [5]] output_shape [4, 32, 12, 32, 8, 8] : tensor<4x32x384x8x8xf32> into tensor<4x32x12x32x8x8xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x12x256x256xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<4x32x12x32x8x8xf32> -> tensor<4x12x256x256xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<4x32x12x32x8x8xf32> -> tensor<4x12x256x256xf32>
// CHECK: return %[[UNPACK]] : tensor<4x12x256x256xf32>
// -----
func.func @push_down_unpack_through_unit_expand(%5: tensor<6x32x8x8xf32>) -> tensor<3x16x1x256xf32> {
%6 = tensor.empty() : tensor<48x256xf32>
- %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<6x32x8x8xf32> -> tensor<48x256xf32>
+ %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<6x32x8x8xf32> -> tensor<48x256xf32>
%expanded = tensor.expand_shape %unpack [[0, 1, 2], [3]] output_shape [3, 16, 1, 256] : tensor<48x256xf32> into tensor<3x16x1x256xf32>
func.return %expanded : tensor<3x16x1x256xf32>
}
@@ -1356,14 +1356,14 @@ func.func @push_down_unpack_through_unit_expand(%5: tensor<6x32x8x8xf32>) -> ten
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1, 2], [3], [4], [5]] output_shape [3, 2, 1, 32, 8, 8] : tensor<6x32x8x8xf32> into tensor<3x2x1x32x8x8xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x16x1x256xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<3x2x1x32x8x8xf32> -> tensor<3x16x1x256xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<3x2x1x32x8x8xf32> -> tensor<3x16x1x256xf32>
// CHECK: return %[[UNPACK]] : tensor<3x16x1x256xf32>
// -----
func.func @push_down_unpack_through_expand_on_outer_dims(%5: tensor<?x32x8xf32>, %dim: index, %sz0: index) -> tensor<?x256x256xf32> {
%6 = tensor.empty(%dim) : tensor<?x256xf32>
- %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %6 : tensor<?x32x8xf32> -> tensor<?x256xf32>
+ %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %6 : tensor<?x32x8xf32> -> tensor<?x256xf32>
%expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor<?x256xf32> into tensor<?x256x256xf32>
func.return %expanded : tensor<?x256x256xf32>
}
@@ -1377,19 +1377,19 @@ func.func @push_down_unpack_through_expand_on_outer_dims(%5: tensor<?x32x8xf32>,
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3]] output_shape [%[[SZ0]], 256, 32, 8] : tensor<?x32x8xf32> into tensor<?x256x32x8xf32>
// CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor<?x256x32x8xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x256x256xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [8] into %[[EMPTY]] : tensor<?x256x32x8xf32> -> tensor<?x256x256xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [8] into %[[EMPTY]] : tensor<?x256x32x8xf32> -> tensor<?x256x256xf32>
// CHECK: return %[[UNPACK]] : tensor<?x256x256xf32>
// -----
func.func @no_push_down_unpack_through_non_divisible_expand(%5: tensor<384x32x8x8xf32>) -> tensor<256x12x256xf32> {
%6 = tensor.empty() : tensor<3072x256xf32>
- %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32>
+ %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32>
%expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [256, 12, 256] : tensor<3072x256xf32> into tensor<256x12x256xf32>
func.return %expanded : tensor<256x12x256xf32>
}
// CHECK-LABEL: func.func @no_push_down_unpack_through_non_divisible_expand
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[UNPACK]] {{\[}}[0, 1], [2]] output_shape [256, 12, 256] : tensor<3072x256xf32> into tensor<256x12x256xf32>
// CHECK: return %[[EXPANDED]] : tensor<256x12x256xf32>
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir
index ec761d9a494362..72fde5490a305e 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir
@@ -4,7 +4,7 @@
// RUN: -transform-interpreter %s | FileCheck %s
func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8x32xf32>) -> tensor<1x1x4x8x8x32xf32> {
- %0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32>
return %0 : tensor<1x1x4x8x8x32xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
@@ -27,7 +27,7 @@ func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -36,7 +36,7 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %arg2: f32) -> tensor<2x8x8x2xf32> {
- %0 = tensor.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
+ %0 = linalg.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
return %0 : tensor<2x8x8x2xf32>
}
// CHECK: func.func @pad_and_pack
@@ -54,7 +54,7 @@ func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %a
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -64,7 +64,7 @@ module attributes {transform.with_named_sequence} {
func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
- %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
+ %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
return %0 : tensor<32x4x32x8xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
@@ -85,7 +85,7 @@ func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>)
// CHECK-SAME: [%[[C]], %[[K]], 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<1x1x32x8xf32> into tensor<32x4x32x8xf32>
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
index 1cc1484ed40951..911b453f919c36 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
@@ -5,7 +5,7 @@
func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<?x?xi32>, %arg1: tensor<1x1x?x1xi32>) -> tensor<1x1x?x1xi32> {
%c8 = arith.constant 8 : index
%c5 = arith.constant 5 : i32
- %pack = tensor.pack %arg0 padding_value(%c5 : i32) inner_dims_pos = [0, 1] inner_tiles = [%c8, 1] into %arg1 : tensor<?x?xi32> -> tensor<1x1x?x1xi32>
+ %pack = linalg.pack %arg0 padding_value(%c5 : i32) inner_dims_pos = [0, 1] inner_tiles = [%c8, 1] into %arg1 : tensor<?x?xi32> -> tensor<1x1x?x1xi32>
return %pack : tensor<1x1x?x1xi32>
}
@@ -32,7 +32,7 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<?x?xi32>, %arg1: tensor<1x1x?x1xi
// -----
func.func @simple_pad_and_pack_static_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> {
- %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
+ %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
return %0 : tensor<1x1x8x2xf32>
}
// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 - 5)>
@@ -52,7 +52,7 @@ func.func @simple_pad_and_pack_static_tiles(%input: tensor<5x1xf32>, %output: te
/// Same as example above, but with 1 dynamic tile size.
func.func @simple_pad_and_pack_dynamic_tile(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32, %tile_dim_0: index) -> tensor<1x1x?x2xf32> {
- %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
+ %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
return %0 : tensor<1x1x?x2xf32>
}
// CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tile(
@@ -72,7 +72,7 @@ func.func @simple_pad_and_pack_dynamic_tile(%input: tensor<5x1xf32>, %output: te
func.func @simple_pad_and_pack_dynamic_tile_cst(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32) -> tensor<1x1x?x2xf32> {
%tile_dim_0 = arith.constant 8 : index
- %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
+ %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
return %0 : tensor<1x1x?x2xf32>
}
// CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tile_cst(
@@ -86,7 +86,7 @@ func.func @simple_pad_and_pack_dynamic_tile_cst(%input: tensor<5x1xf32>, %output
// CHECK: return %[[RES]] : tensor<1x1x?x2xf32>
func.func @simple_pad_and_pack_dynamic_tile_transpose(%input: tensor<5x1xf32>, %output: tensor<1x1x2x?xf32>, %pad: f32, %tile_dim_1: index) -> tensor<1x1x2x?xf32> {
- %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x2x?xf32>
+ %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x2x?xf32>
return %0 : tensor<1x1x2x?xf32>
}
// CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tile_transpose(
@@ -116,7 +116,7 @@ func.func @simple_pad_and_pack_scalable_tile(%input: tensor<5x1xf32>, %output: t
%c8 = arith.constant 8 : index
%vscale = vector.vscale
%c8_vscale = arith.muli %vscale, %c8 : index
- %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
+ %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
return %0 : tensor<1x1x?x2xf32>
}
@@ -138,7 +138,7 @@ func.func @simple_pad_and_pack_scalable_tile(%input: tensor<5x1xf32>, %output: t
/// Same as example above, but with both tile sizes dynamic.
func.func @simple_pad_and_pack_dynamic_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x?x?xf32>, %pad: f32, %tile_dim_0: index, %tile_dim_1: index) -> tensor<1x1x?x?xf32> {
- %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x?x?xf32>
+ %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x?x?xf32>
return %0 : tensor<1x1x?x?xf32>
}
// CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tiles(
@@ -158,7 +158,7 @@ func.func @simple_pad_and_pack_dynamic_tiles(%input: tensor<5x1xf32>, %output: t
// -----
func.func @simple_pad_and_pack_dynamic_tile_not_all_dims_tiled(%input: tensor<1x1x5x1xf32>, %output: tensor<1x1x1x1x2x?xf32>, %pad: f32, %high: index) -> tensor<1x1x1x1x2x?xf32> {
- %0 = tensor.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<1x1x5x1xf32> -> tensor<1x1x1x1x2x?xf32>
+ %0 = linalg.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<1x1x5x1xf32> -> tensor<1x1x1x1x2x?xf32>
return %0 : tensor<1x1x1x1x2x?xf32>
}
// CHECK: #[[$ATTR_2:.+]] = affine_map<()[s0] -> (s0 - 5)>
@@ -183,7 +183,7 @@ func.func @simple_pad_and_pack_dynamic_tile_not_all_dims_tiled(%input: tensor<1x
// -----
func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32>{
- %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32>
+ %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32>
return %0 : tensor<1x1x32x8xf32>
}
// CHECK-LABEL: func.func @simple_NC_to_CNnc
@@ -197,7 +197,7 @@ func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32
// -----
func.func @simple_CHW_to_CHWhwc(%arg0: tensor<3x5x7xf32>, %arg1: tensor<1x1x1x5x7x3xf32>) -> tensor<1x1x1x5x7x3xf32> {
- %0 = tensor.pack %arg0 inner_dims_pos = [1, 2, 0] inner_tiles = [5, 7, 3] into %arg1 : tensor<3x5x7xf32> -> tensor<1x1x1x5x7x3xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [1, 2, 0] inner_tiles = [5, 7, 3] into %arg1 : tensor<3x5x7xf32> -> tensor<1x1x1x5x7x3xf32>
return %0 : tensor<1x1x1x5x7x3xf32>
}
// CHECK-LABEL: func.func @simple_CHW_to_CHWhwc
@@ -215,7 +215,7 @@ func.func @simple_CHW_to_CHWhwc(%arg0: tensor<3x5x7xf32>, %arg1: tensor<1x1x1x5x
// -----
func.func @simple_KCRS_to_KRSCsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x1x1x8x32xf32>) -> tensor<1x1x1x1x8x32xf32> {
- %0 = tensor.pack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32>
+ %0 = linalg.pack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32>
return %0 : tensor<1x1x1x1x8x32xf32>
}
// CHECK-LABEL: func.func @simple_KCRS_to_KRSCsr
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir
index 0dbdf470bbfc96..03437223f0d45d 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir
@@ -4,13 +4,13 @@
// RUN: -transform-interpreter %s | FileCheck %s
func.func @KCRSsr_to_KCRS(%arg0: tensor<1x1x4x8x8x32xf32>, %arg1: tensor<1x1x128x64xf32>) -> tensor<1x1x128x64xf32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x4x8x8x32xf32> -> tensor<1x1x128x64xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x4x8x8x32xf32> -> tensor<1x1x128x64xf32>
return %0 : tensor<1x1x128x64xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -38,7 +38,7 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13x15xf32>) -> tensor<13x15xf32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
return %0 : tensor<13x15xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (-d0 + 13, 8)>
@@ -70,7 +70,7 @@ func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [8, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -79,7 +79,7 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @CKkc_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>) -> tensor<128x256xf32> {
- %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
+ %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
return %0 : tensor<128x256xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
@@ -102,7 +102,7 @@ func.func @CKkc_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>)
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
index ba1f214952562c..d460c506d6e182 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
@@ -3,7 +3,7 @@
// RUN: -transform-interpreter=entry-point=decompose_unpack %s | FileCheck %s
func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32>
return %0 : tensor<1x1x32x8xf32>
}
// CHECK-LABEL: func.func @simple_KCRSsr_to_KCRS
@@ -22,7 +22,7 @@ func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<
// -----
func.func @simple_unpack_static_tiles(%input: tensor<1x1x8x2xf32>, %output: tensor<5x1xf32>) -> tensor<5x1xf32> {
- %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32>
+ %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32>
return %0 : tensor<5x1xf32>
}
// CHECK-LABEL: func.func @simple_unpack_static_tiles
@@ -38,7 +38,7 @@ func.func @simple_unpack_static_tiles(%input: tensor<1x1x8x2xf32>, %output: tens
/// Same as example above, but with 1 dynamic tile size.
func.func @simple_unpack_dynamic_tile(%input: tensor<1x1x?x2xf32>, %output: tensor<5x1xf32>, %tile_dim: index) -> tensor<5x1xf32> {
- %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%tile_dim, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
+ %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%tile_dim, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
return %0 : tensor<5x1xf32>
}
// CHECK-LABEL: func.func @simple_unpack_dynamic_tile
@@ -55,7 +55,7 @@ func.func @simple_unpack_dynamic_tile(%input: tensor<1x1x?x2xf32>, %output: tens
/// Same as example above, but with 1 dynamic tile size and a trasnpose
func.func @simple_unpack_dynamic_tile_transpose(%src: tensor<1x1x2x?xf32>, %dest: tensor<5x1xf32>, %tile_dim: index) -> tensor<5x1xf32> {
- %0 = tensor.unpack %src inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim] into %dest : tensor<1x1x2x?xf32> -> tensor<5x1xf32>
+ %0 = linalg.unpack %src inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim] into %dest : tensor<1x1x2x?xf32> -> tensor<5x1xf32>
return %0 : tensor<5x1xf32>
}
// CHECK-LABEL: func.func @simple_unpack_dynamic_tile_transpose
@@ -78,7 +78,7 @@ func.func @simple_unpack_scalable_tile(%input: tensor<1x1x?x2xf32>, %output: ten
%c8 = arith.constant 8 : index
%vscale = vector.vscale
%c8_vscale = arith.muli %vscale, %c8 : index
- %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
+ %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
return %0 : tensor<5x1xf32>
}
// CHECK-LABEL: func.func @simple_unpack_scalable_tile
@@ -97,7 +97,7 @@ func.func @simple_unpack_scalable_tile(%input: tensor<1x1x?x2xf32>, %output: ten
// -----
func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32>) -> tensor<32x8xf32>{
- %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32>
+ %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32>
return %0 : tensor<32x8xf32>
}
// CHECK-LABEL: func.func @simple_CNnc_to_NC
@@ -112,7 +112,7 @@ func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32
// -----
func.func @simple_NCHWc_to_NCHW(%arg0: tensor<2x1x16x8x32xf32>, %arg1: tensor<2x32x16x8xf32>) -> tensor<2x32x16x8xf32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %arg1 : tensor<2x1x16x8x32xf32> -> tensor<2x32x16x8xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %arg1 : tensor<2x1x16x8x32xf32> -> tensor<2x32x16x8xf32>
return %0 : tensor<2x32x16x8xf32>
}
// CHECK-LABEL: func.func @simple_NCHWc_to_NCHW
@@ -131,7 +131,7 @@ func.func @simple_NCHWc_to_NCHW(%arg0: tensor<2x1x16x8x32xf32>, %arg1: tensor<2x
// -----
func.func @simple_NHWC_to_NCHW(%arg0: tensor<1x16x8x32xf32>, %arg1: tensor<1x32x16x8xf32>) -> tensor<1x32x16x8xf32> {
- %0 = tensor.unpack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [] inner_tiles = [] into %arg1 : tensor<1x16x8x32xf32> -> tensor<1x32x16x8xf32>
+ %0 = linalg.unpack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [] inner_tiles = [] into %arg1 : tensor<1x16x8x32xf32> -> tensor<1x32x16x8xf32>
return %0 : tensor<1x32x16x8xf32>
}
// CHECK-LABEL: func.func @simple_NHWC_to_NCHW
@@ -150,7 +150,7 @@ func.func @simple_NHWC_to_NCHW(%arg0: tensor<1x16x8x32xf32>, %arg1: tensor<1x32x
// -----
func.func @unpack_with_dynamic_dims(%arg0: tensor<?x1x1x1x8x32xf32>, %arg1: tensor<?x1x32x8xf32>) -> tensor<?x1x32x8xf32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<?x1x1x1x8x32xf32> -> tensor<?x1x32x8xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<?x1x1x1x8x32xf32> -> tensor<?x1x32x8xf32>
return %0 : tensor<?x1x32x8xf32>
}
// CHECK-LABEL: func.func @unpack_with_dynamic_dims
diff --git a/mlir/test/Dialect/Linalg/fold-empty-op.mlir b/mlir/test/Dialect/Linalg/fold-empty-op.mlir
new file mode 100644
index 00000000000000..5ce19d70913183
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/fold-empty-op.mlir
@@ -0,0 +1,82 @@
+// RUN: mlir-opt -split-input-file -transform-interpreter %s | FileCheck %s
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func">
+ transform.apply_patterns to %func_op {
+ transform.apply_patterns.linalg.fold_pack_unpack_into_empty
+ } : !transform.op<"func.func">
+ transform.yield
+ }
+}
+
+func.func @pack_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> {
+ %empty_unpacked = tensor.empty() : tensor<256x256xf32>
+ %packed = linalg.pack %empty_unpacked
+ inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+ into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32>
+ return %packed : tensor<8x8x32x32xf32>
+}
+
+// CHECK-LABEL: func.func @pack_empty(
+// CHECK-SAME: %[[T:.+]]: tensor<8x8x32x32xf32>
+// CHECK-NOT: linalg.pack
+// CHECK: return %[[T]] : tensor<8x8x32x32xf32>
+
+func.func @pack_empty_dynamic(%arg0: tensor<?x?x32x32xf32>, %dim0: index, %dim1: index) -> tensor<?x?x32x32xf32> {
+ %empty_unpacked = tensor.empty(%dim0, %dim1) : tensor<?x?xf32>
+ %packed = linalg.pack %empty_unpacked
+ inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+ into %arg0 : tensor<?x?xf32> -> tensor<?x?x32x32xf32>
+ return %packed : tensor<?x?x32x32xf32>
+}
+
+// CHECK-LABEL: func.func @pack_empty_dynamic(
+// CHECK-SAME: %[[T:.+]]: tensor<?x?x32x32xf32>,
+// CHECK-SAME: %[[DIM0:[a-zA-Z0-9_]+]]: index,
+// CHECK-SAME: %[[DIM1:[a-zA-Z0-9_]+]]: index
+// CHECK-NOT: linalg.pack
+// CHECK: return %[[T]] : tensor<?x?x32x32xf32>
+
+func.func @unpack_empty(%arg0: tensor<256x256xf32>) -> tensor<256x256xf32> {
+ %empty_packed = tensor.empty() : tensor<8x8x32x32xf32>
+ %unpacked = linalg.unpack %empty_packed
+ inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+ into %arg0 : tensor<8x8x32x32xf32> -> tensor<256x256xf32>
+ return %unpacked : tensor<256x256xf32>
+}
+
+// CHECK-LABEL: func.func @unpack_empty(
+// CHECK-SAME: %[[T:.+]]: tensor<256x256xf32>
+// CHECK-NOT: linalg.unpack
+// CHECK: return %[[T]] : tensor<256x256xf32>
+
+func.func @unpack_empty_dynamic(%arg0: tensor<?x?xf32>, %dim0: index, %dim1: index) -> tensor<?x?xf32> {
+ %empty_packed = tensor.empty(%dim0, %dim1) : tensor<?x?x32x32xf32>
+ %unpacked = linalg.unpack %empty_packed
+ inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+ into %arg0 : tensor<?x?x32x32xf32> -> tensor<?x?xf32>
+ return %unpacked : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: func.func @unpack_empty_dynamic(
+// CHECK-SAME: %[[T:.+]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[DIM0:[a-zA-Z0-9_]+]]: index,
+// CHECK-SAME: %[[DIM1:[a-zA-Z0-9_]+]]: index
+// CHECK-NOT: linalg.unpack
+// CHECK: return %[[T]] : tensor<?x?xf32>
+
+func.func @pack_padded_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> {
+ %pad = arith.constant 1.0 : f32
+ %empty_unpacked = tensor.empty() : tensor<256x256xf32>
+ %packed = linalg.pack %empty_unpacked
+ padding_value(%pad : f32)
+ inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+ into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32>
+ return %packed : tensor<8x8x32x32xf32>
+}
+
+// CHECK-LABEL: func.func @pack_padded_empty(
+// CHECK-SAME: %[[T:.+]]: tensor<8x8x32x32xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack
+// CHECK: return %[[PACK]] : tensor<8x8x32x32xf32>
diff --git a/mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir b/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
similarity index 86%
rename from mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir
rename to mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
index f9e51ae52a74b0..51350e5bc84989 100644
--- a/mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file -test-tensor-transform-patterns="test-simplify-pack-unpack-patterns" %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -test-linalg-transform-patterns="test-simplify-pack-unpack-patterns" %s | FileCheck %s
// CHECK-LABEL: func.func @single_dim_packing(
// CHECK-SAME: %[[ARG0:.+]]: tensor<256xf32>)
@@ -6,7 +6,7 @@
// CHECK: return %[[EXPANDED]] : tensor<8x32xf32>
func.func @single_dim_packing(%arg0: tensor<256xf32>) -> tensor<8x32xf32> {
%empty = tensor.empty() : tensor<8x32xf32>
- %0 = tensor.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256xf32> -> tensor<8x32xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256xf32> -> tensor<8x32xf32>
return %0 : tensor<8x32xf32>
}
@@ -15,11 +15,11 @@ func.func @single_dim_packing(%arg0: tensor<256xf32>) -> tensor<8x32xf32> {
// CHECK-LABEL: func.func @single_dim_packing_with_padding(
// CHECK-SAME: %[[ARG0:.+]]: tensor<255xf32>)
// CHECK-NOT: tensor.expand_shape
-// CHECK: tensor.pack
+// CHECK: linalg.pack
func.func @single_dim_packing_with_padding(%arg0: tensor<255xf32>) -> tensor<8x32xf32> {
%empty = tensor.empty() : tensor<8x32xf32>
%cst = arith.constant 0.000000e+00 : f32
- %0 = tensor.pack %arg0 padding_value(%cst : f32) inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<255xf32> -> tensor<8x32xf32>
+ %0 = linalg.pack %arg0 padding_value(%cst : f32) inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<255xf32> -> tensor<8x32xf32>
return %0 : tensor<8x32xf32>
}
@@ -31,7 +31,7 @@ func.func @single_dim_packing_with_padding(%arg0: tensor<255xf32>) -> tensor<8x3
// CHECK: return %[[EXPANDED]] : tensor<5x8x32xf32>
func.func @single_last_inner_dim_packing(%arg0: tensor<5x256xf32>) -> tensor<5x8x32xf32> {
%empty = tensor.empty() : tensor<5x8x32xf32>
- %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32>
return %0 : tensor<5x8x32xf32>
}
@@ -43,7 +43,7 @@ func.func @single_last_inner_dim_packing(%arg0: tensor<5x256xf32>) -> tensor<5x8
// CHECK: return %[[EXPANDED]] : tensor<2x32xf32>
func.func @pack_1d_with_outer_dims_perm(%arg0: tensor<64xf32>) -> tensor<2x32xf32> {
%empty = tensor.empty() : tensor<2x32xf32>
- %pack = tensor.pack %arg0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<64xf32> -> tensor<2x32xf32>
+ %pack = linalg.pack %arg0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<64xf32> -> tensor<2x32xf32>
return %pack : tensor<2x32xf32>
}
@@ -55,7 +55,7 @@ func.func @pack_1d_with_outer_dims_perm(%arg0: tensor<64xf32>) -> tensor<2x32xf3
// CHECK: return %[[EXPANDED]] : tensor<5x8x32xf32>
func.func @single_last_inner_dim_packing_with_identity_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<5x8x32xf32> {
%empty = tensor.empty() : tensor<5x8x32xf32>
- %0 = tensor.pack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32>
+ %0 = linalg.pack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32>
return %0 : tensor<5x8x32xf32>
}
@@ -63,10 +63,10 @@ func.func @single_last_inner_dim_packing_with_identity_outer_dims_perm(%arg0: te
// CHECK-LABEL: func.func @packing_with_outer_dims_perm(
// CHECK-NOT: tensor.expand_shape
-// CHECK: tensor.pack
+// CHECK: linalg.pack
func.func @packing_with_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<8x5x32xf32> {
%empty = tensor.empty() : tensor<8x5x32xf32>
- %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<8x5x32xf32>
+ %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<8x5x32xf32>
return %0 : tensor<8x5x32xf32>
}
@@ -74,10 +74,10 @@ func.func @packing_with_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<8x5x
// CHECK-LABEL: func.func @single_first_inner_dim_packing(
// CHECK-NOT: tensor.expand_shape
-// CHECK: tensor.pack
+// CHECK: linalg.pack
func.func @single_first_inner_dim_packing(%arg0: tensor<256x5xf32>) -> tensor<8x5x32xf32> {
%empty = tensor.empty() : tensor<8x5x32xf32>
- %0 = tensor.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256x5xf32> -> tensor<8x5x32xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256x5xf32> -> tensor<8x5x32xf32>
return %0 : tensor<8x5x32xf32>
}
@@ -89,7 +89,7 @@ func.func @single_first_inner_dim_packing(%arg0: tensor<256x5xf32>) -> tensor<8x
// CHECK: return %[[EXPANDED]]
func.func @pack_1x32_to_1x32x1x1(%arg0 : tensor<1x32xf32>) -> tensor<1x32x1x1xf32> {
%empty = tensor.empty() : tensor<1x32x1x1xf32>
- %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty
+ %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty
: tensor<1x32xf32> -> tensor<1x32x1x1xf32>
return %pack : tensor<1x32x1x1xf32>
}
@@ -102,7 +102,7 @@ func.func @pack_1x32_to_1x32x1x1(%arg0 : tensor<1x32xf32>) -> tensor<1x32x1x1xf3
// CHECK: return %[[EXPANDED]]
func.func @pack_1x32_to_1x16x1x2(%arg0 : tensor<1x32xf32>) -> tensor<1x16x1x2xf32> {
%empty = tensor.empty() : tensor<1x16x1x2xf32>
- %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 2] into %empty
+ %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 2] into %empty
: tensor<1x32xf32> -> tensor<1x16x1x2xf32>
return %pack : tensor<1x16x1x2xf32>
}
@@ -115,7 +115,7 @@ func.func @pack_1x32_to_1x16x1x2(%arg0 : tensor<1x32xf32>) -> tensor<1x16x1x2xf3
// CHECK: return %[[EXPANDED]]
func.func @pack_32x1_to_16x1x2x1(%arg0 : tensor<32x1xf32>) -> tensor<1x16x2x1xf32> {
%empty = tensor.empty() : tensor<1x16x2x1xf32>
- %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty
+ %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty
: tensor<32x1xf32> -> tensor<1x16x2x1xf32>
return %pack : tensor<1x16x2x1xf32>
}
@@ -124,10 +124,10 @@ func.func @pack_32x1_to_16x1x2x1(%arg0 : tensor<32x1xf32>) -> tensor<1x16x2x1xf3
// CHECK-LABEL: func.func @pack_32x1_to_16x1x1x2
// CHECK-NOT: tensor.expand_shape
-// CHECK: tensor.pack
+// CHECK: linalg.pack
func.func @pack_32x1_to_16x1x1x2(%arg0 : tensor<32x1xf32>) -> tensor<16x1x1x2xf32> {
%empty = tensor.empty() : tensor<16x1x1x2xf32>
- %pack = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty
+ %pack = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty
: tensor<32x1xf32> -> tensor<16x1x1x2xf32>
return %pack : tensor<16x1x1x2xf32>
}
@@ -140,7 +140,7 @@ func.func @pack_32x1_to_16x1x1x2(%arg0 : tensor<32x1xf32>) -> tensor<16x1x1x2xf3
// CHECK: return %[[COLLAPSED]]
func.func @unpack_1d_to_collapse(%arg0: tensor<8x32xf32>) -> tensor<256xf32> {
%empty = tensor.empty() : tensor<256xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<256xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<256xf32>
return %0 : tensor<256xf32>
}
@@ -148,10 +148,10 @@ func.func @unpack_1d_to_collapse(%arg0: tensor<8x32xf32>) -> tensor<256xf32> {
// CHECK-LABEL: func.func @unpack_to_partial_slice
// CHECK-NOT: tensor.collapse
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
func.func @unpack_to_partial_slice(%arg0: tensor<8x32xf32>) -> tensor<255xf32> {
%empty = tensor.empty() : tensor<255xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<255xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<255xf32>
return %0 : tensor<255xf32>
}
@@ -159,14 +159,14 @@ func.func @unpack_to_partial_slice(%arg0: tensor<8x32xf32>) -> tensor<255xf32> {
// CHECK-LABEL: func.func @unpack_dynamic
// CHECK-NOT: tensor.collapse
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
func.func @unpack_dynamic(%arg0: tensor<?x32xf32>) -> tensor<?xf32> {
%c32 = arith.constant 32 : index
%c0 = arith.constant 0 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x32xf32>
%size = arith.muli %d0, %c32 : index
%empty = tensor.empty(%size) : tensor<?xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<?x32xf32> -> tensor<?xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<?x32xf32> -> tensor<?xf32>
return %0 : tensor<?xf32>
}
@@ -178,7 +178,7 @@ func.func @unpack_dynamic(%arg0: tensor<?x32xf32>) -> tensor<?xf32> {
// CHECK: return %[[COLLAPSED]] : tensor<5x256xf32>
func.func @single_last_inner_dim_unpacking(%arg0: tensor<5x8x32xf32>) -> tensor<5x256xf32> {
%empty = tensor.empty() : tensor<5x256xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32>
return %0 : tensor<5x256xf32>
}
@@ -190,7 +190,7 @@ func.func @single_last_inner_dim_unpacking(%arg0: tensor<5x8x32xf32>) -> tensor<
// CHECK: return %[[COLLAPSED]] : tensor<5x256xf32>
func.func @single_last_inner_dim_unpacking_with_identity_outer_dims_perm(%arg0: tensor<5x8x32xf32>) -> tensor<5x256xf32> {
%empty = tensor.empty() : tensor<5x256xf32>
- %0 = tensor.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32>
+ %0 = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32>
return %0 : tensor<5x256xf32>
}
@@ -198,10 +198,10 @@ func.func @single_last_inner_dim_unpacking_with_identity_outer_dims_perm(%arg0:
// CHECK-LABEL: func.func @unpacking_with_outer_dims_perm(
// CHECK-NOT: tensor.collpase_shape
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
func.func @unpacking_with_outer_dims_perm(%arg0: tensor<8x5x32xf32>) -> tensor<5x256xf32> {
%empty = tensor.empty() : tensor<5x256xf32>
- %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<5x256xf32>
+ %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<5x256xf32>
return %0 : tensor<5x256xf32>
}
@@ -209,10 +209,10 @@ func.func @unpacking_with_outer_dims_perm(%arg0: tensor<8x5x32xf32>) -> tensor<5
// CHECK-LABEL: func.func @single_first_inner_dim_unpacking(
// CHECK-NOT: tensor.collapse_shape
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
func.func @single_first_inner_dim_unpacking(%arg0: tensor<8x5x32xf32>) -> tensor<256x5xf32> {
%empty = tensor.empty() : tensor<256x5xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<256x5xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<256x5xf32>
return %0 : tensor<256x5xf32>
}
@@ -224,7 +224,7 @@ func.func @single_first_inner_dim_unpacking(%arg0: tensor<8x5x32xf32>) -> tensor
// CHECK: return %[[COLLAPSED]]
func.func @unpack_1x32x1x1_to_1x32(%arg0 : tensor<1x32x1x1xf32>) -> tensor<1x32xf32> {
%empty = tensor.empty() : tensor<1x32xf32>
- %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty
+ %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty
: tensor<1x32x1x1xf32> -> tensor<1x32xf32>
return %unpack : tensor<1x32xf32>
}
@@ -237,7 +237,7 @@ func.func @unpack_1x32x1x1_to_1x32(%arg0 : tensor<1x32x1x1xf32>) -> tensor<1x32x
// CHECK: return %[[COLLAPSED]]
func.func @unpack_1x2x1x16_to_1x32(%arg0 : tensor<1x2x1x16xf32>) -> tensor<1x32xf32> {
%empty = tensor.empty() : tensor<1x32xf32>
- %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [1, 16] into %empty
+ %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [1, 16] into %empty
: tensor<1x2x1x16xf32> -> tensor<1x32xf32>
return %unpack : tensor<1x32xf32>
}
@@ -250,7 +250,7 @@ func.func @unpack_1x2x1x16_to_1x32(%arg0 : tensor<1x2x1x16xf32>) -> tensor<1x32x
// CHECK: return %[[COLLAPSED]]
func.func @unpack_16x1x2x1_to_32x1(%arg0 : tensor<1x16x2x1xf32>) -> tensor<32x1xf32> {
%empty = tensor.empty() : tensor<32x1xf32>
- %unpack = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty
+ %unpack = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty
: tensor<1x16x2x1xf32> -> tensor<32x1xf32>
return %unpack : tensor<32x1xf32>
}
@@ -259,10 +259,10 @@ func.func @unpack_16x1x2x1_to_32x1(%arg0 : tensor<1x16x2x1xf32>) -> tensor<32x1x
// CHECK-LABEL: func.func @unpack_16x1x1x2_to_32x1
// CHECK-NOT: tensor.collapse_shape
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
func.func @unpack_16x1x1x2_to_32x1(%arg0 : tensor<16x1x1x2xf32>) -> tensor<32x1xf32> {
%empty = tensor.empty() : tensor<32x1xf32>
- %unpack = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty
+ %unpack = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty
: tensor<16x1x1x2xf32> -> tensor<32x1xf32>
return %unpack : tensor<32x1xf32>
}
@@ -275,7 +275,7 @@ func.func @unpack_16x1x1x2_to_32x1(%arg0 : tensor<16x1x1x2xf32>) -> tensor<32x1x
// CHECK: return %[[EXPANDED]] : tensor<1x1x32x64xf32>
func.func @pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> {
%empty = tensor.empty() : tensor<1x1x32x64xf32>
- %0 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32>
return %0 : tensor<1x1x32x64xf32>
}
@@ -287,7 +287,7 @@ func.func @pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> {
// CHECK: return %[[EXPANDED]] : tensor<1x1x32x64xf32>
func.func @pad_like_pack_with_outer_dims_perm(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> {
%empty = tensor.empty() : tensor<1x1x32x64xf32>
- %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32>
+ %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32>
return %0 : tensor<1x1x32x64xf32>
}
@@ -299,7 +299,7 @@ func.func @pad_like_pack_with_outer_dims_perm(%arg0: tensor<32x64xf32>) -> tenso
// CHECK: return %[[EXPANDED]] : tensor<32x1x64xf32>
func.func @inner_pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<32x1x64xf32> {
%empty = tensor.empty() : tensor<32x1x64xf32>
- %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64xf32> -> tensor<32x1x64xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64xf32> -> tensor<32x1x64xf32>
return %0 : tensor<32x1x64xf32>
}
@@ -309,11 +309,11 @@ func.func @inner_pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<32x1x64xf32>
// CHECK-LABEL: func.func @pad_and_inner_dim_shuffle_pack(
// CHECK-SAME: %[[ARG0:.+]]: tensor<32x64xf32>)
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x1x64x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<1x1x64x32xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<1x1x64x32xf32>
// CHECK: return %[[PACK]] : tensor<1x1x64x32xf32>
func.func @pad_and_inner_dim_shuffle_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x64x32xf32> {
%empty = tensor.empty() : tensor<1x1x64x32xf32>
- %0 = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %empty : tensor<32x64xf32> -> tensor<1x1x64x32xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %empty : tensor<32x64xf32> -> tensor<1x1x64x32xf32>
return %0 : tensor<1x1x64x32xf32>
}
@@ -323,11 +323,11 @@ func.func @pad_and_inner_dim_shuffle_pack(%arg0: tensor<32x64xf32>) -> tensor<1x
// CHECK-LABEL: func.func @pad_like_pack_with_transpose(
// CHECK-SAME: %[[ARG0:.+]]: tensor<32x64x16xf32>)
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x1x16x64xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32>
// CHECK: return %[[PACK]] : tensor<32x1x16x64xf32>
func.func @pad_like_pack_with_transpose(%arg0: tensor<32x64x16xf32>) -> tensor<32x1x16x64xf32> {
%empty = tensor.empty() : tensor<32x1x16x64xf32>
- %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32>
+ %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32>
return %0 : tensor<32x1x16x64xf32>
}
@@ -339,7 +339,7 @@ func.func @pad_like_pack_with_transpose(%arg0: tensor<32x64x16xf32>) -> tensor<3
// CHECK: return %[[COLLAPSED]] : tensor<32x64xf32>
func.func @unpad_like_unpack(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32> {
%empty = tensor.empty() : tensor<32x64xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32>
return %0 : tensor<32x64xf32>
}
@@ -351,7 +351,7 @@ func.func @unpad_like_unpack(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32>
// CHECK: return %[[COLLAPSED]] : tensor<32x64xf32>
func.func @unpad_like_unpack_with_outer_dims_perm(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32> {
%empty = tensor.empty() : tensor<32x64xf32>
- %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32>
+ %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32>
return %0 : tensor<32x64xf32>
}
@@ -363,7 +363,7 @@ func.func @unpad_like_unpack_with_outer_dims_perm(%arg0: tensor<1x1x32x64xf32>)
// CHECK: return %[[COLLAPSED]] : tensor<32x64xf32>
func.func @inner_unpad_like_unpack(%arg0: tensor<32x1x64xf32>) -> tensor<32x64xf32> {
%empty = tensor.empty() : tensor<32x64xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x64xf32> -> tensor<32x64xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x64xf32> -> tensor<32x64xf32>
return %0 : tensor<32x64xf32>
}
@@ -373,11 +373,11 @@ func.func @inner_unpad_like_unpack(%arg0: tensor<32x1x64xf32>) -> tensor<32x64xf
// CHECK-LABEL: func.func @unpad_and_inner_dim_shuffle_pack(
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x1x32x64xf32>)
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<64x32xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %[[EMPTY]] : tensor<1x1x32x64xf32> -> tensor<64x32xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %[[EMPTY]] : tensor<1x1x32x64xf32> -> tensor<64x32xf32>
// CHECK: return %[[UNPACK]] : tensor<64x32xf32>
func.func @unpad_and_inner_dim_shuffle_pack(%arg0: tensor<1x1x32x64xf32>) -> tensor<64x32xf32> {
%empty = tensor.empty() : tensor<64x32xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<64x32xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<64x32xf32>
return %0 : tensor<64x32xf32>
}
@@ -387,10 +387,10 @@ func.func @unpad_and_inner_dim_shuffle_pack(%arg0: tensor<1x1x32x64xf32>) -> ten
// CHECK-LABEL: func.func @unpad_like_unpack_with_transpose(
// CHECK-SAME: %[[ARG0:.+]]: tensor<32x1x16x64xf32>)
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x64x16xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32>
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32>
// CHECK: return %[[UNPACK]] : tensor<32x64x16xf32>
func.func @unpad_like_unpack_with_transpose(%arg0: tensor<32x1x16x64xf32>) -> tensor<32x64x16xf32> {
%empty = tensor.empty() : tensor<32x64x16xf32>
- %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32>
+ %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32>
return %0 : tensor<32x64x16xf32>
}
diff --git a/mlir/test/Dialect/Linalg/td/decompose-pack.mlir b/mlir/test/Dialect/Linalg/td/decompose-pack.mlir
index 49c45e29d5a145..32054134266c74 100644
--- a/mlir/test/Dialect/Linalg/td/decompose-pack.mlir
+++ b/mlir/test/Dialect/Linalg/td/decompose-pack.mlir
@@ -1,6 +1,6 @@
module @transforms attributes { transform.with_named_sequence } {
transform.named_sequence @decompose_pack(%module: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op
%1 = transform.get_parent_op %pack {isolated_from_above} : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %1 {
diff --git a/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir b/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir
index 11243634262e0e..f5b8403af5e580 100644
--- a/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir
@@ -1,6 +1,6 @@
module @transforms attributes { transform.with_named_sequence } {
transform.named_sequence @decompose_unpack(%module: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.unpack"]} in %module : (!transform.any_op) -> !transform.any_op
+ %pack = transform.structured.match ops{["linalg.unpack"]} in %module : (!transform.any_op) -> !transform.any_op
%1 = transform.get_parent_op %pack {isolated_from_above} : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %1 {
diff --git a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
index 5f8ff36a165786..81fd7a8a947d7d 100644
--- a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
+++ b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
@@ -4,7 +4,7 @@
func.func @pack(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<17x2x16x16x32x8xf32>) -> tensor<17x2x16x16x32x8xf32> {
%cst_0 = arith.constant 0.0 : f32
- // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
+ // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
// CHECK: tensor.pad {{.*}} low[0, 0, 0, 0]
// CHECK: : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32>
// CHECK: tensor.expand_shape %{{.*}} [{{.*}}[0, 1], [2, 3], [4], [5]]
@@ -13,16 +13,16 @@ func.func @pack(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<17x2x16x16x32x8xf
// CHECK-SAME: ins(%{{.*}} : tensor<17x8x2x32x16x16xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<17x2x16x16x32x8xf32>)
// CHECK-SAME: permutation = [0, 2, 4, 5, 3, 1]
- %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
+ %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
: tensor<129x47x16x16xf32> -> tensor<17x2x16x16x32x8xf32>
return %pack : tensor<17x2x16x16x32x8xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -33,7 +33,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-LABEL: func.func @pack(
func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor<8x8x16x1xf32> {
- // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
+ // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
// CHECK: tensor.pad {{.*}} low[0, 0]
// CHECK: : tensor<128x8xf32> to tensor<128x8xf32>
// CHECK: tensor.expand_shape %{{.*}} [{{.*}}[0, 1], [2, 3]]
@@ -43,7 +43,7 @@ func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor
// CHECK-SAME: outs(%{{.*}} : tensor<8x8x16x1xf32>)
// CHECK-SAME: permutation = [0, 2, 1, 3]
- %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %arg1
+ %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %arg1
: tensor<128x8xf32> -> tensor<8x8x16x1xf32>
return %pack : tensor<8x8x16x1xf32>
@@ -51,9 +51,9 @@ func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -67,7 +67,7 @@ module attributes {transform.with_named_sequence} {
func.func @pack_as_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> {
%cst_0 = arith.constant 0.0 : f32
- // tensor.pack is lowered to tensor.pad + tensor.insert_slice
+ // linalg.pack is lowered to tensor.pad + tensor.insert_slice
// CHECK: %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0, 0, 0] high[7, 17, 0, 0]
// CHECK: : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32>
// CHECK: %[[RES:.*]] = tensor.insert_slice %[[PAD]] into %[[OUT]]
@@ -79,16 +79,16 @@ func.func @pack_as_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x13
// CHECK-SAME: [1, 1, 1, 1, 1, 1, 1, 1]
// CHECK-SAME: : tensor<136x64x16x16xf32> into tensor<1x1x1x1x136x64x16x16xf32>
// CHECK: return %[[RES]]
- %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+ %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
: tensor<129x47x16x16xf32> -> tensor<1x1x1x1x136x64x16x16xf32>
return %pack : tensor<1x1x1x1x136x64x16x16xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -101,22 +101,22 @@ module attributes {transform.with_named_sequence} {
// CHECK-LABEL: func.func @pack_as_pad_disabled_insert_slice(
func.func @pack_as_pad_disabled_insert_slice(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> {
%cst_0 = arith.constant 0.0 : f32
- // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
+ // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
// CHECK-SAME: %[[ARG0:[^:]*]]: tensor<129x47x16x16xf32>
// CHECK-DAG: %[[PAD:.*]] = tensor.pad %[[ARG0]]
// CHECK-NOT: %[[RES:.*]] = tensor.insert_slice %[[PAD]]
// CHECK: %[[PAD_EXPANDED:.*]] = tensor.expand_shape %[[PAD]]
// CHECK-DAG: %[[RES:.*]] = linalg.transpose ins(%[[PAD_EXPANDED]]
- %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+ %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
: tensor<129x47x16x16xf32> -> tensor<1x1x1x1x136x64x16x16xf32>
return %pack : tensor<1x1x1x1x136x64x16x16xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}: (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}: (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -141,16 +141,16 @@ func.func @pack_not_a_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x16x
// CHECK-SAME: outs(%{{.*}} : tensor<1x1x16x16x136x64xf32>)
// CHECK-SAME: permutation = [0, 2, 4, 5, 1, 3]
- %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1] inner_tiles = [136, 64] into %arg1
+ %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1] inner_tiles = [136, 64] into %arg1
: tensor<129x47x16x16xf32> -> tensor<1x1x16x16x136x64xf32>
return %pack : tensor<1x1x16x16x136x64xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -172,16 +172,16 @@ func.func @unpack(%arg0: tensor<17x2x16x16x32x8xf32>, %arg1: tensor<129x47x16x16
// CHECK-SAME: : tensor<136x64x16x16xf32> to tensor<129x47x16x16xf32>
// CHECK: linalg.copy ins(%[[SLICE]] : tensor<129x47x16x16xf32>)
// CHECK-SAME: outs(%[[ARG1]] : tensor<129x47x16x16xf32>)
- %unpack = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
+ %unpack = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
: tensor<17x2x16x16x32x8xf32> -> tensor<129x47x16x16xf32>
return %unpack : tensor<129x47x16x16xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -207,16 +207,16 @@ func.func @unpack_with_identity_outer_dims_perm(%arg0: tensor<17x2x16x16x32x8xf3
// CHECK-SAME: : tensor<136x64x16x16xf32> to tensor<129x47x16x16xf32>
// CHECK: linalg.copy ins(%[[SLICE]] : tensor<129x47x16x16xf32>)
// CHECK-SAME: outs(%[[ARG1]] : tensor<129x47x16x16xf32>)
- %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
+ %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
: tensor<17x2x16x16x32x8xf32> -> tensor<129x47x16x16xf32>
return %unpack : tensor<129x47x16x16xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -241,16 +241,16 @@ func.func @unpack_as_pad(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor<
// strides multiplers.
// CHECK-SAME: [1, 1, 1, 1, 1, 1, 1, 1]
// CHECK-SAME: : tensor<1x1x1x1x136x64x16x16xf32> to tensor<129x47x16x16xf32>
- %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+ %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
: tensor<1x1x1x1x136x64x16x16xf32> -> tensor<129x47x16x16xf32>
return %pack : tensor<129x47x16x16xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -267,22 +267,22 @@ module attributes {transform.with_named_sequence} {
func.func @unpack_as_pad_disabled_extract_slice(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor<129x47x16x16xf32>) -> tensor<129x47x16x16xf32> {
%cst_0 = arith.constant 0.0 : f32
- // tensor.unpack is lowered to tensor.extract_slice + linalg.transpose + tensor.collapse_shape
+ // linalg.unpack is lowered to tensor.extract_slice + linalg.transpose + tensor.collapse_shape
// CHECK-DAG: %[[ARG0:[^:]*]]: tensor<1x1x1x1x136x64x16x16xf32>
// CHECK-NOT: %[[RES:.*]] = tensor.extract_slice %[[ARG0]]
// CHECK: %[[TRANSPOSED:.*]] = linalg.transpose ins(%[[ARG0]]
// CHECK: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[TRANSPOSED]]
// CHECK-DAG: %[[RES:.*]] = tensor.extract_slice %[[COLLAPSED]]
- %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+ %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
: tensor<1x1x1x1x136x64x16x16xf32> -> tensor<129x47x16x16xf32>
return %pack : tensor<129x47x16x16xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false}: (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false}: (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -305,7 +305,7 @@ func.func @pack_with_outer_dims_perm(%src: tensor<100x200x128x256xi32>,
// CHECK-SAME: ins(%{{.*}} : tensor<100x200x4x32x16x16xi32>)
// CHECK-SAME: outs(%{{.*}} : tensor<200x4x16x100x16x32xi32>)
// CHECK-SAME: permutation = [1, 2, 4, 0, 5, 3]
- %0 = tensor.pack %src
+ %0 = linalg.pack %src
outer_dims_perm = [1, 2, 3, 0]
inner_dims_pos = [3, 2]
inner_tiles = [16, 32]
@@ -315,9 +315,9 @@ func.func @pack_with_outer_dims_perm(%src: tensor<100x200x128x256xi32>,
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -337,7 +337,7 @@ func.func @pack_with_pad(%src: tensor<4225x12xf32>, %dest: tensor<265x16x16x1xf3
// CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor<265x16x16x1xf32>)
// CHECK-SAME: permutation = [0, 2, 1, 3]
%cst = arith.constant 0.000000e+00 : f32
- %0 = tensor.pack %src
+ %0 = linalg.pack %src
padding_value(%cst : f32)
inner_dims_pos = [0, 1]
inner_tiles = [16, 1] into %dest
@@ -347,9 +347,9 @@ func.func @pack_with_pad(%src: tensor<4225x12xf32>, %dest: tensor<265x16x16x1xf3
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -370,7 +370,7 @@ func.func @pack_with_pad_and_outer_dims_perm(%src: tensor<100x200x127x255xi32>,
// CHECK-SAME: outs(%{{.*}} : tensor<200x4x16x100x16x32xi32>)
// CHECK-SAME: permutation = [1, 2, 4, 0, 5, 3]
%cst_0 = arith.constant 0 : i32
- %0 = tensor.pack %src
+ %0 = linalg.pack %src
padding_value(%cst_0 : i32)
outer_dims_perm = [1, 2, 3, 0]
inner_dims_pos = [3, 2]
@@ -381,9 +381,9 @@ func.func @pack_with_pad_and_outer_dims_perm(%src: tensor<100x200x127x255xi32>,
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -429,7 +429,7 @@ func.func @dynamic_pack_pad_transpose_inner_and_outer_dims(%source: tensor<?x?xf
%tiled_d0 = arith.ceildivui %d0, %c32 : index
%tiled_d1 = arith.ceildivui %d1, %c16 : index
%init_pack = tensor.empty(%tiled_d1, %tiled_d0) : tensor<?x?x16x32xf32>
- %pack = tensor.pack %source padding_value(%padding_value : f32)
+ %pack = linalg.pack %source padding_value(%padding_value : f32)
outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack
: tensor<?x?xf32> -> tensor<?x?x16x32xf32>
return %pack : tensor<?x?x16x32xf32>
@@ -437,9 +437,9 @@ func.func @dynamic_pack_pad_transpose_inner_and_outer_dims(%source: tensor<?x?xf
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -453,7 +453,7 @@ module attributes {transform.with_named_sequence} {
func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> {
%cst_0 = arith.constant 0.0 : f32
- // tensor.pack is lowered to tensor.pad + tensor.insert_slice
+ // linalg.pack is lowered to tensor.pad + tensor.insert_slice
// CHECK: %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0, 0, 0] high[7, 17, 0, 0]
// CHECK: : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32>
// CHECK: %[[RES:.*]] = tensor.insert_slice %[[PAD]] into %[[OUT]]
@@ -465,7 +465,7 @@ func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %ar
// CHECK-SAME: [1, 1, 1, 1, 1, 1, 1, 1]
// CHECK-SAME: : tensor<136x64x16x16xf32> into tensor<1x1x1x1x136x64x16x16xf32>
// CHECK: return %[[RES]]
- %pack = tensor.pack %arg0
+ %pack = linalg.pack %arg0
padding_value(%cst_0 : f32)
outer_dims_perm = [1, 2, 3, 0]
inner_dims_pos = [0, 1, 2, 3]
@@ -476,9 +476,9 @@ func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %ar
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -501,7 +501,7 @@ func.func @pack_as_pad_with_unit_dims(%arg0: tensor<3x1x1x1xf32>, %arg1: tensor<
// CHECK-SAME: outs(%[[OUT]] : tensor<1x1x1x1x8x1xf32>)
// CHECK-SAME: permutation = [0, 2, 4, 5, 1, 3]
// CHECK: return %[[TRANSPOSED]] : tensor<1x1x1x1x8x1xf32>
- %pack = tensor.pack %arg0
+ %pack = linalg.pack %arg0
padding_value(%zero : f32)
inner_dims_pos = [0, 1]
inner_tiles = [8, 1] into %arg1 : tensor<3x1x1x1xf32> -> tensor<1x1x1x1x8x1xf32>
@@ -512,9 +512,9 @@ func.func @pack_as_pad_with_unit_dims(%arg0: tensor<3x1x1x1xf32>, %arg1: tensor<
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
transform.yield
}
@@ -541,16 +541,16 @@ module attributes {transform.with_named_sequence} {
// CHECK: linalg.copy ins(%[[SLICE]] : tensor<32x?x?xf32>)
// CHECK-SAME: outs(%[[ARG1]] : tensor<32x?x?xf32>)
func.func @unpack_with_dynamic_dest(%arg0: tensor<32x2x49x16x16xf32>, %arg1: tensor<32x?x?xf32>) -> tensor<32x?x?xf32> {
- %pack = tensor.unpack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %arg1
+ %pack = linalg.unpack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %arg1
: tensor<32x2x49x16x16xf32> -> tensor<32x?x?xf32>
return %pack : tensor<32x?x?xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -582,15 +582,15 @@ module attributes {transform.with_named_sequence} {
// CHECK: linalg.copy ins(%[[SLICE]] : tensor<?x?xf32>)
// CHECK-SAME: outs(%[[ARG1]] : tensor<?x?xf32>)
func.func @unpack_with_dynamic_input_dest(%arg0: tensor<?x?x8x16xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
- %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %arg1 : tensor<?x?x8x16xf32> -> tensor<?x?xf32>
+ %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %arg1 : tensor<?x?x8x16xf32> -> tensor<?x?xf32>
return %unpack : tensor<?x?xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -626,14 +626,14 @@ module attributes {transform.with_named_sequence} {
// CHECK: linalg.copy ins(%[[SLICE]] : tensor<?x?xf32>)
// CHECK-SAME: outs(%[[ARG1]] : tensor<?x?xf32>)
func.func @unpack_fully_dynamic(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>, %tile_n : index, %tile_m : index) -> tensor<?x?xf32> {
- %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -664,16 +664,16 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: [1, 1, 1, 1, 1, 1, 1, 1]
// CHECK-SAME: : tensor<1x1x1x1x136x64x16x16xf32> to tensor<?x?x?x?xf32>
func.func @unpack_as_pad_dynamic(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
- %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+ %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
: tensor<1x1x1x1x136x64x16x16xf32> -> tensor<?x?x?x?xf32>
return %pack : tensor<?x?x?x?xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -698,16 +698,16 @@ module attributes {transform.with_named_sequence} {
// CHECK: linalg.copy ins(%[[SLICE]]
// CHECK-SAME: : tensor<32x64xf32>) outs(%[[ARG0]] : tensor<32x64xf32>) -> tensor<32x64xf32>
func.func @unpack_with_outer_dims_perm(%arg0: tensor<32x64xf32>, %arg1: tensor<2x4x32x8xf32>) -> tensor<32x64xf32> {
- %unpack = tensor.unpack %arg1 outer_dims_perm = [1, 0]
+ %unpack = linalg.unpack %arg1 outer_dims_perm = [1, 0]
inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg0 : tensor<2x4x32x8xf32> -> tensor<32x64xf32>
return %unpack : tensor<32x64xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
index ac1ca9319d3354..20019424e8d3c2 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -106,12 +106,12 @@ module attributes {transform.with_named_sequence} {
// CHECK-LABEL: func.func @unpack_elemwise
// CHECK: %[[RES:.*]] = scf.for
// CHECK: scf.for
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
// CHECK: linalg.elemwise_unary
// CHECK: return %[[RES]]
func.func @unpack_elemwise(%arg0: tensor<16x48x8x8xf32>, %arg1: tensor<128x384xf32>) -> tensor<128x384xf32> {
%0 = tensor.empty() : tensor<128x384xf32>
- %1 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
+ %1 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
: tensor<16x48x8x8xf32> -> tensor<128x384xf32>
%2 = linalg.elemwise_unary ins(%1: tensor<128x384xf32>)
outs(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32>
@@ -132,12 +132,12 @@ module attributes {transform.with_named_sequence} {
// CHECK-LABEL: func.func @pack_elemwise
// CHECK: %[[RES:.*]] = scf.for
// CHECK: scf.for
-// CHECK: tensor.pack
+// CHECK: linalg.pack
// CHECK: linalg.elemwise_unary
// CHECK: return %[[RES]]
func.func @pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> {
%0 = tensor.empty() : tensor<16x48x8x8xf32>
- %1 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
+ %1 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
: tensor<128x384xf32> -> tensor<16x48x8x8xf32>
%2 = linalg.elemwise_unary ins(%1: tensor<16x48x8x8xf32>)
outs(%arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32>
@@ -156,14 +156,14 @@ module attributes {transform.with_named_sequence} {
// -----
// CHECK-LABEL: func.func @nofuse_pack_elemwise
-// CHECK: tensor.pack
+// CHECK: linalg.pack
// CHECK: %[[RES:.*]] = scf.for
// CHECK: scf.for
// CHECK: linalg.elemwise_unary
// CHECK: return %[[RES]]
func.func @nofuse_pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> {
%0 = tensor.empty() : tensor<16x48x8x8xf32>
- %1 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
+ %1 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
: tensor<128x384xf32> -> tensor<16x48x8x8xf32>
%2 = linalg.elemwise_unary ins(%1: tensor<16x48x8x8xf32>)
outs(%arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-pack.mlir b/mlir/test/Dialect/Linalg/transform-op-pack.mlir
index 6c26ebd0a5b845..b3ad73e8df8e75 100644
--- a/mlir/test/Dialect/Linalg/transform-op-pack.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pack.mlir
@@ -15,9 +15,9 @@
// CHECK-SAME: %[[T1:.+]]: tensor<3xf16>
func.func @reduction_2d_static(%t0: tensor<3x7xf16>, %t1: tensor<3xf16>) -> tensor<3xf16> {
// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<3x2x4xf16>
- // CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16)
+ // CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16)
// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]] : tensor<3x7xf16> -> tensor<3x2x4xf16>
- // CHECK-NOT: tensor.pack
+ // CHECK-NOT: linalg.pack
// CHECK: linalg.generic
// CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
// CHECK-SAME: iterator_types = ["parallel", "reduction", "reduction"]
@@ -29,7 +29,7 @@ func.func @reduction_2d_static(%t0: tensor<3x7xf16>, %t1: tensor<3xf16>) -> tens
linalg.yield %3 : f16
} -> tensor<3xf16>
- // CHECK-NOT: tensor.unpack
+ // CHECK-NOT: linalg.unpack
return %2 : tensor<3xf16>
}
@@ -59,9 +59,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[T1:.+]]: tensor<3xf16>
func.func @col_reduction_2d_static(%t0: tensor<7x3xf16>, %t1: tensor<3xf16>) -> tensor<3xf16> {
// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<3x2x4xf16>
- // CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16)
+ // CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16)
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]] : tensor<7x3xf16> -> tensor<3x2x4xf16>
- // CHECK-NOT: tensor.pack
+ // CHECK-NOT: linalg.pack
// CHECK: linalg.generic
// CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
// CHECK-SAME: iterator_types = ["reduction", "parallel", "reduction"]
@@ -73,7 +73,7 @@ func.func @col_reduction_2d_static(%t0: tensor<7x3xf16>, %t1: tensor<3xf16>) ->
linalg.yield %3 : f16
} -> tensor<3xf16>
- // CHECK-NOT: tensor.unpack
+ // CHECK-NOT: linalg.unpack
return %2 : tensor<3xf16>
}
@@ -83,12 +83,12 @@ module attributes {transform.with_named_sequence} {
%1 = transform.structured.pack %0 packed_sizes = [4, 0]
: (!transform.any_op) -> (!transform.op<"linalg.generic">)
%pack = transform.get_producer_of_operand %1[0]
- : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">)
+ : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">)
%2, %pack_2, %empty_unpack_2 =
transform.structured.pack_transpose %pack with_compute_op(%1)
outer_perm = [1, 0]
- : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">)
- -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op)
+ : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">)
+ -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op)
transform.yield
}
}
@@ -116,9 +116,9 @@ func.func @reduction_2d_dynamic(%t0: tensor<?x?xf16>, %t1: tensor<?xf16>) -> ten
// CHECK-DAG: %[[D1:.*]] = tensor.dim %[[T0]], %[[C1]] : tensor<?x?xf16>
// CHECK: %[[D1B4:.*]] = affine.apply #[[$DIV4]]()[%[[D1]]]
// CHECK: %[[EMPTY:.*]] = tensor.empty(%[[D0]], %[[D1B4]]) : tensor<?x?x4xf16>
- // CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16)
+ // CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16)
// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]] : tensor<?x?xf16> -> tensor<?x?x4xf16>
- // CHECK-NOT: tensor.pack
+ // CHECK-NOT: linalg.pack
// CHECK: linalg.generic
// CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
// CHECK-SAME: iterator_types = ["parallel", "reduction", "reduction"]
@@ -130,7 +130,7 @@ func.func @reduction_2d_dynamic(%t0: tensor<?x?xf16>, %t1: tensor<?xf16>) -> ten
linalg.yield %3 : f16
} -> tensor<?xf16>
- // CHECK-NOT: tensor.unpack
+ // CHECK-NOT: linalg.unpack
return %2 : tensor<?xf16>
}
@@ -162,11 +162,11 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[T0:.+]]: tensor<?x?xf16>,
// CHECK-SAME: %[[T1:.+]]: tensor<?xf16>
func.func @reduction_2d_dynamic(%t0: tensor<?x?xf16>, %t1: tensor<?xf16>) -> tensor<?xf16> {
- // CHECK: %[[PACKED_0:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16)
+ // CHECK: %[[PACKED_0:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16)
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [3, 4] into %{{.*}} : tensor<?x?xf16> -> tensor<?x?x3x4xf16>
- // CHECK: %[[PACKED_1:.*]] = tensor.pack %[[T1]] padding_value(%{{.*}} : f16)
+ // CHECK: %[[PACKED_1:.*]] = linalg.pack %[[T1]] padding_value(%{{.*}} : f16)
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor<?xf16> -> tensor<?x3xf16>
- // CHECK-NOT: tensor.pack
+ // CHECK-NOT: linalg.pack
// CHECK: linalg.generic
// CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
// CHECK-SAME: iterator_types = ["parallel", "reduction", "parallel", "reduction"]
@@ -178,7 +178,7 @@ func.func @reduction_2d_dynamic(%t0: tensor<?x?xf16>, %t1: tensor<?xf16>) -> ten
linalg.yield %3 : f16
} -> tensor<?xf16>
- // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor<?x3xf16> -> tensor<?xf16>
+ // CHECK: linalg.unpack %{{.*}} inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor<?x3xf16> -> tensor<?xf16>
return %2 : tensor<?xf16>
}
@@ -207,11 +207,11 @@ module attributes {transform.with_named_sequence} {
func.func @matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
-> tensor<?x?xf32> {
- // CHECK: %[[PACK_A:.*]] = tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [2, 4]
+ // CHECK: %[[PACK_A:.*]] = linalg.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [2, 4]
// CHECK-SAME: : tensor<?x?xf32> -> tensor<?x?x2x4xf32>
- // CHECK: %[[PACK_B:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [3, 4]
+ // CHECK: %[[PACK_B:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [3, 4]
// CHECK-SAME: : tensor<?x?xf32> -> tensor<?x?x3x4xf32>
- // CHECK: %[[PACK_C:.*]] = tensor.pack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
+ // CHECK: %[[PACK_C:.*]] = linalg.pack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
// CHECK-SAME: : tensor<?x?xf32> -> tensor<?x?x3x2xf32>
// CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
@@ -222,7 +222,7 @@ func.func @matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
outs(%C: tensor<?x?xf32>)
-> tensor<?x?xf32>
- // CHECK: tensor.unpack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
+ // CHECK: linalg.unpack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
// CHECK-SAME: : tensor<?x?x3x2xf32> -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
@@ -235,12 +235,12 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> (!transform.op<"linalg.generic">)
%unpack = transform.get_consumers_of_result %1[0]
- : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">)
+ : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">)
%2, %pack_2, %unpack_2 =
transform.structured.pack_transpose %unpack with_compute_op(%1)
outer_perm = [1, 0] inner_perm = [1, 0]
- : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">)
- -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">)
+ : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">)
+ -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">)
transform.yield
}
}
@@ -259,11 +259,11 @@ module attributes {transform.with_named_sequence} {
func.func @conv_2d_nchw_fchw(%i: tensor<14x512x28x28xf32>, %f: tensor<1024x512x1x1xf32>,
%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> {
- // CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [8]
+ // CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [8]
// CHECK-SAME: : tensor<14x512x28x28xf32> -> tensor<14x64x28x28x8xf32>
- // CHECK: %[[PACK_FILTER:.*]] = tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
+ // CHECK: %[[PACK_FILTER:.*]] = linalg.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
// CHECK-SAME: : tensor<1024x512x1x1xf32> -> tensor<256x64x1x1x4x8xf32>
- // CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
+ // CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
// CHECK-SAME: : tensor<14x1024x28x28xf32> -> tensor<14x256x28x28x4xf32>
// CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "reduction"]}
@@ -272,7 +272,7 @@ func.func @conv_2d_nchw_fchw(%i: tensor<14x512x28x28xf32>, %f: tensor<1024x512x1
%0 = linalg.conv_2d_nchw_fchw ins(%i, %f: tensor<14x512x28x28xf32>, tensor<1024x512x1x1xf32>)
outs(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32>
- // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
+ // CHECK: linalg.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
// CHECK-SAME: : tensor<14x256x28x28x4xf32> -> tensor<14x1024x28x28xf32>
return %0: tensor<14x1024x28x28xf32>
}
@@ -300,11 +300,11 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[INIT:.+]]: tensor<?x1x?x?xf32>
func.func @conv_2d_nhwc_hwcf(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?x?x?xf32>, %init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32> {
- // CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [6]
+ // CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [6]
// CHECK-SAME: : tensor<?x1x?x?xf32> -> tensor<?x1x?x?x6xf32>
- // CHECK: %[[PACK_FILTER:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3, 2] inner_tiles = [4, 6]
+ // CHECK: %[[PACK_FILTER:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3, 2] inner_tiles = [4, 6]
// CHECK-SAME: : tensor<1x?x?x?xf32> -> tensor<1x?x?x?x4x6xf32>
- // CHECK: %[[PACK_OUTPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [4]
+ // CHECK: %[[PACK_OUTPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [4]
// CHECK-SAME: : tensor<?x1x?x?xf32> -> tensor<?x1x?x?x4xf32>
// CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
@@ -315,7 +315,7 @@ func.func @conv_2d_nhwc_hwcf(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?x?x
ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?x?x?xf32>)
outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
- // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [3] inner_tiles = [4]
+ // CHECK: linalg.unpack %{{.*}} inner_dims_pos = [3] inner_tiles = [4]
// CHECK-SAME: : tensor<?x1x?x?x4xf32> -> tensor<?x1x?x?xf32>
return %0 : tensor<?x1x?x?xf32>
}
@@ -349,11 +349,11 @@ func.func @matmul_dynamic_pack_size(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
// CHECK: %[[TS:.*]] = "some_tile_size"() : () -> index
%sz = "some_tile_size"() : () -> (index)
- // CHECK: %[[PACK_A:.*]] = tensor.pack %[[A]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]]
+ // CHECK: %[[PACK_A:.*]] = linalg.pack %[[A]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]]
// CHECK-SAME: : tensor<?x?xf32> -> tensor<?x?x?xf32>
- // CHECK: %[[PACK_B:.*]] = tensor.pack %[[B]] {{.*}} inner_dims_pos = [1, 0] inner_tiles = [%[[TS]], %[[TS]]]
+ // CHECK: %[[PACK_B:.*]] = linalg.pack %[[B]] {{.*}} inner_dims_pos = [1, 0] inner_tiles = [%[[TS]], %[[TS]]]
// CHECK-SAME: : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
- // CHECK: %[[PACK_C:.*]] = tensor.pack %[[C]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]]
+ // CHECK: %[[PACK_C:.*]] = linalg.pack %[[C]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]]
// CHECK-SAME: : tensor<?x?xf32> -> tensor<?x?x?xf32>
// CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "reduction"]}
@@ -363,7 +363,7 @@ func.func @matmul_dynamic_pack_size(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
outs(%C: tensor<?x?xf32>)
-> tensor<?x?xf32>
- // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] into %[[C]]
+ // CHECK: linalg.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] into %[[C]]
// CHECK-SAME: : tensor<?x?x?xf32> -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
@@ -445,16 +445,16 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @no_single_packing_op(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) {
- %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
- %1 = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
- %2 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+ %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+ %1 = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
+ %2 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
return
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
// expected-error @below {{requires target to map to exactly 1 packing op and 1 packed op (got 2 and 1)}}
transform.structured.pack_transpose %0 with_compute_op(%1)
inner_perm = [0]
@@ -476,7 +476,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // expected-error @below {{requires target to map to a tensor.pack or tensor.unpack}}
+ // expected-error @below {{requires target to map to a linalg.pack or linalg.unpack}}
transform.structured.pack_transpose %0 with_compute_op(%1)
inner_perm = [0]
: (!transform.any_op, !transform.any_op)
@@ -488,14 +488,14 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @no_linalg_target(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) {
- %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+ %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
%1 = arith.constant 0 : index
return
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op
// expected-error @below {{requires a LinalgOp target}}
transform.structured.pack_transpose %0 with_compute_op(%1)
@@ -509,7 +509,7 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @no_single_use_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) {
- %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+ %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
%f0 = arith.constant 0.0 : f32
%1 = tensor.empty() : tensor<f32>
%2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<f32>) -> tensor<f32>
@@ -518,7 +518,7 @@ func.func @no_single_use_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op
// expected-error @below {{not a single use by the LinalgOp target}}
transform.structured.pack_transpose %0 with_compute_op(%1)
@@ -532,8 +532,8 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @not_produced_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) {
- %a = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
- %b = tensor.unpack %a inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
+ %a = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+ %b = linalg.unpack %a inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
%f0 = arith.constant 0.0 : f32
%1 = tensor.empty() : tensor<f32>
%2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<f32>) -> tensor<f32>
@@ -542,7 +542,7 @@ func.func @not_produced_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op
// expected-error @below {{not produced by the LinalgOp target}}
transform.structured.pack_transpose %0 with_compute_op(%1)
@@ -559,13 +559,13 @@ func.func @no_matching_pack(%source: tensor<16xf32>) {
%f0 = arith.constant 0.0 : f32
%1 = tensor.empty() : tensor<4x4xf32>
%2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<4x4xf32>) -> tensor<4x4xf32>
- %b = tensor.unpack %2 inner_dims_pos = [0] inner_tiles = [4] into %source : tensor<4x4xf32> -> tensor<16xf32>
+ %b = linalg.unpack %2 inner_dims_pos = [0] inner_tiles = [4] into %source : tensor<4x4xf32> -> tensor<16xf32>
return
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op
// expected-error @below {{could not find matching pack op}}
transform.structured.pack_transpose %0 with_compute_op(%1)
@@ -593,13 +593,13 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> (!transform.op<"linalg.generic">)
%unpack = transform.get_consumers_of_result %1[0]
- : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">)
+ : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">)
%2, %pack_2, %unpack_2 =
// expected-error @below {{invalid outer_perm}}
transform.structured.pack_transpose %unpack with_compute_op(%1)
outer_perm = [1]
- : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">)
- -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">)
+ : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">)
+ -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">)
transform.yield
}
}
@@ -621,13 +621,13 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> (!transform.op<"linalg.generic">)
%unpack = transform.get_consumers_of_result %1[0]
- : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">)
+ : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">)
%2, %pack_2, %unpack_2 =
// expected-error @below {{invalid inner_perm}}
transform.structured.pack_transpose %unpack with_compute_op(%1)
inner_perm = [1]
- : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">)
- -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">)
+ : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">)
+ -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">)
transform.yield
}
}
@@ -643,12 +643,12 @@ func.func @no_padding_on_packs(%A: tensor<32x32xf32>, %B: tensor<32x32xf32>, %C:
}
// CHECK-LABEL: no_padding_on_packs
-// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
+// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
// CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<8x4x4x8xf32>
-// CHECK: tensor.pack %{{.+}} outer_dims_perm = [1, 0]
+// CHECK: linalg.pack %{{.+}} outer_dims_perm = [1, 0]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 8]
// CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<4x4x8x8xf32>
-// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
+// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
// CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<8x4x4x8xf32>
module attributes {transform.with_named_sequence} {
@@ -657,12 +657,12 @@ module attributes {transform.with_named_sequence} {
%1 = transform.structured.pack %0 packed_sizes = [4, 8, 8]
: (!transform.any_op) -> (!transform.op<"linalg.generic">)
%pack = transform.get_producer_of_operand %1[1]
- : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">)
+ : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">)
%2, %pack_2, %empty_unpack_2 =
transform.structured.pack_transpose %pack with_compute_op(%1)
outer_perm = [1, 0] inner_perm = [1, 0]
- : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">)
- -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op)
+ : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">)
+ -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op)
transform.yield
}
}
diff --git a/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir b/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir
new file mode 100644
index 00000000000000..456a5ea453963d
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir
@@ -0,0 +1,491 @@
+// RUN: mlir-opt %s -transform-interpreter -canonicalize -cse -split-input-file | FileCheck %s
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
+// CHECK: func.func @NC_to_NCnc
+// CHECK-SAME: %[[IN:.*]]: tensor<128x256xf32>,
+// CHECK-SAME: %[[OUT:.*]]: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+// CHECK: %[[RES0:.*]] = scf.for %[[N:.*]] = %[[C0]] to %[[C4]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<4x8x32x32xf32>) {
+// CHECK: %[[RES1:.+]] = scf.for %[[C:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<4x8x32x32xf32>) {
+// CHECK-DAG: %[[IN_N:.+]] = affine.apply #[[MAP0]](%[[N]])
+// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
+// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_N]], %[[IN_C]]] [64, 128] [1, 1] : tensor<128x256xf32> to tensor<64x128xf32>
+// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[N]], %[[C]], 0, 0] [2, 4, 32, 32] [1, 1, 1, 1] : tensor<4x8x32x32xf32> to tensor<2x4x32x32xf32>
+// CHECK: %[[SUB_RES:.*]] = linalg.pack
+// CHECK-SAME: %[[SUB_IN]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[SUB_OUT]]
+// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
+// CHECK: scf.yield %[[INSERT]] : tensor<4x8x32x32xf32>
+// CHECK: }
+// CHECK: scf.yield %[[RES1:.*]] : tensor<4x8x32x32xf32>
+// CHECK: }
+// CHECK: return %[[RES0:.*]] : tensor<4x8x32x32xf32>
+// CHECK: }
+func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
+ %0 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %arg1 : tensor<128x256xf32> -> tensor<4x8x32x32xf32>
+ return %0 : tensor<4x8x32x32xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 8)>
+// CHECK: func.func @KC_to_CKkc
+// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
+// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index
+// CHECK: scf.for %[[C:.+]] = %[[C0]] to %[[C32]] step %[[C2]]
+// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
+// CHECK: %[[INPUT_SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK-SAME: [0, %[[IN_C]]] [128, 16]
+// CHECK: %[[OUTPUT_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[C]], 0, 0, 0] [2, 4, 32, 8]
+// CHECK: linalg.pack
+// CHECK-SAME: %[[INPUT_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
+// CHECK-SAME: into %[[OUTPUT_SLICE]]
+func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
+ %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
+ return %0 : tensor<32x4x32x8xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -2 + 15, 8)>
+// CHECK: func.func @pad_and_pack_static(
+// CHECK-SAME: %[[IN:.*]]: tensor<13x15xf32>,
+// CHECK-SAME: %[[OUT:.*]]: tensor<2x8x8x2xf32>,
+// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<2x8x8x2xf32> {
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
+// CHECK-DAG: %[[RES0:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[OUT]]) -> (tensor<2x8x8x2xf32>) {
+// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP0]](%[[J]])
+// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])
+// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][0, %[[IN_J]]] [13, %[[IN_J_SZ]]] [1, 1]
+// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][0, %[[J]], 0, 0] [2, 4, 8, 2] [1, 1, 1, 1]
+// CHECK: %[[SUB_RES:.*]] = linalg.pack
+// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
+// CHECK-SAME: into %[[SUB_OUT]]
+// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
+// CHECK: scf.yield %[[INSERT]] : tensor<2x8x8x2xf32>
+// CHECK: }
+// CHECK: return %[[RES0:.*]] : tensor<2x8x8x2xf32>
+// CHECK: }
+func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> {
+ %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
+ return %0 : tensor<2x8x8x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
+// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)>
+// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -8 + s0, d0 * 8)>
+// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 * 2)>
+// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -2 + s0, d0 * 2)>
+// CHECK: func.func @pad_and_pack_partially_dynamic(
+// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[OUT:.*]]: tensor<?x?x8x2xf32>,
+// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<?x?x8x2xf32> {
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x8x2xf32>
+// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x8x2xf32>
+// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x8x2xf32>) {
+// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x8x2xf32>) {
+// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
+// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
+// CHECK-DAG: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])
+// CHECK-DAG: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]]
+// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP4]](%[[J]])
+// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP5]]
+// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], 8, 2] [1, 1, 1, 1] : tensor<?x?x8x2xf32> to tensor<?x?x8x2xf32>
+// CHECK: %[[SUB_RES:.*]] = linalg.pack
+// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
+// CHECK-SAME: into %[[SUB_OUT]]
+// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
+// CHECK: scf.yield %[[INSERT]] : tensor<?x?x8x2xf32>
+// CHECK: }
+// CHECK: scf.yield %[[RES1:.*]] : tensor<?x?x8x2xf32>
+// CHECK: }
+// CHECK: return %[[VAL_34:.*]] : tensor<?x?x8x2xf32>
+// CHECK: }
+func.func @pad_and_pack_partially_dynamic(%input: tensor<?x?xf32>, %output: tensor<?x?x8x2xf32>, %pad: f32) -> tensor<?x?x8x2xf32> {
+ %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
+ return %0 : tensor<?x?x8x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
+// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0, -(d1 * s0) + s1)>
+// CHECK: func.func @pad_and_pack_fully_dynamic(
+// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[OUT:.*]]: tensor<?x?x?x?xf32>,
+// CHECK-SAME: %[[PAD:.*]]: f32,
+// CHECK-SAME: %[[TILE_0:.*]]: index,
+// CHECK-SAME: %[[TILE_1:.*]]: index) -> tensor<?x?x?x?xf32> {
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x?x?xf32>
+// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x?x?xf32>
+// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x?x?xf32>) {
+// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x?x?xf32>) {
+// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
+// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
+// CHECK-DAG: %[[IN_D0:.*]] = tensor.dim %[[IN]], %[[C0]]
+// CHECK-DAG: %[[IN_D1:.*]] = tensor.dim %[[IN]], %[[C1]]
+// CHECK: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])[%[[TILE_0]]]
+// CHECK: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_I_SZ]], %[[I]])[%[[TILE_0]], %[[IN_D0]]]
+// CHECK: %[[IN_J:.*]] = affine.apply #[[MAP2]](%[[J]])[%[[TILE_1]]]
+// CHECK: %[[IN_J_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_J_SZ]], %[[J]])[%[[TILE_1]], %[[IN_D1]]]
+// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK: %[[OUT_D2:.+]] = tensor.dim %[[ITER1]], %[[C2]]
+// CHECK: %[[OUT_D3:.+]] = tensor.dim %[[ITER1]], %[[C3]]
+// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], %[[OUT_D2]], %[[OUT_D3]]] [1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+// CHECK: %[[PACK:.*]] = linalg.pack
+// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_0]], %[[TILE_1]]]
+// CHECK-SAME: into %[[SUB_OUT]]
+// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[PACK]] into %[[ITER1]]
+// CHECK: scf.yield %[[INSERT]] : tensor<?x?x?x?xf32>
+// CHECK: }
+// CHECK: scf.yield %[[RES1:.*]] : tensor<?x?x?x?xf32>
+// CHECK: }
+// CHECK: return %[[RES0:.*]] : tensor<?x?x?x?xf32>
+// CHECK: }
+func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x?x?xf32>, %pad: f32, %tile_n : index, %tile_m : index) -> tensor<?x?x?x?xf32> {
+ %0 = linalg.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+ return %0 : tensor<?x?x?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
+// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
+// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)>
+// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)>
+// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)>
+// CHECK: func.func @NCnc_to_NC
+// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
+// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index
+// CHECK: %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]]
+// CHECK: %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
+// CHECK-DAG: %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]])
+// CHECK-DAG: %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]])
+// CHECK-DAG: %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]])
+// CHECK-DAG: %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]])
+// CHECK-DAG: %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]])
+// CHECK-DAG: %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]])
+// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK-SAME: [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16]
+// CHECK-SAME: : tensor<8x8x32x16xf32> to tensor<?x?x32x16xf32>
+// CHECK: %[[EMPTY:.+]] = tensor.empty
+// CHECK: %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME: %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
+// CHECK-SAME: into %[[EMPTY]]
+// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
+// CHECK-SAME: [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4]
+// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
+// CHECK-SAME: into %{{.+}}[%[[I]], %[[J]]] [2, 4]
+// CHECK: scf.yield %[[RES]]
+func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> {
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+ return %0 : tensor<256x128xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
+// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
+// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 8)>
+// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 8)>
+// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 8 - d0 floordiv 8 + 1)>
+// CHECK: func.func @CKkc_to_KC
+// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
+// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index
+// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C128]] step %[[C2]]
+// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C256]] step %[[C4]]
+// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
+// CHECK-DAG: %[[OFFSET_K:.+]] = affine.apply #[[MAP1]](%[[K]])
+// CHECK-DAG: %[[IN_K_SZ:.+]] = affine.apply #[[MAP2]](%[[K]])
+// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP4]](%[[C]])
+// CHECK-DAG: %[[OFFSET_C:.+]] = affine.apply #[[MAP5]](%[[C]])
+// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP6]](%[[C]])
+// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], %[[IN_K_SZ]], 32, 8]
+// CHECK: %[[EMPTY:.+]] = tensor.empty
+// CHECK: %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
+// CHECK-SAME: into %[[EMPTY]]
+// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
+// CHECK-SAME: [%[[OFFSET_K]], %[[OFFSET_C]]] [2, 4]
+// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
+// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4]
+// CHECK: scf.yield %[[RES]]
+func.func @CKkc_to_KC(%source: tensor<32x4x32x8xf32>, %dest: tensor<128x256xf32>) -> tensor<128x256xf32> {
+ %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %dest : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
+ return %0 : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 4)>
+// CHECK: func.func @perfect_CKkc_to_KC
+// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
+// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
+// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C2]]
+// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
+// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
+// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP1]](%[[C]])
+// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [1, 1, 2, 4]
+// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [2, 4]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4]
+// CHECK-SAME: into %[[ITER_SLICE]]
+// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
+// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4]
+// CHECK: scf.yield %[[RES]]
+func.func @perfect_CKkc_to_KC(%source: tensor<32x4x2x4xf32>, %dest: tensor<8x128xf32>) -> tensor<8x128xf32> {
+ %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %dest : tensor<32x4x2x4xf32> -> tensor<8x128xf32>
+ return %0 : tensor<8x128xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
+// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
+// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0) -> (d0 ceildiv 2)>
+// CHECK: func.func @dynamic_perfect_CKkc_to_KC
+// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[DIM_0:.+]] = tensor.dim %[[OUT]], %[[C0]]
+// CHECK-DAG: %[[DIM_1:.+]] = tensor.dim %[[OUT]], %[[C1]]
+// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[DIM_0]] step %[[C2]]
+// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[DIM_1]] step %[[C4]]
+// CHECK-DAG: %[[OUT_K_SZ:.+]] = affine.min #[[MAP0]](%[[K]])[%[[DIM_0]]]
+// CHECK-DAG: %[[OUT_C_SZ:.+]] = affine.min #[[MAP1]](%[[C]])[%[[DIM_1]]]
+// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP2]](%[[K]])
+// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]])
+// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP3]](%[[OUT_C_SZ]])
+// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], 1, 2, 2]
+// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2]
+// CHECK-SAME: into %[[ITER_SLICE]]
+// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
+// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]]
+// CHECK: scf.yield %[[RES]]
+
+func.func @dynamic_perfect_CKkc_to_KC(%source: tensor<?x?x2x2xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %dest : tensor<?x?x2x2xf32> -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK: #[[MAP:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
+// CHECK: func.func @perfect_NKPQk_to_NPQK(
+// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x4x6x6x2xf32>,
+// CHECK-SAME: %{{.+}}: tensor<1x6x6x8xf32>)
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index
+// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK: %{{.+}} = scf.for %[[P:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
+// CHECK: %{{.+}} = scf.for %[[Q:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
+// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C4]]
+// CHECK: %[[K_SZ:.+]] = affine.apply #[[MAP]](%[[K]])
+// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[K_SZ]], %[[P]], %[[Q]], 0]
+// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[P]], %[[Q]], %[[K]]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2]
+// CHECK-SAME: into %[[SLICE_DEST]]
+// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
+// CHECK-SAME: into %{{.+}}[0, %[[P]], %[[Q]], %[[K]]]
+// CHECK: scf.yield %[[RES]]
+
+func.func @perfect_NKPQk_to_NPQK(%source: tensor<1x4x6x6x2xf32>, %dest: tensor<1x6x6x8xf32>) -> tensor<1x6x6x8xf32> {
+ %0 = linalg.unpack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x4x6x6x2xf32> -> tensor<1x6x6x8xf32>
+ return %0 : tensor<1x6x6x8xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+func.func private @get_dynamic_tile_size() -> index
+
+// CHECK-LABEL: func.func @fully_dynamic_unpack
+// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME: %[[DST:[0-9a-zA-Z]+]]
+// CHECK: %[[INNER_TS:.+]] = call @get_dynamic_tile_size() : () -> index
+// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[DST]])
+// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]])
+// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[SRC]]
+// CHECK: %[[EMPTY:.+]] = tensor.empty
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SLICE]]
+// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [%[[INNER_TS]], %[[INNER_TS]]] into %[[EMPTY]]
+func.func @fully_dynamic_unpack(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = func.call @get_dynamic_tile_size() : () -> index
+ %1 = linalg.unpack %source inner_dims_pos = [1, 0] inner_tiles = [%0, %0] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+ return %1 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * 2)>
+// CHECK: func.func @perfect_NPQK_to_NKPQk
+// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x6x6x8xf32>,
+// CHECK-SAME: %{{.+}}: tensor<1x4x6x6x2xf32>)
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
+// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index
+// CHECK: %{{.+}} = scf.for %[[ARG2:.+]] = %[[C0]] to %[[C4]] step %[[C1]]
+// CHECK: %{{.+}} = scf.for %[[ARG4:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
+// CHECK: %{{.+}} = scf.for %[[ARG6:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
+// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP1]](%[[ARG2]])
+// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[ARG4]], %[[ARG6]], %[[APPLY]]]
+// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0]
+// CHECK: %[[PACK:.+]] = linalg.pack
+// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2]
+// CHECK-SAME: into %[[SLICE_DEST]]
+// CHECK: %[[RES:.+]] = tensor.insert_slice %[[PACK]]
+// CHECK-SAME: into %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0]
+// CHECK: scf.yield %[[RES]]
+
+func.func @perfect_NPQK_to_NKPQk(%source: tensor<1x6x6x8xf32>, %dest: tensor<1x4x6x6x2xf32>) -> tensor<1x4x6x6x2xf32> {
+ %0 = linalg.pack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x6x6x8xf32> -> tensor<1x4x6x6x2xf32>
+ return %0 : tensor<1x4x6x6x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
diff --git a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
index 100692426ef44c..5812c4db88247f 100644
--- a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
+++ b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
@@ -378,11 +378,11 @@ func.func @no_padding_on_packs(%A: tensor<32x32xf32>, %B: tensor<32x32xf32>, %C:
}
// CHECK-LABEL: no_padding_on_packs
-// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 4]
+// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 4]
// CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<4x8x8x4xf32>
-// CHECK: tensor.pack %{{.+}} outer_dims_perm = [1, 0]
+// CHECK: linalg.pack %{{.+}} outer_dims_perm = [1, 0]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 16] into %{{.+}} : tensor<32x32xf32> -> tensor<2x8x4x16xf32>
-// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
+// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
// CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<4x2x8x16xf32>
module attributes {transform.with_named_sequence} {
@@ -393,12 +393,12 @@ module attributes {transform.with_named_sequence} {
matmul_packed_sizes = [8, 16, 4] matmul_inner_dims_order = [0, 1, 2]
: (!transform.op<"linalg.matmul">) -> !transform.op<"linalg.generic">
%pack = transform.get_producer_of_operand %1[1]
- : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">)
+ : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">)
%2, %pack_2, %empty_unpack_2 =
transform.structured.pack_transpose %pack with_compute_op(%1)
outer_perm = [1, 0] inner_perm = [1, 0]
- : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">)
- -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op)
+ : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">)
+ -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op)
transform.yield
}
}
diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir
index faf7ff9ad7ed09..5d4ae4f15d3fd1 100644
--- a/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir
@@ -14,7 +14,7 @@ module {
func.func @fuse_pack_as_producer(%src: tensor<128x256xf32>, %other: tensor<4x4x128x256xf32>)
-> tensor<4x4x128x256xf32> {
%dest = tensor.empty() : tensor<1x1x128x256xf32>
- %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+ %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
into %dest : tensor<128x256xf32> -> tensor<1x1x128x256xf32>
%out = tensor.empty() : tensor<4x4x128x256xf32>
@@ -36,10 +36,10 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
// Find and lower pack operation.
- %pack = transform.structured.match ops{["tensor.pack"]} in %arg1
- : (!transform.any_op) -> !transform.op<"tensor.pack">
+ %pack = transform.structured.match ops{["linalg.pack"]} in %arg1
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
%paded, %expanded, %transpose = transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}
- : (!transform.op<"tensor.pack">)
+ : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">,
!transform.op<"tensor.expand_shape">,
!transform.op<"linalg.transpose">)
@@ -72,7 +72,7 @@ module {
func.func @fuse_pack_as_producer_blocked_by_insert_slice(%src: tensor<128x256xf32>, %other: tensor<4x4x128x256xf32>)
-> tensor<4x4x128x256xf32> {
%dest = tensor.empty() : tensor<1x1x128x256xf32>
- %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+ %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
into %dest : tensor<128x256xf32> -> tensor<1x1x128x256xf32>
%out = tensor.empty() : tensor<4x4x128x256xf32>
@@ -94,10 +94,10 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
// Find and lower pack operation.
- %pack = transform.structured.match ops{["tensor.pack"]} in %arg1
- : (!transform.any_op) -> !transform.op<"tensor.pack">
+ %pack = transform.structured.match ops{["linalg.pack"]} in %arg1
+ : (!transform.any_op) -> !transform.op<"linalg.pack">
%paded, %expanded, %transpose = transform.structured.lower_pack %pack
- : (!transform.op<"tensor.pack">)
+ : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">,
!transform.op<"tensor.expand_shape">,
!transform.op<"linalg.transpose">)
@@ -143,7 +143,7 @@ module {
} -> tensor<1x1x128x256xf32>
%dest = tensor.empty() : tensor<128x256xf32>
- %unpack = tensor.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+ %unpack = linalg.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256]
into %dest : tensor<1x1x128x256xf32> -> tensor<128x256xf32>
return %unpack : tensor<128x256xf32>
@@ -152,10 +152,10 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
// Find and lower unpack operation.
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %arg1
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %arg1
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false}
- : (!transform.op<"tensor.unpack">)
+ : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
@@ -204,7 +204,7 @@ module {
} -> tensor<1x1x128x256xf32>
%dest = tensor.empty() : tensor<128x256xf32>
- %unpack = tensor.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+ %unpack = linalg.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256]
into %dest : tensor<1x1x128x256xf32> -> tensor<128x256xf32>
return %unpack : tensor<128x256xf32>
@@ -213,10 +213,10 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
// Find and lower unpack operation.
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %arg1
- : (!transform.any_op) -> !transform.op<"tensor.unpack">
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %arg1
+ : (!transform.any_op) -> !transform.op<"linalg.unpack">
transform.structured.lower_unpack %unpack
- : (!transform.op<"tensor.unpack">)
+ : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index 8fbc74ec345c6b..8f3b199145ce03 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -115,13 +115,13 @@ module attributes {transform.with_named_sequence} {
func.func @test_pack_no_vectorize_dynamic_shape(%arg0: tensor<?xf32>, %arg1: tensor<4x16xf32>) -> tensor<4x16xf32> {
%pad = arith.constant 0.000000e+00 : f32
// expected-error @+1 {{Attempted to vectorize, but failed}}
- %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [0] inner_tiles = [16] into %arg1 : tensor<?xf32> -> tensor<4x16xf32>
+ %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [0] inner_tiles = [16] into %arg1 : tensor<?xf32> -> tensor<4x16xf32>
return %pack : tensor<4x16xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 : !transform.any_op
transform.yield
}
diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
index b688a677500c22..1b234cffa212da 100644
--- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
@@ -1911,13 +1911,13 @@ module attributes {transform.with_named_sequence} {
// masking was used.
func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
- %pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
+ %pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
return %pack : tensor<4x1x32x16x2xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
%1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
%2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
transform.yield
@@ -1944,7 +1944,7 @@ module attributes {transform.with_named_sequence} {
func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
%pad = arith.constant 0.000000e+00 : f32
- %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
+ %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
return %pack : tensor<32x4x1x16x2xf32>
}
@@ -1962,7 +1962,7 @@ func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
%1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
%2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
transform.yield
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index 0f2abe06569d64..31ca5ab84ea179 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -671,7 +671,7 @@ module attributes {transform.with_named_sequence} {
// masking was used.
func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
- %pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
+ %pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
return %pack : tensor<4x1x32x16x2xf32>
}
// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -688,7 +688,7 @@ func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 vector_sizes [4, 1, 32] : !transform.any_op
transform.yield
}
@@ -702,7 +702,7 @@ module attributes {transform.with_named_sequence} {
func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
%pad = arith.constant 0.000000e+00 : f32
- %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
+ %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
return %pack : tensor<32x4x1x16x2xf32>
}
// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -725,7 +725,7 @@ func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 vector_sizes [32, 4, 1] : !transform.any_op
transform.yield
}
@@ -734,7 +734,7 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @test_vectorize_dynamic_pack(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> {
- %pack = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg1 : tensor<?x?xf32> -> tensor<?x?x16x2xf32>
+ %pack = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg1 : tensor<?x?xf32> -> tensor<?x?x16x2xf32>
return %pack : tensor<?x?x16x2xf32>
}
// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -766,7 +766,7 @@ func.func @test_vectorize_dynamic_pack(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 vector_sizes [4, 1] : !transform.any_op
transform.yield
}
@@ -893,12 +893,12 @@ func.func @test_vectorize_dynamic_shapes_unpack(%arg0: tensor<?x?xf32>, %arg1: t
// CHECK: %[[writeMsk0:.*]] = vector.create_mask {{.*}} : vector<4x16xi1>
// CHECK: %[[write0:.*]] = vector.mask %[[writeMsk0:.*]] {{.*}} vector.transfer_write %[[sc0]], %[[empt0]]
// CHECK: return %[[write0]]
- %ret = tensor.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor<?x?x16x2xf32> -> tensor<?x?xf32>
+ %ret = linalg.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor<?x?x16x2xf32> -> tensor<?x?xf32>
return %ret : tensor<?x?xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 vector_sizes [4, 16] : !transform.any_op
transform.yield
}
@@ -925,12 +925,12 @@ func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<2
// CHECK: %[[WRITEMSK:.*]] = vector.create_mask %[[C256]], %[[C128]] : vector<512x128xi1>
// CHECK: %[[WRIT:.*]] = vector.mask %[[WRITEMSK]] {{.*}} : vector<512x128xi1> -> tensor<256x128xf32>
// CHECK: return %[[WRIT]] : tensor<256x128xf32>
- %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
return %0 : tensor<256x128xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 vector_sizes [512, 128] : !transform.any_op
transform.yield
}
@@ -949,12 +949,12 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest:
// CHECK: %[[C00:.*]] = arith.constant 0 : index
// CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32>
// CHECK: return %[[WRIT]] : tensor<256x128xf32>
- %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
return %0 : tensor<256x128xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op
transform.yield
}
@@ -973,12 +973,12 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest:
// CHECK: %[[C00:.*]] = arith.constant 0 : index
// CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32>
// CHECK: return %[[WRIT]] : tensor<256x128xf32>
- %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+ %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
return %0 : tensor<256x128xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op
transform.yield
}
@@ -988,7 +988,7 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest:
// CHECK-LABEL: test_vectorize_pack_no_vector_sizes
func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> {
- %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32>
+ %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32>
return %pack : tensor<2x4x16x2xf32>
}
// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -1005,7 +1005,7 @@ func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: t
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 : !transform.any_op
transform.yield
}
@@ -1016,7 +1016,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes
func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
%pad = arith.constant 0.000000e+00 : f32
- %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
+ %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
return %pack : tensor<32x4x1x16x2xf32>
}
// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -1033,7 +1033,7 @@ func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 : !transform.any_op
transform.yield
}
@@ -1051,12 +1051,12 @@ func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>,
// CHECK: %[[C00:.*]] = arith.constant 0 : index
// CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32>
// CHECK: return %[[WRIT]] : tensor<256x128xf32>
- %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
return %0 : tensor<256x128xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 : !transform.any_op
transform.yield
}
@@ -1075,12 +1075,12 @@ func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x
// CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[EMPT]]{{\[}}%[[C00]], %[[C00]]]
// CHECK-SAME: {in_bounds = [true, false]} : vector<64x128xf32>, tensor<64x127xf32>
// CHECK: return %[[WRIT]] : tensor<64x127xf32>
- %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32>
+ %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32>
return %0 : tensor<64x127xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 : !transform.any_op
transform.yield
}
@@ -1089,7 +1089,7 @@ func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x
// -----
func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf32>, %dest: tensor<7x16xf32>) -> tensor<7x16xf32> {
- %0 = tensor.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32>
+ %0 = linalg.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32>
return %0 : tensor<7x16xf32>
}
// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
@@ -1103,7 +1103,7 @@ func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf
// CHECK: return %[[WRIT]] : tensor<7x16xf32>
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
transform.structured.vectorize %0 : !transform.any_op
transform.yield
}
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 01d14871072cdf..90cc0ca658ffb6 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -899,225 +899,6 @@ func.func @fold_extract_constant_splat() -> (tensor<4x4xi32>) {
// -----
-// CHECK-LABEL: func @fold_pack_constant_splat
-// CHECK-NOT: tensor.pack
-// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32>
-func.func @fold_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
- %cst = arith.constant dense<1.000000e-01> : tensor<64x128xf32>
- %0 = tensor.pack %cst outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
- inner_tiles = [8, 32] into %dest : tensor<64x128xf32> -> tensor<8x16x8x32xf32>
- return %0 : tensor<8x16x8x32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @fold_padding_value_pack_constant_splat
-// CHECK-NOT: tensor.pack
-// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32>
-func.func @fold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
- %pad = arith.constant 1.000000e-01 : f32
- %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32>
- %0 = tensor.pack %cst
- padding_value(%pad : f32)
- outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
- inner_tiles = [8, 32] into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32>
- return %0 : tensor<8x16x8x32xf32>
-}
-
-
-// -----
-
-// CHECK-LABEL: func @nofold_padding_value_pack_constant_splat
-// CHECK: arith.constant dense<1.000000e-01> : tensor<63x127xf32>
-// CHECK: tensor.pack
-func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
- %pad = arith.constant 0.0 : f32
- %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32>
- %0 = tensor.pack %cst
- padding_value(%pad : f32)
- outer_dims_perm = [1, 0]
- inner_dims_pos = [0, 1]
- inner_tiles = [8, 32]
- into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32>
- return %0 : tensor<8x16x8x32xf32>
-}
-
-// -----
-
-func.func @fold_padding_value_pack(%arg0: tensor<1200x500000xf32>) -> tensor<31250x1200x16x1xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %0 = tensor.empty() : tensor<31250x1200x16x1xf32>
- %pack = tensor.pack %arg0
- padding_value(%cst : f32)
- outer_dims_perm = [1, 0]
- inner_dims_pos = [1, 0]
- inner_tiles = [16, 1]
- into %0 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32>
- return %pack : tensor<31250x1200x16x1xf32>
-}
-// CHECK-LABEL: func @fold_padding_value_pack
-// CHECK-NOT: padding_value
-
-// -----
-
-func.func @infer_src_shape_pack(%src: tensor<?x?x?x?xf32>, %dest: tensor<10x20x30x40x16xf32>) -> tensor<10x20x30x40x16xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %pack = tensor.pack %src
- padding_value(%cst : f32)
- outer_dims_perm = [2, 1, 3, 0]
- inner_dims_pos = [2]
- inner_tiles = [16]
- into %dest : tensor<?x?x?x?xf32> -> tensor<10x20x30x40x16xf32>
- return %pack : tensor<10x20x30x40x16xf32>
-}
-// CHECK-LABEL: func.func @infer_src_shape_pack
-// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]]
-// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor<?x?x?x?xf32> to tensor<40x20x?x30xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[CAST_SRC]] {{.+}} into %[[DEST]]
-// CHECK: return %[[PACK]]
-
-// -----
-
-func.func @infer_dest_shape_pack(%src: tensor<30x20x?x10xf32>, %dest: tensor<?x?x?x?x16xf32>) -> tensor<?x?x?x?x16xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %pack = tensor.pack %src
- padding_value(%cst : f32)
- outer_dims_perm = [2, 1, 3, 0]
- inner_dims_pos = [2]
- inner_tiles = [16]
- into %dest : tensor<30x20x?x10xf32> -> tensor<?x?x?x?x16xf32>
- return %pack : tensor<?x?x?x?x16xf32>
-}
-// CHECK-LABEL: func.func @infer_dest_shape_pack
-// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]]
-// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor<?x?x?x?x16xf32> to tensor<?x20x10x30x16xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC]] {{.+}} into %[[CAST_DEST]]
-// CHECK: %[[CAST_PACK:.+]] = tensor.cast %[[PACK]] : tensor<?x20x10x30x16xf32> to tensor<?x?x?x?x16xf32>
-// CHECK: return %[[CAST_PACK]]
-
-// -----
-
-func.func @no_infer_pack_shape(%arg0: tensor<?x32x100xf32>, %arg1: index) -> tensor<32x7x?x16x1xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %0 = tensor.empty(%arg1) : tensor<32x7x?x16x1xf32>
- %pack = tensor.pack %arg0 padding_value(%cst : f32) outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<?x32x100xf32> -> tensor<32x7x?x16x1xf32>
- return %pack : tensor<32x7x?x16x1xf32>
-}
-// CHECK-LABEL: func.func @no_infer_pack_shape
-// CHECK-NOT: tensor.cast
-
-// -----
-
-func.func @fold_padding_value_pack_negative1(%arg0: tensor<1200x499999xf32>) -> tensor<31250x1200x16x1xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %0 = tensor.empty() : tensor<31250x1200x16x1xf32>
- %pack = tensor.pack %arg0
- padding_value(%cst : f32)
- outer_dims_perm = [1, 0]
- inner_dims_pos = [1, 0]
- inner_tiles = [16, 1]
- into %0 : tensor<1200x499999xf32> -> tensor<31250x1200x16x1xf32>
- return %pack : tensor<31250x1200x16x1xf32>
-}
-// CHECK-LABEL: func @fold_padding_value_pack_negative1
-// CHECK: tensor.pack
-// CHECK-SAME: padding_value
-
-// -----
-
-func.func @fold_padding_value_pack_negative2(%arg0: tensor<1200x?xf32>, %arg1: tensor<?x1200x16x1xf32>) -> tensor<?x1200x16x1xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %pack = tensor.pack %arg0
- padding_value(%cst : f32)
- outer_dims_perm = [1, 0]
- inner_dims_pos = [1, 0]
- inner_tiles = [16, 1]
- into %arg1 : tensor<1200x?xf32> -> tensor<?x1200x16x1xf32>
- return %pack : tensor<?x1200x16x1xf32>
-}
-// CHECK-LABEL: func @fold_padding_value_pack_negative2
-// CHECK: tensor.pack
-// CHECK-SAME: padding_value
-
-// -----
-
-func.func @fold_padding_value_pack_negative3(%arg0: tensor<1200x500000xf32>, %arg1: tensor<?x1200x?x1xf32>, %tile : index) -> tensor<?x1200x?x1xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %pack = tensor.pack %arg0
- padding_value(%cst : f32)
- outer_dims_perm = [1, 0]
- inner_dims_pos = [1, 0]
- inner_tiles = [%tile, 1]
- into %arg1 : tensor<1200x500000xf32> -> tensor<?x1200x?x1xf32>
- return %pack : tensor<?x1200x?x1xf32>
-}
-// CHECK-LABEL: func @fold_padding_value_pack_negative3
-// CHECK: tensor.pack
-// CHECK-SAME: padding_value
-
-// -----
-
-// CHECK-LABEL: func @fold_unpack_constant_splat
-// CHECK-NOT: tensor.unpack
-// CHECK: arith.constant dense<1.000000e-01> : tensor<128x256xf32>
-func.func @fold_unpack_constant_splat(%dest : tensor<128x256xf32>) -> tensor<128x256xf32> {
- %cst = arith.constant dense<1.000000e-01> : tensor<16x8x8x32xf32>
- %0 = tensor.unpack %cst inner_dims_pos = [0, 1]
- inner_tiles = [8, 32] into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
- return %0 : tensor<128x256xf32>
-}
-
-// -----
-
-func.func @infer_dest_shape_unpack(%src: tensor<10x20x30x40x16xf32>, %dest: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
- %unpack = tensor.unpack %src
- outer_dims_perm = [2, 1, 3, 0]
- inner_dims_pos = [2]
- inner_tiles = [16]
- into %dest : tensor<10x20x30x40x16xf32> -> tensor<?x?x?x?xf32>
- return %unpack : tensor<?x?x?x?xf32>
-}
-// CHECK-LABEL: func.func @infer_dest_shape_unpack
-// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]]
-// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor<?x?x?x?xf32> to tensor<40x20x?x30xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[SRC]] {{.+}} into %[[CAST_DEST]]
-// CHECK: %[[CAST_UNPACK:.+]] = tensor.cast %[[UNPACK]] : tensor<40x20x?x30xf32> to tensor<?x?x?x?xf32>
-// CHECK: return %[[CAST_UNPACK]]
-
-// -----
-
-func.func @infer_src_shape_unpack(%src: tensor<?x?x?x?x16xf32>, %dest: tensor<30x20x?x10xf32>) -> tensor<30x20x?x10xf32> {
- %unpack = tensor.unpack %src
- outer_dims_perm = [2, 1, 3, 0]
- inner_dims_pos = [2]
- inner_tiles = [16]
- into %dest : tensor<?x?x?x?x16xf32> -> tensor<30x20x?x10xf32>
- return %unpack : tensor<30x20x?x10xf32>
-}
-// CHECK-LABEL: func.func @infer_src_shape_unpack
-// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]]
-// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor<?x?x?x?x16xf32> to tensor<?x20x10x30x16xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[CAST_SRC]]
-// CHECK: return %[[UNPACK]]
-
-// -----
-
-func.func @no_infer_unpack_shape(%arg1: tensor<32x7x?x16x1xf32>, %arg2: index) -> tensor<?x32x100xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %0 = tensor.empty(%arg2) : tensor<?x32x100xf32>
- %unpack = tensor.unpack %arg1 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<32x7x?x16x1xf32> -> tensor<?x32x100xf32>
- return %unpack : tensor<?x32x100xf32>
-}
-// CHECK-LABEL: func.func @no_infer_unpack_shape
-// CHECK-NOT: tensor.cast
-
-// -----
-
-
// CHECK-LABEL: func @fold_overlapping_insert
// CHECK-SAME: %[[INPUT:.+]]: tensor<?x?x?xf32>, %{{.+}}: tensor<4x?x8xf32>, %[[SLICE2:.+]]: tensor<4x?x8xf32>
func.func @fold_overlapping_insert(%input : tensor<?x?x?xf32>, %slice1: tensor<4x?x8xf32>, %slice2: tensor<4x?x8xf32>, %i: index, %size: index) -> (tensor<?x?x?xf32>) {
@@ -2370,174 +2151,6 @@ func.func @collapse_expand_fold_to_cast(%t: tensor<?xf32>, %sz0: index) -> (tens
// -----
-// Chain: NC -> NCnc -> NCnc -> NC
-// CHECK: func.func @unpack_pack(
-// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
-// CHECK: return %[[T]] : tensor<128x128xf32>
-func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
- %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
- %packed = tensor.pack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
- %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
- %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor<128x128xf32>
- return %unpacked : tensor<128x128xf32>
-}
-
-// -----
-
-// Chain: NC -> NCcn -> NCnc -> NC
-// CHECK: func.func @unpack_pack(
-// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
-// CHECK-NOT: return %[[T]] : tensor<128x128xf32>
-func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
- %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
- %packed = tensor.pack %t inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
- %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
- %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor
-<128x128xf32>
- return %unpacked : tensor<128x128xf32>
-}
-
-// -----
-
-// Chain: NC -> CNcn -> NCnc -> NC
-// CHECK: func.func @unpack_pack(
-// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
-// CHECK-NOT: return %[[T]] : tensor<128x128xf32>
-func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
- %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
- %packed = tensor.pack %t outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
- %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
- %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor
-<128x128xf32>
- return %unpacked : tensor<128x128xf32>
-}
-
-// -----
-
-// Chain: NC -> NCnc -> NCnc -> NC
-// CHECK: func.func @unpack_pack(
-// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>,
-// CHECK: return %[[T]] : tensor<128x128xf32>
-func.func @unpack_pack(%t: tensor<128x128xf32>, %tile1: index, %tile2: index) -> tensor<128x128xf32> {
- %tensor_empty = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
- %packed = tensor.pack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
- %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
- %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<16x16x?x?xf32> -> tensor
-<128x128xf32>
- return %unpacked : tensor<128x128xf32>
-}
-
-// -----
-
-// CHECK: func.func @unpack_pack_with_padding_no_canonicalization(
-// CHECK: tensor.pack
-// CHECK: tensor.unpack
-func.func @unpack_pack_with_padding_no_canonicalization(%t: tensor<256x512xbf16>) -> tensor<224x512xbf16> {
- %tensor_empty = tensor.empty() : tensor<4x16x64x32xbf16>
- %tensor_empty1 = tensor.empty() : tensor<224x512xbf16>
- %packed = tensor.pack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty : tensor<256x512xbf16> -> tensor<4x16x64x32xbf16>
- %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty1 : tensor<4x16x64x32xbf16> -> tensor<224x512xbf16>
- return %unpacked : tensor<224x512xbf16>
-}
-
-// -----
-
-// Chain NCnc -> NC -> NC -> NCnc
-// CHECK: func.func @pack_unpack(
-// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
-// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
-func.func @pack_unpack(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
- %tensor_empty = tensor.empty() : tensor<128x128xf32>
- %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
- %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
- %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
- return %packed : tensor<16x16x?x?xf32>
-}
-
-// -----
-
-// Chain NCnc -> NC -> NC -> NCnc
-// CHECK: func.func @pack_unpack(
-// CHECK-SAME: %[[T:.+]]: tensor<16x16x8x8xf32>
-// CHECK: return %[[T]] : tensor<16x16x8x8xf32>
-func.func @pack_unpack(%t: tensor<16x16x8x8xf32>) -> tensor<16x16x8x8xf32> {
- %tensor_empty = tensor.empty() : tensor<128x128xf32>
- %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<16x16x8x8xf32> -> tensor<128x128xf32>
- %tensor_empty1 = tensor.empty() : tensor<16x16x8x8xf32>
- %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
- return %packed : tensor<16x16x8x8xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_unpack_same_tiles(
-// CHECK-SAME: %[[T:.+]]: tensor<?x?x?x?xf32>,
-// CHECK: return %[[T]] : tensor<?x?x?x?xf32>
-func.func @pack_unpack_same_tiles(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
- %tile1: index, %tile2: index) -> tensor<?x?x?x?xf32> {
- %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
- %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
- %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
- %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
- return %packed : tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_unpack_different_tiles(
-// CHECK-SAME: %[[T:.+]]: tensor<?x?x?x?xf32>,
-// CHECK-NOT: return %[[T]] : tensor<?x?x?x?xf32>
-func.func @pack_unpack_different_tiles(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
- %tile1: index, %tile2: index) -> tensor<?x?x?x?xf32> {
- %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
- %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
- %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
- %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile2, %tile1] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
- return %packed : tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_unpack_dynamic_with_padding(
-// CHECK-SAME: %[[T:.+]]: tensor<?x?x?x?xf32>,
-// CHECK-NOT: return %[[T]] : tensor<?x?x?x?xf32>
-func.func @pack_unpack_dynamic_with_padding(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
- %tile1: index, %tile2: index, %pad: f32) -> tensor<?x?x?x?xf32> {
- %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
- %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
- %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
- %packed = tensor.pack %unpacked padding_value(%pad: f32) inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
- return %packed : tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_outer_dims_unpack_no_outer_dims(
-// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
-// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
-func.func @pack_outer_dims_unpack_no_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
- %tensor_empty = tensor.empty() : tensor<128x128xf32>
- %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
- %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
- %packed = tensor.pack %unpacked outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
- return %packed : tensor<16x16x?x?xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_no_outer_dims_unpack_outer_dims(
-// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
-// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
-func.func @pack_no_outer_dims_unpack_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
- %tensor_empty = tensor.empty() : tensor<128x128xf32>
- %unpacked = tensor.unpack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
- %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
- %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
- return %packed : tensor<16x16x?x?xf32>
-}
-
-// -----
-
// CHECK: func.func @invalid_empty_negative_size
// CHECK: %[[IDX:.*]] = index.constant
// CHECK: %[[T:.*]] = tensor.empty(%[[IDX]]) : tensor<4x5x?xf32>
@@ -2551,22 +2164,6 @@ func.func @invalid_empty_negative_size() -> (tensor<4x5x?xf32>) {
// -----
-// Fold DstStyleOp -> tensor.unpack operations.
-func.func @fold_dst_style_ops_into_unpack(%arg0 : tensor<?x?x16x64xf32>, %init : tensor<?x?xf32>) -> tensor<?x?xf32> {
- %cst = arith.constant 0.0 : f32
- %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
- %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 64] into %fill : tensor<?x?x16x64xf32> -> tensor<?x?xf32>
- return %unpack : tensor<?x?xf32>
-}
-// CHECK-LABEL: func @fold_dst_style_ops_into_unpack
-// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x16x64xf32>
-// CHECK-SAME: %[[INIT:.+]]: tensor<?x?xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
-// CHECK-SAME: into %[[INIT]]
-// CHECK: return %[[UNPACK]]
-
-// -----
-
// The IR in this test case in invalid. This test tests that the canonicalizer
// does not crash.
@@ -2598,21 +2195,6 @@ func.func @generate_negative_size_verifies() -> tensor<?x8xi32> {
return %tensor : tensor<?x8xi32>
}
-// -----
-
-func.func @infer_and_fold_pack_unpack_same_tiles(%t: tensor<10x20x4x4xf32>) -> tensor<10x20x4x4xf32> {
- %dim1 = arith.constant 40 : index
- %dim2 = arith.constant 80 : index
- %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
- %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty : tensor<10x20x4x4xf32> -> tensor<?x?xf32>
- %cast = tensor.cast %unpacked : tensor<?x?xf32> to tensor<40x80xf32>
- %tensor_empty1 = tensor.empty() : tensor<10x20x4x4xf32>
- %packed = tensor.pack %cast inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty1 : tensor<40x80xf32> -> tensor<10x20x4x4xf32>
- return %packed : tensor<10x20x4x4xf32>
-}
-// CHECK-LABEL: func.func @infer_and_fold_pack_unpack_same_tiles
-// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
-// CHECK: return %[[SRC]]
// -----
@@ -2787,62 +2369,6 @@ func.func @fold_cast_multiple_results(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2x
return %0#1 : index
}
-// -----
-
-// CHECK-LABEL: func.func @fold_cast_pack_dynamic_tile_size
-// CHECK-SAME: %[[DEST:.*]]: tensor<1x1x8x1xi32>,
-// CHECK-SAME: %[[SRC:.*]]: tensor<7x?xi32>,
-// CHECK-SAME: %[[PAD:.*]]: i32) -> tensor<1x1x8x1xi32> {
-// CHECK: %[[PACK:.*]] = tensor.pack %[[SRC]] padding_value(%[[PAD]] : i32)
-// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]]
-// CHECK-SAME: test_attr
-// CHECK-SAME: : tensor<7x?xi32> -> tensor<1x1x8x1xi32>
-// CHECK: return %[[PACK]] : tensor<1x1x8x1xi32>
-func.func @fold_cast_pack_dynamic_tile_size(
- %dest: tensor<1x1x8x1xi32>,
- %src: tensor<7x?xi32>,
- %pad: i32) -> tensor<1x1x8x1xi32> {
-
- %cast = tensor.cast %dest : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
- %c8 = arith.constant 8 : index
- %pack = tensor.pack %src padding_value(%pad : i32)
- inner_dims_pos = [0, 1]
- inner_tiles = [%c8, 1]
- into %cast {test_attr} : tensor<7x?xi32> -> tensor<1x1x?x1xi32>
- %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32>
- return %res : tensor<1x1x8x1xi32>
-}
-
-// -----
-
-// CHECK-LABEL: func.func @fold_cast_unpack_dynamic_tile_size(
-// CHECK-SAME: %[[SRC:.*]]: tensor<1x1x8x1xi32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<7x?xi32>) -> tensor<7x?xi32> {
-// CHECK: %[[RES:.*]] = tensor.unpack %[[SRC]] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] {test_attr} : tensor<1x1x8x1xi32> -> tensor<7x?xi32>
-// CHECK: return %[[RES]] : tensor<7x?xi32>
-func.func @fold_cast_unpack_dynamic_tile_size(
- %src: tensor<1x1x8x1xi32>,
- %res: tensor<7x?xi32>) -> tensor<7x?xi32> {
-
- %cast = tensor.cast %src : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
- %c8 = arith.constant 8 : index
- %unpack = tensor.unpack %cast
- inner_dims_pos = [0, 1]
- inner_tiles = [%c8, 1]
- into %res {test_attr} : tensor<1x1x?x1xi32> -> tensor<7x?xi32>
- return %unpack : tensor<7x?xi32>
-}
-
-// -----
-
-// CHECK-LABEL: func.func @pack_dont_drop_attributes(
-// CHECK: tensor.pack {{.*}} {test_attr}
-func.func @pack_dont_drop_attributes(%arg0: tensor<?x?x?xf16>, %arg1: tensor<128x?x100x16x1xf16>) -> tensor<128x?x100x16x1xf16> {
- %c32_i64 = arith.constant 32 : i64
- %cst = arith.constant 0.000000e+00 : f16
- %pack = tensor.pack %arg0 padding_value(%cst : f16) outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %arg1 {test_attr} : tensor<?x?x?xf16> -> tensor<128x?x100x16x1xf16>
- return %pack : tensor<128x?x100x16x1xf16>
-}
// -----
diff --git a/mlir/test/Dialect/Tensor/fold-empty-op.mlir b/mlir/test/Dialect/Tensor/fold-empty-op.mlir
index 65ceb4ff3e3df4..87164a2332a380 100644
--- a/mlir/test/Dialect/Tensor/fold-empty-op.mlir
+++ b/mlir/test/Dialect/Tensor/fold-empty-op.mlir
@@ -64,77 +64,6 @@ func.func @rank_reducing_empty_tensor_extract(%sz : index, %idx : index) -> tens
return %r: tensor<2xf32>
}
-func.func @pack_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> {
- %empty_unpacked = tensor.empty() : tensor<256x256xf32>
- %packed = tensor.pack %empty_unpacked
- inner_dims_pos = [0, 1] inner_tiles = [32, 32]
- into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32>
- return %packed : tensor<8x8x32x32xf32>
-}
-
-// CHECK-LABEL: func.func @pack_empty(
-// CHECK-SAME: %[[T:.+]]: tensor<8x8x32x32xf32>
-// CHECK-NOT: tensor.pack
-// CHECK: return %[[T]] : tensor<8x8x32x32xf32>
-
-func.func @pack_empty_dynamic(%arg0: tensor<?x?x32x32xf32>, %dim0: index, %dim1: index) -> tensor<?x?x32x32xf32> {
- %empty_unpacked = tensor.empty(%dim0, %dim1) : tensor<?x?xf32>
- %packed = tensor.pack %empty_unpacked
- inner_dims_pos = [0, 1] inner_tiles = [32, 32]
- into %arg0 : tensor<?x?xf32> -> tensor<?x?x32x32xf32>
- return %packed : tensor<?x?x32x32xf32>
-}
-
-// CHECK-LABEL: func.func @pack_empty_dynamic(
-// CHECK-SAME: %[[T:.+]]: tensor<?x?x32x32xf32>,
-// CHECK-SAME: %[[DIM0:[a-zA-Z0-9_]+]]: index,
-// CHECK-SAME: %[[DIM1:[a-zA-Z0-9_]+]]: index
-// CHECK-NOT: tensor.pack
-// CHECK: return %[[T]] : tensor<?x?x32x32xf32>
-
-func.func @unpack_empty(%arg0: tensor<256x256xf32>) -> tensor<256x256xf32> {
- %empty_packed = tensor.empty() : tensor<8x8x32x32xf32>
- %unpacked = tensor.unpack %empty_packed
- inner_dims_pos = [0, 1] inner_tiles = [32, 32]
- into %arg0 : tensor<8x8x32x32xf32> -> tensor<256x256xf32>
- return %unpacked : tensor<256x256xf32>
-}
-
-// CHECK-LABEL: func.func @unpack_empty(
-// CHECK-SAME: %[[T:.+]]: tensor<256x256xf32>
-// CHECK-NOT: tensor.unpack
-// CHECK: return %[[T]] : tensor<256x256xf32>
-
-func.func @unpack_empty_dynamic(%arg0: tensor<?x?xf32>, %dim0: index, %dim1: index) -> tensor<?x?xf32> {
- %empty_packed = tensor.empty(%dim0, %dim1) : tensor<?x?x32x32xf32>
- %unpacked = tensor.unpack %empty_packed
- inner_dims_pos = [0, 1] inner_tiles = [32, 32]
- into %arg0 : tensor<?x?x32x32xf32> -> tensor<?x?xf32>
- return %unpacked : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: func.func @unpack_empty_dynamic(
-// CHECK-SAME: %[[T:.+]]: tensor<?x?xf32>,
-// CHECK-SAME: %[[DIM0:[a-zA-Z0-9_]+]]: index,
-// CHECK-SAME: %[[DIM1:[a-zA-Z0-9_]+]]: index
-// CHECK-NOT: tensor.unpack
-// CHECK: return %[[T]] : tensor<?x?xf32>
-
-func.func @pack_padded_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> {
- %pad = arith.constant 1.0 : f32
- %empty_unpacked = tensor.empty() : tensor<256x256xf32>
- %packed = tensor.pack %empty_unpacked
- padding_value(%pad : f32)
- inner_dims_pos = [0, 1] inner_tiles = [32, 32]
- into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32>
- return %packed : tensor<8x8x32x32xf32>
-}
-
-// CHECK-LABEL: func.func @pack_padded_empty(
-// CHECK-SAME: %[[T:.+]]: tensor<8x8x32x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack
-// CHECK: return %[[PACK]] : tensor<8x8x32x32xf32>
-
// -----
module attributes {transform.with_named_sequence} {
diff --git a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
index bff913f5f55feb..84eb60248b8bea 100644
--- a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
+++ b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
@@ -1,8 +1,8 @@
-// RUN: mlir-opt -split-input-file -test-tensor-transform-patterns=test-fold-into-pack-and-unpack %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -test-linalg-transform-patterns=test-fold-into-pack-and-unpack %s | FileCheck %s
func.func @fold_unpack_slice(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : index, %arg3 : index) -> tensor<?x?xf32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
: tensor<?x?x8x4xf32> -> tensor<?x?xf32>
%1 = tensor.extract_slice %0[0, 0] [%arg2, %arg3] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
return %1 : tensor<?x?xf32>
@@ -13,7 +13,7 @@ func.func @fold_unpack_slice(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index
// CHECK: %[[INIT:.+]] = tensor.empty(%[[ARG2]], %[[ARG3]]) : tensor<?x?xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [0, 1] inner_tiles = [8, 4]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [0, 1] inner_tiles = [8, 4]
// CHECK-SAME: into %[[INIT]]
// CHECK: return %[[UNPACK]]
@@ -21,39 +21,39 @@ func.func @fold_unpack_slice(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32
func.func @nofold_unpack_slice_non_zero_offset(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : index, %arg3 : index, %arg4 : index) -> tensor<?x?xf32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
: tensor<?x?x8x4xf32> -> tensor<?x?xf32>
%1 = tensor.extract_slice %0[0, %arg4] [%arg2, %arg3] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
return %1 : tensor<?x?xf32>
}
// CHECK-LABEL: func @nofold_unpack_slice_non_zero_offset(
-// CHECK: %[[UNPACK:.+]] = tensor.unpack
+// CHECK: %[[UNPACK:.+]] = linalg.unpack
// CHECK: tensor.extract_slice %[[UNPACK]]
// -----
func.func @nofold_unpack_slice_non_unit_stride(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : index, %arg3 : index, %arg4 : index) -> tensor<?x?xf32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
: tensor<?x?x8x4xf32> -> tensor<?x?xf32>
%1 = tensor.extract_slice %0[0, 0] [%arg2, %arg3] [%arg4, 1] : tensor<?x?xf32> to tensor<?x?xf32>
return %1 : tensor<?x?xf32>
}
// CHECK-LABEL: func @nofold_unpack_slice_non_unit_stride(
-// CHECK: %[[UNPACK:.+]] = tensor.unpack
+// CHECK: %[[UNPACK:.+]] = linalg.unpack
// CHECK: tensor.extract_slice %[[UNPACK]]
// -----
func.func @nofold_unpack_slice_rank_reduced(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : index, %arg3 : index) -> tensor<f32> {
- %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
+ %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
: tensor<?x?x8x4xf32> -> tensor<?x?xf32>
%1 = tensor.extract_slice %0[0, 0] [1, 1] [1, 1] : tensor<?x?xf32> to tensor<f32>
return %1 : tensor<f32>
}
// CHECK-LABEL: func @nofold_unpack_slice_rank_reduced(
-// CHECK: %[[UNPACK:.+]] = tensor.unpack
+// CHECK: %[[UNPACK:.+]] = linalg.unpack
// CHECK: tensor.extract_slice %[[UNPACK]]
// -----
@@ -66,7 +66,7 @@ func.func @pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32> {
tensor.yield %cst : f32
} : tensor<16641x16xf32> to tensor<16656x16xf32>
%empty = tensor.empty() : tensor<2082x1x8x32xf32>
- %pack = tensor.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
+ %pack = linalg.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
: tensor<16656x16xf32> -> tensor<2082x1x8x32xf32>
return %pack : tensor<2082x1x8x32xf32>
}
@@ -74,7 +74,7 @@ func.func @pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32> {
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
// CHECK: %[[PAD_VAL:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[DEST:.+]] = tensor.empty() : tensor<2082x1x8x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC]]
// CHECK-SAME: padding_value(%[[PAD_VAL]] : f32)
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %[[DEST]]
@@ -88,13 +88,13 @@ func.func @nofold_pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32
tensor.yield %cst : f32
} : tensor<16641x16xf32> to tensor<16656x16xf32>
%empty = tensor.empty() : tensor<2082x1x8x32xf32>
- %pack = tensor.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
+ %pack = linalg.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
: tensor<16656x16xf32> -> tensor<2082x1x8x32xf32>
return %pack : tensor<2082x1x8x32xf32>
}
// CHECK-LABEL: func.func @nofold_pad_pack
// CHECK: tensor.pad
-// CHECK: tensor.pack
+// CHECK: linalg.pack
// -----
@@ -107,19 +107,19 @@ func.func @pad_pack_different_padding_value(%src: tensor<16641x16xf32>) -> tenso
tensor.yield %cst0 : f32
} : tensor<16641x16xf32> to tensor<16656x16xf32>
%empty = tensor.empty() : tensor<2082x1x8x32xf32>
- %pack = tensor.pack %padded padding_value(%cst1 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
+ %pack = linalg.pack %padded padding_value(%cst1 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
: tensor<16656x16xf32> -> tensor<2082x1x8x32xf32>
return %pack : tensor<2082x1x8x32xf32>
}
// CHECK-LABEL: func.func @pad_pack_different_padding_value
// CHECK: tensor.pad
-// CHECK: tensor.pack
+// CHECK: linalg.pack
// -----
-func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
+func.func @linalg.pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
%0 = tensor.empty() : tensor<56x2x1x57x32xf32>
- %pack = tensor.pack %arg0
+ %pack = linalg.pack %arg0
outer_dims_perm = [0, 3, 2, 1]
inner_dims_pos = [3]
inner_tiles = [32]
@@ -132,10 +132,10 @@ func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> t
permutation = [2, 3, 0, 1, 4]
return %transposed : tensor<1x57x56x2x32xf32>
}
-// CHECK: func @tensor_pack_linalg_transpose_fold(
+// CHECK: func @linalg.pack_linalg_transpose_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 1, 0, 3]
// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[INIT]]
@@ -143,9 +143,9 @@ func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> t
// -----
-func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
+func.func @linalg.pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
%0 = tensor.empty() : tensor<56x2x1x57x32xf32>
- %pack = tensor.pack %arg0 padding_value(%padding : f32)
+ %pack = linalg.pack %arg0 padding_value(%padding : f32)
outer_dims_perm = [0, 3, 2, 1]
inner_dims_pos = [3]
inner_tiles = [32]
@@ -158,10 +158,10 @@ func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x
permutation = [2, 3, 0, 1, 4]
return %transposed : tensor<1x57x56x2x32xf32>
}
-// CHECK: func @tensor_pack_linalg_transpose_fold_with_padding(
+// CHECK: func @linalg.pack_linalg_transpose_fold_with_padding(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x55xf32>, %[[PADDING:.+]]: f32)
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
// CHECK-SAME: outer_dims_perm = [2, 1, 0, 3]
// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[INIT]]
@@ -169,9 +169,9 @@ func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x
// -----
-func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x2x56x57x32xf32> {
+func.func @linalg.pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x2x56x57x32xf32> {
%0 = tensor.empty() : tensor<56x57x1x2x32xf32>
- %pack = tensor.pack %arg0
+ %pack = linalg.pack %arg0
inner_dims_pos = [3]
inner_tiles = [32]
into %0 : tensor<56x57x1x64xf32> -> tensor<56x57x1x2x32xf32>
@@ -183,10 +183,10 @@ func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56
permutation = [2, 3, 0, 1, 4]
return %transposed : tensor<1x2x56x57x32xf32>
}
-// CHECK: func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(
+// CHECK: func @linalg.pack_linalg_transpose_fold_no_outer_dims_perm(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x2x56x57x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 3, 0, 1]
// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[INIT]]
@@ -194,9 +194,9 @@ func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56
// -----
-func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<12x56x4x9x32x8x2xf32> {
+func.func @linalg.pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<12x56x4x9x32x8x2xf32> {
%0 = tensor.empty() : tensor<4x9x12x56x8x2x32xf32>
- %pack = tensor.pack %arg0
+ %pack = linalg.pack %arg0
outer_dims_perm = [3, 1, 2, 0]
inner_dims_pos = [1, 2, 3]
inner_tiles = [8, 2, 32]
@@ -209,10 +209,10 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<5
permutation = [2, 3, 0, 1, 6, 4, 5]
return %transposed : tensor<12x56x4x9x32x8x2xf32>
}
-// CHECK: func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(
+// CHECK: func @linalg.pack_linalg_transpose_fold_tile_dims_transpose(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x72x24x128xf32>)
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<12x56x4x9x32x8x2xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 0, 3, 1]
// CHECK-SAME: inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2]
// CHECK-SAME: into %[[INIT]]
@@ -220,9 +220,9 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<5
// -----
-func.func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<9x56x2x12x32x8x4xf32> {
+func.func @linalg.pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<9x56x2x12x32x8x4xf32> {
%0 = tensor.empty() : tensor<4x12x9x56x8x2x32xf32>
- %pack = tensor.pack %arg0
+ %pack = linalg.pack %arg0
outer_dims_perm = [3, 2, 1, 0]
inner_dims_pos = [1, 2, 3]
inner_tiles = [8, 2, 32]
@@ -235,16 +235,16 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg
permutation = [2, 3, 5, 1, 6, 4, 0]
return %transposed : tensor<9x56x2x12x32x8x4xf32>
}
-// CHECK: func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(
+// CHECK: func @linalg.pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x72x24x128xf32>)
-// CHECK: tensor.pack
+// CHECK: linalg.pack
// CHECK: linalg.transpose
// -----
-func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56x?x?x64xf32>) -> tensor<?x?x56x2x32xf32> {
+func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56x?x?x64xf32>) -> tensor<?x?x56x2x32xf32> {
%0 = tensor.empty() : tensor<56x2x1x57x32xf32>
- %pack = tensor.pack %arg0
+ %pack = linalg.pack %arg0
outer_dims_perm = [0, 3, 2, 1]
inner_dims_pos = [3]
inner_tiles = [32]
@@ -259,14 +259,14 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56
%return_value = tensor.cast %transposed : tensor<1x57x56x2x32xf32> to tensor<?x?x56x2x32xf32>
return %return_value : tensor<?x?x56x2x32xf32>
}
-// CHECK: func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(
+// CHECK: func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x?x?x64xf32>)
// CHECK-DAG: %[[c1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[c2:.+]] = arith.constant 2 : index
// CHECK: %[[dim:.+]] = tensor.dim %[[ARG0]], %[[c1]] : tensor<56x?x?x64xf32>
// CHECK: %[[dim_0:.+]] = tensor.dim %[[ARG0]], %[[c2]] : tensor<56x?x?x64xf32>
// CHECK: %[[INIT:.+]] = tensor.empty(%[[dim_0]], %[[dim]]) : tensor<?x?x56x2x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 1, 0, 3]
// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[INIT]]
@@ -274,9 +274,9 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56
// -----
-func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: tensor<56x?x?x128xf32>) -> tensor<?x?x56x9x32x8x2xf32> {
+func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: tensor<56x?x?x128xf32>) -> tensor<?x?x56x9x32x8x2xf32> {
%0 = tensor.empty() : tensor<56x9x12x4x8x2x32xf32>
- %pack = tensor.pack %arg0
+ %pack = linalg.pack %arg0
inner_dims_pos = [1, 2, 3]
inner_tiles = [8, 2, 32]
into %0 : tensor<56x?x?x128xf32> -> tensor<56x9x12x4x8x2x32xf32>
@@ -292,7 +292,7 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0:
}
// CHECK-DAG: #[[$MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
-// CHECK-LABEL: func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(
+// CHECK-LABEL: func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x?x?x128xf32>)
// CHECK-DAG: %[[c1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[c2:.+]] = arith.constant 2 : index
@@ -301,15 +301,15 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0:
// CHECK: %[[mapped_dim1:.+]] = affine.apply #[[$MAP0]]()[%[[dim]]]
// CHECK: %[[mapped_dim2:.+]] = affine.apply #[[$MAP1]]()[%[[dim_0]]]
// CHECK: %[[INIT:.+]] = tensor.empty(%[[mapped_dim2]], %[[mapped_dim1]]) : tensor<?x4x56x?x32x8x2xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [2, 3, 0, 1] inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2] into %[[INIT]] : tensor<56x?x?x128xf32> -> tensor<?x4x56x?x32x8x2xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [2, 3, 0, 1] inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2] into %[[INIT]] : tensor<56x?x?x128xf32> -> tensor<?x4x56x?x32x8x2xf32>
// CHECK: %[[CAST:.+]] = tensor.cast %[[PACK]] : tensor<?x4x56x?x32x8x2xf32> to tensor<?x?x56x9x32x8x2xf32>
// CHECK: return %[[CAST]] : tensor<?x?x56x9x32x8x2xf32>
// CHECK: }
// -----
-func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %pack_dest: tensor<?x?x?x?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
- %pack = tensor.pack %arg0
+func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %pack_dest: tensor<?x?x?x?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
+ %pack = linalg.pack %arg0
outer_dims_perm = [3, 0, 2, 1]
inner_dims_pos = [1, 2, 3]
inner_tiles = [%tile_p, %tile_q, %tile_r]
@@ -324,7 +324,7 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_s
}
// CHECK: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)>
// CHECK: module {
-// CHECK: func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(
+// CHECK: func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?x?xf32>,
// CHECK-SAME: %[[PACK_DEST:.+]]: tensor<?x?x?x?x?x?x?xf32>, %[[TRANSPOSE_DEST:.+]]: tensor<?x?x?x?x?x?x?xf32>,
// CHECK-SAME: %[[ARG1:.+]]: index, %[[ARG2:.+]]: index,
@@ -341,13 +341,13 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_s
// CHECK: %[[mapped_dim1:.+]] = affine.apply #[[$MAP]]()[%[[dim_0]], %[[ARG1]]]
// CHECK: %[[mapped_dim2:.+]] = affine.apply #[[$MAP]]()[%[[dim_1]], %[[ARG2]]]
// CHECK: %[[INIT:.+]] = tensor.empty(%[[mapped_dim2]], %[[mapped_dim1]], %[[mapped_dim0]], %[[dim]], %[[ARG3]], %[[ARG1]], %[[ARG2]]) : tensor<?x?x?x?x?x?x?xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1, 2] inner_tiles = [%[[ARG3]], %[[ARG1]], %[[ARG2]]] into %[[INIT]] : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1, 2] inner_tiles = [%[[ARG3]], %[[ARG1]], %[[ARG2]]] into %[[INIT]] : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
// CHECK: return %[[PACK]] : tensor<?x?x?x?x?x?x?xf32>
// CHECK: }
// -----
-func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
+func.func @linalg_transpose_linalg.pack_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
%0 = tensor.empty() : tensor<1x56x57x64xf32>
%transposed = linalg.transpose
ins(%arg0 : tensor<56x57x1x64xf32>)
@@ -355,17 +355,17 @@ func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> t
permutation = [2, 0, 1, 3]
%1 = tensor.empty() : tensor<1x57x56x2x32xf32>
- %pack = tensor.pack %transposed
+ %pack = linalg.pack %transposed
outer_dims_perm = [0, 2, 1, 3]
inner_dims_pos = [3]
inner_tiles = [32]
into %1 : tensor<1x56x57x64xf32> -> tensor<1x57x56x2x32xf32>
return %pack : tensor<1x57x56x2x32xf32>
}
-//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold(
+//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 1, 0, 3]
// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[INIT]]
@@ -373,7 +373,7 @@ func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> t
// -----
-func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
+func.func @linalg_transpose_linalg.pack_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
%0 = tensor.empty() : tensor<1x56x57x55xf32>
%transpose = linalg.transpose
ins(%arg0 : tensor<56x57x1x55xf32>)
@@ -381,17 +381,17 @@ func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x
permutation = [2, 0, 1, 3]
%1 = tensor.empty() : tensor<1x57x56x2x32xf32>
- %pack = tensor.pack %transpose padding_value(%padding : f32)
+ %pack = linalg.pack %transpose padding_value(%padding : f32)
outer_dims_perm = [0, 2, 1, 3]
inner_dims_pos = [3]
inner_tiles = [32]
into %1 : tensor<1x56x57x55xf32> -> tensor<1x57x56x2x32xf32>
return %pack : tensor<1x57x56x2x32xf32>
}
-//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold_with_padding(
+//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold_with_padding(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x55xf32>, %[[PADDING:.+]]: f32)
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
// CHECK-SAME: outer_dims_perm = [2, 1, 0, 3]
// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[INIT]]
@@ -399,7 +399,7 @@ func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x
// -----
-func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x56x57x2x32xf32> {
+func.func @linalg_transpose_linalg.pack_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x56x57x2x32xf32> {
%0 = tensor.empty() : tensor<1x56x57x64xf32>
%transposed = linalg.transpose
ins(%arg0 : tensor<56x57x1x64xf32>)
@@ -407,16 +407,16 @@ func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56
permutation = [2, 0, 1, 3]
%1 = tensor.empty() : tensor<1x56x57x2x32xf32>
- %pack = tensor.pack %transposed
+ %pack = linalg.pack %transposed
inner_dims_pos = [3]
inner_tiles = [32]
into %1 : tensor<1x56x57x64xf32> -> tensor<1x56x57x2x32xf32>
return %pack : tensor<1x56x57x2x32xf32>
}
-//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(
+//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold_no_outer_dims_perm(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x56x57x2x32xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 0, 1, 3]
// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
// CHECK-SAME: into %[[INIT]]
@@ -424,25 +424,25 @@ func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56
// -----
-func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change(%arg0: tensor<25x30x35x40xf32>, %transpose_dest: tensor<35x40x25x30xf32>, %pack_dest: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> {
+func.func @linalg_transpose_linalg.pack_fold_complex_inner_dims_change(%arg0: tensor<25x30x35x40xf32>, %transpose_dest: tensor<35x40x25x30xf32>, %pack_dest: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> {
%transposed = linalg.transpose
ins(%arg0 : tensor<25x30x35x40xf32>)
outs(%transpose_dest : tensor<35x40x25x30xf32>)
permutation = [2, 3, 0, 1]
- %pack = tensor.pack %transposed
+ %pack = linalg.pack %transposed
outer_dims_perm = [3, 0, 2, 1]
inner_dims_pos = [1, 3, 2]
inner_tiles = [5, 10, 5]
into %pack_dest : tensor<35x40x25x30xf32> -> tensor<3x35x5x8x5x10x5xf32>
return %pack : tensor<3x35x5x8x5x10x5xf32>
}
-//CHECK-LABEL: func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change(
+//CHECK-LABEL: func.func @linalg_transpose_linalg.pack_fold_complex_inner_dims_change(
// CHECK-SAME: %[[ARG0:.+]]: tensor<25x30x35x40xf32>,
// CHECK-SAME: %[[ARG1:.+]]: tensor<35x40x25x30xf32>,
// CHECK-SAME: %[[ARG2:.+]]: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> {
// CHECK: %[[VAL0:.+]] = tensor.empty() : tensor<3x35x5x8x5x10x5xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 2, 0, 3]
// CHECK-SAME: inner_dims_pos = [3, 1, 0]
// CHECK-SAME: inner_tiles = [5, 10, 5]
@@ -451,13 +451,13 @@ func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change(%arg0: te
// -----
-func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?xf32>, %pack_dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
+func.func @linalg_transpose_linalg.pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?xf32>, %pack_dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
%transposed = linalg.transpose
ins(%arg0 : tensor<?x?x?x?xf32>)
outs(%transpose_dest : tensor<?x?x?x?xf32>)
permutation = [2, 3, 0, 1]
- %pack = tensor.pack %transposed
+ %pack = linalg.pack %transposed
outer_dims_perm = [3, 0, 2, 1]
inner_dims_pos = [1, 3, 2]
inner_tiles = [%tile_p, %tile_q, %tile_r]
@@ -465,7 +465,7 @@ func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_s
return %pack : tensor<?x?x?x?x?x?x?xf32>
}
// CHECK: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)>
-//CHECK-LABEL: func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(
+//CHECK-LABEL: func.func @linalg_transpose_linalg.pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?x?xf32>, %[[ARG1:.+]]: tensor<?x?x?x?xf32>,
// CHECK-SAME: %[[ARG2:.+]]: tensor<?x?x?x?x?x?x?xf32>, %[[ARG3:.+]]: index, %[[ARG4:.+]]: index, %[[ARG5:.+]]: index) -> tensor<?x?x?x?x?x?x?xf32> {
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -480,12 +480,12 @@ func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_s
// CHECK: %[[VAL1:.+]] = affine.apply #[[$MAP]]()[%[[DIM0]], %[[ARG4]]]
// CHECK: %[[VAL2:.+]] = affine.apply #[[$MAP]]()[%[[DIM]], %[[ARG5]]]
// CHECK: %[[VAL3:.+]] = tensor.empty(%[[VAL1]], %[[DIM1]], %[[VAL2]], %[[VAL0]], %[[ARG3]], %[[ARG4]], %[[ARG5]]) : tensor<?x?x?x?x?x?x?xf32>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [1, 2, 0, 3] inner_dims_pos = [3, 1, 0] inner_tiles = [%[[ARG3]], %[[ARG4]], %[[ARG5]]] into %[[VAL3]] : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [1, 2, 0, 3] inner_dims_pos = [3, 1, 0] inner_tiles = [%[[ARG3]], %[[ARG4]], %[[ARG5]]] into %[[VAL3]] : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
// CHECK: return %[[PACK]] : tensor<?x?x?x?x?x?x?xf32>
// -----
-func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor<?x32x128xbf16>) -> tensor<32x?x64x16x2xbf16> {
+func.func @linalg_transpose_linalg.pack_multiple_tiles(%arg0: tensor<?x32x128xbf16>) -> tensor<32x?x64x16x2xbf16> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : bf16
%dim = tensor.dim %arg0, %c0 : tensor<?x32x128xbf16>
@@ -497,7 +497,7 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor<?x32x128xbf
permutation = [1, 2, 0]
%2 = tensor.empty(%dim) : tensor<32x?x64x16x2xbf16>
- %pack = tensor.pack %transposed
+ %pack = linalg.pack %transposed
padding_value(%cst : bf16)
outer_dims_perm = [0, 2, 1]
inner_dims_pos = [2, 1]
@@ -506,14 +506,14 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor<?x32x128xbf
return %pack : tensor<32x?x64x16x2xbf16>
}
// CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)>
-//CHECK-LABEL: func.func @linalg_transpose_tensor_pack_multiple_tiles(
+//CHECK-LABEL: func.func @linalg_transpose_linalg.pack_multiple_tiles(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x32x128xbf16>) -> tensor<32x?x64x16x2xbf16> {
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : bf16
// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x32x128xbf16>
// CHECK: %[[VAL0:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]]
// CHECK: %[[VAL1:.+]] = tensor.empty(%[[VAL0]]) : tensor<32x?x64x16x2xbf16>
-// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]]
// CHECK-SAME: padding_value(%[[CST]] : bf16)
// CHECK-SAME: outer_dims_perm = [1, 0, 2]
// CHECK-SAME: inner_dims_pos = [0, 2]
@@ -524,23 +524,23 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor<?x32x128xbf
// -----
-func.func @linalg_transpose_tensor_unpack_fold(%arg0: tensor<1x1x4x16xi32>) -> tensor<16x4xi32> {
+func.func @linalg_transpose_linalg.unpack_fold(%arg0: tensor<1x1x4x16xi32>) -> tensor<16x4xi32> {
%0 = tensor.empty() : tensor<1x1x16x4xi32>
%transposed = linalg.transpose ins(%arg0 : tensor<1x1x4x16xi32>)
outs(%0 : tensor<1x1x16x4xi32>)
permutation = [1, 0, 3, 2]
%1 = tensor.empty() : tensor<16x4xi32>
- %unpack = tensor.unpack %transposed
+ %unpack = linalg.unpack %transposed
outer_dims_perm = [0, 1]
inner_dims_pos = [0, 1]
inner_tiles = [16, 4] into
%1 : tensor<1x1x16x4xi32> -> tensor<16x4xi32>
return %unpack : tensor<16x4xi32>
}
-//CHECK-LABEL: func.func @linalg_transpose_tensor_unpack_fold(
+//CHECK-LABEL: func.func @linalg_transpose_linalg.unpack_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x1x4x16xi32>) -> tensor<16x4xi32> {
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<16x4xi32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 0]
// CHECK-SAME: inner_dims_pos = [1, 0]
// CHECK-SAME: inner_tiles = [4, 16]
@@ -550,23 +550,23 @@ func.func @linalg_transpose_tensor_unpack_fold(%arg0: tensor<1x1x4x16xi32>) -> t
// -----
-func.func @linalg_transpose_tensor_unpack_fold_partial_tile(%arg0: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> {
+func.func @linalg_transpose_linalg.unpack_fold_partial_tile(%arg0: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> {
%0 = tensor.empty() : tensor<1x1x16x4xi32>
%transposed = linalg.transpose ins(%arg0 : tensor<1x1x4x16xi32>)
outs(%0 : tensor<1x1x16x4xi32>)
permutation = [1, 0, 3, 2]
%1 = tensor.empty() : tensor<15x3xi32>
- %unpack = tensor.unpack %transposed
+ %unpack = linalg.unpack %transposed
outer_dims_perm = [0, 1]
inner_dims_pos = [0, 1]
inner_tiles = [16, 4] into
%1 : tensor<1x1x16x4xi32> -> tensor<15x3xi32>
return %unpack : tensor<15x3xi32>
}
-//CHECK-LABEL: func.func @linalg_transpose_tensor_unpack_fold_partial_tile(
+//CHECK-LABEL: func.func @linalg_transpose_linalg.unpack_fold_partial_tile(
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> {
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<15x3xi32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 0]
// CHECK-SAME: inner_dims_pos = [1, 0]
// CHECK-SAME: inner_tiles = [4, 16]
@@ -576,20 +576,20 @@ func.func @linalg_transpose_tensor_unpack_fold_partial_tile(%arg0: tensor<1x1x4x
// -----
-func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?xf32>, %unpack_dest: tensor<?x?xf32>, %tile_p : index, %tile_q : index) -> tensor<?x?xf32> {
+func.func @linalg_transpose_linalg.unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?xf32>, %unpack_dest: tensor<?x?xf32>, %tile_p : index, %tile_q : index) -> tensor<?x?xf32> {
%transposed = linalg.transpose
ins(%arg0 : tensor<?x?x?x?xf32>)
outs(%transpose_dest : tensor<?x?x?x?xf32>)
permutation = [1, 0, 3, 2]
- %unpack = tensor.unpack %transposed
+ %unpack = linalg.unpack %transposed
outer_dims_perm = [1, 0]
inner_dims_pos = [0, 1]
inner_tiles = [%tile_p, %tile_q]
into %unpack_dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
return %unpack : tensor<?x?xf32>
}
-// CHECK-LABEL: func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(
+// CHECK-LABEL: func.func @linalg_transpose_linalg.unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?x?xf32>, %[[ARG1:.+]]: tensor<?x?x?x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>,
// CHECK-SAME: %[[IDX1:.+]]: index, %[[IDX2:.+]]: index) -> tensor<?x?xf32> {
// CHECK-DAG: %[[CST1:.+]] = arith.constant 1 : index
@@ -597,7 +597,7 @@ func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile
// CHECK-DAG: %[[DIM0:.+]] = tensor.dim %[[ARG2]], %[[CST0]] : tensor<?x?xf32>
// CHECK-DAG: %[[DIM1:.+]] = tensor.dim %[[ARG2]], %[[CST1]] : tensor<?x?xf32>
// CHECK: %[[OUT:.+]] = tensor.empty(%[[DIM0]], %[[DIM1]]) : tensor<?x?xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 1]
// CHECK-SAME: inner_dims_pos = [1, 0]
// CHECK-SAME: inner_tiles = [%[[IDX2]], %[[IDX1]]]
@@ -607,9 +607,9 @@ func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile
// -----
-func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> {
+func.func @linalg.unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> {
%0 = tensor.empty() : tensor<56x3648xf32>
- %pack = tensor.unpack %arg0
+ %pack = linalg.unpack %arg0
outer_dims_perm = [0, 1]
inner_dims_pos = [0, 1]
inner_tiles = [1, 64]
@@ -622,10 +622,10 @@ func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) ->
permutation = [1,0]
return %transposed : tensor<3648x56xf32>
}
-// CHECK-LABEL: func.func @tensor_unpack_linalg_transpose_fold(
+// CHECK-LABEL: func.func @linalg.unpack_linalg_transpose_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> {
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x56xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 0]
// CHECK-SAME: inner_dims_pos = [1, 0]
// CHECK-SAME: inner_tiles = [1, 64]
@@ -637,7 +637,7 @@ func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) ->
func.func @tensor_padded_unpack_linalg_transpose_fold(%arg0: tensor<71x7x4x16x16xf32>) -> tensor<100x71x64xf32> {
%0 = tensor.empty() : tensor<71x100x64xf32>
- %pack = tensor.unpack %arg0
+ %pack = linalg.unpack %arg0
inner_dims_pos = [1, 2]
inner_tiles = [16, 16]
into %0 : tensor<71x7x4x16x16xf32> -> tensor<71x100x64xf32>
@@ -652,7 +652,7 @@ func.func @tensor_padded_unpack_linalg_transpose_fold(%arg0: tensor<71x7x4x16x16
// CHECK-LABEL: func.func @tensor_padded_unpack_linalg_transpose_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<71x7x4x16x16xf32>) -> tensor<100x71x64xf32> {
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<100x71x64xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [1, 0, 2]
// CHECK-SAME: inner_dims_pos = [0, 2]
// CHECK-SAME: inner_tiles = [16, 16]
@@ -668,7 +668,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -
outs(%0 : tensor<5x2x3x16x4xi32>)
permutation = [2, 0, 1, 4, 3]
%1 = tensor.empty() : tensor<5x48x8xi32>
- %unpack = tensor.unpack %transposed
+ %unpack = linalg.unpack %transposed
outer_dims_perm = [0, 2, 1]
inner_dims_pos = [1, 2]
inner_tiles = [16, 4] into
@@ -678,7 +678,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -
//CHECK-LABEL: func.func @non_involution_transpose_unpack_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 1, 0]
// CHECK-SAME: inner_dims_pos = [2, 1]
// CHECK-SAME: inner_tiles = [4, 16]
@@ -690,7 +690,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -
func.func @unpack_non_involution_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
%0 = tensor.empty() : tensor<3x56x3648xf32>
- %unpack = tensor.unpack %arg0
+ %unpack = linalg.unpack %arg0
outer_dims_perm = [2, 0, 1]
inner_dims_pos = [1, 2]
inner_tiles = [1, 64]
@@ -706,7 +706,7 @@ func.func @unpack_non_involution_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>)
// CHECK-LABEL: func.func @unpack_non_involution_transpose_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 1, 2]
// CHECK-SAME: inner_dims_pos = [2, 0]
// CHECK-SAME: inner_tiles = [1, 64]
@@ -722,7 +722,7 @@ func.func @transpose_unpacked_dims_no_fold(%arg0: tensor<2x16x5x4x3xi32>) -> ten
outs(%0 : tensor<5x2x3x16x4xi32>)
permutation = [2, 0, 4, 1, 3]
%1 = tensor.empty() : tensor<5x32x12xi32>
- %unpack = tensor.unpack %transposed
+ %unpack = linalg.unpack %transposed
inner_dims_pos = [1, 2]
inner_tiles = [16, 4] into
%1 : tensor<5x2x3x16x4xi32> -> tensor<5x32x12xi32>
@@ -730,7 +730,7 @@ func.func @transpose_unpacked_dims_no_fold(%arg0: tensor<2x16x5x4x3xi32>) -> ten
}
//CHECK-LABEL: func.func @transpose_unpacked_dims_no_fold(
// CHECK: linalg.transpose
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
// -----
@@ -747,7 +747,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso
linalg.yield %in : i32
} -> tensor<5x2x3x16x4xi32>
%1 = tensor.empty() : tensor<5x48x8xi32>
- %unpack = tensor.unpack %transposed
+ %unpack = linalg.unpack %transposed
outer_dims_perm = [0, 2, 1]
inner_dims_pos = [1, 2]
inner_tiles = [16, 4] into
@@ -757,7 +757,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso
//CHECK-LABEL: func.func @generic_transpose_unpack_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [2, 1, 0]
// CHECK-SAME: inner_dims_pos = [2, 1]
// CHECK-SAME: inner_tiles = [4, 16]
@@ -771,7 +771,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso
#map1 = affine_map<(d0, d1, d2)->(d0, d1, d2)>
func.func @unpack_generic_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
%0 = tensor.empty() : tensor<3x56x3648xf32>
- %unpack = tensor.unpack %arg0
+ %unpack = linalg.unpack %arg0
outer_dims_perm = [2, 0, 1]
inner_dims_pos = [1, 2]
inner_tiles = [1, 64]
@@ -791,7 +791,7 @@ func.func @unpack_generic_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> ten
// CHECK-LABEL: func.func @unpack_generic_transpose_fold(
// CHECK-SAME: %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
// CHECK-SAME: outer_dims_perm = [0, 1, 2]
// CHECK-SAME: inner_dims_pos = [2, 0]
// CHECK-SAME: inner_tiles = [1, 64]
diff --git a/mlir/test/Dialect/Tensor/tiling.mlir b/mlir/test/Dialect/Tensor/tiling.mlir
index 193fbe93e0f9ee..04a99b5fd0d686 100644
--- a/mlir/test/Dialect/Tensor/tiling.mlir
+++ b/mlir/test/Dialect/Tensor/tiling.mlir
@@ -224,495 +224,3 @@ module attributes {transform.with_named_sequence} {
transform.yield
}
}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
-// CHECK: func.func @NC_to_NCnc
-// CHECK-SAME: %[[IN:.*]]: tensor<128x256xf32>,
-// CHECK-SAME: %[[OUT:.*]]: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK: %[[RES0:.*]] = scf.for %[[N:.*]] = %[[C0]] to %[[C4]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<4x8x32x32xf32>) {
-// CHECK: %[[RES1:.+]] = scf.for %[[C:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<4x8x32x32xf32>) {
-// CHECK-DAG: %[[IN_N:.+]] = affine.apply #[[MAP0]](%[[N]])
-// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
-// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_N]], %[[IN_C]]] [64, 128] [1, 1] : tensor<128x256xf32> to tensor<64x128xf32>
-// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[N]], %[[C]], 0, 0] [2, 4, 32, 32] [1, 1, 1, 1] : tensor<4x8x32x32xf32> to tensor<2x4x32x32xf32>
-// CHECK: %[[SUB_RES:.*]] = tensor.pack
-// CHECK-SAME: %[[SUB_IN]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[SUB_OUT]]
-// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
-// CHECK: scf.yield %[[INSERT]] : tensor<4x8x32x32xf32>
-// CHECK: }
-// CHECK: scf.yield %[[RES1:.*]] : tensor<4x8x32x32xf32>
-// CHECK: }
-// CHECK: return %[[RES0:.*]] : tensor<4x8x32x32xf32>
-// CHECK: }
-func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
- %0 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %arg1 : tensor<128x256xf32> -> tensor<4x8x32x32xf32>
- return %0 : tensor<4x8x32x32xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 8)>
-// CHECK: func.func @KC_to_CKkc
-// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
-// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index
-// CHECK: scf.for %[[C:.+]] = %[[C0]] to %[[C32]] step %[[C2]]
-// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
-// CHECK: %[[INPUT_SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK-SAME: [0, %[[IN_C]]] [128, 16]
-// CHECK: %[[OUTPUT_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[C]], 0, 0, 0] [2, 4, 32, 8]
-// CHECK: tensor.pack
-// CHECK-SAME: %[[INPUT_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
-// CHECK-SAME: into %[[OUTPUT_SLICE]]
-func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
- %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
- return %0 : tensor<32x4x32x8xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -2 + 15, 8)>
-// CHECK: func.func @pad_and_pack_static(
-// CHECK-SAME: %[[IN:.*]]: tensor<13x15xf32>,
-// CHECK-SAME: %[[OUT:.*]]: tensor<2x8x8x2xf32>,
-// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<2x8x8x2xf32> {
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG: %[[RES0:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[OUT]]) -> (tensor<2x8x8x2xf32>) {
-// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP0]](%[[J]])
-// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])
-// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][0, %[[IN_J]]] [13, %[[IN_J_SZ]]] [1, 1]
-// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][0, %[[J]], 0, 0] [2, 4, 8, 2] [1, 1, 1, 1]
-// CHECK: %[[SUB_RES:.*]] = tensor.pack
-// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
-// CHECK-SAME: into %[[SUB_OUT]]
-// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
-// CHECK: scf.yield %[[INSERT]] : tensor<2x8x8x2xf32>
-// CHECK: }
-// CHECK: return %[[RES0:.*]] : tensor<2x8x8x2xf32>
-// CHECK: }
-func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> {
- %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
- return %0 : tensor<2x8x8x2xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -8 + s0, d0 * 8)>
-// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -2 + s0, d0 * 2)>
-// CHECK: func.func @pad_and_pack_partially_dynamic(
-// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32>,
-// CHECK-SAME: %[[OUT:.*]]: tensor<?x?x8x2xf32>,
-// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<?x?x8x2xf32> {
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x8x2xf32>
-// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x8x2xf32>
-// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x8x2xf32>) {
-// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x8x2xf32>) {
-// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
-// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
-// CHECK-DAG: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])
-// CHECK-DAG: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]]
-// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP4]](%[[J]])
-// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP5]]
-// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], 8, 2] [1, 1, 1, 1] : tensor<?x?x8x2xf32> to tensor<?x?x8x2xf32>
-// CHECK: %[[SUB_RES:.*]] = tensor.pack
-// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
-// CHECK-SAME: into %[[SUB_OUT]]
-// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
-// CHECK: scf.yield %[[INSERT]] : tensor<?x?x8x2xf32>
-// CHECK: }
-// CHECK: scf.yield %[[RES1:.*]] : tensor<?x?x8x2xf32>
-// CHECK: }
-// CHECK: return %[[VAL_34:.*]] : tensor<?x?x8x2xf32>
-// CHECK: }
-func.func @pad_and_pack_partially_dynamic(%input: tensor<?x?xf32>, %output: tensor<?x?x8x2xf32>, %pad: f32) -> tensor<?x?x8x2xf32> {
- %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
- return %0 : tensor<?x?x8x2xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0, -(d1 * s0) + s1)>
-// CHECK: func.func @pad_and_pack_fully_dynamic(
-// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32>,
-// CHECK-SAME: %[[OUT:.*]]: tensor<?x?x?x?xf32>,
-// CHECK-SAME: %[[PAD:.*]]: f32,
-// CHECK-SAME: %[[TILE_0:.*]]: index,
-// CHECK-SAME: %[[TILE_1:.*]]: index) -> tensor<?x?x?x?xf32> {
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x?x?xf32>
-// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x?x?xf32>
-// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x?x?xf32>) {
-// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x?x?xf32>) {
-// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
-// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
-// CHECK-DAG: %[[IN_D0:.*]] = tensor.dim %[[IN]], %[[C0]]
-// CHECK-DAG: %[[IN_D1:.*]] = tensor.dim %[[IN]], %[[C1]]
-// CHECK: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])[%[[TILE_0]]]
-// CHECK: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_I_SZ]], %[[I]])[%[[TILE_0]], %[[IN_D0]]]
-// CHECK: %[[IN_J:.*]] = affine.apply #[[MAP2]](%[[J]])[%[[TILE_1]]]
-// CHECK: %[[IN_J_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_J_SZ]], %[[J]])[%[[TILE_1]], %[[IN_D1]]]
-// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-// CHECK: %[[OUT_D2:.+]] = tensor.dim %[[ITER1]], %[[C2]]
-// CHECK: %[[OUT_D3:.+]] = tensor.dim %[[ITER1]], %[[C3]]
-// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], %[[OUT_D2]], %[[OUT_D3]]] [1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
-// CHECK: %[[PACK:.*]] = tensor.pack
-// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_0]], %[[TILE_1]]]
-// CHECK-SAME: into %[[SUB_OUT]]
-// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[PACK]] into %[[ITER1]]
-// CHECK: scf.yield %[[INSERT]] : tensor<?x?x?x?xf32>
-// CHECK: }
-// CHECK: scf.yield %[[RES1:.*]] : tensor<?x?x?x?xf32>
-// CHECK: }
-// CHECK: return %[[RES0:.*]] : tensor<?x?x?x?xf32>
-// CHECK: }
-func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x?x?xf32>, %pad: f32, %tile_n : index, %tile_m : index) -> tensor<?x?x?x?xf32> {
- %0 = tensor.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
- return %0 : tensor<?x?x?x?xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
-// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)>
-// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)>
-// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)>
-// CHECK: func.func @NCnc_to_NC
-// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
-// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index
-// CHECK: %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]]
-// CHECK: %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
-// CHECK-DAG: %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]])
-// CHECK-DAG: %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]])
-// CHECK-DAG: %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]])
-// CHECK-DAG: %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]])
-// CHECK-DAG: %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]])
-// CHECK-DAG: %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]])
-// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK-SAME: [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16]
-// CHECK-SAME: : tensor<8x8x32x16xf32> to tensor<?x?x32x16xf32>
-// CHECK: %[[EMPTY:.+]] = tensor.empty
-// CHECK: %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME: %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
-// CHECK-SAME: into %[[EMPTY]]
-// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
-// CHECK-SAME: [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4]
-// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
-// CHECK-SAME: into %{{.+}}[%[[I]], %[[J]]] [2, 4]
-// CHECK: scf.yield %[[RES]]
-func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> {
- %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
- return %0 : tensor<256x128xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
-// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 8)>
-// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 8)>
-// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 8 - d0 floordiv 8 + 1)>
-// CHECK: func.func @CKkc_to_KC
-// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
-// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index
-// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C128]] step %[[C2]]
-// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C256]] step %[[C4]]
-// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
-// CHECK-DAG: %[[OFFSET_K:.+]] = affine.apply #[[MAP1]](%[[K]])
-// CHECK-DAG: %[[IN_K_SZ:.+]] = affine.apply #[[MAP2]](%[[K]])
-// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP4]](%[[C]])
-// CHECK-DAG: %[[OFFSET_C:.+]] = affine.apply #[[MAP5]](%[[C]])
-// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP6]](%[[C]])
-// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], %[[IN_K_SZ]], 32, 8]
-// CHECK: %[[EMPTY:.+]] = tensor.empty
-// CHECK: %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
-// CHECK-SAME: into %[[EMPTY]]
-// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
-// CHECK-SAME: [%[[OFFSET_K]], %[[OFFSET_C]]] [2, 4]
-// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
-// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4]
-// CHECK: scf.yield %[[RES]]
-func.func @CKkc_to_KC(%source: tensor<32x4x32x8xf32>, %dest: tensor<128x256xf32>) -> tensor<128x256xf32> {
- %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %dest : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
- return %0 : tensor<128x256xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 4)>
-// CHECK: func.func @perfect_CKkc_to_KC
-// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
-// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C2]]
-// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
-// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
-// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP1]](%[[C]])
-// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [1, 1, 2, 4]
-// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [2, 4]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4]
-// CHECK-SAME: into %[[ITER_SLICE]]
-// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
-// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4]
-// CHECK: scf.yield %[[RES]]
-func.func @perfect_CKkc_to_KC(%source: tensor<32x4x2x4xf32>, %dest: tensor<8x128xf32>) -> tensor<8x128xf32> {
- %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %dest : tensor<32x4x2x4xf32> -> tensor<8x128xf32>
- return %0 : tensor<8x128xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0) -> (d0 ceildiv 2)>
-// CHECK: func.func @dynamic_perfect_CKkc_to_KC
-// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG: %[[DIM_0:.+]] = tensor.dim %[[OUT]], %[[C0]]
-// CHECK-DAG: %[[DIM_1:.+]] = tensor.dim %[[OUT]], %[[C1]]
-// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[DIM_0]] step %[[C2]]
-// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[DIM_1]] step %[[C4]]
-// CHECK-DAG: %[[OUT_K_SZ:.+]] = affine.min #[[MAP0]](%[[K]])[%[[DIM_0]]]
-// CHECK-DAG: %[[OUT_C_SZ:.+]] = affine.min #[[MAP1]](%[[C]])[%[[DIM_1]]]
-// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP2]](%[[K]])
-// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]])
-// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP3]](%[[OUT_C_SZ]])
-// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], 1, 2, 2]
-// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2]
-// CHECK-SAME: into %[[ITER_SLICE]]
-// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
-// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]]
-// CHECK: scf.yield %[[RES]]
-
-func.func @dynamic_perfect_CKkc_to_KC(%source: tensor<?x?x2x2xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
- %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %dest : tensor<?x?x2x2xf32> -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK: #[[MAP:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
-// CHECK: func.func @perfect_NKPQk_to_NPQK(
-// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x4x6x6x2xf32>,
-// CHECK-SAME: %{{.+}}: tensor<1x6x6x8xf32>)
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index
-// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK: %{{.+}} = scf.for %[[P:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
-// CHECK: %{{.+}} = scf.for %[[Q:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
-// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C4]]
-// CHECK: %[[K_SZ:.+]] = affine.apply #[[MAP]](%[[K]])
-// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[K_SZ]], %[[P]], %[[Q]], 0]
-// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[P]], %[[Q]], %[[K]]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2]
-// CHECK-SAME: into %[[SLICE_DEST]]
-// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
-// CHECK-SAME: into %{{.+}}[0, %[[P]], %[[Q]], %[[K]]]
-// CHECK: scf.yield %[[RES]]
-
-func.func @perfect_NKPQk_to_NPQK(%source: tensor<1x4x6x6x2xf32>, %dest: tensor<1x6x6x8xf32>) -> tensor<1x6x6x8xf32> {
- %0 = tensor.unpack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x4x6x6x2xf32> -> tensor<1x6x6x8xf32>
- return %0 : tensor<1x6x6x8xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-func.func private @get_dynamic_tile_size() -> index
-
-// CHECK-LABEL: func.func @fully_dynamic_unpack
-// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME: %[[DST:[0-9a-zA-Z]+]]
-// CHECK: %[[INNER_TS:.+]] = call @get_dynamic_tile_size() : () -> index
-// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[DST]])
-// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]])
-// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[SRC]]
-// CHECK: %[[EMPTY:.+]] = tensor.empty
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[SLICE]]
-// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [%[[INNER_TS]], %[[INNER_TS]]] into %[[EMPTY]]
-func.func @fully_dynamic_unpack(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
- %0 = func.call @get_dynamic_tile_size() : () -> index
- %1 = tensor.unpack %source inner_dims_pos = [1, 0] inner_tiles = [%0, %0] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK: func.func @perfect_NPQK_to_NKPQk
-// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x6x6x8xf32>,
-// CHECK-SAME: %{{.+}}: tensor<1x4x6x6x2xf32>)
-// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index
-// CHECK: %{{.+}} = scf.for %[[ARG2:.+]] = %[[C0]] to %[[C4]] step %[[C1]]
-// CHECK: %{{.+}} = scf.for %[[ARG4:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
-// CHECK: %{{.+}} = scf.for %[[ARG6:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
-// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP1]](%[[ARG2]])
-// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[ARG4]], %[[ARG6]], %[[APPLY]]]
-// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0]
-// CHECK: %[[PACK:.+]] = tensor.pack
-// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2]
-// CHECK-SAME: into %[[SLICE_DEST]]
-// CHECK: %[[RES:.+]] = tensor.insert_slice %[[PACK]]
-// CHECK-SAME: into %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0]
-// CHECK: scf.yield %[[RES]]
-
-func.func @perfect_NPQK_to_NKPQk(%source: tensor<1x6x6x8xf32>, %dest: tensor<1x4x6x6x2xf32>) -> tensor<1x4x6x6x2xf32> {
- %0 = tensor.pack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x6x6x8xf32> -> tensor<1x4x6x6x2xf32>
- return %0 : tensor<1x4x6x6x2xf32>
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
- transform.yield
- }
-}
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir
index a0fd3f7d87083c..bca94d4a64416b 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir
@@ -22,7 +22,7 @@
// RUN: rm -f %t && %{compile} && %{run} | FileCheck %s
-/// End-to-end test for tensor.pack where one of the inner tile sizes is
+/// End-to-end test for linalg.pack where one of the inner tile sizes is
/// scalable.
func.func @main() {
@@ -60,7 +60,7 @@ func.func private @pack(%A: tensor<7x16xi32>) {
%A_pack_empty = tensor.empty(%c1, %tile_size) : tensor<?x16x?x1xi32>
- %A_pack = tensor.pack %A
+ %A_pack = linalg.pack %A
padding_value(%pad_val : i32)
inner_dims_pos = [0, 1]
inner_tiles = [%tile_size, 1]
@@ -117,9 +117,9 @@ func.func private @pack(%A: tensor<7x16xi32>) {
module @transforms attributes { transform.with_named_sequence } {
transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op
- // 1. Tile so that we can decompose tensor.pack into tensor.pad and other
+ // 1. Tile so that we can decompose linalg.pack into tensor.pad and other
// Ops (see step 2)
%tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
index 3a9f214ff43c30..ed3564b960c094 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
@@ -8,7 +8,7 @@
// RUN: rm -f %t && %{compile} && %{run} | FileCheck %s
-/// End-to-end test for tensor.pack where one of the inner tile sizes is
+/// End-to-end test for linalg.pack where one of the inner tile sizes is
/// dynamic.
func.func @main() {
@@ -38,7 +38,7 @@ func.func private @pack(%A: tensor<7x16xi32>) {
%tile_size = arith.constant 8 : index
%A_pack_empty = tensor.empty(%c1, %tile_size) : tensor<?x16x?x1xi32>
- %A_pack = tensor.pack %A
+ %A_pack = linalg.pack %A
padding_value(%pad_val : i32)
inner_dims_pos = [0, 1]
inner_tiles = [%tile_size, 1]
@@ -78,9 +78,9 @@ func.func private @pack(%A: tensor<7x16xi32>) {
module @transforms attributes { transform.with_named_sequence } {
transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) {
- %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op
+ %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op
- // 1. Tile so that we can decompose tensor.pack into tensor.pad and other
+ // 1. Tile so that we can decompose linalg.pack into tensor.pad and other
// Ops (see step 2)
%tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir
index 10b29dd70177b5..c816a07e1e90a9 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir
@@ -12,9 +12,9 @@
/// End-to-end test for computing matrix-multiplication using linalg.mmt4d. In
/// particular, demonstrates how the following MLIR sequence (implemented in @mmt4d):
///
-/// A_pack = tensor.pack A
-/// B_pack = tensor.pack B
-/// C_pack = tensor.pack C
+/// A_pack = linalg.pack A
+/// B_pack = linalg.pack B
+/// C_pack = linalg.pack C
/// out_pack = linalg.mmt4d(A_pack, B_pack, C_pack)
///
/// is equivalent to:
@@ -86,16 +86,16 @@ func.func private @mmt4d(%A: tensor<7x16xi32>, %B: tensor<16x13xi32>, %C: tensor
%C_pack_empty = tensor.empty() : tensor<2x2x8x8xi32>
// Pack matrices
- %A_pack = tensor.pack %A padding_value(%zero : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %A_pack_empty : tensor<7x16xi32> -> tensor<2x16x8x1xi32>
- %B_pack = tensor.pack %B padding_value(%zero : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %B_pack_empty : tensor<16x13xi32> -> tensor<2x16x8x1xi32>
- %C_pack = tensor.pack %C padding_value(%zero : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_pack_empty : tensor<7x13xi32> -> tensor<2x2x8x8xi32>
+ %A_pack = linalg.pack %A padding_value(%zero : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %A_pack_empty : tensor<7x16xi32> -> tensor<2x16x8x1xi32>
+ %B_pack = linalg.pack %B padding_value(%zero : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %B_pack_empty : tensor<16x13xi32> -> tensor<2x16x8x1xi32>
+ %C_pack = linalg.pack %C padding_value(%zero : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_pack_empty : tensor<7x13xi32> -> tensor<2x2x8x8xi32>
// MMT4D
%mmt4d = linalg.mmt4d ins(%A_pack, %B_pack : tensor<2x16x8x1xi32>, tensor<2x16x8x1xi32>) outs(%C_pack : tensor<2x2x8x8xi32>) -> tensor<2x2x8x8xi32>
// Unpack output
%C_out_empty = tensor.empty() : tensor<7x13xi32>
- %C_out_unpack = tensor.unpack %mmt4d outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_out_empty : tensor<2x2x8x8xi32> -> tensor<7x13xi32>
+ %C_out_unpack = linalg.unpack %mmt4d outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_out_empty : tensor<2x2x8x8xi32> -> tensor<7x13xi32>
return %C_out_unpack : tensor<7x13xi32>
}
@@ -146,16 +146,16 @@ module @transforms attributes { transform.with_named_sequence } {
transform.apply_patterns.canonicalization
} : !transform.op<"func.func">
- // Step 4. Lower tensor.pack
- %pack = transform.structured.match ops{["tensor.pack"]} in %func_h
- : (!transform.op<"func.func">) -> !transform.op<"tensor.pack">
- transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+ // Step 4. Lower linalg.pack
+ %pack = transform.structured.match ops{["linalg.pack"]} in %func_h
+ : (!transform.op<"func.func">) -> !transform.op<"linalg.pack">
+ transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
-> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
- // Step 5. Lower tensor.unpack
- %unpack = transform.structured.match ops{["tensor.unpack"]} in %func_h
- : (!transform.op<"func.func">) -> !transform.op<"tensor.unpack">
- transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+ // Step 5. Lower linalg.unpack
+ %unpack = transform.structured.match ops{["linalg.unpack"]} in %func_h
+ : (!transform.op<"func.func">) -> !transform.op<"linalg.unpack">
+ transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
-> (!transform.op<"tensor.empty">,
!transform.op<"linalg.transpose">,
!transform.op<"tensor.collapse_shape">,
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir
index cae572ff3696b8..ebc4479d74b1db 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir
@@ -8,7 +8,7 @@
// RUN: rm -f %t && %{compile} && %{run} | FileCheck %s
-/// End-to-end test for tensor.unpack where one of the inner tile sizes is
+/// End-to-end test for linalg.unpack where one of the inner tile sizes is
/// dynamic.
func.func @main() {
@@ -56,7 +56,7 @@ func.func private @unpack(%A: tensor<?x3x?x1xi32>) {
%tile_size = arith.constant 8 : index
%A_unpack_empty = tensor.empty() : tensor<7x3xi32>
- %A_unpack = tensor.unpack %A
+ %A_unpack = linalg.unpack %A
inner_dims_pos = [0, 1]
inner_tiles = [%tile_size, 1]
into %A_unpack_empty : tensor<?x3x?x1xi32> -> tensor<7x3xi32>
@@ -78,9 +78,9 @@ func.func private @unpack(%A: tensor<?x3x?x1xi32>) {
module @transforms attributes { transform.with_named_sequence } {
transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) {
- %pack = transform.structured.match ops{["tensor.unpack"]} in %module : (!transform.any_op) -> !transform.any_op
+ %pack = transform.structured.match ops{["linalg.unpack"]} in %module : (!transform.any_op) -> !transform.any_op
- // 1. Tile so that we can decompose tensor.pack
+ // 1. Tile so that we can decompose linalg.pack
// Ops (see step 2)
%c8 = transform.param.constant 8 : i64 -> !transform.param<i64>
%tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [%c8, 1]
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
index a2871b30698c52..d570fdeba8e2d8 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
@@ -211,7 +211,7 @@ module {
linalg.yield %7, %8 : f32, f32
} -> (tensor<64x64xf32>, tensor<64x64xf32>)
%5 = tensor.empty() : tensor<2048xf32>
- %unpack = tensor.unpack %0#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %5 : tensor<64x32xf32> -> tensor<2048xf32>
+ %unpack = linalg.unpack %0#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %5 : tensor<64x32xf32> -> tensor<2048xf32>
return %4#1, %unpack : tensor<64x64xf32>, tensor<2048xf32>
}
}
@@ -254,7 +254,7 @@ module attributes {transform.with_named_sequence} {
// CHECK: tensor.parallel_insert_slice %[[ELEM_OUT]]#1 into %[[ELEM_OUT_ARG_1]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1]
// CHECK: }
// CHECK: }
-// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[FINAL_RESULT]]#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %{{.*}} : tensor<64x32xf32> -> tensor<2048xf32>
+// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[FINAL_RESULT]]#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %{{.*}} : tensor<64x32xf32> -> tensor<2048xf32>
// CHECK: return %[[FINAL_RESULT]]#3, %[[UNPACK]] :
// -----
@@ -278,7 +278,7 @@ module {
}
}
%output = tensor.empty() : tensor<2048xf32>
- %unpack = tensor.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2048xf32>
+ %unpack = linalg.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2048xf32>
return %unpack : tensor<2048xf32>
}
}
@@ -308,7 +308,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[UNPACK_RESULT_OFFSET:.*]] = affine.apply #[[UNPACK_RESULT_OFFSET_MAP]](%[[IV1]])
// CHECK-DAG: %[[UNPACK_RESULT_SIZE:.*]] = affine.min #[[UNPACK_RESULT_SIZE_MAP]](%[[IV1]])
// CHECK: %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[UNPACK_OUT_ARG]][%[[UNPACK_RESULT_OFFSET]]] [%[[UNPACK_RESULT_SIZE]]] [1]
-// CHECK: %[[TILED_UNPACK_OUT:.*]] = tensor.unpack %[[GENERIC_OUT]]
+// CHECK: %[[TILED_UNPACK_OUT:.*]] = linalg.unpack %[[GENERIC_OUT]]
// CHECK-SAME: outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32]
// CHECK-SAME: into %[[TILED_UNPACK_DEST]]
// CHECK: scf.forall.in_parallel {
@@ -339,7 +339,7 @@ module {
}
}
%output = tensor.empty() : tensor<2047xf32>
- %unpack = tensor.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2047xf32>
+ %unpack = linalg.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2047xf32>
return %unpack : tensor<2047xf32>
}
}
@@ -369,7 +369,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[UNPACK_RESULT_OFFSET:.*]] = affine.apply #[[UNPACK_RESULT_OFFSET_MAP]](%[[IV1]])
// CHECK-DAG: %[[UNPACK_RESULT_SIZE:.*]] = affine.min #[[UNPACK_RESULT_SIZE_MAP]](%[[IV1]])
// CHECK: %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[UNPACK_OUT_ARG]][%[[UNPACK_RESULT_OFFSET]]] [%[[UNPACK_RESULT_SIZE]]] [1]
-// CHECK: %[[TILED_UNPACK_OUT:.*]] = tensor.unpack %[[GENERIC_OUT]]
+// CHECK: %[[TILED_UNPACK_OUT:.*]] = linalg.unpack %[[GENERIC_OUT]]
// CHECK-SAME: outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32]
// CHECK-SAME: into %[[TILED_UNPACK_DEST]]
// CHECK: scf.forall.in_parallel {
@@ -400,7 +400,7 @@ module {
}
}
%output = tensor.empty() : tensor<4x32x16xf32>
- %pack = tensor.pack %1 inner_dims_pos = [0] inner_tiles = [16] into %output : tensor<64x32xf32> -> tensor<4x32x16xf32>
+ %pack = linalg.pack %1 inner_dims_pos = [0] inner_tiles = [16] into %output : tensor<64x32xf32> -> tensor<4x32x16xf32>
return %pack : tensor<4x32x16xf32>
}
}
@@ -428,7 +428,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: outs(%[[GENERIC_OUT_SLICE]] :
// CHECK: %[[PACK_RESULT_OFFSET:.*]] = affine.apply #[[PACK_RESULT_MAP]](%[[IV1]])
// CHECK: %[[TILED_PACK_DEST:.*]] = tensor.extract_slice %[[PACK_OUT_ARG]][%[[PACK_RESULT_OFFSET]], %[[IV2]], 0] [2, 32, 16] [1, 1, 1]
-// CHECK: %[[TILED_PACK_OUT:.*]] = tensor.pack %[[GENERIC_OUT]]
+// CHECK: %[[TILED_PACK_OUT:.*]] = linalg.pack %[[GENERIC_OUT]]
// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [16]
// CHECK-SAME: into %[[TILED_PACK_DEST]]
// CHECK: scf.forall.in_parallel {
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
index 5f7663af773a4a..bc27840fdf5e9f 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -591,7 +591,7 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @imperfect_unpack_producer_fusion(%source: tensor<1x1x288x8x4xf32>, %dest: tensor<1x2x1152xf32>) -> tensor<1x2x1152xf32> {
- %0 = tensor.unpack %source
+ %0 = linalg.unpack %source
outer_dims_perm = [0, 1, 2]
inner_dims_pos = [1, 2]
inner_tiles = [8, 4] into %dest
@@ -625,7 +625,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[ARG1:.+]]: tensor<1x2x1152xf32>
// CHECK: %[[FOR_RESULT:.+]] = scf.for{{.*}}iter_args(%[[ITER_ARG:.+]] = {{.*}})
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[SLICE]]
+// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SLICE]]
// CHECK-DAG: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
// CHECK-DAG: %[[INIT_SLICE:.+]] = tensor.extract_slice %[[ITER_ARG]]
// CHECK: %[[GENERIC:.+]] = linalg.generic
diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir
index 5133c14414c978..c1604e226a334f 100644
--- a/mlir/test/Transforms/loop-invariant-code-motion.mlir
+++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir
@@ -1163,18 +1163,18 @@ func.func @speculate_ceildivsi_range(
func.func @speculate_static_pack_and_unpack(%source: tensor<128x256xf32>,
%dest: tensor<4x16x32x16xf32>, %lb: index, %ub: index, %step: index) {
- // CHECK: tensor.pack
+ // CHECK: linalg.pack
// CHECK-NEXT: scf.for
scf.for %i = %lb to %ub step %step {
- %packed = tensor.pack %source
+ %packed = linalg.pack %source
inner_dims_pos = [0, 1]
inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
}
- // CHECK: tensor.unpack
+ // CHECK: linalg.unpack
// CHECK-NEXT: scf.for
scf.for %i = %lb to %ub step %step {
- %unpacked = tensor.unpack %dest
+ %unpacked = linalg.unpack %dest
inner_dims_pos = [0, 1]
inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
}
@@ -1188,25 +1188,25 @@ func.func @speculate_dynamic_pack_and_unpack(%source: tensor<?x?xf32>,
%tile_m: index, %tile_n: index, %pad: f32) {
// CHECK: scf.for
- // CHECK-NEXT: tensor.pack
+ // CHECK-NEXT: linalg.pack
scf.for %i = %lb to %ub step %step {
- %packed = tensor.pack %source
+ %packed = linalg.pack %source
inner_dims_pos = [0, 1]
inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
}
// CHECK: scf.for
- // CHECK-NEXT: tensor.unpack
+ // CHECK-NEXT: linalg.unpack
scf.for %i = %lb to %ub step %step {
- %unpacked = tensor.unpack %dest
+ %unpacked = linalg.unpack %dest
inner_dims_pos = [0, 1]
inner_tiles = [%tile_n, %tile_m] into %source : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
}
- // CHECK: tensor.pack
+ // CHECK: linalg.pack
// CHECK-NEXT: scf.for
scf.for %i = %lb to %ub step %step {
- %packed = tensor.pack %source padding_value(%pad : f32)
+ %packed = linalg.pack %source padding_value(%pad : f32)
inner_dims_pos = [0, 1]
inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
}
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
index fa2a27dcfa9914..046b9a65f3359f 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -74,8 +74,9 @@ struct TestLinalgTransforms
*this, "test-decompose-pad-tensor",
llvm::cl::desc("Test transform pad tensor by copying with generic ops"),
llvm::cl::init(false)};
+ // TODO: This is not used - delete.
Option<bool> testDecomposeTensorPackOp{
- *this, "test-decompose-tensor-pack",
+ *this, "test-decompose-linalg-pack",
llvm::cl::desc("Test transform that generalizes pack ops into a sequence "
"of tensor and Linalg ops"),
llvm::cl::init(false)};
@@ -130,6 +131,14 @@ struct TestLinalgTransforms
Option<bool> testDecomposeWinogradOps{
*this, "test-decompose-winograd-ops",
llvm::cl::desc("Test decompose Winograd ops"), llvm::cl::init(false)};
+ Option<bool> testFoldIntoPackAndUnpack{
+ *this, "test-fold-into-pack-and-unpack",
+ llvm::cl::desc("Test folding ops into linalg.pack and linalg.unpack"),
+ llvm::cl::init(false)};
+ Option<bool> testSimplifyPackUnpackPatterns{
+ *this, "test-simplify-pack-unpack-patterns",
+ llvm::cl::desc("Test patterns to simplify linalg.pack and linalg.unpack"),
+ llvm::cl::init(false)};
};
} // namespace
@@ -227,6 +236,18 @@ static void applyDecomposeWinogradOps(func::FuncOp funcOp) {
(void)applyPatternsGreedily(funcOp, std::move(patterns));
}
+static void applyFoldIntoPackAndUnpackPatterns(Operation *rootOp) {
+ RewritePatternSet patterns(rootOp->getContext());
+ linalg::populateFoldIntoPackAndUnpackPatterns(patterns);
+ (void)applyPatternsGreedily(rootOp, std::move(patterns));
+}
+
+static void applySimplifyPackUnpackPatterns(Operation *rootOp) {
+ RewritePatternSet patterns(rootOp->getContext());
+ linalg::populateSimplifyPackAndUnpackPatterns(patterns);
+ (void)applyPatternsGreedily(rootOp, std::move(patterns));
+}
+
/// Apply transformations specified as patterns.
void TestLinalgTransforms::runOnOperation() {
if (testPatterns)
@@ -255,6 +276,11 @@ void TestLinalgTransforms::runOnOperation() {
return applyWinogradConv2D(getOperation());
if (testDecomposeWinogradOps)
return applyDecomposeWinogradOps(getOperation());
+ Operation *rootOp = getOperation();
+ if (testFoldIntoPackAndUnpack)
+ applyFoldIntoPackAndUnpackPatterns(rootOp);
+ if (testSimplifyPackUnpackPatterns)
+ applySimplifyPackUnpackPatterns(rootOp);
}
namespace mlir {
diff --git a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
index 173bfd8955f2b7..e435130c2a4170 100644
--- a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
+++ b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
@@ -77,11 +77,6 @@ struct TestTensorTransforms
llvm::cl::desc("Test folding of expand_shape/collapse_shape"),
llvm::cl::init(false)};
- Option<bool> testFoldIntoPackAndUnpack{
- *this, "test-fold-into-pack-and-unpack",
- llvm::cl::desc("Test folding ops into tensor.pack and tensor.unpack"),
- llvm::cl::init(false)};
-
Option<bool> useForeach{
*this, "use-foreach",
llvm::cl::desc(
@@ -89,11 +84,6 @@ struct TestTensorTransforms
"the extract_slice of collapse_shape pattern"),
llvm::cl::init(false)};
- Option<bool> testSimplifyPackUnpackPatterns{
- *this, "test-simplify-pack-unpack-patterns",
- llvm::cl::desc("Test patterns to simplify tensor.pack and tensor.unpack"),
- llvm::cl::init(false)};
-
Option<bool> testTrackingListener{
*this, "test-tracking-listener",
llvm::cl::desc("Test tensor TrackingListener for the transform dialect"),
@@ -113,12 +103,6 @@ static void applyBubbleUpExpandShapePatterns(Operation *rootOp) {
(void)applyPatternsGreedily(rootOp, std::move(patterns));
}
-static void applyFoldIntoPackAndUnpackPatterns(Operation *rootOp) {
- RewritePatternSet patterns(rootOp->getContext());
- tensor::populateFoldIntoPackAndUnpackPatterns(patterns);
- (void)applyPatternsGreedily(rootOp, std::move(patterns));
-}
-
static void applyFoldConstantExtractSlicePatterns(Operation *rootOp) {
RewritePatternSet patterns(rootOp->getContext());
tensor::ControlConstantExtractSliceFusionFn controlFn =
@@ -148,12 +132,6 @@ applyDropRedundantInsertSliceRankExpansionPatterns(Operation *rootOp) {
(void)applyPatternsGreedily(rootOp, std::move(patterns));
}
-static void applySimplifyPackUnpackPatterns(Operation *rootOp) {
- RewritePatternSet patterns(rootOp->getContext());
- tensor::populateSimplifyPackAndUnpackPatterns(patterns);
- (void)applyPatternsGreedily(rootOp, std::move(patterns));
-}
-
namespace {
/// Base pattern to rewrite a `tensor.collapse_shape -> tensor.extract_slice`.
/// The `tensor.extract_slice` is replaced by a loop or gather operation that
@@ -387,8 +365,6 @@ static LogicalResult testTrackingListenerReplacements(Operation *rootOp) {
void TestTensorTransforms::runOnOperation() {
Operation *rootOp = getOperation();
- if (testSimplifyPackUnpackPatterns)
- applySimplifyPackUnpackPatterns(rootOp);
if (testFoldConstantExtractSlice)
applyFoldConstantExtractSlicePatterns(rootOp);
if (testFoldConsecutiveInsertExtractSlice)
@@ -399,8 +375,6 @@ void TestTensorTransforms::runOnOperation() {
applyReassociativeReshapeFoldingPatterns(rootOp);
if (testBubbleUpExpandShapePatterns)
applyBubbleUpExpandShapePatterns(rootOp);
- if (testFoldIntoPackAndUnpack)
- applyFoldIntoPackAndUnpackPatterns(rootOp);
if (testRewriteExtractSliceWithTiledCollapseShape) {
if (failed(
applyRewriteExtractFromCollapseShapePatterns(rootOp, useForeach)))
>From 1fca4aa45f8d4dbef88c5110b13ab82631d4bb19 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Mon, 20 Jan 2025 11:47:27 +0000
Subject: [PATCH 4/4] [mlir][tensor][linalg] Move Pack/Unpack Ops to Linalg
(4/4)
This is merely moving code around, no new functionality is added.
PATCH 4: Remove `tensor.{pack|unpack}` and all the associated code (e.g.
transfromations, verifiers, etc).
CONTEXT:
This change was discussed in the following RFC:
* https://discourse.llvm.org/t/rfc-move-tensor-pack-and-tensor-unpack-into-linalg
---
.../mlir/Dialect/Tensor/IR/TensorOps.td | 308 -----
.../include/mlir/Dialect/Tensor/Utils/Utils.h | 19 -
mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp | 2 +-
mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 1021 +----------------
.../Tensor/IR/TensorTilingInterfaceImpl.cpp | 652 -----------
mlir/lib/Dialect/Tensor/Utils/Utils.cpp | 55 -
6 files changed, 2 insertions(+), 2055 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
index 812ac209845020..e77901457cb9df 100644
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -1816,314 +1816,6 @@ def Tensor_SplatOp : Tensor_Op<"splat", [
let hasVerifier = 1;
}
-//===----------------------------------------------------------------------===//
-// RelayoutOp
-//===----------------------------------------------------------------------===//
-
-class Tensor_RelayoutOp<string mnemonic, list<Trait> traits = []> :
- Tensor_Op<mnemonic, !listconcat(traits, [
- DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
- DestinationStyleOpInterface,
- ConditionallySpeculatable, NoMemoryEffect,
- DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
- TypesMatchWith<"result type matches type of dest",
- "dest", "result",
- "$_self">])> {
-
- code commonExtraClassDeclaration = [{
- size_t getSourceRank() { return getSourceType().getRank(); };
- size_t getDestRank() { return getDestType().getRank(); };
- RankedTensorType getSourceType() {
- return ::llvm::cast<RankedTensorType>(getSource().getType()); };
- RankedTensorType getDestType() {
- return ::llvm::cast<RankedTensorType>(getDest().getType()); };
-
- MutableOperandRange getDpsInitsMutable() { return getDestMutable(); }
-
- /// Interface method for ConditionallySpeculatable.
- Speculation::Speculatability getSpeculatability();
-
- /// Return a mapping from positions `inner_dims_pos` to their
- /// tile factors.
- DenseMap<int64_t, OpFoldResult> getDimAndTileMapping();
-
- /// Return the tile sizes as OpFoldResult.
- SmallVector<OpFoldResult> getMixedTiles();
-
- /// Return the tile sizes as `int64_t`. If a tile size is dynamic
- /// a sentinel `kDynamic` is introduced at that position in
- /// the returned vector.
- SmallVector<int64_t> getStaticTiles();
-
- /// Retrieve all outer dims for this Pack/UnPack Op, i.e. all the leading
- /// dims excluding the trailing dims corresponding to `innerTiles`. Note
- /// that this will include both tiled and non-tiled dimensions. The order
- /// of the output dimensions is consistent with the shape of the packed
- /// tensor.
- ArrayRef<int64_t> getAllOuterDims();
-
- /// Similar to `getAllOuterDims`, but only retrieve the outer dims that
- /// have been tiled. Also, the order of the output dimensions is consistent
- /// with `inner_dims_pos` rather than the packed tensor.
- SmallVector<int64_t> getTiledOuterDims();
- }];
-
- let hasVerifier = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// PackOp
-//===----------------------------------------------------------------------===//
-
-def Tensor_PackOp : Tensor_RelayoutOp<"pack", [
- AttrSizedOperandSegments]> {
- let summary = "tensor pack operation";
- let description = [{
- The "pack" operation converts a source tensor of rank `n` into a result
- tensor of rank `n + k` with a tiled and packed layout (maybe with padding)
- and optionally transposes the tiled source tensor dimensions.
-
- `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
- being tiled, where `0 < k <= n`. The order of the dimensions matters:
- - The tiled dimensions (of size `inner_tiles`) are added to the end of the result
- tensor in the order in which they appear in `inner_dims_pos`.
- - `inner_dims_pos[i]` specifies the source tensor dimension tiled by
- `inner_tiles[i]`.
-
- `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
- correspond to the least significant ("inner") result tensor dimension sizes,
- in the same order. Tile sizes can be static or dynamic.
-
- Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of
- `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled
- by 16 and the 1st source dimension is tiled by 32. Other source dimensions
- (if any) are not tiled. If `inner_dims_pos = [1, 0]`, the 1st dimension is
- tiled by 16 and the 0th dimension is tiled by 32.
-
- Example:
- ```mlir
- // NC to NCnc
- %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
- into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32>
- // \ / \ /
- // outer dims inner dims
- ```
-
- `outer_dims_perm` (optional) specifies a permutation for the outer
- dimensions. If specified, it must have `n` elements.
-
- Example:
- ```mlir
- // CK to KCck
- %0 = tensor.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
- inner_tiles = [8, 32] into %dest
- : tensor<128x256xf32> -> tensor<8x16 x 8x32 xf32>
- // \ /
- // compare with "NC to NCnc": outer dims are transposed
- ```
-
- `padding_value` specifies a padding value at the boundary on non-perfectly
- divisible dimensions. Padding is optional:
- - If absent, it is UB if the tile does not perfectly divide the dimension.
- - If present, it will pad along high dimensions (high-padding) to make the
- tile complete.
-
- Example:
- ```mlir
- %0 = tensor.pack %arg0 padding_value(%pad : f32) outer_dims_perm = [2, 1, 0]
- inner_dims_pos = [1] inner_tiles = [2] into %arg1
- : tensor<200x127x256xf32> -> tensor<256x64x200x2xf32>
- // \
- // padded and tiled dim
- //
- // Source dimension 1 is tiled. 64 does not divide 127 evenly, so 1 padded
- // element is added at the end.
- //
- // Note: Only tiled dimensions can be padded.
- ```
- }];
- let arguments = (ins AnyRankedTensor:$source,
- AnyRankedTensor:$dest,
- Optional<AnyType>:$padding_value,
- DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
- DenseI64ArrayAttr:$inner_dims_pos,
- Variadic<Index>:$inner_tiles,
- DenseI64ArrayAttr:$static_inner_tiles);
- let results = (outs AnyRankedTensor:$result);
- let assemblyFormat = [{
- $source
- (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?
- (`outer_dims_perm` `=` $outer_dims_perm^)?
- `inner_dims_pos` `=` $inner_dims_pos
- `inner_tiles` `=`
- custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
- `into` $dest attr-dict `:` type($source) `->` type($dest)
- }];
-
- let builders = [
- OpBuilder<(ins "Value":$source, "Value":$dest,
- "ArrayRef<int64_t>":$innerDimsPos,
- "ArrayRef<OpFoldResult>":$innerTiles,
- CArg<"std::optional<Value>", "std::nullopt">:$paddingValue,
- CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)>
- ];
-
- let extraClassDeclaration = commonExtraClassDeclaration # [{
- // Method to get the shape of the result as `SmallVector<OpFoldResult>`.
- // This is a static method to allow getting the shape of the destination
- // expected while creating a `pack` op.
- static SmallVector<OpFoldResult> getResultShape(OpBuilder &builder,
- Location loc, ArrayRef<OpFoldResult> sourceDims,
- ArrayRef<OpFoldResult> innerTileDims, ArrayRef<int64_t> innerDimsPos,
- ArrayRef<int64_t> outerDimsPerm = {});
-
- // Method to get the `RankedTensorType` of the result based on the inner
- // tiles, position of the inner tiles (innerDimsPos) and interchange vector
- // of outer loops (outerDimsPerm).
- static RankedTensorType inferPackedType(RankedTensorType sourceType,
- ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
- ArrayRef<int64_t> outerDimsPerm = {});
-
- // Returns true if we have enough static information to catch undefined
- // behavior when the tile size does not divide perfectly the dimension of
- // the input tensor. Detecting UB requires that the input size and either
- // corresponding tile or output size are static.
- static bool requirePaddingValue(ArrayRef<int64_t> inputShape,
- ArrayRef<int64_t> innerDimsPos,
- ArrayRef<int64_t> outputShape,
- ArrayRef<int64_t> outerDimsPerm,
- ArrayRef<OpFoldResult> innerTiles);
-
- static Value createDestinationTensor(OpBuilder &b, Location loc,
- Value source, ArrayRef<OpFoldResult> innerTileSizes,
- ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
-
- /// Build and return a new PackOp that is a clone of the current PackOp with
- /// (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by
- /// innerPermutation (resp. outerPermutation).
- /// A new `tensor.empty` of the proper shape is built in the process.
- /// Asserts that:
- /// - At least one of innerPermutation or outerPermutation is non-empty.
- /// - If not empty, innerPermutation is a valid permutation of size
- /// matching innerDimPos.
- /// - If not empty, outerPermutation is a valid permutation of size
- /// matching outerDimsPerm.
- PackOp createTransposedClone(OpBuilder &b,
- Location loc,
- ArrayRef<int64_t> innerPermutation,
- ArrayRef<int64_t> outerPermutation);
-
- /// Check if this PackOp is like a simple pad operation.
- /// In other words, this operation:
- /// 1. adds useless dimensions (dimension of size 1),
- /// 2. pads the other ones, and
- /// 3. doesn't shuffle the dimensions
- bool isLikePad();
- }];
-
- let hasCanonicalizeMethod = 1;
-
- let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// UnPackOp
-//===----------------------------------------------------------------------===//
-
-def Tensor_UnPackOp : Tensor_RelayoutOp<"unpack"> {
- let summary = "tensor unpack operation";
- let description = [{
- The "unpack" operation converts a source tensor of rank `n` with a tiled and
- packed layout to a result tensor of rank `n - k`.
-
- `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with
- which the last `k` source tensor dimensions are combined, where
- `0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`.
- The order of the dimensions in `inner_dims_pos` matters: dimension
- `inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that
- `outer_dims_perm` is not specified).
-
- `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
- correspond to the least significant ("inner") source tensor dimension sizes.
- The behavior of this op is undefined if:
- - `inner_tiles` do not exactly match with the corresponding source tensor
- dimension sizes.
- - Or, `inner_tiles[i]` does not divide the size of dimension
- `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified)
- evenly.
-
- `outer_dims_perm` (optional) specifies a permutation for the outer
- dimensions. If specified, it must have `n - k` elements. If specified, this
- permutation is applied before combining any dimensions.
-
- Example:
-
- ```mlir
- // NCnc to NC:
- %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
- into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
-
- // CK to KCck:
- %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
- inner_tiles = [8, 32] into %dest
- : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
- ```
- }];
- let arguments = (ins AnyRankedTensor:$source,
- AnyRankedTensor:$dest,
- DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
- DenseI64ArrayAttr:$inner_dims_pos,
- Variadic<Index>:$inner_tiles,
- DenseI64ArrayAttr:$static_inner_tiles);
- let results = (outs AnyRankedTensor:$result);
- let assemblyFormat = [{
- $source
- (`outer_dims_perm` `=` $outer_dims_perm^)?
- `inner_dims_pos` `=` $inner_dims_pos
- `inner_tiles` `=`
- custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
- `into` $dest attr-dict `:` type($source) `->` type($dest)
- }];
-
- let builders = [
- OpBuilder<(ins "Value":$source, "Value":$dest,
- "ArrayRef<int64_t>":$innerDimsPos,
- "ArrayRef<OpFoldResult>":$innerTiles,
- CArg<"ArrayRef<int64_t>", "{}">:$outerDimsPerm)>
- ];
-
- let extraClassDeclaration = commonExtraClassDeclaration # [{
- static Value createDestinationTensor(OpBuilder &b, Location loc,
- Value source, ArrayRef<OpFoldResult> innerTileSizes,
- ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
-
- /// Build and return a new UnPackOp that is a clone of the current UnPackOp
- /// with (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by
- /// innerPermutation (resp. outerPermutation).
- /// Asserts that:
- /// - At least one of innerPermutation or outerPermutation is non-empty.
- /// - If not empty, innerPermutation is a valid permutation of size
- /// matching innerDimPos.
- /// - If not empty, outerPermutation is a valid permutation of size
- /// matching outerDimsPerm.
- UnPackOp createTransposedClone(OpBuilder &b,
- Location loc,
- Value transposedSource,
- ArrayRef<int64_t> innerPermutation,
- ArrayRef<int64_t> outerPermutation);
-
- /// Check if this UnPackOp is like a simple unpad operation.
- /// In other words, this operation:
- /// 1. drops useless dimensions (dimension of size 1), and
- /// 2. reduces dimensions in place (i.e., no transpose.)
- bool isLikeUnPad();
- }];
-
- let hasCanonicalizeMethod = 1;
-
- let hasFolder = 1;
-}
-
//===----------------------------------------------------------------------===//
// YieldOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
index ed1ec1e871482d..83cc665b5a4fb4 100644
--- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
@@ -42,25 +42,6 @@ FailureOr<RankedTensorType>
computeTransposedType(RankedTensorType rankedTensorType,
ArrayRef<int64_t> transposeVector);
-/// Shell function to compute the Destination Permutation of PackOp
-/// This function uses the helper function `computePackUnPackPerm` to get
-/// the permutation vector. Only major difference between UnPack and Pack is
-/// that packOp uses destination rank whereas unpack Uses source rank.
-SmallVector<int64_t> getPackInverseDestPerm(tensor::PackOp packOp);
-
-/// Shell function to compute the Source Permutation of unPackOp.
-/// This function, like the getPackInverseDestPerm uses the helper function
-/// computePackUnPackPerm` to get the permutation vector.
-/// Only major difference between UnPack and Pack is that packOp uses
-/// destination rank whereas unpack Uses source rank.
-SmallVector<int64_t> getUnPackInverseSrcPerm(tensor::UnPackOp unpackOp);
-
-/// Shell function to compute the Source rank permutation for unpackOp
-/// Unpack requires some packing metadata data information, so created
-/// another function where this value is passed by reference.
-SmallVector<int64_t> getUnPackInverseSrcPerm(tensor::UnPackOp,
- PackingMetadata &metadata);
-
/// A tensor.insert_slice is a cast-like operation if it merely rank-extends the
/// source tensor or inserts the source tensor into a destination tensor with
/// the same shape.
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
index 002077753b1324..8af087cbf0f612 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp
@@ -63,7 +63,7 @@ void TensorDialect::initialize() {
declarePromisedInterfaces<SubsetInsertionOpInterface, InsertSliceOp,
ParallelInsertSliceOp>();
declarePromisedInterface<SubsetExtractionOpInterface, ExtractSliceOp>();
- declarePromisedInterfaces<TilingInterface, PadOp, PackOp, UnPackOp>();
+ declarePromisedInterfaces<TilingInterface, PadOp>();
declarePromisedInterfaces<ValueBoundsOpInterface, CastOp, DimOp, EmptyOp,
ExtractSliceOp, PadOp, RankOp>();
}
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index dfe342b3e743bb..92075d7a5e861e 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -3832,916 +3832,6 @@ OpFoldResult SplatOp::fold(FoldAdaptor adaptor) {
return SplatElementsAttr::get(getType(), {constOperand});
}
-//===----------------------------------------------------------------------===//
-// PackOp/UnPackOp Common
-//===----------------------------------------------------------------------===//
-
-template <typename OpTy>
-static LogicalResult
-reifyResultShapesImpl(OpTy op, OpBuilder &builder,
- ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
- static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
- "applies to only pack or unpack operations");
- int64_t destRank = op.getDestRank();
- reifiedReturnShapes.resize(1, SmallVector<OpFoldResult>(destRank));
- reifiedReturnShapes[0] =
- tensor::getMixedSizes(builder, op.getLoc(), op.getDest());
- return success();
-}
-
-template <typename OpTy>
-static DenseMap<int64_t, OpFoldResult> getDimAndTileMappingImpl(OpTy op) {
- static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
- "applies to only pack or unpack operations");
- DenseMap<int64_t, OpFoldResult> dimAndTileMapping;
- ArrayRef<int64_t> dimsToTile = op.getInnerDimsPos();
- SmallVector<OpFoldResult> tiles = op.getMixedTiles();
- assert(tiles.size() == dimsToTile.size() &&
- "tiles must match indices of dimension to block");
- // bind the dimension `i` with the tile factor.
- for (auto i : llvm::seq<int64_t>(0, dimsToTile.size()))
- dimAndTileMapping[dimsToTile[i]] = tiles[i];
- return dimAndTileMapping;
-}
-
-template <typename OpTy>
-static SmallVector<OpFoldResult> getMixedTilesImpl(OpTy op) {
- static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
- "applies to only pack or unpack operations");
- Builder builder(op);
- SmallVector<OpFoldResult> mixedInnerTiles;
- unsigned dynamicValIndex = 0;
- for (int64_t staticTile : op.getStaticInnerTiles()) {
- if (!ShapedType::isDynamic(staticTile))
- mixedInnerTiles.push_back(builder.getI64IntegerAttr(staticTile));
- else
- mixedInnerTiles.push_back(op.getInnerTiles()[dynamicValIndex++]);
- }
- return mixedInnerTiles;
-}
-
-template <typename OpTy>
-static SmallVector<int64_t> getStaticTilesImpl(OpTy op) {
- static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
- "applies to only pack or unpack operations");
- SmallVector<Value> dynamicTiles;
- SmallVector<int64_t> staticTiles;
- dispatchIndexOpFoldResults(op.getMixedTiles(), dynamicTiles, staticTiles);
- return staticTiles;
-}
-
-/// Returns true if `dimsPos` is invalid. It is invalid when:
-/// a) It contains duplicate.
-/// b) At least one dimension is out of bound (`dimPos` is >= 0 and < rank).
-/// c) The number of elements in `dimsPos` is > than `rank`.
-static bool isInvalidPackingPosSpecification(ArrayRef<int64_t> dimsPos,
- size_t rank) {
- size_t dimsPosSize = dimsPos.size();
- if (dimsPosSize > rank)
- return true;
- DenseSet<int64_t> uniqued;
- for (int64_t dim : dimsPos)
- uniqued.insert(dim);
- if (dimsPosSize != uniqued.size())
- return true;
- return llvm::any_of(dimsPos, [rank](int64_t dimPos) {
- return dimPos < 0 || dimPos >= static_cast<int64_t>(rank);
- });
-}
-
-/// Returns true if the dimension of `sourceShape` is smaller than the dimension
-/// of the `limitShape`.
-static bool areAllInBound(ArrayRef<int64_t> sourceShape,
- ArrayRef<int64_t> limitShape) {
- assert(
- sourceShape.size() == limitShape.size() &&
- "expected source shape rank, and limit of the shape to have same rank");
- return llvm::all_of(
- llvm::zip(sourceShape, limitShape), [](std::tuple<int64_t, int64_t> it) {
- int64_t sourceExtent = std::get<0>(it);
- int64_t limit = std::get<1>(it);
- return ShapedType::isDynamic(sourceExtent) ||
- ShapedType::isDynamic(limit) || sourceExtent <= limit;
- });
-}
-
-template <typename OpTy>
-static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
- static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
- "applies to only pack or unpack operations");
- Operation *op = packOrUnPack.getOperation();
-
- // Return true if we have a zero-value tile.
- auto hasZeros = [&](ArrayRef<OpFoldResult> tiles) {
- return llvm::any_of(
- tiles, [](OpFoldResult tile) { return isConstantIntValue(tile, 0); });
- };
-
- // Verify tiles. Do not allow zero tiles.
- SmallVector<OpFoldResult> mixedTiles = packOrUnPack.getMixedTiles();
- if (hasZeros(mixedTiles))
- return op->emitError("invalid zero tile factor");
-
- // Verify inner_dims_pos and outer_dims_perm.
- RankedTensorType unpackedType = (std::is_same<OpTy, PackOp>::value)
- ? packOrUnPack.getSourceType()
- : packOrUnPack.getDestType();
- size_t unpackedRank = unpackedType.getRank();
- ArrayRef<int64_t> innerDimsPos = packOrUnPack.getInnerDimsPos();
- ArrayRef<int64_t> outerDimPerm = packOrUnPack.getOuterDimsPerm();
- if (isInvalidPackingPosSpecification(innerDimsPos, unpackedRank))
- return op->emitError("invalid inner_dims_pos vector");
- if (isInvalidPackingPosSpecification(outerDimPerm, unpackedRank))
- return op->emitError("invalid outer_dims_perm vector");
- if (!outerDimPerm.empty() && outerDimPerm.size() != unpackedRank)
- return op->emitError("outer_dims_perm must be a permutation or empty");
-
- // Tiling factors must be less than or equal to the input rank for pack (or
- // output rank for unpack), and must match the number of `inner_dims_pos`.
- if (mixedTiles.size() > unpackedRank) {
- return op->emitError("tiling factors must be less than or equal to the "
- "input rank for pack or output rank for unpack");
- }
- if (mixedTiles.size() != innerDimsPos.size()) {
- return op->emitError(
- "tiling factors must equal the number of dimensions to tile");
- }
-
- ShapedType packedType = (std::is_same<OpTy, PackOp>::value)
- ? packOrUnPack.getDestType()
- : packOrUnPack.getSourceType();
- size_t packedRank = packedType.getRank();
- // Require output rank to match input rank + number of blocking factors.
- size_t expectedPackedRank = unpackedRank + mixedTiles.size();
- if (expectedPackedRank != packedRank) {
- return op->emitError(
- "packed rank != (unpacked rank + num tiling factors), got ")
- << packedRank << " != " << expectedPackedRank;
- }
-
- // Verify result shape is greater than the minimum expected
- // by the pack operation, and that the output shape
- // represents full tiles.
- RankedTensorType expectedPackedType = PackOp::inferPackedType(
- unpackedType, packOrUnPack.getStaticTiles(), innerDimsPos, outerDimPerm);
- if (!areAllInBound(expectedPackedType.getShape(), packedType.getShape())) {
- return op->emitError("the shape of output is not large enough to hold the "
- "packed data. Expected at least ")
- << expectedPackedType << ", got " << packedType;
- }
- if (!llvm::all_of(
- llvm::zip(packedType.getShape().take_back(mixedTiles.size()),
- mixedTiles),
- [](std::tuple<int64_t, OpFoldResult> it) {
- int64_t shape = std::get<0>(it);
- if (Attribute attr =
- llvm::dyn_cast_if_present<Attribute>(std::get<1>(it))) {
- IntegerAttr intAttr = dyn_cast_or_null<IntegerAttr>(attr);
- int64_t staticTileSize = intAttr.getValue().getSExtValue();
- return shape == staticTileSize;
- }
- return ShapedType::isDynamic(shape);
- })) {
- return op->emitError("mismatch in inner tile sizes specified and shaped of "
- "tiled dimension in the packed type");
- }
- return success();
-}
-
-namespace {
-/// Subset of PackOp/UnPackOp fields used to compute the result of applying
-/// various permutations to the op.
-// TODO: Add linalg.transpose + pack/unpack folding patterns that just reuse
-// these. These may or may not become true foldings / canonicalizations
-// depending on how aggressive we want to be in automatically folding
-// transposes.
-struct PackOrUnPackTransposeResult {
- SmallVector<int64_t> innerDimsPos;
- SmallVector<OpFoldResult> innerTiles;
- SmallVector<int64_t> outerDimsPerm;
-};
-} // namespace
-
-template <typename OpTy>
-static PackOrUnPackTransposeResult
-commonPermutationOfPackAndUnPackOp(OpTy packOrUnPackOp,
- ArrayRef<int64_t> innerPermutation,
- ArrayRef<int64_t> outerPermutation) {
- static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
- "applies to only pack or unpack operations");
- assert((!innerPermutation.empty() || !outerPermutation.empty()) &&
- "some permutation must be non-empty");
- PackOrUnPackTransposeResult metadata;
- metadata.innerDimsPos =
- SmallVector<int64_t>(packOrUnPackOp.getInnerDimsPos());
- metadata.innerTiles =
- SmallVector<OpFoldResult>(packOrUnPackOp.getMixedTiles());
- int64_t numOuterDims = std::is_same<OpTy, PackOp>::value
- ? packOrUnPackOp.getSourceRank()
- : packOrUnPackOp.getDestRank();
- metadata.outerDimsPerm =
- packOrUnPackOp.getOuterDimsPerm().empty()
- ? llvm::to_vector(llvm::seq<int64_t>(0, numOuterDims))
- : SmallVector<int64_t>(packOrUnPackOp.getOuterDimsPerm());
- if (!innerPermutation.empty()) {
- assert(innerPermutation.size() == metadata.innerDimsPos.size() &&
- isPermutationVector(innerPermutation) &&
- "invalid inner permutation");
- applyPermutationToVector(metadata.innerDimsPos, innerPermutation);
- applyPermutationToVector(metadata.innerTiles, innerPermutation);
- }
- if (!outerPermutation.empty()) {
- assert(outerPermutation.size() == metadata.outerDimsPerm.size() &&
- isPermutationVector(outerPermutation) &&
- "invalid outer permutation");
- applyPermutationToVector(metadata.outerDimsPerm, outerPermutation);
- }
- return metadata;
-}
-
-//===----------------------------------------------------------------------===//
-// PackOp
-//===----------------------------------------------------------------------===//
-
-void PackOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
- setNameFn(getResult(), "pack");
-}
-
-void PackOp::build(OpBuilder &builder, OperationState &state, Value source,
- Value dest, ArrayRef<int64_t> innerDimsPos,
- ArrayRef<OpFoldResult> innerTiles,
- std::optional<Value> paddingValue,
- ArrayRef<int64_t> outerDimsPerm) {
- assert(innerDimsPos.size() == innerTiles.size() &&
- "number of tile sizes specified must match the specified number of "
- "original dimensions to be tiled");
- SmallVector<int64_t> staticTileSizes;
- SmallVector<Value> dynamicTileSizes;
- dispatchIndexOpFoldResults(innerTiles, dynamicTileSizes, staticTileSizes);
- build(builder, state, dest.getType(), source, dest,
- paddingValue ? *paddingValue : nullptr,
- outerDimsPerm.empty() ? nullptr
- : builder.getDenseI64ArrayAttr(outerDimsPerm),
- builder.getDenseI64ArrayAttr(innerDimsPos), dynamicTileSizes,
- builder.getDenseI64ArrayAttr(staticTileSizes));
-}
-
-LogicalResult
-PackOp::reifyResultShapes(OpBuilder &builder,
- ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
- return reifyResultShapesImpl(*this, builder, reifiedReturnShapes);
-}
-
-DenseMap<int64_t, OpFoldResult> PackOp::getDimAndTileMapping() {
- return getDimAndTileMappingImpl(*this);
-}
-
-SmallVector<OpFoldResult> PackOp::getMixedTiles() {
- return getMixedTilesImpl(*this);
-}
-
-SmallVector<int64_t> PackOp::getStaticTiles() {
- return getStaticTilesImpl(*this);
-}
-
-ArrayRef<int64_t> PackOp::getAllOuterDims() {
- ShapedType inputType = getSourceType();
- int64_t inputRank = inputType.getRank();
- return getDestType().getShape().take_front(inputRank);
-}
-
-SmallVector<int64_t> PackOp::getTiledOuterDims() {
- auto innerDimsPos = getInnerDimsPos();
- auto packedShape = getDestType().getShape();
- SmallVector<int64_t> res;
-
- for (auto index : innerDimsPos)
- res.push_back(packedShape[index]);
-
- return res;
-}
-
-bool PackOp::requirePaddingValue(ArrayRef<int64_t> inputShape,
- ArrayRef<int64_t> innerDimsPos,
- ArrayRef<int64_t> outputShape,
- ArrayRef<int64_t> outerDimsPerm,
- ArrayRef<OpFoldResult> innerTiles) {
- SmallVector<int64_t> outputTileSizes(
- outputShape.take_front(inputShape.size()));
- if (!outerDimsPerm.empty()) {
- assert(outerDimsPerm.size() == outputTileSizes.size() &&
- "expected output and outer_dims_perm to have same size");
- applyPermutationToVector(outputTileSizes,
- invertPermutationVector(outerDimsPerm));
- }
- for (auto [pos, tileSize] : llvm::zip_equal(innerDimsPos, innerTiles)) {
- if (ShapedType::isDynamic(inputShape[pos]))
- continue;
- std::optional<int64_t> constantTile = getConstantIntValue(tileSize);
-
- if (!constantTile) {
- if (!ShapedType::isDynamic(outputTileSizes[pos]) &&
- (inputShape[pos] % outputTileSizes[pos] != 0))
- return true;
- } else if (inputShape[pos] % (*constantTile) != 0) {
- return true;
- }
- }
- return false;
-}
-
-LogicalResult PackOp::verify() {
- if (failed(commonVerifierPackAndUnPackOp(*this)))
- return failure();
-
- // Verify padding value, and bail out if the tile does not divide the
- // dimension fully. In the case of dynamic tile factors or dimensions, having
- // a partial tile is undefined behavior.
- auto paddingValue = getPaddingValue();
- if (paddingValue &&
- paddingValue.getType() != getSourceType().getElementType()) {
- return emitOpError("expected padding_value has ")
- << getSourceType().getElementType()
- << " but got: " << paddingValue.getType();
- }
-
- if (!paddingValue &&
- requirePaddingValue(getSourceType().getShape(), getInnerDimsPos(),
- getDestType().getShape(), getOuterDimsPerm(),
- getMixedTiles())) {
- return emitOpError(
- "invalid tile factor or output size provided. Only full tiles are "
- "supported when padding_value is not set");
- }
- return success();
-}
-
-/// Converts OpFoldResults to int64_t shape entries, unconditionally mapping all
-/// Value's to kDynamic, even if they are arith.constant values.
-static SmallVector<int64_t>
-asShapeWithAnyValueAsDynamic(ArrayRef<OpFoldResult> ofrs) {
- SmallVector<int64_t> result;
- for (auto o : ofrs) {
- // Have to do this first, as getConstantIntValue special-cases constants.
- if (llvm::dyn_cast_if_present<Value>(o))
- result.push_back(ShapedType::kDynamic);
- else
- result.push_back(getConstantIntValue(o).value_or(ShapedType::kDynamic));
- }
- return result;
-}
-
-/// Helper for PackOp::{getResultShape,inferPackedType}. Returns the shape of
-/// the packed type. Having a shared helper helps implement these two methods in
-/// a way that ensures that they agree on which dimensions are dynamic.
-static SmallVector<int64_t> getPackOpResultTypeShape(
- ArrayRef<int64_t> sourceShape, ArrayRef<int64_t> innerTileSizes,
- ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm) {
- SmallVector<int64_t> resultShape = llvm::to_vector(sourceShape);
- for (auto tiledDim : llvm::enumerate(llvm::to_vector(innerDimsPos))) {
- if (ShapedType::isDynamic(resultShape[tiledDim.value()]))
- continue;
- if (ShapedType::isDynamic(innerTileSizes[tiledDim.index()])) {
- resultShape[tiledDim.value()] = ShapedType::kDynamic;
- continue;
- }
- resultShape[tiledDim.value()] = divideCeilSigned(
- resultShape[tiledDim.value()], innerTileSizes[tiledDim.index()]);
- }
-
- // Swap tile loops if outer_dims_perm is available.
- if (!outerDimsPerm.empty())
- applyPermutationToVector(resultShape, outerDimsPerm);
-
- // Append the inner tile dimensions.
- resultShape.append(innerTileSizes.begin(), innerTileSizes.end());
- return resultShape;
-}
-
-SmallVector<OpFoldResult> PackOp::getResultShape(
- OpBuilder &builder, Location loc, ArrayRef<OpFoldResult> sourceDims,
- ArrayRef<OpFoldResult> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
- ArrayRef<int64_t> outerDimsPerm) {
- SmallVector<OpFoldResult> resultDims = llvm::to_vector(sourceDims);
-
- AffineExpr s0, s1;
- bindSymbols(builder.getContext(), s0, s1);
- AffineExpr ceilDivExpr = s0.ceilDiv(s1);
- for (auto tiledDim : llvm::enumerate(llvm::to_vector(innerDimsPos))) {
- resultDims[tiledDim.value()] = affine::makeComposedFoldedAffineApply(
- builder, loc, ceilDivExpr,
- {resultDims[tiledDim.value()], innerTileSizes[tiledDim.index()]});
- }
- if (!outerDimsPerm.empty())
- applyPermutationToVector(resultDims, outerDimsPerm);
- resultDims.append(innerTileSizes.begin(), innerTileSizes.end());
-
- SmallVector<int64_t> resultTypeShape =
- getPackOpResultTypeShape(asShapeWithAnyValueAsDynamic(sourceDims),
- asShapeWithAnyValueAsDynamic(innerTileSizes),
- innerDimsPos, outerDimsPerm);
-
- // Fix-up `resultDims` to ensure that they are Value's if and only if the
- // result type shape says it's a dynamic dim. This is needed as callers may
- // use dispatchIndexOpFoldResults on the result, and rely on exact number of
- // dynamic dims returned by that.
- for (unsigned i = 0; i < resultDims.size(); ++i) {
- if (!ShapedType::isDynamic(resultTypeShape[i]))
- continue;
- resultDims[i] =
- getValueOrCreateConstantIndexOp(builder, loc, resultDims[i]);
- }
-
- return resultDims;
-}
-
-/// Get the expected packed type based on source type, tile factors, position of
-/// the inner tiles and permutation of the outer tiled loop.
-RankedTensorType PackOp::inferPackedType(RankedTensorType sourceType,
- ArrayRef<int64_t> innerTileSizes,
- ArrayRef<int64_t> innerDimsPos,
- ArrayRef<int64_t> outerDimsPerm) {
- SmallVector<int64_t> resultShape = getPackOpResultTypeShape(
- sourceType.getShape(), innerTileSizes, innerDimsPos, outerDimsPerm);
- return RankedTensorType::get(resultShape, sourceType.getElementType());
-}
-
-Value PackOp::createDestinationTensor(OpBuilder &b, Location loc, Value source,
- ArrayRef<OpFoldResult> innerTileSizes,
- ArrayRef<int64_t> innerDimsPos,
- ArrayRef<int64_t> outerDimsPerm) {
- AffineExpr dim0, dim1;
- bindDims(b.getContext(), dim0, dim1);
- auto ceilDiv = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult {
- return affine::makeComposedFoldedAffineApply(b, loc, dim0.ceilDiv(dim1),
- {v1, v2});
- };
-
- SmallVector<OpFoldResult> mixedSizes;
- for (auto [index, value] : llvm::enumerate(
- llvm::cast<RankedTensorType>(source.getType()).getShape())) {
- if (ShapedType::isDynamic(value))
- mixedSizes.push_back(b.create<DimOp>(loc, source, index).getResult());
- else
- mixedSizes.push_back(b.getIndexAttr(value));
- }
- for (auto it : llvm::zip(innerDimsPos, innerTileSizes)) {
- int64_t dimPos = std::get<0>(it);
- OpFoldResult tileSize = std::get<1>(it);
- mixedSizes[dimPos] = ceilDiv(mixedSizes[dimPos], tileSize);
- }
- if (!outerDimsPerm.empty())
- applyPermutationToVector<OpFoldResult>(mixedSizes, outerDimsPerm);
-
- mixedSizes.append(innerTileSizes.begin(), innerTileSizes.end());
- auto elemType = llvm::cast<ShapedType>(source.getType()).getElementType();
- return b.create<tensor::EmptyOp>(loc, mixedSizes, elemType);
-}
-
-PackOp PackOp::createTransposedClone(OpBuilder &b, Location loc,
- ArrayRef<int64_t> innerPermutation,
- ArrayRef<int64_t> outerPermutation) {
- PackOrUnPackTransposeResult metadata = commonPermutationOfPackAndUnPackOp(
- *this, innerPermutation, outerPermutation);
- Value transposedDest =
- createDestinationTensor(b, loc, getSource(), metadata.innerTiles,
- metadata.innerDimsPos, metadata.outerDimsPerm);
- return b.create<PackOp>(loc, getSource(), transposedDest,
- metadata.innerDimsPos, metadata.innerTiles,
- getPaddingValue(), metadata.outerDimsPerm);
-}
-
-/// Returns true if the tiles and the tiled dims are constant.
-template <typename OpTy>
-bool areTilesAndTiledDimsAllConstant(OpTy op) {
- static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
- "applies to only pack or unpack operations");
- ShapedType packedType = (std::is_same<OpTy, PackOp>::value)
- ? op.getDestType()
- : op.getSourceType();
- SmallVector<OpFoldResult> mixedTiles = op.getMixedTiles();
- for (auto [dimDest, tile] : llvm::zip(
- packedType.getShape().take_back(mixedTiles.size()), mixedTiles)) {
- std::optional<int64_t> constTileSize = getConstantIntValue(tile);
- if (!constTileSize || ShapedType::isDynamic(dimDest))
- return false;
- }
- return true;
-}
-
-Speculation::Speculatability PackOp::getSpeculatability() {
- if (getPaddingValue())
- return Speculation::Speculatable;
-
- // The verifier rejects already operations if we can statically prove that the
- // sizes of the tiles do not divide perfectly the dimension; thus, check only
- // to have constant tiles and tiled inner dimensions.
- if (!areTilesAndTiledDimsAllConstant(*this))
- return Speculation::NotSpeculatable;
-
- return Speculation::Speculatable;
-}
-
-// Return true if `inner_dims_pos` and `outer_dims_perm` target the same
-// dimensions for pack and unpack.
-static bool hasSameInnerOuterAttribute(PackOp packOp, UnPackOp unPackOp) {
- if (packOp.getInnerDimsPos() != unPackOp.getInnerDimsPos())
- return false;
- if (packOp.getOuterDimsPerm() == unPackOp.getOuterDimsPerm())
- return true;
- // Outer dims permutation is optional.
- // To compare unbalanced pack-unpack pair, treat no permutation as equal to
- // identity permutation.
- return isIdentityPermutation(packOp.getOuterDimsPerm()) &&
- isIdentityPermutation(unPackOp.getOuterDimsPerm());
-}
-
-// Return true if pack and unpack have the same tiles.
-// Same SSA values or same integer constants.
-static bool haveSameTiles(PackOp packOp, UnPackOp unPackOp) {
- auto packTiles = packOp.getMixedTiles();
- auto unPackTiles = unPackOp.getMixedTiles();
- if (packTiles.size() != unPackTiles.size())
- return false;
- for (size_t i = 0, e = packTiles.size(); i < e; i++) {
- if (!isEqualConstantIntOrValue(packTiles[i], unPackTiles[i]))
- return false;
- }
- return true;
-}
-
-/// Returns true if the pack op does not need a padding value.
-static bool paddingIsNotNeeded(PackOp op) {
- auto srcType = op.getSourceType();
- if (llvm::any_of(op.getInnerDimsPos(),
- [&](int64_t pos) { return srcType.isDynamicDim(pos); }))
- return false;
- if (ShapedType::isDynamicShape(op.getStaticInnerTiles()))
- return false;
- return !PackOp::requirePaddingValue(
- srcType.getShape(), op.getInnerDimsPos(), op.getDestType().getShape(),
- op.getOuterDimsPerm(), op.getMixedTiles());
-}
-
-/// Returns true if the `srcShape` or `destShape` is different from the one in
-/// `packOp` and populates each with the inferred static shape.
-static bool inferStaticShape(PackOp packOp, SmallVectorImpl<int64_t> &srcShape,
- SmallVectorImpl<int64_t> &destShape) {
- bool changeNeeded = false;
- srcShape.assign(packOp.getSourceType().getShape().begin(),
- packOp.getSourceType().getShape().end());
- destShape.assign(packOp.getDestType().getShape().begin(),
- packOp.getDestType().getShape().end());
- llvm::SmallSetVector<int64_t, 4> innerDims;
- innerDims.insert(packOp.getInnerDimsPos().begin(),
- packOp.getInnerDimsPos().end());
- SmallVector<int64_t> inverseOuterDimsPerm;
- if (!packOp.getOuterDimsPerm().empty())
- inverseOuterDimsPerm = invertPermutationVector(packOp.getOuterDimsPerm());
- int srcRank = packOp.getSourceRank();
- for (auto i : llvm::seq<int64_t>(0, srcRank)) {
- if (innerDims.contains(i))
- continue;
- int64_t srcPos = i;
- int64_t destPos = i;
- if (!inverseOuterDimsPerm.empty())
- destPos = inverseOuterDimsPerm[srcPos];
- if (ShapedType::isDynamic(srcShape[srcPos]) ==
- ShapedType::isDynamic(destShape[destPos])) {
- continue;
- }
- int64_t size = srcShape[srcPos];
- if (ShapedType::isDynamic(size))
- size = destShape[destPos];
- srcShape[srcPos] = size;
- destShape[destPos] = size;
- changeNeeded = true;
- }
- return changeNeeded;
-}
-
-LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
- // Fold an pack(unpack(x)) to x.
- if (auto unPackOp = packOp.getSource().getDefiningOp<UnPackOp>()) {
- if (unPackOp.getSourceType() != packOp.getDestType())
- return failure();
- if (packOp.getPaddingValue() ||
- !hasSameInnerOuterAttribute(packOp, unPackOp) ||
- !haveSameTiles(packOp, unPackOp))
- return failure();
- rewriter.replaceOp(packOp, unPackOp.getSource());
- return success();
- }
-
- // Fold optional PaddingValue operand away if padding is not needed.
- if (packOp.getPaddingValue() && paddingIsNotNeeded(packOp)) {
- rewriter.startOpModification(packOp);
- packOp.getPaddingValueMutable().clear();
- rewriter.finalizeOpModification(packOp);
- return success();
- }
-
- // Insert tensor.cast ops if static shape inference is available..
- SmallVector<int64_t> srcShape, destShape;
- if (inferStaticShape(packOp, srcShape, destShape)) {
- Location loc = packOp.getLoc();
- Value source = packOp.getSource();
- if (srcShape != packOp.getSourceType().getShape()) {
- auto newSrcType = packOp.getSourceType().clone(srcShape);
- source =
- rewriter.create<tensor::CastOp>(loc, newSrcType, packOp.getSource());
- }
- Value dest = packOp.getDest();
- RankedTensorType originalResultType = packOp.getDestType();
- bool needUpdateDestType = (destShape != originalResultType.getShape());
- if (needUpdateDestType) {
- auto newDestType = packOp.getDestType().clone(destShape);
- dest =
- rewriter.create<tensor::CastOp>(loc, newDestType, packOp.getDest());
- }
- rewriter.modifyOpInPlace(packOp, [&] {
- packOp.getSourceMutable().assign(source);
- packOp.getDestMutable().assign(dest);
- packOp.getResult().setType(cast<RankedTensorType>(dest.getType()));
- });
- // Insert a cast if needed
- if (needUpdateDestType) {
- rewriter.setInsertionPointAfter(packOp);
- auto castOp =
- rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
- rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
- }
- return success();
- }
-
- return failure();
-}
-
-template <typename PackOrUnpackOp>
-static bool isLikePadUnPad(PackOrUnpackOp packOp,
- RankedTensorType packedTensorType) {
- static_assert(std::is_same<PackOrUnpackOp, PackOp>::value ||
- std::is_same<PackOrUnpackOp, UnPackOp>::value,
- "Function meant for pack/unpack");
- // This is a pad if packing only adds ones and we don't transpose dimensions.
-
- // Check that we are not transposing any dimensions.
- ArrayRef<int64_t> innerDimsPos = packOp.getInnerDimsPos();
- int64_t numPackedDims = innerDimsPos.size();
- auto orderedDims = llvm::to_vector<4>(llvm::seq<int64_t>(0, numPackedDims));
- if (orderedDims != innerDimsPos) {
- // Dimensions don't happen in order.
- return false;
- }
-
- ArrayRef<int64_t> packedShape = packedTensorType.getShape();
- int64_t packedRank = packedTensorType.getRank();
- // At this point we know that we are taking numPackedDims outer
- // dimensions and pushing them all the way as the inner most dimensions.
- // What's left on the outer most dimensions is, in this order:
- // - the factor of the packed dimensions, then
- // - the untouched dimensions
- // This shifting inward of dimensions is a no-op (as opposed to a transpose)
- // if all the dimensions that bubble outerward are ones.
- // Therefore check that all the dimensions but the numPackedDims inner most
- // ones are ones.
- return llvm::all_of(
- llvm::seq<int64_t>(0, packedRank - numPackedDims),
- [&packedShape](int64_t i) { return packedShape[i] == 1; });
-}
-
-bool PackOp::isLikePad() {
- auto packedTensorType =
- llvm::cast<RankedTensorType>((*this)->getResultTypes().front());
- return isLikePadUnPad(*this, packedTensorType);
-}
-
-OpFoldResult PackOp::fold(FoldAdaptor adaptor) {
- std::optional<Attribute> paddingValue;
- if (auto pad = adaptor.getPaddingValue())
- paddingValue = pad;
- if (OpFoldResult reshapedSource = reshapeConstantSource(
- llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
- getDestType(), paddingValue))
- return reshapedSource;
- return {};
-}
-
-//===----------------------------------------------------------------------===//
-// UnPackOp
-//===----------------------------------------------------------------------===//
-
-void UnPackOp::getAsmResultNames(
- function_ref<void(Value, StringRef)> setNameFn) {
- setNameFn(getResult(), "unpack");
-}
-
-LogicalResult
-UnPackOp::reifyResultShapes(OpBuilder &builder,
- ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
- return reifyResultShapesImpl(*this, builder, reifiedReturnShapes);
-}
-
-DenseMap<int64_t, OpFoldResult> UnPackOp::getDimAndTileMapping() {
- return getDimAndTileMappingImpl(*this);
-}
-
-SmallVector<OpFoldResult> UnPackOp::getMixedTiles() {
- return getMixedTilesImpl(*this);
-}
-
-SmallVector<int64_t> UnPackOp::getStaticTiles() {
- return getStaticTilesImpl(*this);
-}
-
-ArrayRef<int64_t> UnPackOp::getAllOuterDims() {
- ShapedType destType = getDestType();
- int64_t destRank = destType.getRank();
- return getSourceType().getShape().take_front(destRank);
-}
-
-SmallVector<int64_t> UnPackOp::getTiledOuterDims() {
- auto innerDimsPos = getInnerDimsPos();
- auto packedShape = getSourceType().getShape();
- SmallVector<int64_t> res;
-
- for (auto index : innerDimsPos)
- res.push_back(packedShape[index]);
-
- return res;
-}
-
-LogicalResult UnPackOp::verify() {
- return commonVerifierPackAndUnPackOp(*this);
-}
-
-Speculation::Speculatability UnPackOp::getSpeculatability() {
- // See PackOp::getSpeculatability.
- if (!areTilesAndTiledDimsAllConstant(*this))
- return Speculation::NotSpeculatable;
-
- return Speculation::Speculatable;
-}
-
-void UnPackOp::build(OpBuilder &builder, OperationState &state, Value source,
- Value dest, ArrayRef<int64_t> innerDimsPos,
- ArrayRef<OpFoldResult> innerTiles,
- ArrayRef<int64_t> outerDimsPerm) {
- assert(innerDimsPos.size() == innerTiles.size() &&
- "number of tile sizes specified must match the specified number of "
- "original dimensions to be tiled");
- SmallVector<int64_t> staticTileSizes;
- SmallVector<Value> dynamicTileSizes;
- dispatchIndexOpFoldResults(innerTiles, dynamicTileSizes, staticTileSizes);
- build(builder, state, dest.getType(), source, dest,
- outerDimsPerm.empty() ? nullptr
- : builder.getDenseI64ArrayAttr(outerDimsPerm),
- builder.getDenseI64ArrayAttr(innerDimsPos), dynamicTileSizes,
- builder.getDenseI64ArrayAttr(staticTileSizes));
-}
-
-Value UnPackOp::createDestinationTensor(OpBuilder &b, Location loc,
- Value source,
- ArrayRef<OpFoldResult> innerTileSizes,
- ArrayRef<int64_t> innerDimsPos,
- ArrayRef<int64_t> outerDimsPerm) {
- AffineExpr sym0, sym1;
- bindSymbols(b.getContext(), sym0, sym1);
- auto dimMul = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult {
- return affine::makeComposedFoldedAffineApply(b, loc, sym0 * sym1, {v1, v2});
- };
-
- SmallVector<OpFoldResult> mixedSizes;
- auto srcType = llvm::cast<RankedTensorType>(source.getType());
- for (auto i :
- llvm::seq<unsigned>(0, srcType.getRank() - innerTileSizes.size())) {
- if (srcType.isDynamicDim(i))
- mixedSizes.push_back(b.create<DimOp>(loc, source, i).getResult());
- else
- mixedSizes.push_back(b.getIndexAttr(srcType.getDimSize(i)));
- }
- if (!outerDimsPerm.empty()) {
- applyPermutationToVector<OpFoldResult>(
- mixedSizes, invertPermutationVector(outerDimsPerm));
- }
-
- for (auto [dimPos, tileSize] : llvm::zip_equal(innerDimsPos, innerTileSizes))
- mixedSizes[dimPos] = dimMul(mixedSizes[dimPos], tileSize);
-
- auto elemType = srcType.getElementType();
- return b.create<tensor::EmptyOp>(loc, mixedSizes, elemType);
-}
-
-UnPackOp UnPackOp::createTransposedClone(OpBuilder &b, Location loc,
- Value transposedSource,
- ArrayRef<int64_t> innerPermutation,
- ArrayRef<int64_t> outerPermutation) {
- PackOrUnPackTransposeResult metadata = commonPermutationOfPackAndUnPackOp(
- *this, innerPermutation, outerPermutation);
- return b.create<UnPackOp>(loc, transposedSource, getDest(),
- metadata.innerDimsPos, metadata.innerTiles,
- metadata.outerDimsPerm);
-}
-
-/// Returns true if the `srcShape` or `destShape` is different from the one in
-/// `op` and populates each with the inferred static shape.
-static bool inferStaticShape(UnPackOp op, SmallVectorImpl<int64_t> &srcShape,
- SmallVectorImpl<int64_t> &destShape) {
- bool changeNeeded = false;
- srcShape.assign(op.getSourceType().getShape().begin(),
- op.getSourceType().getShape().end());
- destShape.assign(op.getDestType().getShape().begin(),
- op.getDestType().getShape().end());
- llvm::SmallSetVector<int64_t, 4> innerDims;
- innerDims.insert(op.getInnerDimsPos().begin(), op.getInnerDimsPos().end());
- SmallVector<int64_t> inverseOuterDimsPerm;
- if (!op.getOuterDimsPerm().empty())
- inverseOuterDimsPerm = invertPermutationVector(op.getOuterDimsPerm());
- int destRank = op.getDestRank();
- for (auto i : llvm::seq<int64_t>(0, destRank)) {
- if (innerDims.contains(i))
- continue;
- int64_t srcPos = i;
- int64_t destPos = i;
- if (!inverseOuterDimsPerm.empty())
- srcPos = inverseOuterDimsPerm[destPos];
- if (ShapedType::isDynamic(srcShape[srcPos]) ==
- ShapedType::isDynamic(destShape[destPos])) {
- continue;
- }
- int64_t size = srcShape[srcPos];
- if (ShapedType::isDynamic(size))
- size = destShape[destPos];
- srcShape[srcPos] = size;
- destShape[destPos] = size;
- changeNeeded = true;
- }
- return changeNeeded;
-}
-
-LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
- PatternRewriter &rewriter) {
- /// unpack(pack(x)) -> x
- if (PackOp packOp = unPackOp.getSource().getDefiningOp<PackOp>()) {
- if (packOp.getSourceType() != unPackOp.getDestType())
- return failure();
- if (packOp.getPaddingValue() ||
- !hasSameInnerOuterAttribute(packOp, unPackOp) ||
- !haveSameTiles(packOp, unPackOp))
- return failure();
- rewriter.replaceOp(unPackOp, packOp.getSource());
- return success();
- }
- /// unpack(destinationStyleOp(x)) -> unpack(x)
- if (auto dstStyleOp =
- unPackOp.getDest().getDefiningOp<DestinationStyleOpInterface>()) {
- auto destValue = cast<OpResult>(unPackOp.getDest());
- Value newDest = dstStyleOp.getDpsInits()[destValue.getResultNumber()];
- rewriter.modifyOpInPlace(unPackOp,
- [&]() { unPackOp.setDpsInitOperand(0, newDest); });
- return success();
- }
-
- // Insert tensor.cast ops if static shape inference is available..
- SmallVector<int64_t> srcShape, destShape;
- if (inferStaticShape(unPackOp, srcShape, destShape)) {
- Location loc = unPackOp.getLoc();
- Value source = unPackOp.getSource();
- if (srcShape != unPackOp.getSourceType().getShape()) {
- auto newSrcType = unPackOp.getSourceType().clone(srcShape);
- source = rewriter.create<tensor::CastOp>(loc, newSrcType,
- unPackOp.getSource());
- }
- Value dest = unPackOp.getDest();
- if (destShape != unPackOp.getDestType().getShape()) {
- auto newDestType = unPackOp.getDestType().clone(destShape);
- dest =
- rewriter.create<tensor::CastOp>(loc, newDestType, unPackOp.getDest());
- }
- Value newOp = rewriter.create<UnPackOp>(
- loc, source, dest, unPackOp.getInnerDimsPos(), unPackOp.getMixedTiles(),
- unPackOp.getOuterDimsPerm());
- rewriter.replaceOpWithNewOp<tensor::CastOp>(
- unPackOp, unPackOp.getResult().getType(), newOp);
- return success();
- }
-
- return failure();
-}
-
-bool UnPackOp::isLikeUnPad() {
- RankedTensorType packedTensorType = getSourceType();
- return isLikePadUnPad(*this, packedTensorType);
-}
-
-OpFoldResult UnPackOp::fold(FoldAdaptor adaptor) {
- if (OpFoldResult reshapedSource = reshapeConstantSource(
- llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
- getResult().getType()))
- return reshapedSource;
- return {};
-}
-
//===----------------------------------------------------------------------===//
// Common Canonicalizers and Folders.
//===----------------------------------------------------------------------===//
@@ -4821,111 +3911,6 @@ getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy,
return newMixedTileSizes;
}
-/// Folds a tensor.cast op into a consuming PackOp op if the
-/// `tensor.cast` has source that is more static than the consuming op.
-///
-/// Example:
-/// ```mlir
-/// %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
-/// %2 = tensor.pack %1 ... : tensor<?x?xf32> ...
-/// ```
-///
-/// folds into:
-///
-/// ```mlir
-/// %2 = tensor.pack %0 ... : tensor<8x16xf32> ...
-/// ```
-struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
- using OpRewritePattern<PackOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(PackOp op,
- PatternRewriter &rewriter) const override {
- if (!foldTensorCastPrecondition(op))
- return failure();
-
- SmallVector<Type> newResultTypes(op->getResultTypes());
- SmallVector<Value> newOperands = getNewOperands(op, newResultTypes);
-
- // Get the updated mixed-tile-sizes attribute.
- SmallVector<OpFoldResult> newMixedTileSizes =
- getNewMixedTileSizes(rewriter, newResultTypes[0], op.getMixedTiles());
-
- // Clone op.
- // TODO: Strictly speaking, discardable attributes should be _discarded_ at
- // this point. However, in practice, we use them for things that we'd like
- // to preserve. Implement a better abstraction.
- PackOp newOp = rewriter.create<PackOp>(
- op.getLoc(), newOperands[0], newOperands[1], op.getInnerDimsPos(),
- newMixedTileSizes, op.getPaddingValue(), op.getOuterDimsPerm());
- newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary());
-
- // Replace op.
- Value oldResult = op.getResult();
- Value newResult = newOp.getResult();
- Value replacement = (newResult.getType() != oldResult.getType())
- ? rewriter.create<tensor::CastOp>(
- op->getLoc(), oldResult.getType(), newResult)
- : newResult;
-
- rewriter.replaceOp(op, {replacement});
-
- return success();
- }
-};
-
-/// Folds a tensor.cast op into a consuming UnPackOp op if the
-/// `tensor.cast` has source that is more static than the consuming op.
-///
-/// Example:
-/// ```mlir
-/// %1 = tensor.cast %0 : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
-/// %2 = tensor.unpack %1 ... : tensor<1x1x?x1xi32> -> tensor<7x?xi32>
-/// ```
-///
-/// folds into:
-///
-/// ```mlir
-/// %2 = tensor.unpack %0 ... tensor<1x1x8x1xi32> -> tensor<7x?xi32>
-/// ```
-struct FoldTensorCastUnPackOp : public OpRewritePattern<UnPackOp> {
- using OpRewritePattern<UnPackOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(UnPackOp op,
- PatternRewriter &rewriter) const override {
- if (!foldTensorCastPrecondition(op))
- return failure();
-
- SmallVector<Type> newResultTypes(op->getResultTypes());
- SmallVector<Value> newOperands = getNewOperands(op, newResultTypes);
- Value sourceTensor = newOperands[0];
-
- // Get the updated mixed-tile-sizes attribute.
- SmallVector<OpFoldResult> newMixedTileSizes = getNewMixedTileSizes(
- rewriter, sourceTensor.getType(), op.getMixedTiles());
-
- // Clone op.
- // TODO: Strictly speaking, discardable attributes should be _discarded_ at
- // this point. However, in practice, we use them for things that we'd like
- // to preserve. Implement a better abstraction.
- UnPackOp newOp = rewriter.create<UnPackOp>(
- op.getLoc(), sourceTensor, newOperands[1], op.getInnerDimsPos(),
- newMixedTileSizes, op.getOuterDimsPerm());
- newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary());
-
- // Replace op.
- Value oldResult = op.getResult();
- Value newResult = newOp.getResult();
- Value replacement = (newResult.getType() != oldResult.getType())
- ? rewriter.create<tensor::CastOp>(
- op->getLoc(), oldResult.getType(), newResult)
- : newResult;
-
- rewriter.replaceOp(op, {replacement});
-
- return success();
- }
-};
-
/// Folds a tensor.cast op into a consuming DestinationStyleOpInterface op if
/// the `tensor.cast` has source that is more static than the consuming op.
///
@@ -4950,9 +3935,7 @@ struct FoldTensorCastProducerOp
LogicalResult matchAndRewrite(DestinationStyleOpInterface op,
PatternRewriter &rewriter) const override {
- // Reject PackOp/UnpackOp - there are dedicated patterns for that instead.
- if (!foldTensorCastPrecondition(op) || isa<PackOp, UnPackOp>(*op) ||
- isa<linalg::PackOp, linalg::UnPackOp>(*op))
+ if (!foldTensorCastPrecondition(op))
return failure();
SmallVector<Type> newResultTypes(op->getResultTypes());
@@ -4984,8 +3967,6 @@ struct FoldTensorCastProducerOp
void TensorDialect::getCanonicalizationPatterns(
RewritePatternSet &results) const {
- results.add<FoldTensorCastPackOp>(getContext());
- results.add<FoldTensorCastUnPackOp>(getContext());
results.add<FoldTensorCastProducerOp>(getContext());
}
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
index bd1a09be6b9bca..138e4be6b18e99 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -87,648 +87,6 @@ struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
}
};
-template <typename OpTy>
-static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
- OpBuilder &builder) {
- static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
- "applies to only pack or unpack operations");
- OpBuilder::InsertionGuard g(builder);
- int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
- : op.getDestRank();
- OpFoldResult zero = builder.getIndexAttr(0);
- OpFoldResult one = builder.getIndexAttr(1);
- ReifiedRankedShapedTypeDims resultShape;
- (void)reifyResultShapes(builder, op, resultShape);
- SmallVector<Range> loopBounds(rank);
- for (auto dim : llvm::seq<int64_t>(0, rank)) {
- loopBounds[dim].offset = zero;
- loopBounds[dim].stride = one;
- loopBounds[dim].size = resultShape[0][dim];
- }
- return loopBounds;
-}
-
-static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
- SmallVector<OpFoldResult> &sizes,
- ArrayRef<int64_t> permutation) {
- if (permutation.empty())
- return;
- applyPermutationToVector<OpFoldResult>(offsets, permutation);
- applyPermutationToVector<OpFoldResult>(sizes, permutation);
-}
-
-struct PackOpTiling
- : public TilingInterface::ExternalModel<PackOpTiling, tensor::PackOp> {
-
- SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
- // Note that here we only consider untiled dimensions and outer tiled data
- // dimensions, the inner tiled data dimensions are materialized when
- // building the body of the operation.
- auto packOp = cast<PackOp>(op);
- SmallVector<utils::IteratorType> iteratorTypes(
- packOp.getSourceRank(), utils::IteratorType::parallel);
- return iteratorTypes;
- }
-
- SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
- return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
- }
-
- FailureOr<TilingResult>
- getTiledImplementation(Operation *op, OpBuilder &b,
- ArrayRef<OpFoldResult> offsets,
- ArrayRef<OpFoldResult> sizes) const {
- auto packOp = cast<PackOp>(op);
- Location loc = packOp.getLoc();
-
- // The tiling is applied on interchanged dimensions. We have to undo the
- // interchange to map sizes and offsets to the original input.
- int64_t inputRank = packOp.getSourceRank();
- SmallVector<OpFoldResult> origOffsets(offsets);
- SmallVector<OpFoldResult> origSizes(sizes);
- applyPermToRange(origOffsets, origSizes,
- invertPermutationVector(packOp.getOuterDimsPerm()));
-
- DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
- packOp.getDimAndTileMapping();
- SmallVector<OpFoldResult> srcDimValues =
- tensor::getMixedSizes(b, loc, packOp.getSource());
- SmallVector<OpFoldResult> inputIndices, inputSizes;
- for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
- using AV = affine::AffineValueExpr;
- affine::AffineBuilder ab(b, loc);
- AffineExpr dim0, dim1, sym;
- bindDims(b.getContext(), dim0, dim1);
- bindSymbols(b.getContext(), sym);
- if (dimAndTileMapping.count(dim)) {
- // If the data dimension is tiled, the i-th index is the product of
- // offset_i and tile_i, and the i-th size is the product of sizes_i and
- // tile_i.
- auto avOffset = AV(dim0).bind(origOffsets[dim]);
- auto avSize = AV(dim0).bind(origSizes[dim]);
- auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
- inputIndices.push_back(ab.mul(avOffset, avTileSize));
- inputSizes.push_back(ab.mul(avSize, avTileSize));
- } else {
- inputIndices.push_back(origOffsets[dim]);
- inputSizes.push_back(origSizes[dim]);
- }
-
- // Limit the size of the input operand for incomplete tiles.
- if (packOp.getPaddingValue()) {
- OpFoldResult dimSize = srcDimValues[dim];
- auto avDimSize = AV(dim0).bind(dimSize);
- auto avInputIdx = AV(dim1).bind(inputIndices.back());
- inputSizes.back() =
- ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
- }
- }
-
- auto oneAttr = b.getI64IntegerAttr(1);
- SmallVector<OpFoldResult> strides(inputRank, oneAttr);
-
- SmallVector<Value> tiledOperands;
- auto sourceSlice = b.create<ExtractSliceOp>(
- loc, packOp.getSource(), inputIndices, inputSizes, strides);
- tiledOperands.push_back(sourceSlice);
-
- SmallVector<OpFoldResult> outputOffsets, outputSizes;
- if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
- outputSizes)))
- return {};
-
- strides.append(packOp.getDestRank() - inputRank, oneAttr);
- auto outSlice = b.create<ExtractSliceOp>(
- loc, packOp.getDest(), outputOffsets, outputSizes, strides);
- tiledOperands.push_back(outSlice);
-
- if (auto val = packOp.getPaddingValue())
- tiledOperands.push_back(val);
- for (auto tile : packOp.getInnerTiles())
- tiledOperands.push_back(tile);
-
- Operation *tiledPackOp = b.create<PackOp>(
- loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
-
- return TilingResult{
- {tiledPackOp},
- SmallVector<Value>(tiledPackOp->getResults()),
- llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
- }
-
- LogicalResult
- getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
- ArrayRef<OpFoldResult> offsets,
- ArrayRef<OpFoldResult> sizes,
- SmallVector<OpFoldResult> &resultOffsets,
- SmallVector<OpFoldResult> &resultSizes) const {
- // The iteration domain is over outer dimensions of packed layout. In this
- // context, the outer dimensions of `resultOffsets` are `offsets`. The
- // inner dimensions of `resultOffsets` are zeros because tiling is not
- // applied to them.
- auto packOp = cast<PackOp>(op);
- int64_t inputRank = packOp.getSourceRank();
- int64_t outputRank = packOp.getDestRank();
- auto zeroAttr = b.getI64IntegerAttr(0);
- resultOffsets.assign(offsets.begin(), offsets.end());
- resultOffsets.append(outputRank - inputRank, zeroAttr);
-
- ReifiedRankedShapedTypeDims outputShape;
- (void)reifyResultShapes(b, packOp, outputShape);
- resultSizes.assign(sizes.begin(), sizes.end());
- for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
- resultSizes.push_back(outputShape[0][dataTileDim]);
-
- return success();
- }
-
- FailureOr<TilingResult>
- generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
- ArrayRef<OpFoldResult> offsets,
- ArrayRef<OpFoldResult> sizes) const {
- auto packOp = cast<PackOp>(op);
- int64_t numTiles = packOp.getInnerDimsPos().size();
-
- // tensor.pack op is fusible (as a producer) only if full inner tiles are
- // iterated or inner dims are not tiled. Otherwise, it will generate a
- // sequence of non-trivial ops (for partial tiles).
- for (auto offset : offsets.take_back(numTiles))
- if (!isConstantIntValue(offset, 0))
- return failure();
-
- for (auto iter :
- llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
- if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
- return failure();
-
- FailureOr<TilingResult> tilingResult = getTiledImplementation(
- op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
- if (failed(tilingResult))
- return failure();
- return tilingResult.value();
- }
-
- /// Method to return the position of iteration domain tile computed by the
- /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and
- /// `resultSizes` only cover outer dimensions.
- LogicalResult getIterationDomainTileFromOperandTile(
- Operation *op, OpBuilder &b, unsigned operandNumber,
- ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
- SmallVectorImpl<OpFoldResult> &resultOffsets,
- SmallVectorImpl<OpFoldResult> &resultSizes) const {
- if (operandNumber != 0)
- return failure();
-
- auto packOp = cast<PackOp>(op);
- // It is not trivial to infer dest tile from source tile if `packOp` has
- // padding semantic.
- if (packOp.getPaddingValue())
- return failure();
-
- Location loc = packOp.getLoc();
-
- SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
- DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
- packOp.getDimAndTileMapping();
- for (auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
- if (dimAndTileMapping.count(dim)) {
- FailureOr<int64_t> cstSize =
- ValueBoundsConstraintSet::computeConstantBound(
- presburger::BoundType::UB, sizes[dim],
- /*stopCondition=*/nullptr, /*closedUB=*/true);
- std::optional<int64_t> cstInnerSize =
- getConstantIntValue(dimAndTileMapping[dim]);
- // Currently fusing `packOp` as consumer only expects perfect tiling
- // scenario because even if without padding semantic, the `packOp` may
- // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
- // where the `tileSize` from operand of `packOp` is 5, which is not
- // exactly divided by `innerTile`(=6) of `packOp`. As the result:
- // 1. the first slice is extracted from (0) to (4) and inserted into
- // (0,0)~(0,4) at first row.
- // 2. the second slice is extracted from (5) to (9) and SHOULD BE
- // respectively inserted into two rows with different length, including
- // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
- // them, thus adding below constraint to bypass them temporarily. In
- // another word, we can only support tiling with consumer if the tile
- // size for the producer is a multiple of the inner tile size for the
- // packed dimensions at this moment.
- if (failed(cstSize) || !cstInnerSize || *cstSize % *cstInnerSize != 0) {
- return failure();
- }
-
- using AV = affine::AffineValueExpr;
- affine::AffineBuilder ab(b, loc);
- AffineExpr dim0, sym;
- bindDims(b.getContext(), dim0);
- bindSymbols(b.getContext(), sym);
- auto avOffset = AV(dim0).bind(offsets[dim]);
- auto avSize = AV(dim0).bind(sizes[dim]);
- auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
- outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
- outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
- } else {
- outerDimOffsets.push_back(offsets[dim]);
- outerDimSizes.push_back(sizes[dim]);
- }
- }
- applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
- resultOffsets = outerDimOffsets;
- resultSizes = outerDimSizes;
- return success();
- }
-
- /// Method to return the tiled implementation of tensor.pack as a consumer.
- FailureOr<TilingResult> getTiledImplementationFromOperandTile(
- Operation *op, OpBuilder &b, unsigned operandNumber,
- ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes) const {
- if (operandNumber != 0)
- return failure();
-
- auto packOp = cast<PackOp>(op);
- Location loc = packOp.getLoc();
-
- int64_t inputRank = packOp.getSourceRank();
- auto oneAttr = b.getI64IntegerAttr(1);
- SmallVector<OpFoldResult> strides(inputRank, oneAttr);
-
- SmallVector<Value> tiledOperands;
- auto sourceSlice = b.create<ExtractSliceOp>(loc, packOp.getSource(),
- offsets, sizes, strides);
- tiledOperands.push_back(sourceSlice);
-
- SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
- if (failed(getIterationDomainTileFromOperandTile(
- op, b, /*operandNumber=*/0, offsets, sizes, outerDimOffsets,
- outerDimSizes)))
- return failure();
-
- SmallVector<OpFoldResult> outputOffsets, outputSizes;
- if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes,
- outputOffsets, outputSizes)))
- return failure();
-
- strides.append(packOp.getDestRank() - inputRank, oneAttr);
- auto outSlice = b.create<ExtractSliceOp>(
- loc, packOp.getDest(), outputOffsets, outputSizes, strides);
- tiledOperands.push_back(outSlice);
-
- assert(!packOp.getPaddingValue() && "Expect no padding semantic");
- for (auto tile : packOp.getInnerTiles())
- tiledOperands.push_back(tile);
-
- Operation *tiledPackOp = b.create<PackOp>(
- loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
-
- return TilingResult{
- {tiledPackOp},
- SmallVector<Value>(tiledPackOp->getResults()),
- llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
- }
-};
-
-struct UnpackTileDimInfo {
- bool isAlignedToInnerTileSize;
- OpFoldResult sourceOffset;
- OpFoldResult sourceSize;
- OpFoldResult resultOffset;
- OpFoldResult destExpandedSize;
-};
-
-/// Returns the needed information for tiling unpack op on `tileDim` with given
-/// `tileOffset` and `tileSize`. For more details, see the comment of the
-/// `getTiledImplementation`.
-static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
- int64_t tileDim,
- OpFoldResult tileOffset,
- OpFoldResult tileSize) {
- UnpackTileDimInfo info;
- Attribute zeroAttr = b.getIndexAttr(0);
- Attribute oneAttr = b.getIndexAttr(1);
- DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
- unpackOp.getDimAndTileMapping();
- // The dimension is not one of packed data dimension.
- if (!dimAndTileMapping.count(tileDim)) {
- info.isAlignedToInnerTileSize = true;
- info.sourceOffset = tileOffset;
- info.sourceSize = tileSize;
- info.resultOffset = zeroAttr;
- info.destExpandedSize = tileSize;
- return info;
- }
-
- Location loc = unpackOp.getLoc();
- using AV = affine::AffineValueExpr;
- affine::AffineBuilder ab(b, loc);
- AffineExpr dim0, dim1, sym0;
- bindDims(b.getContext(), dim0, dim1);
- bindSymbols(b.getContext(), sym0);
-
- OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
-
- info.isAlignedToInnerTileSize = false;
- FailureOr<int64_t> cstSize = ValueBoundsConstraintSet::computeConstantBound(
- presburger::BoundType::UB, tileSize,
- /*stopCondition=*/nullptr, /*closedUB=*/true);
- std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
- if (!failed(cstSize) && cstInnerSize) {
- if (*cstSize % *cstInnerSize == 0)
- info.isAlignedToInnerTileSize = true;
-
- // If the tiling size equals to the inner tiling size, the outer dims are
- // always 1.
- if (*cstInnerSize == *cstSize) {
- auto lhs = AV(dim0).bind(tileOffset);
- auto rhs = AV(dim1).bind(innerTileSize);
- info.sourceOffset = ab.floor(lhs, rhs);
- info.sourceSize = oneAttr;
- info.resultOffset = zeroAttr;
- info.destExpandedSize = tileSize;
- return info;
- }
- }
-
- if (info.isAlignedToInnerTileSize) {
- info.sourceOffset =
- ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
- info.resultOffset = zeroAttr;
- info.destExpandedSize = tileSize;
-
- // The ceilDiv is needed here because there could be incomplete tile even
- // it is perfect tiling cases. E.g.,
- // %0 = unpack tensor<33x2xf32> into tensor<64xf32>
- // If the tiling size is 32, there will be 3 tiles. Two of them have
- // size=32; one of them have size=2. The size is represented using
- // affine_min op; we need ceilDiv.
- info.sourceSize =
- ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
- return info;
- }
-
- affine::DivModValue firstCoord = affine::getDivMod(
- b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
- getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
- OpFoldResult tileExclusiveBound =
- ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
- affine::DivModValue lastCoord = affine::getDivMod(
- b, loc,
- getValueOrCreateConstantIndexOp(
- b, loc,
- ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
- getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
-
- OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
- AV(dim1).bind(firstCoord.quotient));
- info.sourceSize =
- ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
- info.sourceOffset = firstCoord.quotient;
- info.resultOffset = firstCoord.remainder;
- // Do not create an Affine ops for expanded size because the affine op is too
- // complicated which would trigger an issue in affine ops simplification.
- info.destExpandedSize = b.createOrFold<arith::MulIOp>(
- loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize),
- getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
- return info;
-}
-
-struct UnPackOpTiling
- : public TilingInterface::ExternalModel<UnPackOpTiling, tensor::UnPackOp> {
-
- SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
- auto unpackOp = cast<UnPackOp>(op);
- SmallVector<utils::IteratorType> iteratorTypes(
- unpackOp.getDestRank(), utils::IteratorType::parallel);
- return iteratorTypes;
- }
-
- SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
- return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
- }
-
- /// There are two cases in tiling unpack ops. If the tiling size is aligned to
- /// the inner tile size, the corresponding tiles of source are all complete.
- /// Otherwise, there are in-complete tiles. We will need to expand the slice
- /// of source for getting complete tiles. The tiled unpack op unpacks more
- /// data from source, so We'll need an extract_slice op to shift and truncate
- /// the output.
- /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
- /// coordinates of second tile (i.e., result[15..31]) are
- /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
- /// row are incomplete tiles. To represent the unpack op, we have to complete
- /// the rows. I.e., the input coordinates would start with (1, 0); end with
- /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
- /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
- /// can get the actual result.
- FailureOr<TilingResult>
- getTiledImplementation(Operation *op, OpBuilder &b,
- ArrayRef<OpFoldResult> offsets,
- ArrayRef<OpFoldResult> sizes) const {
- auto unpackOp = cast<UnPackOp>(op);
- int64_t srcRank = unpackOp.getSourceRank();
- int64_t destRank = unpackOp.getDestRank();
- int64_t numInnerTiles = srcRank - destRank;
- Location loc = unpackOp.getLoc();
-
- // The perfect tiling case indicates that the tiling sizes are multiple of
- // inner_tile_size. In this context, no extra data is needed when
- // representing the tiled unpack op.
- bool isPerfectTilingCase = true;
- Attribute oneAttr = b.getIndexAttr(1);
- SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
- SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
- SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
- for (auto dim : llvm::seq<int64_t>(0, destRank)) {
- UnpackTileDimInfo info =
- getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
- if (!info.isAlignedToInnerTileSize)
- isPerfectTilingCase = false;
- sliceSrcIndices.push_back(info.sourceOffset);
- sliceSrcSizes.push_back(info.sourceSize);
- destExpandedSizes.push_back(info.destExpandedSize);
- resultOffsetsFromDest.push_back(info.resultOffset);
- }
-
- // The tiling is applied on destination dimensions. We have to apply the
- // interchange on source dimensions if outer_dims_perm is set.
- applyPermToRange(sliceSrcIndices, sliceSrcSizes,
- unpackOp.getOuterDimsPerm());
- Attribute zeroAttr = b.getIndexAttr(0);
- sliceSrcIndices.append(numInnerTiles, zeroAttr);
- sliceSrcSizes.append(unpackOp.getMixedTiles());
- sliceSrcStrides.append(numInnerTiles, oneAttr);
- SmallVector<Operation *> generatedSlices;
- ExtractSliceOp sliceSource =
- b.create<ExtractSliceOp>(loc, unpackOp.getSource(), sliceSrcIndices,
- sliceSrcSizes, sliceSrcStrides);
- generatedSlices.push_back(sliceSource);
-
- SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
- Value sliceDest;
- if (isPerfectTilingCase) {
- auto destSliceOp = b.create<ExtractSliceOp>(loc, unpackOp.getDest(),
- offsets, sizes, destStrides);
- sliceDest = destSliceOp;
- generatedSlices.push_back(destSliceOp);
- } else {
- sliceDest = b.create<EmptyOp>(loc, destExpandedSizes,
- unpackOp.getDestType().getElementType());
- }
-
- SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
- for (auto tile : unpackOp.getInnerTiles())
- tiledOperands.push_back(tile);
-
- Operation *tiledUnpackOp = b.create<UnPackOp>(
- loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs());
-
- if (isPerfectTilingCase)
- return TilingResult{{tiledUnpackOp},
- SmallVector<Value>(tiledUnpackOp->getResults()),
- generatedSlices};
-
- auto extractSlice =
- b.create<ExtractSliceOp>(loc, tiledUnpackOp->getResult(0),
- resultOffsetsFromDest, sizes, destStrides);
- return TilingResult{
- {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
- }
-
- LogicalResult
- getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
- ArrayRef<OpFoldResult> offsets,
- ArrayRef<OpFoldResult> sizes,
- SmallVector<OpFoldResult> &resultOffsets,
- SmallVector<OpFoldResult> &resultSizes) const {
- resultOffsets = llvm::to_vector(offsets);
- resultSizes = llvm::to_vector(sizes);
- return success();
- }
-
- FailureOr<TilingResult>
- generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
- ArrayRef<OpFoldResult> offsets,
- ArrayRef<OpFoldResult> sizes) const {
- FailureOr<TilingResult> tilingResult =
- getTiledImplementation(op, b, offsets, sizes);
- if (failed(tilingResult))
- return failure();
- return tilingResult.value();
- }
-
- /// Method to return the position of iteration domain tile computed by the
- /// tiled operation.
- LogicalResult getIterationDomainTileFromOperandTile(
- Operation *op, OpBuilder &b, unsigned operandNumber,
- ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
- SmallVectorImpl<OpFoldResult> &resultOffsets,
- SmallVectorImpl<OpFoldResult> &resultSizes) const {
- auto unPackOp = cast<UnPackOp>(op);
- // If the operand tile is the dest, then no adjustment is needed.
- if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
- resultOffsets = llvm::to_vector(offsets);
- resultSizes = llvm::to_vector(sizes);
- return success();
- }
- Location loc = unPackOp.getLoc();
-
- int64_t numTiles = unPackOp.getInnerDimsPos().size();
- auto destOffsets = offsets.drop_back(numTiles);
- auto destSizes = sizes.drop_back(numTiles);
- // The tiling is applied on interchanged dimensions. We have to undo the
- // interchange to map sizes and offsets to the original input.
- int64_t outputRank = unPackOp.getDestRank();
- ReifiedRankedShapedTypeDims reifiedReturnShapes;
- if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes)))
- return failure();
- SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
- SmallVector<OpFoldResult> origOffsets(destOffsets);
- SmallVector<OpFoldResult> origSizes(destSizes);
- applyPermToRange(origOffsets, origSizes,
- invertPermutationVector(unPackOp.getOuterDimsPerm()));
-
- DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
- unPackOp.getDimAndTileMapping();
-
- for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
- using AV = affine::AffineValueExpr;
- affine::AffineBuilder ab(b, loc);
- AffineExpr dim0, dim1, sym0;
- bindDims(b.getContext(), dim0, dim1);
- bindSymbols(b.getContext(), sym0);
- if (dimAndTileMapping.count(dim)) {
- // If the data dimension is tiled, the i-th index is the product of
- // offset_i and tile_i, and the i-th size is the product of sizes_i and
- // tile_i. The sizes must be clamped to the sizes of the unpack result.
- auto avOffset = AV(dim0).bind(origOffsets[dim]);
- auto avSize = AV(dim0).bind(origSizes[dim]);
- auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
- auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
- resultOffsets.push_back(ab.mul(avOffset, avTileSize));
- auto avResultOffset = AV(dim1).bind(resultOffsets.back());
- resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
- ab.sub(avResultSize, avResultOffset)}));
- } else {
- resultOffsets.push_back(origOffsets[dim]);
- resultSizes.push_back(origSizes[dim]);
- }
- }
- return success();
- }
-
- /// Method to return the tiled implementation of tensor.unpack as a consumer.
- FailureOr<TilingResult> getTiledImplementationFromOperandTile(
- Operation *op, OpBuilder &b, unsigned operandNumber,
- ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes) const {
- auto unPackOp = cast<UnPackOp>(op);
- // tensor.unpack op is fusible (as a consumer) only if inner dims are not
- // tiled.
- int64_t numTiles = unPackOp.getInnerDimsPos().size();
- for (auto iter :
- llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
- if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
- return failure();
- }
-
- Location loc = unPackOp.getLoc();
-
- // Fetch offset/size for creating the slice of the dest operand of
- // unpack op.
- SmallVector<OpFoldResult> outputOffsets, outputSizes;
- if (failed(getIterationDomainTileFromOperandTile(
- op, b, /*operandNumber=*/0, offsets, sizes, outputOffsets,
- outputSizes)))
- return failure();
-
- auto oneAttr = b.getI64IntegerAttr(1);
- int64_t outputRank = unPackOp.getDestRank();
- SmallVector<OpFoldResult> strides(outputRank, oneAttr);
-
- SmallVector<Value> tiledOperands;
- // Create slice of the dest operand.
- auto extractDestSlice = b.create<ExtractSliceOp>(
- loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
- tiledOperands.push_back(extractDestSlice);
-
- SmallVector<OpFoldResult> inputOffsets, inputSizes;
- strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
- // Create slice of the source operand.
- auto extractSourceSlice = b.create<ExtractSliceOp>(
- loc, unPackOp.getSource(), offsets, sizes, strides);
- tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
- for (auto tile : unPackOp.getInnerTiles())
- tiledOperands.push_back(tile);
-
- // Create tiled unpack op.
- Operation *tiledUnPackOp =
- b.create<UnPackOp>(loc, TypeRange{extractDestSlice.getType()},
- tiledOperands, op->getAttrs());
-
- return TilingResult{{tiledUnPackOp},
- SmallVector<Value>(tiledUnPackOp->getResults()),
- llvm::to_vector(ArrayRef<Operation *>{
- extractSourceSlice, extractDestSlice})};
- }
-};
-
} // namespace
FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
@@ -949,15 +307,5 @@ void mlir::tensor::registerTilingInterfaceExternalModels(
DialectRegistry ®istry) {
registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) {
tensor::PadOp::attachInterface<PadOpTiling>(*ctx);
- tensor::PackOp::attachInterface<PackOpTiling>(*ctx);
- tensor::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
- });
-}
-
-void mlir::tensor::registerTilingInterfaceExternalModelsForPackUnPackOps(
- DialectRegistry ®istry) {
- registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) {
- tensor::PackOp::attachInterface<PackOpTiling>(*ctx);
- tensor::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
});
}
diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
index 5c16e538ac2420..52462aae4bc803 100644
--- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
@@ -92,61 +92,6 @@ mlir::tensor::computeTransposedType(RankedTensorType rankedTensorType,
return transposedTensorType;
}
-/// The permutation can be obtained from two permutations:
-/// a) Compute the permutation vector to move the last `numPackedDims` into
-/// the `innerPosDims` of a shape of rank `rank`.
-/// b) Compute the permutation vector to move outer dims if the
-/// `outerPerm` parameter is not empty.
-/// Apply (b) permutation on (a) permutation to get the final permutation.
-static SmallVector<int64_t>
-computePackUnPackPerm(int64_t rank, ArrayRef<int64_t> &innerDimsPos,
- ArrayRef<int64_t> &outerPerm,
- PackingMetadata &packingMetadata) {
- int64_t numPackedDims = innerDimsPos.size();
- auto lastDims =
- llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
- packingMetadata = computePackingMetadata(rank, innerDimsPos);
- SmallVector<int64_t> innerPositionsPerm =
- computePermutationVector(rank, lastDims, packingMetadata.insertPositions);
-
- SmallVector<int64_t> outerPos = packingMetadata.outerPositions;
- if (!outerPerm.empty())
- applyPermutationToVector(outerPos, outerPerm);
- SmallVector<int64_t> outerPositionPerm =
- computePermutationVector(rank, packingMetadata.outerPositions, outerPos);
-
- SmallVector<int64_t> packInverseDestPermutation = innerPositionsPerm;
- applyPermutationToVector(packInverseDestPermutation, outerPositionPerm);
- return packInverseDestPermutation;
-}
-
-SmallVector<int64_t> mlir::tensor::getPackInverseDestPerm(PackOp packOp) {
-
- PackingMetadata pMetadata;
- int64_t packedRank = packOp.getDestType().getRank();
- ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
- ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
- SmallVector<int64_t> packInvDestPerm =
- computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata);
- return packInvDestPerm;
-}
-
-SmallVector<int64_t> mlir::tensor::getUnPackInverseSrcPerm(UnPackOp unpackOp) {
- PackingMetadata metadata;
- return mlir::tensor::getUnPackInverseSrcPerm(unpackOp, metadata);
-}
-
-SmallVector<int64_t>
-mlir::tensor::getUnPackInverseSrcPerm(UnPackOp unpackOp,
- PackingMetadata &metadata) {
- int64_t unpackRank = unpackOp.getSourceType().getRank();
- ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
- ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
- SmallVector<int64_t> unpackInvSrcPerm =
- computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata);
- return unpackInvSrcPerm;
-}
-
bool mlir::tensor::isCastLikeInsertSliceOp(InsertSliceOp op) {
llvm::SmallBitVector droppedDims = op.getDroppedDims();
int64_t srcDim = 0;
More information about the Mlir-commits
mailing list